X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FMips%2FMipsISelLowering.cpp;h=9efd0f757b7af11c885497a3078d544efc52002f;hb=edacba83dc6b34676fe2be60e0bac487a6d1152d;hp=786add66d0c34bab202f750ae032ed199778784a;hpb=8be76112454e736db1815e6159644bf56ce04ac0;p=oota-llvm.git diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index 786add66d0c..9efd0f757b7 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -13,6 +13,7 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "mips-lower" +//#include #include "MipsISelLowering.h" #include "MipsMachineFunction.h" #include "MipsTargetMachine.h" @@ -36,21 +37,25 @@ using namespace llvm; const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const { switch (Opcode) { - case MipsISD::JmpLink : return "MipsISD::JmpLink"; - case MipsISD::Hi : return "MipsISD::Hi"; - case MipsISD::Lo : return "MipsISD::Lo"; - case MipsISD::GPRel : return "MipsISD::GPRel"; - case MipsISD::Ret : return "MipsISD::Ret"; - case MipsISD::SelectCC : return "MipsISD::SelectCC"; - case MipsISD::FPSelectCC : return "MipsISD::FPSelectCC"; - case MipsISD::FPBrcond : return "MipsISD::FPBrcond"; - case MipsISD::FPCmp : return "MipsISD::FPCmp"; - case MipsISD::FPRound : return "MipsISD::FPRound"; - case MipsISD::MAdd : return "MipsISD::MAdd"; - case MipsISD::MAddu : return "MipsISD::MAddu"; - case MipsISD::MSub : return "MipsISD::MSub"; - case MipsISD::MSubu : return "MipsISD::MSubu"; - default : return NULL; + case MipsISD::JmpLink: return "MipsISD::JmpLink"; + case MipsISD::Hi: return "MipsISD::Hi"; + case MipsISD::Lo: return "MipsISD::Lo"; + case MipsISD::GPRel: return "MipsISD::GPRel"; + case MipsISD::Ret: return "MipsISD::Ret"; + case MipsISD::FPBrcond: return "MipsISD::FPBrcond"; + case MipsISD::FPCmp: return "MipsISD::FPCmp"; + case MipsISD::CMovFP_T: return "MipsISD::CMovFP_T"; + case MipsISD::CMovFP_F: return "MipsISD::CMovFP_F"; + case MipsISD::FPRound: return "MipsISD::FPRound"; + case MipsISD::MAdd: return "MipsISD::MAdd"; + case MipsISD::MAddu: return "MipsISD::MAddu"; + case MipsISD::MSub: return "MipsISD::MSub"; + case MipsISD::MSubu: return "MipsISD::MSubu"; + case MipsISD::DivRem: return "MipsISD::DivRem"; + case MipsISD::DivRemU: return "MipsISD::DivRemU"; + case MipsISD::BuildPairF64: return "MipsISD::BuildPairF64"; + case MipsISD::ExtractElementF64: return "MipsISD::ExtractElementF64"; + default: return NULL; } } @@ -89,25 +94,22 @@ MipsTargetLowering(MipsTargetMachine &TM) // Mips Custom Operations setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); + setOperationAction(ISD::BlockAddress, MVT::i32, Custom); setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom); setOperationAction(ISD::JumpTable, MVT::i32, Custom); setOperationAction(ISD::ConstantPool, MVT::i32, Custom); setOperationAction(ISD::SELECT, MVT::f32, Custom); setOperationAction(ISD::SELECT, MVT::f64, Custom); setOperationAction(ISD::SELECT, MVT::i32, Custom); - setOperationAction(ISD::SETCC, MVT::f32, Custom); - setOperationAction(ISD::SETCC, MVT::f64, Custom); setOperationAction(ISD::BRCOND, MVT::Other, Custom); setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom); setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); setOperationAction(ISD::VASTART, MVT::Other, Custom); - - // We custom lower AND/OR to handle the case where the DAG contain 'ands/ors' - // with operands comming from setcc fp comparions. This is necessary since - // the result from these setcc are in a flag registers (FCR31). - setOperationAction(ISD::AND, MVT::i32, Custom); - setOperationAction(ISD::OR, MVT::i32, Custom); + setOperationAction(ISD::SDIV, MVT::i32, Expand); + setOperationAction(ISD::SREM, MVT::i32, Expand); + setOperationAction(ISD::UDIV, MVT::i32, Expand); + setOperationAction(ISD::UREM, MVT::i32, Expand); // Operations not directly supported by Mips. setOperationAction(ISD::BR_JT, MVT::Other, Expand); @@ -129,9 +131,12 @@ MipsTargetLowering(MipsTargetMachine &TM) setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); setOperationAction(ISD::FSIN, MVT::f32, Expand); + setOperationAction(ISD::FSIN, MVT::f64, Expand); setOperationAction(ISD::FCOS, MVT::f32, Expand); + setOperationAction(ISD::FCOS, MVT::f64, Expand); setOperationAction(ISD::FPOWI, MVT::f32, Expand); setOperationAction(ISD::FPOW, MVT::f32, Expand); + setOperationAction(ISD::FPOW, MVT::f64, Expand); setOperationAction(ISD::FLOG, MVT::f32, Expand); setOperationAction(ISD::FLOG2, MVT::f32, Expand); setOperationAction(ISD::FLOG10, MVT::f32, Expand); @@ -139,6 +144,10 @@ MipsTargetLowering(MipsTargetMachine &TM) setOperationAction(ISD::EH_LABEL, MVT::Other, Expand); + setOperationAction(ISD::VAARG, MVT::Other, Expand); + setOperationAction(ISD::VACOPY, MVT::Other, Expand); + setOperationAction(ISD::VAEND, MVT::Other, Expand); + // Use the default for now setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); @@ -160,6 +169,11 @@ MipsTargetLowering(MipsTargetMachine &TM) setTargetDAGCombine(ISD::ADDE); setTargetDAGCombine(ISD::SUBE); + setTargetDAGCombine(ISD::SDIVREM); + setTargetDAGCombine(ISD::UDIVREM); + setTargetDAGCombine(ISD::SETCC); + + setMinFunctionAlignment(2); setStackPointerRegisterToSaveRestore(Mips::SP); computeRegisterProperties(); @@ -169,21 +183,16 @@ MVT::SimpleValueType MipsTargetLowering::getSetCCResultType(EVT VT) const { return MVT::i32; } -/// getFunctionAlignment - Return the Log2 alignment of this function. -unsigned MipsTargetLowering::getFunctionAlignment(const Function *) const { - return 2; -} - // SelectMadd - // Transforms a subgraph in CurDAG if the following pattern is found: // (addc multLo, Lo0), (adde multHi, Hi0), // where, // multHi/Lo: product of multiplication -// Lo0: initial value of Lo register -// Hi0: initial value of Hi register -// Return true if mattern matching was successful. +// Lo0: initial value of Lo register +// Hi0: initial value of Hi register +// Return true if pattern matching was successful. static bool SelectMadd(SDNode* ADDENode, SelectionDAG* CurDAG) { - // ADDENode's second operand must be a flag output of an ADDC node in order + // ADDENode's second operand must be a flag output of an ADDC node in order // for the matching to be successful. SDNode* ADDCNode = ADDENode->getOperand(2).getNode(); @@ -192,7 +201,7 @@ static bool SelectMadd(SDNode* ADDENode, SelectionDAG* CurDAG) { SDValue MultHi = ADDENode->getOperand(0); SDValue MultLo = ADDCNode->getOperand(0); - SDNode* MultNode = MultHi.getNode(); + SDNode* MultNode = MultHi.getNode(); unsigned MultOpc = MultHi.getOpcode(); // MultHi and MultLo must be generated by the same node, @@ -202,39 +211,39 @@ static bool SelectMadd(SDNode* ADDENode, SelectionDAG* CurDAG) { // and it must be a multiplication. if (MultOpc != ISD::SMUL_LOHI && MultOpc != ISD::UMUL_LOHI) return false; - - // MultLo amd MultHi must be the first and second output of MultNode - // respectively. + + // MultLo amd MultHi must be the first and second output of MultNode + // respectively. if (MultHi.getResNo() != 1 || MultLo.getResNo() != 0) return false; - // Transform this to a MADD only if ADDENode and ADDCNode are the only users + // Transform this to a MADD only if ADDENode and ADDCNode are the only users // of the values of MultNode, in which case MultNode will be removed in later // phases. // If there exist users other than ADDENode or ADDCNode, this function returns - // here, which will result in MultNode being mapped to a single MULT - // instruction node rather than a pair of MULT and MADD instructions being + // here, which will result in MultNode being mapped to a single MULT + // instruction node rather than a pair of MULT and MADD instructions being // produced. if (!MultHi.hasOneUse() || !MultLo.hasOneUse()) return false; - SDValue Chain = CurDAG->getEntryNode(); + SDValue Chain = CurDAG->getEntryNode(); DebugLoc dl = ADDENode->getDebugLoc(); // create MipsMAdd(u) node MultOpc = MultOpc == ISD::UMUL_LOHI ? MipsISD::MAddu : MipsISD::MAdd; - + SDValue MAdd = CurDAG->getNode(MultOpc, dl, MVT::Glue, MultNode->getOperand(0),// Factor 0 MultNode->getOperand(1),// Factor 1 - ADDCNode->getOperand(1),// Lo0 + ADDCNode->getOperand(1),// Lo0 ADDENode->getOperand(1));// Hi0 // create CopyFromReg nodes SDValue CopyFromLo = CurDAG->getCopyFromReg(Chain, dl, Mips::LO, MVT::i32, MAdd); - SDValue CopyFromHi = CurDAG->getCopyFromReg(CopyFromLo.getValue(1), dl, + SDValue CopyFromHi = CurDAG->getCopyFromReg(CopyFromLo.getValue(1), dl, Mips::HI, MVT::i32, CopyFromLo.getValue(2)); @@ -245,7 +254,7 @@ static bool SelectMadd(SDNode* ADDENode, SelectionDAG* CurDAG) { if (!SDValue(ADDENode, 0).use_empty()) CurDAG->ReplaceAllUsesOfValueWith(SDValue(ADDENode, 0), CopyFromHi); - return true; + return true; } // SelectMsub - @@ -253,11 +262,11 @@ static bool SelectMadd(SDNode* ADDENode, SelectionDAG* CurDAG) { // (addc Lo0, multLo), (sube Hi0, multHi), // where, // multHi/Lo: product of multiplication -// Lo0: initial value of Lo register -// Hi0: initial value of Hi register -// Return true if mattern matching was successful. +// Lo0: initial value of Lo register +// Hi0: initial value of Hi register +// Return true if pattern matching was successful. static bool SelectMsub(SDNode* SUBENode, SelectionDAG* CurDAG) { - // SUBENode's second operand must be a flag output of an SUBC node in order + // SUBENode's second operand must be a flag output of an SUBC node in order // for the matching to be successful. SDNode* SUBCNode = SUBENode->getOperand(2).getNode(); @@ -266,7 +275,7 @@ static bool SelectMsub(SDNode* SUBENode, SelectionDAG* CurDAG) { SDValue MultHi = SUBENode->getOperand(1); SDValue MultLo = SUBCNode->getOperand(1); - SDNode* MultNode = MultHi.getNode(); + SDNode* MultNode = MultHi.getNode(); unsigned MultOpc = MultHi.getOpcode(); // MultHi and MultLo must be generated by the same node, @@ -330,9 +339,9 @@ static SDValue PerformADDECombine(SDNode *N, SelectionDAG& DAG, if (Subtarget->isMips32() && SelectMadd(N, &DAG)) return SDValue(N, 0); - + return SDValue(); -} +} static SDValue PerformSUBECombine(SDNode *N, SelectionDAG& DAG, TargetLowering::DAGCombinerInfo &DCI, @@ -342,11 +351,135 @@ static SDValue PerformSUBECombine(SDNode *N, SelectionDAG& DAG, if (Subtarget->isMips32() && SelectMsub(N, &DAG)) return SDValue(N, 0); - + + return SDValue(); +} + +static SDValue PerformDivRemCombine(SDNode *N, SelectionDAG& DAG, + TargetLowering::DAGCombinerInfo &DCI, + const MipsSubtarget* Subtarget) { + if (DCI.isBeforeLegalizeOps()) + return SDValue(); + + unsigned opc = N->getOpcode() == ISD::SDIVREM ? MipsISD::DivRem : + MipsISD::DivRemU; + DebugLoc dl = N->getDebugLoc(); + + SDValue DivRem = DAG.getNode(opc, dl, MVT::Glue, + N->getOperand(0), N->getOperand(1)); + SDValue InChain = DAG.getEntryNode(); + SDValue InGlue = DivRem; + + // insert MFLO + if (N->hasAnyUseOfValue(0)) { + SDValue CopyFromLo = DAG.getCopyFromReg(InChain, dl, Mips::LO, MVT::i32, + InGlue); + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), CopyFromLo); + InChain = CopyFromLo.getValue(1); + InGlue = CopyFromLo.getValue(2); + } + + // insert MFHI + if (N->hasAnyUseOfValue(1)) { + SDValue CopyFromHi = DAG.getCopyFromReg(InChain, dl, + Mips::HI, MVT::i32, InGlue); + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), CopyFromHi); + } + return SDValue(); -} +} + +static Mips::CondCode FPCondCCodeToFCC(ISD::CondCode CC) { + switch (CC) { + default: llvm_unreachable("Unknown fp condition code!"); + case ISD::SETEQ: + case ISD::SETOEQ: return Mips::FCOND_OEQ; + case ISD::SETUNE: return Mips::FCOND_UNE; + case ISD::SETLT: + case ISD::SETOLT: return Mips::FCOND_OLT; + case ISD::SETGT: + case ISD::SETOGT: return Mips::FCOND_OGT; + case ISD::SETLE: + case ISD::SETOLE: return Mips::FCOND_OLE; + case ISD::SETGE: + case ISD::SETOGE: return Mips::FCOND_OGE; + case ISD::SETULT: return Mips::FCOND_ULT; + case ISD::SETULE: return Mips::FCOND_ULE; + case ISD::SETUGT: return Mips::FCOND_UGT; + case ISD::SETUGE: return Mips::FCOND_UGE; + case ISD::SETUO: return Mips::FCOND_UN; + case ISD::SETO: return Mips::FCOND_OR; + case ISD::SETNE: + case ISD::SETONE: return Mips::FCOND_ONE; + case ISD::SETUEQ: return Mips::FCOND_UEQ; + } +} + + +// Returns true if condition code has to be inverted. +static bool InvertFPCondCode(Mips::CondCode CC) { + if (CC >= Mips::FCOND_F && CC <= Mips::FCOND_NGT) + return false; + + if (CC >= Mips::FCOND_T && CC <= Mips::FCOND_GT) + return true; + + assert(false && "Illegal Condition Code"); + return false; +} + +// Creates and returns an FPCmp node from a setcc node. +// Returns Op if setcc is not a floating point comparison. +static SDValue CreateFPCmp(SelectionDAG& DAG, const SDValue& Op) { + // must be a SETCC node + if (Op.getOpcode() != ISD::SETCC) + return Op; + + SDValue LHS = Op.getOperand(0); + + if (!LHS.getValueType().isFloatingPoint()) + return Op; + + SDValue RHS = Op.getOperand(1); + DebugLoc dl = Op.getDebugLoc(); + + // Assume the 3rd operand is a CondCodeSDNode. Add code to check the type of + // node if necessary. + ISD::CondCode CC = cast(Op.getOperand(2))->get(); -SDValue MipsTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) + return DAG.getNode(MipsISD::FPCmp, dl, MVT::Glue, LHS, RHS, + DAG.getConstant(FPCondCCodeToFCC(CC), MVT::i32)); +} + +// Creates and returns a CMovFPT/F node. +static SDValue CreateCMovFP(SelectionDAG& DAG, SDValue Cond, SDValue True, + SDValue False, DebugLoc DL) { + bool invert = InvertFPCondCode((Mips::CondCode) + cast(Cond.getOperand(2)) + ->getSExtValue()); + + return DAG.getNode((invert ? MipsISD::CMovFP_F : MipsISD::CMovFP_T), DL, + True.getValueType(), True, False, Cond); +} + +static SDValue PerformSETCCCombine(SDNode *N, SelectionDAG& DAG, + TargetLowering::DAGCombinerInfo &DCI, + const MipsSubtarget* Subtarget) { + if (DCI.isBeforeLegalizeOps()) + return SDValue(); + + SDValue Cond = CreateFPCmp(DAG, SDValue(N, 0)); + + if (Cond.getOpcode() != MipsISD::FPCmp) + return SDValue(); + + SDValue True = DAG.getConstant(1, MVT::i32); + SDValue False = DAG.getConstant(0, MVT::i32); + + return CreateCMovFP(DAG, Cond, True, False, N->getDebugLoc()); +} + +SDValue MipsTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; unsigned opc = N->getOpcode(); @@ -357,6 +490,11 @@ SDValue MipsTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) return PerformADDECombine(N, DAG, DCI, Subtarget); case ISD::SUBE: return PerformSUBECombine(N, DAG, DCI, Subtarget); + case ISD::SDIVREM: + case ISD::UDIVREM: + return PerformDivRemCombine(N, DAG, DCI, Subtarget); + case ISD::SETCC: + return PerformSETCCCombine(N, DAG, DCI, Subtarget); } return SDValue(); @@ -367,17 +505,15 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { - case ISD::AND: return LowerANDOR(Op, DAG); case ISD::BRCOND: return LowerBRCOND(Op, DAG); case ISD::ConstantPool: return LowerConstantPool(Op, DAG); case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG); case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG); case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); + case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); case ISD::JumpTable: return LowerJumpTable(Op, DAG); - case ISD::OR: return LowerANDOR(Op, DAG); case ISD::SELECT: return LowerSELECT(Op, DAG); - case ISD::SETCC: return LowerSETCC(Op, DAG); case ISD::VASTART: return LowerVASTART(Op, DAG); } return SDValue(); @@ -410,122 +546,110 @@ static Mips::FPBranchCode GetFPBranchCodeFromCond(Mips::CondCode CC) { return Mips::BRANCH_INVALID; } -static unsigned FPBranchCodeToOpc(Mips::FPBranchCode BC) { - switch(BC) { - default: - llvm_unreachable("Unknown branch code"); - case Mips::BRANCH_T : return Mips::BC1T; - case Mips::BRANCH_F : return Mips::BC1F; - case Mips::BRANCH_TL : return Mips::BC1TL; - case Mips::BRANCH_FL : return Mips::BC1FL; - } -} - -static Mips::CondCode FPCondCCodeToFCC(ISD::CondCode CC) { - switch (CC) { - default: llvm_unreachable("Unknown fp condition code!"); - case ISD::SETEQ: - case ISD::SETOEQ: return Mips::FCOND_EQ; - case ISD::SETUNE: return Mips::FCOND_OGL; - case ISD::SETLT: - case ISD::SETOLT: return Mips::FCOND_OLT; - case ISD::SETGT: - case ISD::SETOGT: return Mips::FCOND_OGT; - case ISD::SETLE: - case ISD::SETOLE: return Mips::FCOND_OLE; - case ISD::SETGE: - case ISD::SETOGE: return Mips::FCOND_OGE; - case ISD::SETULT: return Mips::FCOND_ULT; - case ISD::SETULE: return Mips::FCOND_ULE; - case ISD::SETUGT: return Mips::FCOND_UGT; - case ISD::SETUGE: return Mips::FCOND_UGE; - case ISD::SETUO: return Mips::FCOND_UN; - case ISD::SETO: return Mips::FCOND_OR; - case ISD::SETNE: - case ISD::SETONE: return Mips::FCOND_NEQ; - case ISD::SETUEQ: return Mips::FCOND_UEQ; - } -} - MachineBasicBlock * MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *BB) const { + // There is no need to expand CMov instructions if target has + // conditional moves. + if (Subtarget->hasCondMov()) + return BB; + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); bool isFPCmp = false; DebugLoc dl = MI->getDebugLoc(); + unsigned Opc; switch (MI->getOpcode()) { default: assert(false && "Unexpected instr type to insert"); - case Mips::Select_FCC: - case Mips::Select_FCC_S32: - case Mips::Select_FCC_D32: - isFPCmp = true; // FALL THROUGH - case Mips::Select_CC: - case Mips::Select_CC_S32: - case Mips::Select_CC_D32: { - // To "insert" a SELECT_CC instruction, we actually have to insert the - // diamond control-flow pattern. The incoming instruction knows the - // destination vreg to set, the condition code register to branch on, the - // true/false values to select between, and a branch opcode to use. - const BasicBlock *LLVM_BB = BB->getBasicBlock(); - MachineFunction::iterator It = BB; - ++It; - - // thisMBB: - // ... - // TrueVal = ... - // setcc r1, r2, r3 - // bNE r1, r0, copy1MBB - // fallthrough --> copy0MBB - MachineBasicBlock *thisMBB = BB; - MachineFunction *F = BB->getParent(); - MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); - F->insert(It, copy0MBB); - F->insert(It, sinkMBB); - - // Transfer the remainder of BB and its successor edges to sinkMBB. - sinkMBB->splice(sinkMBB->begin(), BB, - llvm::next(MachineBasicBlock::iterator(MI)), - BB->end()); - sinkMBB->transferSuccessorsAndUpdatePHIs(BB); - - // Next, add the true and fallthrough blocks as its successors. - BB->addSuccessor(copy0MBB); - BB->addSuccessor(sinkMBB); - - // Emit the right instruction according to the type of the operands compared - if (isFPCmp) { - // Find the condiction code present in the setcc operation. - Mips::CondCode CC = (Mips::CondCode)MI->getOperand(4).getImm(); - // Get the branch opcode from the branch code. - unsigned Opc = FPBranchCodeToOpc(GetFPBranchCodeFromCond(CC)); - BuildMI(BB, dl, TII->get(Opc)).addMBB(sinkMBB); - } else - BuildMI(BB, dl, TII->get(Mips::BNE)).addReg(MI->getOperand(1).getReg()) - .addReg(Mips::ZERO).addMBB(sinkMBB); - - // copy0MBB: - // %FalseValue = ... - // # fallthrough to sinkMBB - BB = copy0MBB; - - // Update machine-CFG edges - BB->addSuccessor(sinkMBB); - - // sinkMBB: - // %Result = phi [ %TrueValue, thisMBB ], [ %FalseValue, copy0MBB ] - // ... - BB = sinkMBB; + case Mips::MOVT: + case Mips::MOVT_S: + case Mips::MOVT_D: + isFPCmp = true; + Opc = Mips::BC1F; + break; + case Mips::MOVF: + case Mips::MOVF_S: + case Mips::MOVF_D: + isFPCmp = true; + Opc = Mips::BC1T; + break; + case Mips::MOVZ_I: + case Mips::MOVZ_S: + case Mips::MOVZ_D: + Opc = Mips::BNE; + break; + case Mips::MOVN_I: + case Mips::MOVN_S: + case Mips::MOVN_D: + Opc = Mips::BEQ; + break; + } + + // To "insert" a SELECT_CC instruction, we actually have to insert the + // diamond control-flow pattern. The incoming instruction knows the + // destination vreg to set, the condition code register to branch on, the + // true/false values to select between, and a branch opcode to use. + const BasicBlock *LLVM_BB = BB->getBasicBlock(); + MachineFunction::iterator It = BB; + ++It; + + // thisMBB: + // ... + // TrueVal = ... + // setcc r1, r2, r3 + // bNE r1, r0, copy1MBB + // fallthrough --> copy0MBB + MachineBasicBlock *thisMBB = BB; + MachineFunction *F = BB->getParent(); + MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); + F->insert(It, copy0MBB); + F->insert(It, sinkMBB); + + // Transfer the remainder of BB and its successor edges to sinkMBB. + sinkMBB->splice(sinkMBB->begin(), BB, + llvm::next(MachineBasicBlock::iterator(MI)), + BB->end()); + sinkMBB->transferSuccessorsAndUpdatePHIs(BB); + + // Next, add the true and fallthrough blocks as its successors. + BB->addSuccessor(copy0MBB); + BB->addSuccessor(sinkMBB); + + // Emit the right instruction according to the type of the operands compared + if (isFPCmp) + BuildMI(BB, dl, TII->get(Opc)).addMBB(sinkMBB); + else + BuildMI(BB, dl, TII->get(Opc)).addReg(MI->getOperand(2).getReg()) + .addReg(Mips::ZERO).addMBB(sinkMBB); + + + // copy0MBB: + // %FalseValue = ... + // # fallthrough to sinkMBB + BB = copy0MBB; + + // Update machine-CFG edges + BB->addSuccessor(sinkMBB); + + // sinkMBB: + // %Result = phi [ %TrueValue, thisMBB ], [ %FalseValue, copy0MBB ] + // ... + BB = sinkMBB; + + if (isFPCmp) BuildMI(*BB, BB->begin(), dl, TII->get(Mips::PHI), MI->getOperand(0).getReg()) .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB) - .addReg(MI->getOperand(3).getReg()).addMBB(copy0MBB); + .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB); + else + BuildMI(*BB, BB->begin(), dl, + TII->get(Mips::PHI), MI->getOperand(0).getReg()) + .addReg(MI->getOperand(3).getReg()).addMBB(thisMBB) + .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB); - MI->eraseFromParent(); // The pseudo instruction is gone now. - return BB; - } - } + MI->eraseFromParent(); // The pseudo instruction is gone now. + return BB; } //===----------------------------------------------------------------------===// @@ -568,6 +692,13 @@ LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const SDValue MipsTargetLowering:: LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const { + unsigned StackAlignment = + getTargetMachine().getFrameLowering()->getStackAlignment(); + assert(StackAlignment >= + cast(Op.getOperand(2).getNode())->getZExtValue() && + "Cannot lower if the alignment of the allocated space is larger than \ + that of the stack."); + SDValue Chain = Op.getOperand(0); SDValue Size = Op.getOperand(1); DebugLoc dl = Op.getDebugLoc(); @@ -581,35 +712,28 @@ LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const // The Sub result contains the new stack start address, so it // must be placed in the stack pointer register. - Chain = DAG.getCopyToReg(StackPointer.getValue(1), dl, Mips::SP, Sub); + Chain = DAG.getCopyToReg(StackPointer.getValue(1), dl, Mips::SP, Sub, + SDValue()); + // Retrieve updated $sp. There is a glue input to prevent instructions that + // clobber $sp from being inserted between copytoreg and copyfromreg. + SDValue NewSP = DAG.getCopyFromReg(Chain, dl, Mips::SP, MVT::i32, + Chain.getValue(1)); + + // The stack space reserved by alloca is located right above the argument + // area. It is aligned on a boundary that is a multiple of StackAlignment. + MachineFunction &MF = DAG.getMachineFunction(); + MipsFunctionInfo *MipsFI = MF.getInfo(); + unsigned SPOffset = (MipsFI->getMaxCallFrameSize() + StackAlignment - 1) / + StackAlignment * StackAlignment; + SDValue AllocPtr = DAG.getNode(ISD::ADD, dl, MVT::i32, NewSP, + DAG.getConstant(SPOffset, MVT::i32)); // This node always has two return values: a new stack pointer // value and a chain - SDValue Ops[2] = { Sub, Chain }; + SDValue Ops[2] = { AllocPtr, NewSP.getValue(1) }; return DAG.getMergeValues(Ops, 2, dl); } -SDValue MipsTargetLowering:: -LowerANDOR(SDValue Op, SelectionDAG &DAG) const -{ - SDValue LHS = Op.getOperand(0); - SDValue RHS = Op.getOperand(1); - DebugLoc dl = Op.getDebugLoc(); - - if (LHS.getOpcode() != MipsISD::FPCmp || RHS.getOpcode() != MipsISD::FPCmp) - return Op; - - SDValue True = DAG.getConstant(1, MVT::i32); - SDValue False = DAG.getConstant(0, MVT::i32); - - SDValue LSEL = DAG.getNode(MipsISD::FPSelectCC, dl, True.getValueType(), - LHS, True, False, LHS.getOperand(2)); - SDValue RSEL = DAG.getNode(MipsISD::FPSelectCC, dl, True.getValueType(), - RHS, True, False, RHS.getOperand(2)); - - return DAG.getNode(Op.getOpcode(), dl, MVT::i32, LSEL, RSEL); -} - SDValue MipsTargetLowering:: LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { @@ -619,58 +743,32 @@ LowerBRCOND(SDValue Op, SelectionDAG &DAG) const SDValue Dest = Op.getOperand(2); DebugLoc dl = Op.getDebugLoc(); - if (Op.getOperand(1).getOpcode() != MipsISD::FPCmp) + SDValue CondRes = CreateFPCmp(DAG, Op.getOperand(1)); + + // Return if flag is not set by a floating point comparison. + if (CondRes.getOpcode() != MipsISD::FPCmp) return Op; - SDValue CondRes = Op.getOperand(1); SDValue CCNode = CondRes.getOperand(2); Mips::CondCode CC = (Mips::CondCode)cast(CCNode)->getZExtValue(); SDValue BrCode = DAG.getConstant(GetFPBranchCodeFromCond(CC), MVT::i32); return DAG.getNode(MipsISD::FPBrcond, dl, Op.getValueType(), Chain, BrCode, - Dest, CondRes); -} - -SDValue MipsTargetLowering:: -LowerSETCC(SDValue Op, SelectionDAG &DAG) const -{ - // The operands to this are the left and right operands to compare (ops #0, - // and #1) and the condition code to compare them with (op #2) as a - // CondCodeSDNode. - SDValue LHS = Op.getOperand(0); - SDValue RHS = Op.getOperand(1); - DebugLoc dl = Op.getDebugLoc(); - - ISD::CondCode CC = cast(Op.getOperand(2))->get(); - - return DAG.getNode(MipsISD::FPCmp, dl, Op.getValueType(), LHS, RHS, - DAG.getConstant(FPCondCCodeToFCC(CC), MVT::i32)); + Dest, CondRes); } SDValue MipsTargetLowering:: LowerSELECT(SDValue Op, SelectionDAG &DAG) const { - SDValue Cond = Op.getOperand(0); - SDValue True = Op.getOperand(1); - SDValue False = Op.getOperand(2); - DebugLoc dl = Op.getDebugLoc(); + SDValue Cond = CreateFPCmp(DAG, Op.getOperand(0)); - // if the incomming condition comes from a integer compare, the select - // operation must be SelectCC or a conditional move if the subtarget - // supports it. - if (Cond.getOpcode() != MipsISD::FPCmp) { - if (Subtarget->hasCondMov() && !True.getValueType().isFloatingPoint()) - return Op; - return DAG.getNode(MipsISD::SelectCC, dl, True.getValueType(), - Cond, True, False); - } + // Return if flag is not set by a floating point comparison. + if (Cond.getOpcode() != MipsISD::FPCmp) + return Op; - // if the incomming condition comes from fpcmp, the select - // operation must use FPSelectCC. - SDValue CCNode = Cond.getOperand(2); - return DAG.getNode(MipsISD::FPSelectCC, dl, True.getValueType(), - Cond, True, False, CCNode); + return CreateCMovFP(DAG, Cond, Op.getOperand(1), Op.getOperand(2), + Op.getDebugLoc()); } SDValue MipsTargetLowering::LowerGlobalAddress(SDValue Op, @@ -693,12 +791,13 @@ SDValue MipsTargetLowering::LowerGlobalAddress(SDValue Op, return DAG.getNode(ISD::ADD, dl, MVT::i32, GOT, GPRelNode); } // %hi/%lo relocation - SDValue GA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0, - MipsII::MO_ABS_HILO); - SDValue HiPart = DAG.getNode(MipsISD::Hi, dl, VTs, &GA, 1); - SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, GA); + SDValue GAHi = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0, + MipsII::MO_ABS_HI); + SDValue GALo = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0, + MipsII::MO_ABS_LO); + SDValue HiPart = DAG.getNode(MipsISD::Hi, dl, VTs, &GAHi, 1); + SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, GALo); return DAG.getNode(ISD::ADD, dl, MVT::i32, HiPart, Lo); - } else { SDValue GA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0, MipsII::MO_GOT); @@ -707,9 +806,12 @@ SDValue MipsTargetLowering::LowerGlobalAddress(SDValue Op, false, false, 0); // On functions and global targets not internal linked only // a load from got/GP is necessary for PIC to work. - if (!GV->hasLocalLinkage() || isa(GV)) + if (!GV->hasInternalLinkage() && + (!GV->hasLocalLinkage() || isa(GV))) return ResNode; - SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, GA); + SDValue GALo = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0, + MipsII::MO_ABS_LO); + SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, GALo); return DAG.getNode(ISD::ADD, dl, MVT::i32, ResNode, Lo); } @@ -717,6 +819,34 @@ SDValue MipsTargetLowering::LowerGlobalAddress(SDValue Op, return SDValue(0,0); } +SDValue MipsTargetLowering::LowerBlockAddress(SDValue Op, + SelectionDAG &DAG) const { + const BlockAddress *BA = cast(Op)->getBlockAddress(); + // FIXME there isn't actually debug info here + DebugLoc dl = Op.getDebugLoc(); + + if (getTargetMachine().getRelocationModel() != Reloc::PIC_) { + // %hi/%lo relocation + SDValue BAHi = DAG.getBlockAddress(BA, MVT::i32, true, + MipsII::MO_ABS_HI); + SDValue BALo = DAG.getBlockAddress(BA, MVT::i32, true, + MipsII::MO_ABS_LO); + SDValue Hi = DAG.getNode(MipsISD::Hi, dl, MVT::i32, BAHi); + SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, BALo); + return DAG.getNode(ISD::ADD, dl, MVT::i32, Hi, Lo); + } + + SDValue BAGOTOffset = DAG.getBlockAddress(BA, MVT::i32, true, + MipsII::MO_GOT); + SDValue BALOOffset = DAG.getBlockAddress(BA, MVT::i32, true, + MipsII::MO_ABS_LO); + SDValue Load = DAG.getLoad(MVT::i32, dl, + DAG.getEntryNode(), BAGOTOffset, + MachinePointerInfo(), false, false, 0); + SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, BALOOffset); + return DAG.getNode(ISD::ADD, dl, MVT::i32, Load, Lo); +} + SDValue MipsTargetLowering:: LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { @@ -732,7 +862,7 @@ LowerJumpTable(SDValue Op, SelectionDAG &DAG) const // FIXME there isn't actually debug info here DebugLoc dl = Op.getDebugLoc(); bool IsPIC = getTargetMachine().getRelocationModel() == Reloc::PIC_; - unsigned char OpFlag = IsPIC ? MipsII::MO_GOT : MipsII::MO_ABS_HILO; + unsigned char OpFlag = IsPIC ? MipsII::MO_GOT : MipsII::MO_ABS_HI; EVT PtrVT = Op.getValueType(); JumpTableSDNode *JT = cast(Op); @@ -747,7 +877,9 @@ LowerJumpTable(SDValue Op, SelectionDAG &DAG) const MachinePointerInfo(), false, false, 0); - SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, JTI); + SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, + MipsII::MO_ABS_LO); + SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, JTILo); ResNode = DAG.getNode(ISD::ADD, dl, MVT::i32, HiPart, Lo); return ResNode; @@ -764,7 +896,7 @@ LowerConstantPool(SDValue Op, SelectionDAG &DAG) const // gp_rel relocation // FIXME: we should reference the constant pool using small data sections, - // but the asm printer currently doens't support this feature without + // but the asm printer currently doesn't support this feature without // hacking it. This feature should come soon so we can uncomment the // stuff below. //if (IsInSmallSection(C->getType())) { @@ -773,18 +905,22 @@ LowerConstantPool(SDValue Op, SelectionDAG &DAG) const // ResNode = DAG.getNode(ISD::ADD, MVT::i32, GOT, GPRelNode); if (getTargetMachine().getRelocationModel() != Reloc::PIC_) { - SDValue CP = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment(), - N->getOffset(), MipsII::MO_ABS_HILO); - SDValue HiPart = DAG.getNode(MipsISD::Hi, dl, MVT::i32, CP); - SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, CP); + SDValue CPHi = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment(), + N->getOffset(), MipsII::MO_ABS_HI); + SDValue CPLo = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment(), + N->getOffset(), MipsII::MO_ABS_LO); + SDValue HiPart = DAG.getNode(MipsISD::Hi, dl, MVT::i32, CPHi); + SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, CPLo); ResNode = DAG.getNode(ISD::ADD, dl, MVT::i32, HiPart, Lo); } else { SDValue CP = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment(), - N->getOffset(), MipsII::MO_GOT); + N->getOffset(), MipsII::MO_GOT); SDValue Load = DAG.getLoad(MVT::i32, dl, DAG.getEntryNode(), CP, MachinePointerInfo::getConstantPool(), false, false, 0); - SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, CP); + SDValue CPLo = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment(), + N->getOffset(), MipsII::MO_ABS_LO); + SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, CPLo); ResNode = DAG.getNode(ISD::ADD, dl, MVT::i32, Load, Lo); } @@ -824,6 +960,8 @@ SDValue MipsTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { // yet to hold an argument. Otherwise, use A2, A3 and stack. If A1 is // not used, it must be shadowed. If only A3 is avaiable, shadow it and // go to stack. +// +// For vararg functions, all arguments are passed in A0, A1, A2, A3 and stack. //===----------------------------------------------------------------------===// static bool CC_MipsO32(unsigned ValNo, MVT ValVT, @@ -842,9 +980,16 @@ static bool CC_MipsO32(unsigned ValNo, MVT ValVT, Mips::D6, Mips::D7 }; - unsigned Reg=0; - unsigned UnallocIntReg = State.getFirstUnallocated(IntRegs, IntRegsSize); - bool IntRegUsed = (IntRegs[UnallocIntReg] != (unsigned (Mips::A0))); + // ByVal Args + if (ArgFlags.isByVal()) { + State.HandleByVal(ValNo, ValVT, LocVT, LocInfo, + 1 /*MinSize*/, 4 /*MinAlign*/, ArgFlags); + unsigned NextReg = (State.getNextStackOffset() + 3) / 4; + for (unsigned r = State.getFirstUnallocated(IntRegs, IntRegsSize); + r < std::min(IntRegsSize, NextReg); ++r) + State.AllocateReg(IntRegs[r]); + return false; + } // Promote i8 and i16 if (LocVT == MVT::i8 || LocVT == MVT::i16) { @@ -857,108 +1002,113 @@ static bool CC_MipsO32(unsigned ValNo, MVT ValVT, LocInfo = CCValAssign::AExt; } - if (ValVT == MVT::i32 || (ValVT == MVT::f32 && IntRegUsed)) { + unsigned Reg; + + // f32 and f64 are allocated in A0, A1, A2, A3 when either of the following + // is true: function is vararg, argument is 3rd or higher, there is previous + // argument which is not f32 or f64. + bool AllocateFloatsInIntReg = State.isVarArg() || ValNo > 1 + || State.getFirstUnallocated(F32Regs, FloatRegsSize) != ValNo; + unsigned OrigAlign = ArgFlags.getOrigAlign(); + bool isI64 = (ValVT == MVT::i32 && OrigAlign == 8); + + if (ValVT == MVT::i32 || (ValVT == MVT::f32 && AllocateFloatsInIntReg)) { Reg = State.AllocateReg(IntRegs, IntRegsSize); - IntRegUsed = true; + // If this is the first part of an i64 arg, + // the allocated register must be either A0 or A2. + if (isI64 && (Reg == Mips::A1 || Reg == Mips::A3)) + Reg = State.AllocateReg(IntRegs, IntRegsSize); LocVT = MVT::i32; - } - - if (ValVT.isFloatingPoint() && !IntRegUsed) { - if (ValVT == MVT::f32) + } else if (ValVT == MVT::f64 && AllocateFloatsInIntReg) { + // Allocate int register and shadow next int register. If first + // available register is Mips::A1 or Mips::A3, shadow it too. + Reg = State.AllocateReg(IntRegs, IntRegsSize); + if (Reg == Mips::A1 || Reg == Mips::A3) + Reg = State.AllocateReg(IntRegs, IntRegsSize); + State.AllocateReg(IntRegs, IntRegsSize); + LocVT = MVT::i32; + } else if (ValVT.isFloatingPoint() && !AllocateFloatsInIntReg) { + // we are guaranteed to find an available float register + if (ValVT == MVT::f32) { Reg = State.AllocateReg(F32Regs, FloatRegsSize); - else + // Shadow int register + State.AllocateReg(IntRegs, IntRegsSize); + } else { Reg = State.AllocateReg(F64Regs, FloatRegsSize); - } - - if (ValVT == MVT::f64 && IntRegUsed) { - if (UnallocIntReg != IntRegsSize) { - // If we hit register A3 as the first not allocated, we must - // mark it as allocated (shadow) and use the stack instead. - if (IntRegs[UnallocIntReg] != (unsigned (Mips::A3))) - Reg = Mips::A2; - for (;UnallocIntReg < IntRegsSize; ++UnallocIntReg) - State.AllocateReg(UnallocIntReg); + // Shadow int registers + unsigned Reg2 = State.AllocateReg(IntRegs, IntRegsSize); + if (Reg2 == Mips::A1 || Reg2 == Mips::A3) + State.AllocateReg(IntRegs, IntRegsSize); + State.AllocateReg(IntRegs, IntRegsSize); } - LocVT = MVT::i32; - } + } else + llvm_unreachable("Cannot handle this ValVT."); + + unsigned SizeInBytes = ValVT.getSizeInBits() >> 3; + unsigned Offset = State.AllocateStack(SizeInBytes, OrigAlign); - if (!Reg) { - unsigned SizeInBytes = ValVT.getSizeInBits() >> 3; - unsigned Offset = State.AllocateStack(SizeInBytes, SizeInBytes); + if (!Reg) State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); - } else + else State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); return false; // CC must always match } -static bool CC_MipsO32_VarArgs(unsigned ValNo, MVT ValVT, - MVT LocVT, CCValAssign::LocInfo LocInfo, - ISD::ArgFlagsTy ArgFlags, CCState &State) { - - static const unsigned IntRegsSize=4; - - static const unsigned IntRegs[] = { - Mips::A0, Mips::A1, Mips::A2, Mips::A3 - }; - - // Promote i8 and i16 - if (LocVT == MVT::i8 || LocVT == MVT::i16) { - LocVT = MVT::i32; - if (ArgFlags.isSExt()) - LocInfo = CCValAssign::SExt; - else if (ArgFlags.isZExt()) - LocInfo = CCValAssign::ZExt; - else - LocInfo = CCValAssign::AExt; - } +//===----------------------------------------------------------------------===// +// Call Calling Convention Implementation +//===----------------------------------------------------------------------===// - if (ValVT == MVT::i32 || ValVT == MVT::f32) { - if (unsigned Reg = State.AllocateReg(IntRegs, IntRegsSize)) { - State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, MVT::i32, LocInfo)); - return false; - } - unsigned Off = State.AllocateStack(4, 4); - State.addLoc(CCValAssign::getMem(ValNo, ValVT, Off, LocVT, LocInfo)); - return false; +static const unsigned O32IntRegsSize = 4; + +static const unsigned O32IntRegs[] = { + Mips::A0, Mips::A1, Mips::A2, Mips::A3 +}; + +// Write ByVal Arg to arg registers and stack. +static void +WriteByValArg(SDValue& Chain, DebugLoc dl, + SmallVector, 16>& RegsToPass, + SmallVector& MemOpChains, int& LastFI, + MachineFrameInfo *MFI, SelectionDAG &DAG, SDValue Arg, + const CCValAssign &VA, const ISD::ArgFlagsTy& Flags, + MVT PtrType) { + unsigned FirstWord = VA.getLocMemOffset() / 4; + unsigned NumWords = (Flags.getByValSize() + 3) / 4; + unsigned LastWord = FirstWord + NumWords; + unsigned CurWord; + + // copy the first 4 words of byval arg to registers A0 - A3 + for (CurWord = FirstWord; CurWord < std::min(LastWord, O32IntRegsSize); + ++CurWord) { + SDValue LoadPtr = DAG.getNode(ISD::ADD, dl, MVT::i32, Arg, + DAG.getConstant((CurWord - FirstWord) * 4, + MVT::i32)); + SDValue LoadVal = DAG.getLoad(MVT::i32, dl, Chain, LoadPtr, + MachinePointerInfo(), + false, false, 0); + MemOpChains.push_back(LoadVal.getValue(1)); + unsigned DstReg = O32IntRegs[CurWord]; + RegsToPass.push_back(std::make_pair(DstReg, LoadVal)); } - unsigned UnallocIntReg = State.getFirstUnallocated(IntRegs, IntRegsSize); - if (ValVT == MVT::f64) { - if (IntRegs[UnallocIntReg] == (unsigned (Mips::A1))) { - // A1 can't be used anymore, because 64 bit arguments - // must be aligned when copied back to the caller stack - State.AllocateReg(IntRegs, IntRegsSize); - UnallocIntReg++; - } - - if (IntRegs[UnallocIntReg] == (unsigned (Mips::A0)) || - IntRegs[UnallocIntReg] == (unsigned (Mips::A2))) { - unsigned Reg = State.AllocateReg(IntRegs, IntRegsSize); - State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, MVT::i32, LocInfo)); - // Shadow the next register so it can be used - // later to get the other 32bit part. - State.AllocateReg(IntRegs, IntRegsSize); - return false; - } - - // Register is shadowed to preserve alignment, and the - // argument goes to a stack location. - if (UnallocIntReg != IntRegsSize) - State.AllocateReg(IntRegs, IntRegsSize); - - unsigned Off = State.AllocateStack(8, 8); - State.addLoc(CCValAssign::getMem(ValNo, ValVT, Off, LocVT, LocInfo)); - return false; + // copy remaining part of byval arg to stack. + if (CurWord < LastWord) { + unsigned SizeInBytes = (LastWord - CurWord) * 4; + SDValue Src = DAG.getNode(ISD::ADD, dl, MVT::i32, Arg, + DAG.getConstant((CurWord - FirstWord) * 4, + MVT::i32)); + LastFI = MFI->CreateFixedObject(SizeInBytes, CurWord * 4, true); + SDValue Dst = DAG.getFrameIndex(LastFI, PtrType); + Chain = DAG.getMemcpy(Chain, dl, Dst, Src, + DAG.getConstant(SizeInBytes, MVT::i32), + /*Align*/4, + /*isVolatile=*/false, /*AlwaysInline=*/false, + MachinePointerInfo(0), MachinePointerInfo(0)); + MemOpChains.push_back(Chain); } - - return true; // CC didn't match } -//===----------------------------------------------------------------------===// -// Call Calling Convention Implementation -//===----------------------------------------------------------------------===// - /// LowerCall - functions arguments are copied from virtual regs to /// (physical regs)/(stack frame), CALLSEQ_START and CALLSEQ_END are emitted. /// TODO: isTailCall. @@ -976,21 +1126,18 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee, MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); + const TargetFrameLowering *TFL = MF.getTarget().getFrameLowering(); bool IsPIC = getTargetMachine().getRelocationModel() == Reloc::PIC_; + MipsFunctionInfo *MipsFI = MF.getInfo(); // Analyze operands of the call, assigning locations to each operand. SmallVector ArgLocs; CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs, *DAG.getContext()); - // To meet O32 ABI, Mips must always allocate 16 bytes on - // the stack (even if less than 4 are used as arguments) - if (Subtarget->isABI_O32()) { - int VTsize = MVT(MVT::i32).getSizeInBits()/8; - MFI->CreateFixedObject(VTsize, (VTsize*3), true); - CCInfo.AnalyzeCallOperands(Outs, - isVarArg ? CC_MipsO32_VarArgs : CC_MipsO32); - } else + if (Subtarget->isABI_O32()) + CCInfo.AnalyzeCallOperands(Outs, CC_MipsO32); + else CCInfo.AnalyzeCallOperands(Outs, CC_Mips); // Get a count of how many bytes are to be pushed on the stack. @@ -1001,10 +1148,16 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee, SmallVector, 16> RegsToPass; SmallVector MemOpChains; - // First/LastArgStackLoc contains the first/last - // "at stack" argument location. - int LastArgStackLoc = 0; - unsigned FirstStackArgLoc = (Subtarget->isABI_EABI() ? 0 : 16); + MipsFI->setHasCall(); + + // If this is the first call, create a stack frame object that points to + // a location to which .cprestore saves $gp. The offset of this frame object + // is set to 0, since we know nothing about the size of the argument area at + // this point. + if (IsPIC && !MipsFI->getGPFI()) + MipsFI->setGPFI(MFI->CreateFixedObject(4, 0, true)); + + int FirstFI = -MFI->getNumFixedObjects() - 1, LastFI = 0; // Walk the register/memloc assignments, inserting copies/loads. for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { @@ -1019,11 +1172,12 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee, if (VA.getValVT() == MVT::f32 && VA.getLocVT() == MVT::i32) Arg = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg); if (VA.getValVT() == MVT::f64 && VA.getLocVT() == MVT::i32) { - Arg = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg); - SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Arg, - DAG.getConstant(0, getPointerTy())); - SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Arg, - DAG.getConstant(1, getPointerTy())); + SDValue Lo = DAG.getNode(MipsISD::ExtractElementF64, dl, MVT::i32, + Arg, DAG.getConstant(0, MVT::i32)); + SDValue Hi = DAG.getNode(MipsISD::ExtractElementF64, dl, MVT::i32, + Arg, DAG.getConstant(1, MVT::i32)); + if (!Subtarget->isLittle()) + std::swap(Lo, Hi); RegsToPass.push_back(std::make_pair(VA.getLocReg(), Lo)); RegsToPass.push_back(std::make_pair(VA.getLocReg()+1, Hi)); continue; @@ -1051,15 +1205,22 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee, // Register can't get to this point... assert(VA.isMemLoc()); - // Create the frame index object for this incoming parameter - // This guarantees that when allocating Local Area the firsts - // 16 bytes which are alwayes reserved won't be overwritten - // if O32 ABI is used. For EABI the first address is zero. - LastArgStackLoc = (FirstStackArgLoc + VA.getLocMemOffset()); - int FI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8, - LastArgStackLoc, true); + // ByVal Arg. + ISD::ArgFlagsTy Flags = Outs[i].Flags; + if (Flags.isByVal()) { + assert(Subtarget->isABI_O32() && + "No support for ByVal args by ABIs other than O32 yet."); + assert(Flags.getByValSize() && + "ByVal args of size 0 should have been ignored by front-end."); + WriteByValArg(Chain, dl, RegsToPass, MemOpChains, LastFI, MFI, DAG, Arg, + VA, Flags, getPointerTy()); + continue; + } - SDValue PtrOff = DAG.getFrameIndex(FI,getPointerTy()); + // Create the frame index object for this incoming parameter + LastFI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8, + VA.getLocMemOffset(), true); + SDValue PtrOff = DAG.getFrameIndex(LastFI, getPointerTy()); // emit ISD::STORE whichs stores the // parameter value to a stack Location @@ -1074,28 +1235,69 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee, Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &MemOpChains[0], MemOpChains.size()); + // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every + // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol + // node so that legalize doesn't hack it. + unsigned char OpFlag = IsPIC ? MipsII::MO_GOT_CALL : MipsII::MO_NO_FLAG; + bool LoadSymAddr = false; + SDValue CalleeLo; + + if (GlobalAddressSDNode *G = dyn_cast(Callee)) { + if (IsPIC && G->getGlobal()->hasInternalLinkage()) { + Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, + getPointerTy(), 0,MipsII:: MO_GOT); + CalleeLo = DAG.getTargetGlobalAddress(G->getGlobal(), dl, getPointerTy(), + 0, MipsII::MO_ABS_LO); + } else { + Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, + getPointerTy(), 0, OpFlag); + } + + LoadSymAddr = true; + } + else if (ExternalSymbolSDNode *S = dyn_cast(Callee)) { + Callee = DAG.getTargetExternalSymbol(S->getSymbol(), + getPointerTy(), OpFlag); + LoadSymAddr = true; + } + + SDValue InFlag; + + // Create nodes that load address of callee and copy it to T9 + if (IsPIC) { + if (LoadSymAddr) { + // Load callee address + SDValue LoadValue = DAG.getLoad(MVT::i32, dl, Chain, Callee, + MachinePointerInfo::getGOT(), + false, false, 0); + + // Use GOT+LO if callee has internal linkage. + if (CalleeLo.getNode()) { + SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, CalleeLo); + Callee = DAG.getNode(ISD::ADD, dl, MVT::i32, LoadValue, Lo); + } else + Callee = LoadValue; + + // Use chain output from LoadValue + Chain = LoadValue.getValue(1); + } + + // copy to T9 + Chain = DAG.getCopyToReg(Chain, dl, Mips::T9, Callee, SDValue(0, 0)); + InFlag = Chain.getValue(1); + Callee = DAG.getRegister(Mips::T9, MVT::i32); + } + // Build a sequence of copy-to-reg nodes chained together with token // chain and flag operands which copy the outgoing args into registers. - // The InFlag in necessary since all emited instructions must be + // The InFlag in necessary since all emitted instructions must be // stuck together. - SDValue InFlag; for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, RegsToPass[i].second, InFlag); InFlag = Chain.getValue(1); } - // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every - // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol - // node so that legalize doesn't hack it. - unsigned char OpFlag = IsPIC ? MipsII::MO_GOT_CALL : MipsII::MO_NO_FLAG; - if (GlobalAddressSDNode *G = dyn_cast(Callee)) - Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, - getPointerTy(), 0, OpFlag); - else if (ExternalSymbolSDNode *S = dyn_cast(Callee)) - Callee = DAG.getTargetExternalSymbol(S->getSymbol(), - getPointerTy(), OpFlag); - // MipsJmpLink = #chain, #target_address, #opt_in_flags... // = Chain, Callee, Reg#1, Reg#2, ... // @@ -1117,37 +1319,35 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee, Chain = DAG.getNode(MipsISD::JmpLink, dl, NodeTys, &Ops[0], Ops.size()); InFlag = Chain.getValue(1); - // Create a stack location to hold GP when PIC is used. This stack - // location is used on function prologue to save GP and also after all - // emited CALL's to restore GP. if (IsPIC) { - // Function can have an arbitrary number of calls, so - // hold the LastArgStackLoc with the biggest offset. - int FI; - MipsFunctionInfo *MipsFI = MF.getInfo(); - if (LastArgStackLoc >= MipsFI->getGPStackOffset()) { - LastArgStackLoc = (!LastArgStackLoc) ? (16) : (LastArgStackLoc+4); - // Create the frame index only once. SPOffset here can be anything - // (this will be fixed on processFunctionBeforeFrameFinalized) - if (MipsFI->getGPStackOffset() == -1) { - FI = MFI->CreateFixedObject(4, 0, true); - MipsFI->setGPFI(FI); - } - MipsFI->setGPStackOffset(LastArgStackLoc); - } - - // Reload GP value. - FI = MipsFI->getGPFI(); - SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); - SDValue GPLoad = DAG.getLoad(MVT::i32, dl, Chain, FIN, - MachinePointerInfo::getFixedStack(FI), - false, false, 0); - Chain = GPLoad.getValue(1); - Chain = DAG.getCopyToReg(Chain, dl, DAG.getRegister(Mips::GP, MVT::i32), - GPLoad, SDValue(0,0)); - InFlag = Chain.getValue(1); + // Function can have an arbitrary number of calls, so + // hold the LastArgStackLoc with the biggest offset. + int MaxCallFrameSize = MipsFI->getMaxCallFrameSize(); + unsigned NextStackOffset = CCInfo.getNextStackOffset(); + + // For O32, a minimum of four words (16 bytes) of argument space is + // allocated. + if (Subtarget->isABI_O32()) + NextStackOffset = std::max(NextStackOffset, (unsigned)16); + + if (MaxCallFrameSize < (int)NextStackOffset) { + MipsFI->setMaxCallFrameSize(NextStackOffset); + + // $gp restore slot must be aligned. + unsigned StackAlignment = TFL->getStackAlignment(); + NextStackOffset = (NextStackOffset + StackAlignment - 1) / + StackAlignment * StackAlignment; + int GPFI = MipsFI->getGPFI(); + MFI->setObjectOffset(GPFI, NextStackOffset); + } } + // Extend range of indices of frame objects for outgoing arguments that were + // created during this function call. Skip this step if no such objects were + // created. + if (LastFI) + MipsFI->extendOutArgFIRange(FirstFI, LastFI); + // Create the CALLSEQ_END node. Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), DAG.getIntPtrConstant(0, true), InFlag); @@ -1189,45 +1389,61 @@ MipsTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, //===----------------------------------------------------------------------===// // Formal Arguments Calling Convention Implementation //===----------------------------------------------------------------------===// +static void ReadByValArg(MachineFunction &MF, SDValue Chain, DebugLoc dl, + std::vector& OutChains, + SelectionDAG &DAG, unsigned NumWords, SDValue FIN, + const CCValAssign &VA, const ISD::ArgFlagsTy& Flags) { + unsigned LocMem = VA.getLocMemOffset(); + unsigned FirstWord = LocMem / 4; + + // copy register A0 - A3 to frame object + for (unsigned i = 0; i < NumWords; ++i) { + unsigned CurWord = FirstWord + i; + if (CurWord >= O32IntRegsSize) + break; + + unsigned SrcReg = O32IntRegs[CurWord]; + unsigned Reg = AddLiveIn(MF, SrcReg, Mips::CPURegsRegisterClass); + SDValue StorePtr = DAG.getNode(ISD::ADD, dl, MVT::i32, FIN, + DAG.getConstant(i * 4, MVT::i32)); + SDValue Store = DAG.getStore(Chain, dl, DAG.getRegister(Reg, MVT::i32), + StorePtr, MachinePointerInfo(), false, + false, 0); + OutChains.push_back(Store); + } +} /// LowerFormalArguments - transform physical registers into virtual registers /// and generate load operations for arguments places on the stack. SDValue MipsTargetLowering::LowerFormalArguments(SDValue Chain, - CallingConv::ID CallConv, bool isVarArg, - const SmallVectorImpl - &Ins, - DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl &InVals) + CallingConv::ID CallConv, + bool isVarArg, + const SmallVectorImpl + &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl &InVals) const { - MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); MipsFunctionInfo *MipsFI = MF.getInfo(); - unsigned StackReg = MF.getTarget().getRegisterInfo()->getFrameRegister(MF); MipsFI->setVarArgsFrameIndex(0); // Used with vargs to acumulate store chains. std::vector OutChains; - // Keep track of the last register used for arguments - unsigned ArgRegEnd = 0; - // Assign locations to all of the incoming arguments. SmallVector ArgLocs; CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs, *DAG.getContext()); if (Subtarget->isABI_O32()) - CCInfo.AnalyzeFormalArguments(Ins, - isVarArg ? CC_MipsO32_VarArgs : CC_MipsO32); + CCInfo.AnalyzeFormalArguments(Ins, CC_MipsO32); else CCInfo.AnalyzeFormalArguments(Ins, CC_Mips); - SDValue StackPtr; - - unsigned FirstStackArgLoc = (Subtarget->isABI_EABI() ? 0 : 16); + int LastFI = 0;// MipsFI->LastInArgFI is 0 at the entry of this function. for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; @@ -1235,7 +1451,7 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain, // Arguments stored on registers if (VA.isRegLoc()) { EVT RegVT = VA.getLocVT(); - ArgRegEnd = VA.getLocReg(); + unsigned ArgReg = VA.getLocReg(); TargetRegisterClass *RC = 0; if (RegVT == MVT::i32) @@ -1250,7 +1466,7 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain, // Transform the arguments stored on // physical registers into virtual ones - unsigned Reg = AddLiveIn(DAG.getMachineFunction(), ArgRegEnd, RC); + unsigned Reg = AddLiveIn(DAG.getMachineFunction(), ArgReg, RC); SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT); // If this is an 8 or 16-bit value, it has been passed promoted @@ -1276,9 +1492,10 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain, unsigned Reg2 = AddLiveIn(DAG.getMachineFunction(), VA.getLocReg()+1, RC); SDValue ArgValue2 = DAG.getCopyFromReg(Chain, dl, Reg2, RegVT); - SDValue Hi = DAG.getNode(ISD::BITCAST, dl, MVT::f32, ArgValue); - SDValue Lo = DAG.getNode(ISD::BITCAST, dl, MVT::f32, ArgValue2); - ArgValue = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::f64, Lo, Hi); + if (!Subtarget->isLittle()) + std::swap(ArgValue, ArgValue2); + ArgValue = DAG.getNode(MipsISD::BuildPairF64, dl, MVT::f64, + ArgValue, ArgValue2); } } @@ -1288,26 +1505,31 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain, // sanity check assert(VA.isMemLoc()); - // The last argument is not a register anymore - ArgRegEnd = 0; + ISD::ArgFlagsTy Flags = Ins[i].Flags; + + if (Flags.isByVal()) { + assert(Subtarget->isABI_O32() && + "No support for ByVal args by ABIs other than O32 yet."); + assert(Flags.getByValSize() && + "ByVal args of size 0 should have been ignored by front-end."); + unsigned NumWords = (Flags.getByValSize() + 3) / 4; + LastFI = MFI->CreateFixedObject(NumWords * 4, VA.getLocMemOffset(), + true); + SDValue FIN = DAG.getFrameIndex(LastFI, getPointerTy()); + InVals.push_back(FIN); + ReadByValArg(MF, Chain, dl, OutChains, DAG, NumWords, FIN, VA, Flags); + + continue; + } // The stack pointer offset is relative to the caller stack frame. - // Since the real stack size is unknown here, a negative SPOffset - // is used so there's a way to adjust these offsets when the stack - // size get known (on EliminateFrameIndex). A dummy SPOffset is - // used instead of a direct negative address (which is recorded to - // be used on emitPrologue) to avoid mis-calc of the first stack - // offset on PEI::calculateFrameObjectOffsets. - // Arguments are always 32-bit. - unsigned ArgSize = VA.getLocVT().getSizeInBits()/8; - int FI = MFI->CreateFixedObject(ArgSize, 0, true); - MipsFI->recordLoadArgsFI(FI, -(ArgSize+ - (FirstStackArgLoc + VA.getLocMemOffset()))); + LastFI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8, + VA.getLocMemOffset(), true); // Create load nodes to retrieve arguments from the stack - SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); + SDValue FIN = DAG.getFrameIndex(LastFI, getPointerTy()); InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, - MachinePointerInfo::getFixedStack(FI), + MachinePointerInfo::getFixedStack(LastFI), false, false, 0)); } } @@ -1325,35 +1547,33 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain, Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain); } - // To meet ABI, when VARARGS are passed on registers, the registers - // must have their values written to the caller stack frame. If the last - // argument was placed in the stack, there's no need to save any register. - if ((isVarArg) && (Subtarget->isABI_O32() && ArgRegEnd)) { - if (StackPtr.getNode() == 0) - StackPtr = DAG.getRegister(StackReg, getPointerTy()); - - // The last register argument that must be saved is Mips::A3 - TargetRegisterClass *RC = Mips::CPURegsRegisterClass; - unsigned StackLoc = ArgLocs.size()-1; - - for (++ArgRegEnd; ArgRegEnd <= Mips::A3; ++ArgRegEnd, ++StackLoc) { - unsigned Reg = AddLiveIn(DAG.getMachineFunction(), ArgRegEnd, RC); + if (isVarArg && Subtarget->isABI_O32()) { + // Record the frame index of the first variable argument + // which is a value necessary to VASTART. + unsigned NextStackOffset = CCInfo.getNextStackOffset(); + assert(NextStackOffset % 4 == 0 && + "NextStackOffset must be aligned to 4-byte boundaries."); + LastFI = MFI->CreateFixedObject(4, NextStackOffset, true); + MipsFI->setVarArgsFrameIndex(LastFI); + + // If NextStackOffset is smaller than o32's 16-byte reserved argument area, + // copy the integer registers that have not been used for argument passing + // to the caller's stack frame. + for (; NextStackOffset < 16; NextStackOffset += 4) { + TargetRegisterClass *RC = Mips::CPURegsRegisterClass; + unsigned Idx = NextStackOffset / 4; + unsigned Reg = AddLiveIn(DAG.getMachineFunction(), O32IntRegs[Idx], RC); SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, MVT::i32); - - int FI = MFI->CreateFixedObject(4, 0, true); - MipsFI->recordStoreVarArgsFI(FI, -(4+(StackLoc*4))); - SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy()); + LastFI = MFI->CreateFixedObject(4, NextStackOffset, true); + SDValue PtrOff = DAG.getFrameIndex(LastFI, getPointerTy()); OutChains.push_back(DAG.getStore(Chain, dl, ArgValue, PtrOff, MachinePointerInfo(), false, false, 0)); - - // Record the frame index of the first variable argument - // which is a value necessary to VASTART. - if (!MipsFI->getVarArgsFrameIndex()) - MipsFI->setVarArgsFrameIndex(FI); } } + MipsFI->setLastInArgFI(LastFI); + // All stores are grouped in one node to allow the matching between // the size of Ins and InVals. This only happens when on varg functions if (!OutChains.empty()) { @@ -1571,5 +1791,7 @@ MipsTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { bool MipsTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { if (VT != MVT::f32 && VT != MVT::f64) return false; + if (Imm.isNegZero()) + return false; return Imm.isZero(); }