X-Git-Url: http://plrg.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FTarget%2FPowerPC%2FPPCISelLowering.cpp;h=36fbcfa71a81852702d0c94727aeebce47d7d2ed;hb=8419dd6aa6e8a26307222bdda473ac4bdccbb693;hp=0d5b6932a7e59c7cc0e4e0e66f1d9f3bddd140ce;hpb=1baa1971a66d6a250d07b7bee249653bd91453a0;p=oota-llvm.git diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 0d5b6932a7e..36fbcfa71a8 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -2,8 +2,8 @@ // // The LLVM Compiler Infrastructure // -// This file was developed by Chris Lattner and is distributed under -// the University of Illinois Open Source License. See LICENSE.TXT for details. +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // @@ -16,14 +16,16 @@ #include "PPCPredicates.h" #include "PPCTargetMachine.h" #include "PPCPerfectShuffle.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/VectorExtras.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/CodeGen/SelectionDAG.h" -#include "llvm/CodeGen/SSARegMap.h" #include "llvm/Constants.h" #include "llvm/Function.h" #include "llvm/Intrinsics.h" @@ -32,7 +34,9 @@ #include "llvm/Support/CommandLine.h" using namespace llvm; -static cl::opt EnablePPCPreinc("enable-ppc-preinc"); +static cl::opt EnablePPCPreinc("enable-ppc-preinc", +cl::desc("enable preincrement load/store generation on PPC (experimental)"), + cl::Hidden); PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) : TargetLowering(TM), PPCSubTarget(*TM.getSubtargetImpl()) { @@ -49,12 +53,11 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) addRegisterClass(MVT::f64, PPC::F8RCRegisterClass); // PowerPC has an i16 but no i8 (or i1) SEXTLOAD - setLoadXAction(ISD::SEXTLOAD, MVT::i1, Expand); + setLoadXAction(ISD::SEXTLOAD, MVT::i1, Promote); setLoadXAction(ISD::SEXTLOAD, MVT::i8, Expand); - - // PowerPC does not have truncstore for i1. - setStoreXAction(MVT::i1, Promote); + setTruncStoreAction(MVT::f64, MVT::f32, Expand); + // PowerPC has pre-inc load and store's. setIndexedLoadAction(ISD::PRE_INC, MVT::i1, Legal); setIndexedLoadAction(ISD::PRE_INC, MVT::i8, Legal); @@ -67,27 +70,46 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setIndexedStoreAction(ISD::PRE_INC, MVT::i32, Legal); setIndexedStoreAction(ISD::PRE_INC, MVT::i64, Legal); - setOperationAction(ISD::ConstantFP, MVT::f64, Expand); - setOperationAction(ISD::ConstantFP, MVT::f32, Expand); + // Shortening conversions involving ppcf128 get expanded (2 regs -> 1 reg) + setConvertAction(MVT::ppcf128, MVT::f64, Expand); + setConvertAction(MVT::ppcf128, MVT::f32, Expand); + // This is used in the ppcf128->int sequence. Note it has different semantics + // from FP_ROUND: that rounds to nearest, this rounds to zero. + setOperationAction(ISD::FP_ROUND_INREG, MVT::ppcf128, Custom); // PowerPC has no intrinsics for these particular operations setOperationAction(ISD::MEMMOVE, MVT::Other, Expand); setOperationAction(ISD::MEMSET, MVT::Other, Expand); setOperationAction(ISD::MEMCPY, MVT::Other, Expand); - + setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand); + // PowerPC has no SREM/UREM instructions setOperationAction(ISD::SREM, MVT::i32, Expand); setOperationAction(ISD::UREM, MVT::i32, Expand); setOperationAction(ISD::SREM, MVT::i64, Expand); setOperationAction(ISD::UREM, MVT::i64, Expand); - - // We don't support sin/cos/sqrt/fmod + + // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM. + setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); + setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); + setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand); + setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand); + setOperationAction(ISD::UDIVREM, MVT::i32, Expand); + setOperationAction(ISD::SDIVREM, MVT::i32, Expand); + setOperationAction(ISD::UDIVREM, MVT::i64, Expand); + setOperationAction(ISD::SDIVREM, MVT::i64, Expand); + + // We don't support sin/cos/sqrt/fmod/pow setOperationAction(ISD::FSIN , MVT::f64, Expand); setOperationAction(ISD::FCOS , MVT::f64, Expand); setOperationAction(ISD::FREM , MVT::f64, Expand); + setOperationAction(ISD::FPOW , MVT::f64, Expand); setOperationAction(ISD::FSIN , MVT::f32, Expand); setOperationAction(ISD::FCOS , MVT::f32, Expand); setOperationAction(ISD::FREM , MVT::f32, Expand); + setOperationAction(ISD::FPOW , MVT::f32, Expand); + + setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom); // If we're enabling GP optimizations, use hardware square root if (!TM.getSubtarget().hasFSQRT()) { @@ -145,32 +167,37 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) // Support label based line numbers. setOperationAction(ISD::LOCATION, MVT::Other, Expand); setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand); - if (!TM.getSubtarget().isDarwin()) { - setOperationAction(ISD::LABEL, MVT::Other, Expand); - } else { - setOperationAction(ISD::EXCEPTIONADDR, MVT::i64, Expand); - setOperationAction(ISD::EHSELECTION, MVT::i64, Expand); - setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand); - setOperationAction(ISD::EHSELECTION, MVT::i32, Expand); - } + + setOperationAction(ISD::EXCEPTIONADDR, MVT::i64, Expand); + setOperationAction(ISD::EHSELECTION, MVT::i64, Expand); + setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand); + setOperationAction(ISD::EHSELECTION, MVT::i32, Expand); + // We want to legalize GlobalAddress and ConstantPool nodes into the // appropriate instructions to materialize the address. setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); + setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom); setOperationAction(ISD::ConstantPool, MVT::i32, Custom); setOperationAction(ISD::JumpTable, MVT::i32, Custom); setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); + setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom); setOperationAction(ISD::ConstantPool, MVT::i64, Custom); setOperationAction(ISD::JumpTable, MVT::i64, Custom); // RET must be custom lowered, to meet ABI requirements setOperationAction(ISD::RET , MVT::Other, Custom); - + // VASTART needs to be custom lowered to use the VarArgsFrameIndex setOperationAction(ISD::VASTART , MVT::Other, Custom); + // VAARG is custom lowered with ELF 32 ABI + if (TM.getSubtarget().isELF32_ABI()) + setOperationAction(ISD::VAARG, MVT::Other, Custom); + else + setOperationAction(ISD::VAARG, MVT::Other, Expand); + // Use the default implementation. - setOperationAction(ISD::VAARG , MVT::Other, Expand); setOperationAction(ISD::VACOPY , MVT::Other, Expand); setOperationAction(ISD::VAEND , MVT::Other, Expand); setOperationAction(ISD::STACKSAVE , MVT::Other, Expand); @@ -202,12 +229,12 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) } if (TM.getSubtarget().use64BitRegs()) { - // 64 bit PowerPC implementations can support i64 types directly + // 64-bit PowerPC implementations can support i64 types directly addRegisterClass(MVT::i64, PPC::G8RCRegisterClass); // BUILD_PAIR can't be handled natively, and should be expanded to shl/or setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand); } else { - // 32 bit PowerPC wants to expand i64 shifts itself. + // 32-bit PowerPC wants to expand i64 shifts itself. setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom); setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom); setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom); @@ -217,7 +244,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) // First set operation action for all vector types to expand. Then we // will selectively turn on ones that can be effectively codegen'd. for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; - VT != (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) { + VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) { // add/sub are legal for all supported vector VT's. setOperationAction(ISD::ADD , (MVT::ValueType)VT, Legal); setOperationAction(ISD::SUB , (MVT::ValueType)VT, Legal); @@ -247,11 +274,19 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::UDIV, (MVT::ValueType)VT, Expand); setOperationAction(ISD::UREM, (MVT::ValueType)VT, Expand); setOperationAction(ISD::FDIV, (MVT::ValueType)VT, Expand); + setOperationAction(ISD::FNEG, (MVT::ValueType)VT, Expand); setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Expand); setOperationAction(ISD::INSERT_VECTOR_ELT, (MVT::ValueType)VT, Expand); setOperationAction(ISD::BUILD_VECTOR, (MVT::ValueType)VT, Expand); - + setOperationAction(ISD::UMUL_LOHI, (MVT::ValueType)VT, Expand); + setOperationAction(ISD::SMUL_LOHI, (MVT::ValueType)VT, Expand); + setOperationAction(ISD::UDIVREM, (MVT::ValueType)VT, Expand); + setOperationAction(ISD::SDIVREM, (MVT::ValueType)VT, Expand); setOperationAction(ISD::SCALAR_TO_VECTOR, (MVT::ValueType)VT, Expand); + setOperationAction(ISD::FPOW, (MVT::ValueType)VT, Expand); + setOperationAction(ISD::CTPOP, (MVT::ValueType)VT, Expand); + setOperationAction(ISD::CTLZ, (MVT::ValueType)VT, Expand); + setOperationAction(ISD::CTTZ, (MVT::ValueType)VT, Expand); } // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle @@ -304,9 +339,29 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setTargetDAGCombine(ISD::BR_CC); setTargetDAGCombine(ISD::BSWAP); + // Darwin long double math library functions have $LDBL128 appended. + if (TM.getSubtarget().isDarwin()) { + setLibcallName(RTLIB::COS_PPCF128, "cosl$LDBL128"); + setLibcallName(RTLIB::POW_PPCF128, "powl$LDBL128"); + setLibcallName(RTLIB::REM_PPCF128, "fmodl$LDBL128"); + setLibcallName(RTLIB::SIN_PPCF128, "sinl$LDBL128"); + setLibcallName(RTLIB::SQRT_PPCF128, "sqrtl$LDBL128"); + } + computeRegisterProperties(); } +/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate +/// function arguments in the caller parameter area. +unsigned PPCTargetLowering::getByValTypeAlignment(const Type *Ty) const { + TargetMachine &TM = getTargetMachine(); + // Darwin passes everything on 4 byte boundary. + if (TM.getSubtarget().isDarwin()) + return 4; + // FIXME Elf TBD + return 4; +} + const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { switch (Opcode) { default: return 0; @@ -339,6 +394,11 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { case PPCISD::LBRX: return "PPCISD::LBRX"; case PPCISD::STBRX: return "PPCISD::STBRX"; case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH"; + case PPCISD::MFFS: return "PPCISD::MFFS"; + case PPCISD::MTFSB0: return "PPCISD::MTFSB0"; + case PPCISD::MTFSB1: return "PPCISD::MTFSB1"; + case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ"; + case PPCISD::MTFSF: return "PPCISD::MTFSF"; } } @@ -349,12 +409,12 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { /// isFloatingPointZero - Return true if this is 0.0 or -0.0. static bool isFloatingPointZero(SDOperand Op) { if (ConstantFPSDNode *CFP = dyn_cast(Op)) - return CFP->isExactlyValue(-0.0) || CFP->isExactlyValue(0.0); + return CFP->getValueAPF().isZero(); else if (ISD::isEXTLoad(Op.Val) || ISD::isNON_EXTLoad(Op.Val)) { // Maybe this has already been legalized into the constant pool? if (ConstantPoolSDNode *CP = dyn_cast(Op.getOperand(1))) if (ConstantFP *CFP = dyn_cast(CP->getConstVal())) - return CFP->isExactlyValue(-0.0) || CFP->isExactlyValue(0.0); + return CFP->getValueAPF().isZero(); } return false; } @@ -511,6 +571,16 @@ bool PPC::isSplatShuffleMask(SDNode *N, unsigned EltSize) { return true; } +/// isAllNegativeZeroVector - Returns true if all elements of build_vector +/// are -0.0. +bool PPC::isAllNegativeZeroVector(SDNode *N) { + assert(N->getOpcode() == ISD::BUILD_VECTOR); + if (PPC::isSplatShuffleMask(N, N->getNumOperands())) + if (ConstantFPSDNode *CFP = dyn_cast(N)) + return CFP->getValueAPF().isNegZero(); + return false; +} + /// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the /// specified isSplatShuffleMask VECTOR_SHUFFLE mask. unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize) { @@ -599,7 +669,7 @@ SDOperand PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) { ValSizeInBytes = MVT::getSizeInBits(CN->getValueType(0))/8; } else if (ConstantFPSDNode *CN = dyn_cast(OpVal)) { assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!"); - Value = FloatToBits(CN->getValue()); + Value = FloatToBits(CN->getValueAPF().convertToFloat()); ValSizeInBytes = 4; } @@ -679,15 +749,21 @@ bool PPCTargetLowering::SelectAddressRegReg(SDOperand N, SDOperand &Base, // If this is an or of disjoint bitfields, we can codegen this as an add // (for better address arithmetic) if the LHS and RHS of the OR are provably // disjoint. - uint64_t LHSKnownZero, LHSKnownOne; - uint64_t RHSKnownZero, RHSKnownOne; - ComputeMaskedBits(N.getOperand(0), ~0U, LHSKnownZero, LHSKnownOne); + APInt LHSKnownZero, LHSKnownOne; + APInt RHSKnownZero, RHSKnownOne; + DAG.ComputeMaskedBits(N.getOperand(0), + APInt::getAllOnesValue(N.getOperand(0) + .getValueSizeInBits()), + LHSKnownZero, LHSKnownOne); - if (LHSKnownZero) { - ComputeMaskedBits(N.getOperand(1), ~0U, RHSKnownZero, RHSKnownOne); + if (LHSKnownZero.getBoolValue()) { + DAG.ComputeMaskedBits(N.getOperand(1), + APInt::getAllOnesValue(N.getOperand(1) + .getValueSizeInBits()), + RHSKnownZero, RHSKnownOne); // If all of the bits are known zero on the LHS or RHS, the add won't // carry. - if ((LHSKnownZero | RHSKnownZero) == ~0U) { + if (~(LHSKnownZero | RHSKnownZero) == 0) { Base = N.getOperand(0); Index = N.getOperand(1); return true; @@ -734,9 +810,11 @@ bool PPCTargetLowering::SelectAddressRegImm(SDOperand N, SDOperand &Disp, // If this is an or of disjoint bitfields, we can codegen this as an add // (for better address arithmetic) if the LHS and RHS of the OR are // provably disjoint. - uint64_t LHSKnownZero, LHSKnownOne; - ComputeMaskedBits(N.getOperand(0), ~0U, LHSKnownZero, LHSKnownOne); - if ((LHSKnownZero|~(unsigned)imm) == ~0U) { + APInt LHSKnownZero, LHSKnownOne; + DAG.ComputeMaskedBits(N.getOperand(0), + APInt::getAllOnesValue(32), + LHSKnownZero, LHSKnownOne); + if ((LHSKnownZero.getZExtValue()|~(uint64_t)imm) == ~0ULL) { // If all of the bits are known zero on the LHS or RHS, the add won't // carry. Base = N.getOperand(0); @@ -842,9 +920,11 @@ bool PPCTargetLowering::SelectAddressRegImmShift(SDOperand N, SDOperand &Disp, // If this is an or of disjoint bitfields, we can codegen this as an add // (for better address arithmetic) if the LHS and RHS of the OR are // provably disjoint. - uint64_t LHSKnownZero, LHSKnownOne; - ComputeMaskedBits(N.getOperand(0), ~0U, LHSKnownZero, LHSKnownOne); - if ((LHSKnownZero|~(unsigned)imm) == ~0U) { + APInt LHSKnownZero, LHSKnownOne; + DAG.ComputeMaskedBits(N.getOperand(0), + APInt::getAllOnesValue(32), + LHSKnownZero, LHSKnownOne); + if ((LHSKnownZero.getZExtValue()|~(uint64_t)imm) == ~0ULL) { // If all of the bits are known zero on the LHS or RHS, the add won't // carry. Base = N.getOperand(0); @@ -903,12 +983,12 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDOperand &Base, MVT::ValueType VT; if (LoadSDNode *LD = dyn_cast(N)) { Ptr = LD->getBasePtr(); - VT = LD->getLoadedVT(); + VT = LD->getMemoryVT(); } else if (StoreSDNode *ST = dyn_cast(N)) { ST = ST; Ptr = ST->getBasePtr(); - VT = ST->getStoredVT(); + VT = ST->getMemoryVT(); } else return false; @@ -932,7 +1012,7 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDOperand &Base, if (LoadSDNode *LD = dyn_cast(N)) { // PPC64 doesn't have lwau, but it does have lwaux. Reject preinc load of // sext i32 to i64 when addr mode is r+i. - if (LD->getValueType(0) == MVT::i64 && LD->getLoadedVT() == MVT::i32 && + if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 && LD->getExtensionType() == ISD::SEXTLOAD && isa(Offset)) return false; @@ -946,7 +1026,8 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDOperand &Base, // LowerOperation implementation //===----------------------------------------------------------------------===// -static SDOperand LowerConstantPool(SDOperand Op, SelectionDAG &DAG) { +SDOperand PPCTargetLowering::LowerConstantPool(SDOperand Op, + SelectionDAG &DAG) { MVT::ValueType PtrVT = Op.getValueType(); ConstantPoolSDNode *CP = cast(Op); Constant *C = CP->getConstVal(); @@ -977,7 +1058,7 @@ static SDOperand LowerConstantPool(SDOperand Op, SelectionDAG &DAG) { return Lo; } -static SDOperand LowerJumpTable(SDOperand Op, SelectionDAG &DAG) { +SDOperand PPCTargetLowering::LowerJumpTable(SDOperand Op, SelectionDAG &DAG) { MVT::ValueType PtrVT = Op.getValueType(); JumpTableSDNode *JT = cast(Op); SDOperand JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT); @@ -1007,11 +1088,20 @@ static SDOperand LowerJumpTable(SDOperand Op, SelectionDAG &DAG) { return Lo; } -static SDOperand LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG) { +SDOperand PPCTargetLowering::LowerGlobalTLSAddress(SDOperand Op, + SelectionDAG &DAG) { + assert(0 && "TLS not implemented for PPC."); +} + +SDOperand PPCTargetLowering::LowerGlobalAddress(SDOperand Op, + SelectionDAG &DAG) { MVT::ValueType PtrVT = Op.getValueType(); GlobalAddressSDNode *GSDN = cast(Op); GlobalValue *GV = GSDN->getGlobal(); SDOperand GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset()); + // If it's a debug information descriptor, don't mess with it. + if (DAG.isVerifiedDebugInfoDesc(Op)) + return GA; SDOperand Zero = DAG.getConstant(0, PtrVT); const TargetMachine &TM = DAG.getTarget(); @@ -1044,7 +1134,7 @@ static SDOperand LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG) { return DAG.getLoad(PtrVT, DAG.getEntryNode(), Lo, NULL, 0); } -static SDOperand LowerSETCC(SDOperand Op, SelectionDAG &DAG) { +SDOperand PPCTargetLowering::LowerSETCC(SDOperand Op, SelectionDAG &DAG) { ISD::CondCode CC = cast(Op.getOperand(2))->get(); // If we're comparing for equality to zero, expose the fact that this is @@ -1086,15 +1176,99 @@ static SDOperand LowerSETCC(SDOperand Op, SelectionDAG &DAG) { return SDOperand(); } -static SDOperand LowerVASTART(SDOperand Op, SelectionDAG &DAG, - unsigned VarArgsFrameIndex) { - // vastart just stores the address of the VarArgsFrameIndex slot into the - // memory location argument. +SDOperand PPCTargetLowering::LowerVAARG(SDOperand Op, SelectionDAG &DAG, + int VarArgsFrameIndex, + int VarArgsStackOffset, + unsigned VarArgsNumGPR, + unsigned VarArgsNumFPR, + const PPCSubtarget &Subtarget) { + + assert(0 && "VAARG in ELF32 ABI not implemented yet!"); +} + +SDOperand PPCTargetLowering::LowerVASTART(SDOperand Op, SelectionDAG &DAG, + int VarArgsFrameIndex, + int VarArgsStackOffset, + unsigned VarArgsNumGPR, + unsigned VarArgsNumFPR, + const PPCSubtarget &Subtarget) { + + if (Subtarget.isMachoABI()) { + // vastart just stores the address of the VarArgsFrameIndex slot into the + // memory location argument. + MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + SDOperand FR = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT); + const Value *SV = cast(Op.getOperand(2))->getValue(); + return DAG.getStore(Op.getOperand(0), FR, Op.getOperand(1), SV, 0); + } + + // For ELF 32 ABI we follow the layout of the va_list struct. + // We suppose the given va_list is already allocated. + // + // typedef struct { + // char gpr; /* index into the array of 8 GPRs + // * stored in the register save area + // * gpr=0 corresponds to r3, + // * gpr=1 to r4, etc. + // */ + // char fpr; /* index into the array of 8 FPRs + // * stored in the register save area + // * fpr=0 corresponds to f1, + // * fpr=1 to f2, etc. + // */ + // char *overflow_arg_area; + // /* location on stack that holds + // * the next overflow argument + // */ + // char *reg_save_area; + // /* where r3:r10 and f1:f8 (if saved) + // * are stored + // */ + // } va_list[1]; + + + SDOperand ArgGPR = DAG.getConstant(VarArgsNumGPR, MVT::i8); + SDOperand ArgFPR = DAG.getConstant(VarArgsNumFPR, MVT::i8); + + MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + + SDOperand StackOffsetFI = DAG.getFrameIndex(VarArgsStackOffset, PtrVT); SDOperand FR = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT); - SrcValueSDNode *SV = cast(Op.getOperand(2)); - return DAG.getStore(Op.getOperand(0), FR, Op.getOperand(1), SV->getValue(), - SV->getOffset()); + + uint64_t FrameOffset = MVT::getSizeInBits(PtrVT)/8; + SDOperand ConstFrameOffset = DAG.getConstant(FrameOffset, PtrVT); + + uint64_t StackOffset = MVT::getSizeInBits(PtrVT)/8 - 1; + SDOperand ConstStackOffset = DAG.getConstant(StackOffset, PtrVT); + + uint64_t FPROffset = 1; + SDOperand ConstFPROffset = DAG.getConstant(FPROffset, PtrVT); + + const Value *SV = cast(Op.getOperand(2))->getValue(); + + // Store first byte : number of int regs + SDOperand firstStore = DAG.getStore(Op.getOperand(0), ArgGPR, + Op.getOperand(1), SV, 0); + uint64_t nextOffset = FPROffset; + SDOperand nextPtr = DAG.getNode(ISD::ADD, PtrVT, Op.getOperand(1), + ConstFPROffset); + + // Store second byte : number of float regs + SDOperand secondStore = + DAG.getStore(firstStore, ArgFPR, nextPtr, SV, nextOffset); + nextOffset += StackOffset; + nextPtr = DAG.getNode(ISD::ADD, PtrVT, nextPtr, ConstStackOffset); + + // Store second word : arguments given on stack + SDOperand thirdStore = + DAG.getStore(secondStore, StackOffsetFI, nextPtr, SV, nextOffset); + nextOffset += FrameOffset; + nextPtr = DAG.getNode(ISD::ADD, PtrVT, nextPtr, ConstFrameOffset); + + // Store third word : arguments given in registers + return DAG.getStore(thirdStore, FR, nextPtr, SV, nextOffset); + } #include "PPCGenCallingConv.inc" @@ -1113,26 +1287,30 @@ static const unsigned *GetFPR(const PPCSubtarget &Subtarget) { static const unsigned FPR[] = { PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7, - PPC::F8, PPC::F9, PPC::F10 + PPC::F8 }; return FPR; } -static SDOperand LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG, +SDOperand PPCTargetLowering::LowerFORMAL_ARGUMENTS(SDOperand Op, + SelectionDAG &DAG, int &VarArgsFrameIndex, + int &VarArgsStackOffset, + unsigned &VarArgsNumGPR, + unsigned &VarArgsNumFPR, const PPCSubtarget &Subtarget) { // TODO: add description of PPC stack frame format, or at least some docs. // MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); - SSARegMap *RegMap = MF.getSSARegMap(); + MachineRegisterInfo &RegInfo = MF.getRegInfo(); SmallVector ArgValues; SDOperand Root = Op.getOperand(0); MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); bool isPPC64 = PtrVT == MVT::i64; bool isMachoABI = Subtarget.isMachoABI(); - bool isELF_ABI = Subtarget.isELF_ABI(); + bool isELF32_ABI = Subtarget.isELF32_ABI(); unsigned PtrByteSize = isPPC64 ? 8 : 4; unsigned ArgOffset = PPCFrameInfo::getLinkageSize(isPPC64, isMachoABI); @@ -1153,9 +1331,9 @@ static SDOperand LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG, PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 }; - const unsigned Num_GPR_Regs = sizeof(GPR_32)/sizeof(GPR_32[0]); - const unsigned Num_FPR_Regs = isMachoABI ? 13 : 10; - const unsigned Num_VR_Regs = sizeof( VR)/sizeof( VR[0]); + const unsigned Num_GPR_Regs = array_lengthof(GPR_32); + const unsigned Num_FPR_Regs = isMachoABI ? 13 : 8; + const unsigned Num_VR_Regs = array_lengthof( VR); unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; @@ -1165,10 +1343,12 @@ static SDOperand LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG, // entry to a function on PPC, the arguments start after the linkage area, // although the first ones are often in registers. // - // In the ELF ABI, GPRs and stack are double word align: an argument + // In the ELF 32 ABI, GPRs and stack are double word align: an argument // represented with two words (long long or double) must be copied to an // even GPR_idx value or to an even ArgOffset value. + SmallVector MemOps; + for (unsigned ArgNo = 0, e = Op.Val->getNumValues()-1; ArgNo != e; ++ArgNo) { SDOperand ArgVal; bool needsLoad = false; @@ -1177,20 +1357,58 @@ static SDOperand LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG, unsigned ArgSize = ObjSize; unsigned Flags = cast(Op.getOperand(ArgNo+3))->getValue(); unsigned AlignFlag = 1 << ISD::ParamFlags::OrigAlignmentOffs; + unsigned isByVal = Flags & ISD::ParamFlags::ByVal; // See if next argument requires stack alignment in ELF bool Expand = (ObjectVT == MVT::f64) || ((ArgNo + 1 < e) && (cast(Op.getOperand(ArgNo+4))->getValue() & AlignFlag) && (!(Flags & AlignFlag))); unsigned CurArgOffset = ArgOffset; + + // FIXME alignment for ELF may not be right + // FIXME the codegen can be much improved in some cases. + // We do not have to keep everything in memory. + if (isByVal) { + // Double word align in ELF + if (Expand && isELF32_ABI) GPR_idx += (GPR_idx % 2); + // ObjSize is the true size, ArgSize rounded up to multiple of registers. + ObjSize = (Flags & ISD::ParamFlags::ByValSize) >> + ISD::ParamFlags::ByValSizeOffs; + ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; + // The value of the object is its address. + int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset); + SDOperand FIN = DAG.getFrameIndex(FI, PtrVT); + ArgValues.push_back(FIN); + for (unsigned j = 0; j < ArgSize; j += PtrByteSize) { + // Store whatever pieces of the object are in registers + // to memory. ArgVal will be address of the beginning of + // the object. + if (GPR_idx != Num_GPR_Regs) { + unsigned VReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass); + RegInfo.addLiveIn(GPR[GPR_idx], VReg); + int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset); + SDOperand FIN = DAG.getFrameIndex(FI, PtrVT); + SDOperand Val = DAG.getCopyFromReg(Root, VReg, PtrVT); + SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0); + MemOps.push_back(Store); + ++GPR_idx; + if (isMachoABI) ArgOffset += PtrByteSize; + } else { + ArgOffset += ArgSize - (ArgOffset-CurArgOffset); + break; + } + } + continue; + } + switch (ObjectVT) { default: assert(0 && "Unhandled argument type!"); case MVT::i32: // Double word align in ELF - if (Expand && isELF_ABI && !isPPC64) GPR_idx += (GPR_idx % 2); + if (Expand && isELF32_ABI) GPR_idx += (GPR_idx % 2); if (GPR_idx != Num_GPR_Regs) { - unsigned VReg = RegMap->createVirtualRegister(&PPC::GPRCRegClass); - MF.addLiveIn(GPR[GPR_idx], VReg); + unsigned VReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass); + RegInfo.addLiveIn(GPR[GPR_idx], VReg); ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i32); ++GPR_idx; } else { @@ -1198,7 +1416,7 @@ static SDOperand LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG, ArgSize = PtrByteSize; } // Stack align in ELF - if (needsLoad && Expand && isELF_ABI && !isPPC64) + if (needsLoad && Expand && isELF32_ABI) ArgOffset += ((ArgOffset/4) % 2) * PtrByteSize; // All int arguments reserve stack space in Macho ABI. if (isMachoABI || needsLoad) ArgOffset += PtrByteSize; @@ -1206,8 +1424,8 @@ static SDOperand LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG, case MVT::i64: // PPC64 if (GPR_idx != Num_GPR_Regs) { - unsigned VReg = RegMap->createVirtualRegister(&PPC::G8RCRegClass); - MF.addLiveIn(GPR[GPR_idx], VReg); + unsigned VReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass); + RegInfo.addLiveIn(GPR[GPR_idx], VReg); ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i64); ++GPR_idx; } else { @@ -1229,10 +1447,10 @@ static SDOperand LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG, if (FPR_idx != Num_FPR_Regs) { unsigned VReg; if (ObjectVT == MVT::f32) - VReg = RegMap->createVirtualRegister(&PPC::F4RCRegClass); + VReg = RegInfo.createVirtualRegister(&PPC::F4RCRegClass); else - VReg = RegMap->createVirtualRegister(&PPC::F8RCRegClass); - MF.addLiveIn(FPR[FPR_idx], VReg); + VReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass); + RegInfo.addLiveIn(FPR[FPR_idx], VReg); ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT); ++FPR_idx; } else { @@ -1240,7 +1458,7 @@ static SDOperand LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG, } // Stack align in ELF - if (needsLoad && Expand && isELF_ABI && !isPPC64) + if (needsLoad && Expand && isELF32_ABI) ArgOffset += ((ArgOffset/4) % 2) * PtrByteSize; // All FP arguments reserve stack space in Macho ABI. if (isMachoABI || needsLoad) ArgOffset += isPPC64 ? 8 : ObjSize; @@ -1251,8 +1469,8 @@ static SDOperand LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG, case MVT::v16i8: // Note that vector arguments in registers don't reserve stack space. if (VR_idx != Num_VR_Regs) { - unsigned VReg = RegMap->createVirtualRegister(&PPC::VRRCRegClass); - MF.addLiveIn(VR[VR_idx], VReg); + unsigned VReg = RegInfo.createVirtualRegister(&PPC::VRRCRegClass); + RegInfo.addLiveIn(VR[VR_idx], VReg); ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT); ++VR_idx; } else { @@ -1265,43 +1483,68 @@ static SDOperand LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG, } // We need to load the argument to a virtual register if we determined above - // that we ran out of physical registers of the appropriate type + // that we ran out of physical registers of the appropriate type. if (needsLoad) { - // If the argument is actually used, emit a load from the right stack - // slot. - if (!Op.Val->hasNUsesOfValue(0, ArgNo)) { - int FI = MFI->CreateFixedObject(ObjSize, - CurArgOffset + (ArgSize - ObjSize)); - SDOperand FIN = DAG.getFrameIndex(FI, PtrVT); - ArgVal = DAG.getLoad(ObjectVT, Root, FIN, NULL, 0); - } else { - // Don't emit a dead load. - ArgVal = DAG.getNode(ISD::UNDEF, ObjectVT); - } + int FI = MFI->CreateFixedObject(ObjSize, + CurArgOffset + (ArgSize - ObjSize)); + SDOperand FIN = DAG.getFrameIndex(FI, PtrVT); + ArgVal = DAG.getLoad(ObjectVT, Root, FIN, NULL, 0); } ArgValues.push_back(ArgVal); } - + // If the function takes variable number of arguments, make a frame index for // the start of the first vararg value... for expansion of llvm.va_start. bool isVarArg = cast(Op.getOperand(2))->getValue() != 0; if (isVarArg) { + + int depth; + if (isELF32_ABI) { + VarArgsNumGPR = GPR_idx; + VarArgsNumFPR = FPR_idx; + + // Make room for Num_GPR_Regs, Num_FPR_Regs and for a possible frame + // pointer. + depth = -(Num_GPR_Regs * MVT::getSizeInBits(PtrVT)/8 + + Num_FPR_Regs * MVT::getSizeInBits(MVT::f64)/8 + + MVT::getSizeInBits(PtrVT)/8); + + VarArgsStackOffset = MFI->CreateFixedObject(MVT::getSizeInBits(PtrVT)/8, + ArgOffset); + + } + else + depth = ArgOffset; + VarArgsFrameIndex = MFI->CreateFixedObject(MVT::getSizeInBits(PtrVT)/8, - ArgOffset); + depth); SDOperand FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT); + + // In ELF 32 ABI, the fixed integer arguments of a variadic function are + // stored to the VarArgsFrameIndex on the stack. + if (isELF32_ABI) { + for (GPR_idx = 0; GPR_idx != VarArgsNumGPR; ++GPR_idx) { + SDOperand Val = DAG.getRegister(GPR[GPR_idx], PtrVT); + SDOperand Store = DAG.getStore(Root, Val, FIN, NULL, 0); + MemOps.push_back(Store); + // Increment the address by four for the next argument to store + SDOperand PtrOff = DAG.getConstant(MVT::getSizeInBits(PtrVT)/8, PtrVT); + FIN = DAG.getNode(ISD::ADD, PtrOff.getValueType(), FIN, PtrOff); + } + } + // If this function is vararg, store any remaining integer argument regs // to their spots on the stack so that they may be loaded by deferencing the // result of va_next. - SmallVector MemOps; for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) { unsigned VReg; if (isPPC64) - VReg = RegMap->createVirtualRegister(&PPC::G8RCRegClass); + VReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass); else - VReg = RegMap->createVirtualRegister(&PPC::GPRCRegClass); + VReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass); - MF.addLiveIn(GPR[GPR_idx], VReg); + RegInfo.addLiveIn(GPR[GPR_idx], VReg); SDOperand Val = DAG.getCopyFromReg(Root, VReg, PtrVT); SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0); MemOps.push_back(Store); @@ -1309,10 +1552,39 @@ static SDOperand LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG, SDOperand PtrOff = DAG.getConstant(MVT::getSizeInBits(PtrVT)/8, PtrVT); FIN = DAG.getNode(ISD::ADD, PtrOff.getValueType(), FIN, PtrOff); } - if (!MemOps.empty()) - Root = DAG.getNode(ISD::TokenFactor, MVT::Other,&MemOps[0],MemOps.size()); + + // In ELF 32 ABI, the double arguments are stored to the VarArgsFrameIndex + // on the stack. + if (isELF32_ABI) { + for (FPR_idx = 0; FPR_idx != VarArgsNumFPR; ++FPR_idx) { + SDOperand Val = DAG.getRegister(FPR[FPR_idx], MVT::f64); + SDOperand Store = DAG.getStore(Root, Val, FIN, NULL, 0); + MemOps.push_back(Store); + // Increment the address by eight for the next argument to store + SDOperand PtrOff = DAG.getConstant(MVT::getSizeInBits(MVT::f64)/8, + PtrVT); + FIN = DAG.getNode(ISD::ADD, PtrOff.getValueType(), FIN, PtrOff); + } + + for (; FPR_idx != Num_FPR_Regs; ++FPR_idx) { + unsigned VReg; + VReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass); + + RegInfo.addLiveIn(FPR[FPR_idx], VReg); + SDOperand Val = DAG.getCopyFromReg(Root, VReg, MVT::f64); + SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0); + MemOps.push_back(Store); + // Increment the address by eight for the next argument to store + SDOperand PtrOff = DAG.getConstant(MVT::getSizeInBits(MVT::f64)/8, + PtrVT); + FIN = DAG.getNode(ISD::ADD, PtrOff.getValueType(), FIN, PtrOff); + } + } } + if (!MemOps.empty()) + Root = DAG.getNode(ISD::TokenFactor, MVT::Other,&MemOps[0],MemOps.size()); + ArgValues.push_back(Root); // Return the new list of results. @@ -1332,19 +1604,36 @@ static SDNode *isBLACompatibleAddress(SDOperand Op, SelectionDAG &DAG) { (Addr << 6 >> 6) != Addr) return 0; // Top 6 bits have to be sext of immediate. - return DAG.getConstant((int)C->getValue() >> 2, MVT::i32).Val; + return DAG.getConstant((int)C->getValue() >> 2, + DAG.getTargetLoweringInfo().getPointerTy()).Val; } +/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified +/// by "Src" to address "Dst" of size "Size". Alignment information is +/// specified by the specific parameter attribute. The copy will be passed as +/// a byval function parameter. +/// Sometimes what we are copying is the end of a larger object, the part that +/// does not fit in registers. +static SDOperand +CreateCopyOfByValArgument(SDOperand Src, SDOperand Dst, SDOperand Chain, + unsigned Flags, SelectionDAG &DAG, unsigned Size) { + unsigned Align = 1 << + ((Flags & ISD::ParamFlags::ByValAlign) >> ISD::ParamFlags::ByValAlignOffs); + SDOperand AlignNode = DAG.getConstant(Align, MVT::i32); + SDOperand SizeNode = DAG.getConstant(Size, MVT::i32); + SDOperand AlwaysInline = DAG.getConstant(0, MVT::i32); + return DAG.getMemcpy(Chain, Dst, Src, SizeNode, AlignNode, AlwaysInline); +} -static SDOperand LowerCALL(SDOperand Op, SelectionDAG &DAG, - const PPCSubtarget &Subtarget) { +SDOperand PPCTargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG, + const PPCSubtarget &Subtarget) { SDOperand Chain = Op.getOperand(0); bool isVarArg = cast(Op.getOperand(2))->getValue() != 0; SDOperand Callee = Op.getOperand(4); unsigned NumOps = (Op.getNumOperands() - 5) / 2; bool isMachoABI = Subtarget.isMachoABI(); - bool isELF_ABI = Subtarget.isELF_ABI(); + bool isELF32_ABI = Subtarget.isELF32_ABI(); MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); bool isPPC64 = PtrVT == MVT::i64; @@ -1361,7 +1650,11 @@ static SDOperand LowerCALL(SDOperand Op, SelectionDAG &DAG, // Add up all the space actually used. for (unsigned i = 0; i != NumOps; ++i) { + unsigned Flags = cast(Op.getOperand(5+2*i+1))->getValue(); unsigned ArgSize =MVT::getSizeInBits(Op.getOperand(5+2*i).getValueType())/8; + if (Flags & ISD::ParamFlags::ByVal) + ArgSize = (Flags & ISD::ParamFlags::ByValSize) >> + ISD::ParamFlags::ByValSizeOffs; ArgSize = std::max(ArgSize, PtrByteSize); NumBytes += ArgSize; } @@ -1378,6 +1671,7 @@ static SDOperand LowerCALL(SDOperand Op, SelectionDAG &DAG, // These operations are automatically eliminated by the prolog/epilog pass Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(NumBytes, PtrVT)); + SDOperand CallSeqStart = Chain; // Set up a copy of the stack pointer for use loading and storing any // arguments that may not fit in the registers available for argument @@ -1409,9 +1703,9 @@ static SDOperand LowerCALL(SDOperand Op, SelectionDAG &DAG, PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 }; - const unsigned NumGPRs = sizeof(GPR_32)/sizeof(GPR_32[0]); - const unsigned NumFPRs = isMachoABI ? 13 : 10; - const unsigned NumVRs = sizeof( VR)/sizeof( VR[0]); + const unsigned NumGPRs = array_lengthof(GPR_32); + const unsigned NumFPRs = isMachoABI ? 13 : 8; + const unsigned NumVRs = array_lengthof( VR); const unsigned *GPR = isPPC64 ? GPR_64 : GPR_32; @@ -1432,8 +1726,8 @@ static SDOperand LowerCALL(SDOperand Op, SelectionDAG &DAG, // register cannot be found for it. SDOperand PtrOff; - // Stack align in ELF - if (isELF_ABI && Expand && !isPPC64) + // Stack align in ELF 32 + if (isELF32_ABI && Expand) PtrOff = DAG.getConstant(ArgOffset + ((ArgOffset/4) % 2) * PtrByteSize, StackPtr.getValueType()); else @@ -1447,13 +1741,71 @@ static SDOperand LowerCALL(SDOperand Op, SelectionDAG &DAG, Arg = DAG.getNode(ExtOp, MVT::i64, Arg); } - + + // FIXME Elf untested, what are alignment rules? + // FIXME memcpy is used way more than necessary. Correctness first. + if (Flags & ISD::ParamFlags::ByVal) { + unsigned Size = (Flags & ISD::ParamFlags::ByValSize) >> + ISD::ParamFlags::ByValSizeOffs; + if (isELF32_ABI && Expand) GPR_idx += (GPR_idx % 2); + if (Size==1 || Size==2) { + // Very small objects are passed right-justified. + // Everything else is passed left-justified. + MVT::ValueType VT = (Size==1) ? MVT::i8 : MVT::i16; + if (GPR_idx != NumGPRs) { + SDOperand Load = DAG.getExtLoad(ISD::EXTLOAD, PtrVT, Chain, Arg, + NULL, 0, VT); + MemOpChains.push_back(Load.getValue(1)); + RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); + if (isMachoABI) + ArgOffset += PtrByteSize; + } else { + SDOperand Const = DAG.getConstant(4 - Size, PtrOff.getValueType()); + SDOperand AddPtr = DAG.getNode(ISD::ADD, PtrVT, PtrOff, Const); + SDOperand MemcpyCall = CreateCopyOfByValArgument(Arg, AddPtr, + CallSeqStart.Val->getOperand(0), + Flags, DAG, Size); + // This must go outside the CALLSEQ_START..END. + SDOperand NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, + CallSeqStart.Val->getOperand(1)); + DAG.ReplaceAllUsesWith(CallSeqStart.Val, NewCallSeqStart.Val); + Chain = CallSeqStart = NewCallSeqStart; + ArgOffset += PtrByteSize; + } + continue; + } + for (unsigned j=0; jgetOperand(0), + Flags, DAG, Size - j); + // This must go outside the CALLSEQ_START..END. + SDOperand NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, + CallSeqStart.Val->getOperand(1)); + DAG.ReplaceAllUsesWith(CallSeqStart.Val, NewCallSeqStart.Val); + Chain = CallSeqStart = NewCallSeqStart; + ArgOffset += ((Size - j + 3)/4)*4; + break; + } + } + continue; + } + switch (Arg.getValueType()) { default: assert(0 && "Unexpected ValueType for argument!"); case MVT::i32: case MVT::i64: // Double word align in ELF - if (isELF_ABI && Expand && !isPPC64) GPR_idx += (GPR_idx % 2); + if (isELF32_ABI && Expand) GPR_idx += (GPR_idx % 2); if (GPR_idx != NumGPRs) { RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg)); } else { @@ -1462,7 +1814,7 @@ static SDOperand LowerCALL(SDOperand Op, SelectionDAG &DAG, } if (inMem || isMachoABI) { // Stack align in ELF - if (isELF_ABI && Expand && !isPPC64) + if (isELF32_ABI && Expand) ArgOffset += ((ArgOffset/4) % 2) * PtrByteSize; ArgOffset += PtrByteSize; @@ -1516,7 +1868,7 @@ static SDOperand LowerCALL(SDOperand Op, SelectionDAG &DAG, } if (inMem || isMachoABI) { // Stack align in ELF - if (isELF_ABI && Expand && !isPPC64) + if (isELF32_ABI && Expand) ArgOffset += ((ArgOffset/4) % 2) * PtrByteSize; if (isPPC64) ArgOffset += 8; @@ -1548,8 +1900,8 @@ static SDOperand LowerCALL(SDOperand Op, SelectionDAG &DAG, InFlag = Chain.getValue(1); } - // With the ELF ABI, set CR6 to true if this is a vararg call. - if (isVarArg && isELF_ABI) { + // With the ELF 32 ABI, set CR6 to true if this is a vararg call. + if (isVarArg && isELF32_ABI) { SDOperand SetCR(DAG.getTargetNode(PPC::SETCR, MVT::i32), 0); Chain = DAG.getCopyToReg(Chain, PPC::CR6, SetCR, InFlag); InFlag = Chain.getValue(1); @@ -1610,6 +1962,13 @@ static SDOperand LowerCALL(SDOperand Op, SelectionDAG &DAG, Chain = DAG.getNode(CallOpc, NodeTys, &Ops[0], Ops.size()); InFlag = Chain.getValue(1); + Chain = DAG.getCALLSEQ_END(Chain, + DAG.getConstant(NumBytes, PtrVT), + DAG.getConstant(0, PtrVT), + InFlag); + if (Op.Val->getValueType(0) != MVT::Other) + InFlag = Chain.getValue(1); + SDOperand ResultVals[3]; unsigned NumResults = 0; NodeTys.clear(); @@ -1620,9 +1979,9 @@ static SDOperand LowerCALL(SDOperand Op, SelectionDAG &DAG, case MVT::Other: break; case MVT::i32: if (Op.Val->getValueType(1) == MVT::i32) { - Chain = DAG.getCopyFromReg(Chain, PPC::R4, MVT::i32, InFlag).getValue(1); + Chain = DAG.getCopyFromReg(Chain, PPC::R3, MVT::i32, InFlag).getValue(1); ResultVals[0] = Chain.getValue(0); - Chain = DAG.getCopyFromReg(Chain, PPC::R3, MVT::i32, + Chain = DAG.getCopyFromReg(Chain, PPC::R4, MVT::i32, Chain.getValue(2)).getValue(1); ResultVals[1] = Chain.getValue(0); NumResults = 2; @@ -1640,8 +1999,20 @@ static SDOperand LowerCALL(SDOperand Op, SelectionDAG &DAG, NumResults = 1; NodeTys.push_back(MVT::i64); break; - case MVT::f32: case MVT::f64: + if (Op.Val->getValueType(1) == MVT::f64) { + Chain = DAG.getCopyFromReg(Chain, PPC::F1, MVT::f64, InFlag).getValue(1); + ResultVals[0] = Chain.getValue(0); + Chain = DAG.getCopyFromReg(Chain, PPC::F2, MVT::f64, + Chain.getValue(2)).getValue(1); + ResultVals[1] = Chain.getValue(0); + NumResults = 2; + NodeTys.push_back(MVT::f64); + NodeTys.push_back(MVT::f64); + break; + } + // else fall through + case MVT::f32: Chain = DAG.getCopyFromReg(Chain, PPC::F1, Op.Val->getValueType(0), InFlag).getValue(1); ResultVals[0] = Chain.getValue(0); @@ -1660,8 +2031,6 @@ static SDOperand LowerCALL(SDOperand Op, SelectionDAG &DAG, break; } - Chain = DAG.getNode(ISD::CALLSEQ_END, MVT::Other, Chain, - DAG.getConstant(NumBytes, PtrVT)); NodeTys.push_back(MVT::Other); // If the function returns void, just return the chain. @@ -1675,17 +2044,19 @@ static SDOperand LowerCALL(SDOperand Op, SelectionDAG &DAG, return Res.getValue(Op.ResNo); } -static SDOperand LowerRET(SDOperand Op, SelectionDAG &DAG, TargetMachine &TM) { +SDOperand PPCTargetLowering::LowerRET(SDOperand Op, SelectionDAG &DAG, + TargetMachine &TM) { SmallVector RVLocs; unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv(); - CCState CCInfo(CC, TM, RVLocs); + bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg(); + CCState CCInfo(CC, isVarArg, TM, RVLocs); CCInfo.AnalyzeReturn(Op.Val, RetCC_PPC); // If this is the first return lowered for this function, add the regs to the // liveout set for the function. - if (DAG.getMachineFunction().liveout_empty()) { + if (DAG.getMachineFunction().getRegInfo().liveout_empty()) { for (unsigned i = 0; i != RVLocs.size(); ++i) - DAG.getMachineFunction().addLiveOut(RVLocs[i].getLocReg()); + DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg()); } SDOperand Chain = Op.getOperand(0); @@ -1705,7 +2076,7 @@ static SDOperand LowerRET(SDOperand Op, SelectionDAG &DAG, TargetMachine &TM) { return DAG.getNode(PPCISD::RET_FLAG, MVT::Other, Chain); } -static SDOperand LowerSTACKRESTORE(SDOperand Op, SelectionDAG &DAG, +SDOperand PPCTargetLowering::LowerSTACKRESTORE(SDOperand Op, SelectionDAG &DAG, const PPCSubtarget &Subtarget) { // When we pop the dynamic allocation we need to restore the SP link. @@ -1731,7 +2102,8 @@ static SDOperand LowerSTACKRESTORE(SDOperand Op, SelectionDAG &DAG, return DAG.getStore(Chain, LoadLinkSP, StackPtr, NULL, 0); } -static SDOperand LowerDYNAMIC_STACKALLOC(SDOperand Op, SelectionDAG &DAG, +SDOperand PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDOperand Op, + SelectionDAG &DAG, const PPCSubtarget &Subtarget) { MachineFunction &MF = DAG.getMachineFunction(); bool IsPPC64 = Subtarget.isPPC64(); @@ -1773,7 +2145,7 @@ static SDOperand LowerDYNAMIC_STACKALLOC(SDOperand Op, SelectionDAG &DAG, /// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when /// possible. -static SDOperand LowerSELECT_CC(SDOperand Op, SelectionDAG &DAG) { +SDOperand PPCTargetLowering::LowerSELECT_CC(SDOperand Op, SelectionDAG &DAG) { // Not FP? Not a fsel. if (!MVT::isFloatingPoint(Op.getOperand(0).getValueType()) || !MVT::isFloatingPoint(Op.getOperand(2).getValueType())) @@ -1817,7 +2189,7 @@ static SDOperand LowerSELECT_CC(SDOperand Op, SelectionDAG &DAG) { DAG.getNode(ISD::FNEG, MVT::f64, LHS), TV, FV); } - SDOperand Cmp; + SDOperand Cmp; switch (CC) { default: break; // SETUO etc aren't handled by fsel. case ISD::SETULT: @@ -1852,7 +2224,8 @@ static SDOperand LowerSELECT_CC(SDOperand Op, SelectionDAG &DAG) { return SDOperand(); } -static SDOperand LowerFP_TO_SINT(SDOperand Op, SelectionDAG &DAG) { +// FIXME: Split this code up when LegalizeDAGTypes lands. +SDOperand PPCTargetLowering::LowerFP_TO_SINT(SDOperand Op, SelectionDAG &DAG) { assert(MVT::isFloatingPoint(Op.getOperand(0).getValueType())); SDOperand Src = Op.getOperand(0); if (Src.getValueType() == MVT::f32) @@ -1870,18 +2243,84 @@ static SDOperand LowerFP_TO_SINT(SDOperand Op, SelectionDAG &DAG) { } // Convert the FP value to an int value through memory. - SDOperand Bits = DAG.getNode(ISD::BIT_CONVERT, MVT::i64, Tmp); + SDOperand FIPtr = DAG.CreateStackTemporary(MVT::f64); + + // Emit a store to the stack slot. + SDOperand Chain = DAG.getStore(DAG.getEntryNode(), Tmp, FIPtr, NULL, 0); + + // Result is a load from the stack slot. If loading 4 bytes, make sure to + // add in a bias. if (Op.getValueType() == MVT::i32) - Bits = DAG.getNode(ISD::TRUNCATE, MVT::i32, Bits); - return Bits; + FIPtr = DAG.getNode(ISD::ADD, FIPtr.getValueType(), FIPtr, + DAG.getConstant(4, FIPtr.getValueType())); + return DAG.getLoad(Op.getValueType(), Chain, FIPtr, NULL, 0); } -static SDOperand LowerSINT_TO_FP(SDOperand Op, SelectionDAG &DAG) { +SDOperand PPCTargetLowering::LowerFP_ROUND_INREG(SDOperand Op, + SelectionDAG &DAG) { + assert(Op.getValueType() == MVT::ppcf128); + SDNode *Node = Op.Val; + assert(Node->getOperand(0).getValueType() == MVT::ppcf128); + assert(Node->getOperand(0).Val->getOpcode() == ISD::BUILD_PAIR); + SDOperand Lo = Node->getOperand(0).Val->getOperand(0); + SDOperand Hi = Node->getOperand(0).Val->getOperand(1); + + // This sequence changes FPSCR to do round-to-zero, adds the two halves + // of the long double, and puts FPSCR back the way it was. We do not + // actually model FPSCR. + std::vector NodeTys; + SDOperand Ops[4], Result, MFFSreg, InFlag, FPreg; + + NodeTys.push_back(MVT::f64); // Return register + NodeTys.push_back(MVT::Flag); // Returns a flag for later insns + Result = DAG.getNode(PPCISD::MFFS, NodeTys, &InFlag, 0); + MFFSreg = Result.getValue(0); + InFlag = Result.getValue(1); + + NodeTys.clear(); + NodeTys.push_back(MVT::Flag); // Returns a flag + Ops[0] = DAG.getConstant(31, MVT::i32); + Ops[1] = InFlag; + Result = DAG.getNode(PPCISD::MTFSB1, NodeTys, Ops, 2); + InFlag = Result.getValue(0); + + NodeTys.clear(); + NodeTys.push_back(MVT::Flag); // Returns a flag + Ops[0] = DAG.getConstant(30, MVT::i32); + Ops[1] = InFlag; + Result = DAG.getNode(PPCISD::MTFSB0, NodeTys, Ops, 2); + InFlag = Result.getValue(0); + + NodeTys.clear(); + NodeTys.push_back(MVT::f64); // result of add + NodeTys.push_back(MVT::Flag); // Returns a flag + Ops[0] = Lo; + Ops[1] = Hi; + Ops[2] = InFlag; + Result = DAG.getNode(PPCISD::FADDRTZ, NodeTys, Ops, 3); + FPreg = Result.getValue(0); + InFlag = Result.getValue(1); + + NodeTys.clear(); + NodeTys.push_back(MVT::f64); + Ops[0] = DAG.getConstant(1, MVT::i32); + Ops[1] = MFFSreg; + Ops[2] = FPreg; + Ops[3] = InFlag; + Result = DAG.getNode(PPCISD::MTFSF, NodeTys, Ops, 4); + FPreg = Result.getValue(0); + + // We know the low half is about to be thrown away, so just use something + // convenient. + return DAG.getNode(ISD::BUILD_PAIR, Lo.getValueType(), FPreg, FPreg); +} + +SDOperand PPCTargetLowering::LowerSINT_TO_FP(SDOperand Op, SelectionDAG &DAG) { if (Op.getOperand(0).getValueType() == MVT::i64) { SDOperand Bits = DAG.getNode(ISD::BIT_CONVERT, MVT::f64, Op.getOperand(0)); SDOperand FP = DAG.getNode(PPCISD::FCFID, MVT::f64, Bits); if (Op.getValueType() == MVT::f32) - FP = DAG.getNode(ISD::FP_ROUND, MVT::f32, FP); + FP = DAG.getNode(ISD::FP_ROUND, MVT::f32, FP, DAG.getIntPtrConstant(0)); return FP; } @@ -1900,20 +2339,83 @@ static SDOperand LowerSINT_TO_FP(SDOperand Op, SelectionDAG &DAG) { Op.getOperand(0)); // STD the extended value into the stack slot. + MemOperand MO(PseudoSourceValue::getFixedStack(), + MemOperand::MOStore, FrameIdx, 8, 8); SDOperand Store = DAG.getNode(PPCISD::STD_32, MVT::Other, DAG.getEntryNode(), Ext64, FIdx, - DAG.getSrcValue(NULL)); + DAG.getMemOperand(MO)); // Load the value as a double. SDOperand Ld = DAG.getLoad(MVT::f64, Store, FIdx, NULL, 0); // FCFID it and return it. SDOperand FP = DAG.getNode(PPCISD::FCFID, MVT::f64, Ld); if (Op.getValueType() == MVT::f32) - FP = DAG.getNode(ISD::FP_ROUND, MVT::f32, FP); + FP = DAG.getNode(ISD::FP_ROUND, MVT::f32, FP, DAG.getIntPtrConstant(0)); return FP; } -static SDOperand LowerSHL_PARTS(SDOperand Op, SelectionDAG &DAG) { +SDOperand PPCTargetLowering::LowerFLT_ROUNDS_(SDOperand Op, SelectionDAG &DAG) { + /* + The rounding mode is in bits 30:31 of FPSR, and has the following + settings: + 00 Round to nearest + 01 Round to 0 + 10 Round to +inf + 11 Round to -inf + + FLT_ROUNDS, on the other hand, expects the following: + -1 Undefined + 0 Round to 0 + 1 Round to nearest + 2 Round to +inf + 3 Round to -inf + + To perform the conversion, we do: + ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1)) + */ + + MachineFunction &MF = DAG.getMachineFunction(); + MVT::ValueType VT = Op.getValueType(); + MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + std::vector NodeTys; + SDOperand MFFSreg, InFlag; + + // Save FP Control Word to register + NodeTys.push_back(MVT::f64); // return register + NodeTys.push_back(MVT::Flag); // unused in this context + SDOperand Chain = DAG.getNode(PPCISD::MFFS, NodeTys, &InFlag, 0); + + // Save FP register to stack slot + int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8); + SDOperand StackSlot = DAG.getFrameIndex(SSFI, PtrVT); + SDOperand Store = DAG.getStore(DAG.getEntryNode(), Chain, + StackSlot, NULL, 0); + + // Load FP Control Word from low 32 bits of stack slot. + SDOperand Four = DAG.getConstant(4, PtrVT); + SDOperand Addr = DAG.getNode(ISD::ADD, PtrVT, StackSlot, Four); + SDOperand CWD = DAG.getLoad(MVT::i32, Store, Addr, NULL, 0); + + // Transform as necessary + SDOperand CWD1 = + DAG.getNode(ISD::AND, MVT::i32, + CWD, DAG.getConstant(3, MVT::i32)); + SDOperand CWD2 = + DAG.getNode(ISD::SRL, MVT::i32, + DAG.getNode(ISD::AND, MVT::i32, + DAG.getNode(ISD::XOR, MVT::i32, + CWD, DAG.getConstant(3, MVT::i32)), + DAG.getConstant(3, MVT::i32)), + DAG.getConstant(1, MVT::i8)); + + SDOperand RetVal = + DAG.getNode(ISD::XOR, MVT::i32, CWD1, CWD2); + + return DAG.getNode((MVT::getSizeInBits(VT) < 16 ? + ISD::TRUNCATE : ISD::ZERO_EXTEND), VT, RetVal); +} + +SDOperand PPCTargetLowering::LowerSHL_PARTS(SDOperand Op, SelectionDAG &DAG) { assert(Op.getNumOperands() == 3 && Op.getValueType() == MVT::i32 && Op.getOperand(1).getValueType() == MVT::i32 && "Unexpected SHL!"); @@ -1938,7 +2440,7 @@ static SDOperand LowerSHL_PARTS(SDOperand Op, SelectionDAG &DAG) { OutOps, 2); } -static SDOperand LowerSRL_PARTS(SDOperand Op, SelectionDAG &DAG) { +SDOperand PPCTargetLowering::LowerSRL_PARTS(SDOperand Op, SelectionDAG &DAG) { assert(Op.getNumOperands() == 3 && Op.getValueType() == MVT::i32 && Op.getOperand(1).getValueType() == MVT::i32 && "Unexpected SRL!"); @@ -1963,7 +2465,7 @@ static SDOperand LowerSRL_PARTS(SDOperand Op, SelectionDAG &DAG) { OutOps, 2); } -static SDOperand LowerSRA_PARTS(SDOperand Op, SelectionDAG &DAG) { +SDOperand PPCTargetLowering::LowerSRA_PARTS(SDOperand Op, SelectionDAG &DAG) { assert(Op.getNumOperands() == 3 && Op.getValueType() == MVT::i32 && Op.getOperand(1).getValueType() == MVT::i32 && "Unexpected SRA!"); @@ -2019,7 +2521,7 @@ static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2], } else if (ConstantFPSDNode *CN = dyn_cast(OpVal)) { assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!"); - EltBits = FloatToBits(CN->getValue()); + EltBits = FloatToBits(CN->getValueAPF().convertToFloat()); } else { // Nonconstant element. return true; @@ -2105,7 +2607,7 @@ static SDOperand BuildSplatI(int Val, unsigned SplatSize, MVT::ValueType VT, MVT::ValueType CanonicalVT = VTys[SplatSize-1]; // Build a canonical splat for this value. - SDOperand Elt = DAG.getConstant(Val, MVT::getVectorBaseType(CanonicalVT)); + SDOperand Elt = DAG.getConstant(Val, MVT::getVectorElementType(CanonicalVT)); SmallVector Ops; Ops.assign(MVT::getVectorNumElements(CanonicalVT), Elt); SDOperand Res = DAG.getNode(ISD::BUILD_VECTOR, CanonicalVT, @@ -2155,7 +2657,8 @@ static SDOperand BuildVSLDOI(SDOperand LHS, SDOperand RHS, unsigned Amt, // selects to a single instruction, return Op. Otherwise, if we can codegen // this case more efficiently than a constant pool load, lower it to the // sequence of ops that should be used. -static SDOperand LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) { +SDOperand PPCTargetLowering::LowerBUILD_VECTOR(SDOperand Op, + SelectionDAG &DAG) { // If this is a vector of constants or undefs, get the bits. A bit in // UndefBits is set if the corresponding element of the vector is an // ISD::UNDEF value. For undefs, the corresponding VectorBits values are @@ -2224,7 +2727,7 @@ static SDOperand LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) { -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16 }; - for (unsigned idx = 0; idx < sizeof(SplatCsts)/sizeof(SplatCsts[0]); ++idx){ + for (unsigned idx = 0; idx < array_lengthof(SplatCsts); ++idx) { // Indirect through the SplatCsts array so that we favor 'vsplti -1' for // cases which are ambiguous (e.g. formation of 0x8000_0000). 'vsplti -1' int i = SplatCsts[idx]; @@ -2301,14 +2804,14 @@ static SDOperand LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) { if (SextVal >= 0 && SextVal <= 31) { SDOperand LHS = BuildSplatI(SextVal-16, SplatSize, MVT::Other, DAG); SDOperand RHS = BuildSplatI(-16, SplatSize, MVT::Other, DAG); - LHS = DAG.getNode(ISD::SUB, Op.getValueType(), LHS, RHS); + LHS = DAG.getNode(ISD::SUB, LHS.getValueType(), LHS, RHS); return DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), LHS); } // Odd, in range [-31,-17]: (vsplti C)+(vsplti -16). if (SextVal >= -31 && SextVal <= 0) { SDOperand LHS = BuildSplatI(SextVal+16, SplatSize, MVT::Other, DAG); SDOperand RHS = BuildSplatI(-16, SplatSize, MVT::Other, DAG); - LHS = DAG.getNode(ISD::ADD, Op.getValueType(), LHS, RHS); + LHS = DAG.getNode(ISD::ADD, LHS.getValueType(), LHS, RHS); return DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), LHS); } } @@ -2397,7 +2900,8 @@ static SDOperand GeneratePerfectShuffle(unsigned PFEntry, SDOperand LHS, /// is a shuffle we can handle in a single instruction, return it. Otherwise, /// return the code it can be lowered into. Worst case, it can always be /// lowered into a vperm. -static SDOperand LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) { +SDOperand PPCTargetLowering::LowerVECTOR_SHUFFLE(SDOperand Op, + SelectionDAG &DAG) { SDOperand V1 = Op.getOperand(0); SDOperand V2 = Op.getOperand(1); SDOperand PermMask = Op.getOperand(2); @@ -2495,7 +2999,7 @@ static SDOperand LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) { // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except // that it is in input element units, not in bytes. Convert now. - MVT::ValueType EltVT = MVT::getVectorBaseType(V1.getValueType()); + MVT::ValueType EltVT = MVT::getVectorElementType(V1.getValueType()); unsigned BytesPerElement = MVT::getSizeInBits(EltVT)/8; SmallVector ResultMask; @@ -2561,7 +3065,8 @@ static bool getAltivecCompareInfo(SDOperand Intrin, int &CompareOpc, /// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom /// lower, do it, otherwise return null. -static SDOperand LowerINTRINSIC_WO_CHAIN(SDOperand Op, SelectionDAG &DAG) { +SDOperand PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDOperand Op, + SelectionDAG &DAG) { // If this is a lowered altivec predicate compare, CompareOpc is set to the // opcode number of the comparison. int CompareOpc; @@ -2627,7 +3132,8 @@ static SDOperand LowerINTRINSIC_WO_CHAIN(SDOperand Op, SelectionDAG &DAG) { return Flags; } -static SDOperand LowerSCALAR_TO_VECTOR(SDOperand Op, SelectionDAG &DAG) { +SDOperand PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDOperand Op, + SelectionDAG &DAG) { // Create a stack slot that is 16-byte aligned. MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo(); int FrameIdx = FrameInfo->CreateStackObject(16, 16); @@ -2641,7 +3147,7 @@ static SDOperand LowerSCALAR_TO_VECTOR(SDOperand Op, SelectionDAG &DAG) { return DAG.getLoad(Op.getValueType(), Store, FIdx, NULL, 0); } -static SDOperand LowerMUL(SDOperand Op, SelectionDAG &DAG) { +SDOperand PPCTargetLowering::LowerMUL(SDOperand Op, SelectionDAG &DAG) { if (Op.getValueType() == MVT::v4i32) { SDOperand LHS = Op.getOperand(0), RHS = Op.getOperand(1); @@ -2707,11 +3213,22 @@ SDOperand PPCTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { default: assert(0 && "Wasn't expecting to be able to lower this!"); case ISD::ConstantPool: return LowerConstantPool(Op, DAG); case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); + case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); case ISD::JumpTable: return LowerJumpTable(Op, DAG); case ISD::SETCC: return LowerSETCC(Op, DAG); - case ISD::VASTART: return LowerVASTART(Op, DAG, VarArgsFrameIndex); + case ISD::VASTART: + return LowerVASTART(Op, DAG, VarArgsFrameIndex, VarArgsStackOffset, + VarArgsNumGPR, VarArgsNumFPR, PPCSubTarget); + + case ISD::VAARG: + return LowerVAARG(Op, DAG, VarArgsFrameIndex, VarArgsStackOffset, + VarArgsNumGPR, VarArgsNumFPR, PPCSubTarget); + case ISD::FORMAL_ARGUMENTS: - return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex, PPCSubTarget); + return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex, + VarArgsStackOffset, VarArgsNumGPR, + VarArgsNumFPR, PPCSubTarget); + case ISD::CALL: return LowerCALL(Op, DAG, PPCSubTarget); case ISD::RET: return LowerRET(Op, DAG, getTargetMachine()); case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG, PPCSubTarget); @@ -2721,6 +3238,8 @@ SDOperand PPCTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG); case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG); + case ISD::FP_ROUND_INREG: return LowerFP_ROUND_INREG(Op, DAG); + case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG); // Lower 64-bit shifts. case ISD::SHL_PARTS: return LowerSHL_PARTS(Op, DAG); @@ -2734,20 +3253,28 @@ SDOperand PPCTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG); case ISD::MUL: return LowerMUL(Op, DAG); - // Frame & Return address. Currently unimplemented - case ISD::RETURNADDR: break; + // Frame & Return address. + case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); } return SDOperand(); } +SDNode *PPCTargetLowering::ExpandOperationResult(SDNode *N, SelectionDAG &DAG) { + switch (N->getOpcode()) { + default: assert(0 && "Wasn't expecting to be able to lower this!"); + case ISD::FP_TO_SINT: return LowerFP_TO_SINT(SDOperand(N, 0), DAG).Val; + } +} + + //===----------------------------------------------------------------------===// // Other Lowering Code //===----------------------------------------------------------------------===// MachineBasicBlock * -PPCTargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI, - MachineBasicBlock *BB) { +PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, + MachineBasicBlock *BB) { const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); assert((MI->getOpcode() == PPC::SELECT_CC_I4 || MI->getOpcode() == PPC::SELECT_CC_I8 || @@ -2847,7 +3374,8 @@ SDOperand PPCTargetLowering::PerformDAGCombine(SDNode *N, // Turn (sint_to_fp (fp_to_sint X)) -> fctidz/fcfid without load/stores. // We allow the src/dst to be either f32/f64, but the intermediate // type must be i64. - if (N->getOperand(0).getValueType() == MVT::i64) { + if (N->getOperand(0).getValueType() == MVT::i64 && + N->getOperand(0).getOperand(0).getValueType() != MVT::ppcf128) { SDOperand Val = N->getOperand(0).getOperand(0); if (Val.getValueType() == MVT::f32) { Val = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Val); @@ -2859,7 +3387,8 @@ SDOperand PPCTargetLowering::PerformDAGCombine(SDNode *N, Val = DAG.getNode(PPCISD::FCFID, MVT::f64, Val); DCI.AddToWorklist(Val.Val); if (N->getValueType(0) == MVT::f32) { - Val = DAG.getNode(ISD::FP_ROUND, MVT::f32, Val); + Val = DAG.getNode(ISD::FP_ROUND, MVT::f32, Val, + DAG.getIntPtrConstant(0)); DCI.AddToWorklist(Val.Val); } return Val; @@ -2873,8 +3402,10 @@ SDOperand PPCTargetLowering::PerformDAGCombine(SDNode *N, case ISD::STORE: // Turn STORE (FP_TO_SINT F) -> STFIWX(FCTIWZ(F)). if (TM.getSubtarget().hasSTFIWX() && + !cast(N)->isTruncatingStore() && N->getOperand(1).getOpcode() == ISD::FP_TO_SINT && - N->getOperand(1).getValueType() == MVT::i32) { + N->getOperand(1).getValueType() == MVT::i32 && + N->getOperand(1).getOperand(0).getValueType() != MVT::ppcf128) { SDOperand Val = N->getOperand(1).getOperand(0); if (Val.getValueType() == MVT::f32) { Val = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Val); @@ -2915,11 +3446,11 @@ SDOperand PPCTargetLowering::PerformDAGCombine(SDNode *N, std::vector VTs; VTs.push_back(MVT::i32); VTs.push_back(MVT::Other); - SDOperand SV = DAG.getSrcValue(LD->getSrcValue(), LD->getSrcValueOffset()); + SDOperand MO = DAG.getMemOperand(LD->getMemOperand()); SDOperand Ops[] = { LD->getChain(), // Chain LD->getBasePtr(), // Ptr - SV, // SrcValue + MO, // MemOperand DAG.getValueType(N->getValueType(0)) // VT }; SDOperand BSLoad = DAG.getNode(PPCISD::LBRX, VTs, Ops, 4); @@ -3067,12 +3598,12 @@ SDOperand PPCTargetLowering::PerformDAGCombine(SDNode *N, //===----------------------------------------------------------------------===// void PPCTargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op, - uint64_t Mask, - uint64_t &KnownZero, - uint64_t &KnownOne, + const APInt &Mask, + APInt &KnownZero, + APInt &KnownOne, + const SelectionDAG &DAG, unsigned Depth) const { - KnownZero = 0; - KnownOne = 0; + KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0); switch (Op.getOpcode()) { default: break; case PPCISD::LBRX: { @@ -3151,9 +3682,12 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, } -// isOperandValidForConstraint -SDOperand PPCTargetLowering:: -isOperandValidForConstraint(SDOperand Op, char Letter, SelectionDAG &DAG) { +/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops +/// vector. If it is invalid, don't add anything to Ops. +void PPCTargetLowering::LowerAsmOperandForConstraint(SDOperand Op, char Letter, + std::vector&Ops, + SelectionDAG &DAG) { + SDOperand Result(0,0); switch (Letter) { default: break; case 'I': @@ -3164,39 +3698,88 @@ isOperandValidForConstraint(SDOperand Op, char Letter, SelectionDAG &DAG) { case 'N': case 'O': case 'P': { - if (!isa(Op)) return SDOperand(0,0);// Must be an immediate. - unsigned Value = cast(Op)->getValue(); + ConstantSDNode *CST = dyn_cast(Op); + if (!CST) return; // Must be an immediate to match. + unsigned Value = CST->getValue(); switch (Letter) { default: assert(0 && "Unknown constraint letter!"); case 'I': // "I" is a signed 16-bit constant. - if ((short)Value == (int)Value) return Op; + if ((short)Value == (int)Value) + Result = DAG.getTargetConstant(Value, Op.getValueType()); break; case 'J': // "J" is a constant with only the high-order 16 bits nonzero. case 'L': // "L" is a signed 16-bit constant shifted left 16 bits. - if ((short)Value == 0) return Op; + if ((short)Value == 0) + Result = DAG.getTargetConstant(Value, Op.getValueType()); break; case 'K': // "K" is a constant with only the low-order 16 bits nonzero. - if ((Value >> 16) == 0) return Op; + if ((Value >> 16) == 0) + Result = DAG.getTargetConstant(Value, Op.getValueType()); break; case 'M': // "M" is a constant that is greater than 31. - if (Value > 31) return Op; + if (Value > 31) + Result = DAG.getTargetConstant(Value, Op.getValueType()); break; case 'N': // "N" is a positive constant that is an exact power of two. - if ((int)Value > 0 && isPowerOf2_32(Value)) return Op; + if ((int)Value > 0 && isPowerOf2_32(Value)) + Result = DAG.getTargetConstant(Value, Op.getValueType()); break; case 'O': // "O" is the constant zero. - if (Value == 0) return Op; + if (Value == 0) + Result = DAG.getTargetConstant(Value, Op.getValueType()); break; case 'P': // "P" is a constant whose negation is a signed 16-bit constant. - if ((short)-Value == (int)-Value) return Op; + if ((short)-Value == (int)-Value) + Result = DAG.getTargetConstant(Value, Op.getValueType()); break; } break; } } + if (Result.Val) { + Ops.push_back(Result); + return; + } + // Handle standard constraint letters. - return TargetLowering::isOperandValidForConstraint(Op, Letter, DAG); + TargetLowering::LowerAsmOperandForConstraint(Op, Letter, Ops, DAG); +} + +// isLegalAddressingMode - Return true if the addressing mode represented +// by AM is legal for this target, for a load/store of the specified type. +bool PPCTargetLowering::isLegalAddressingMode(const AddrMode &AM, + const Type *Ty) const { + // FIXME: PPC does not allow r+i addressing modes for vectors! + + // PPC allows a sign-extended 16-bit immediate field. + if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1) + return false; + + // No global is ever allowed as a base. + if (AM.BaseGV) + return false; + + // PPC only support r+r, + switch (AM.Scale) { + case 0: // "r+i" or just "i", depending on HasBaseReg. + break; + case 1: + if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed. + return false; + // Otherwise we have r+r or r+i. + break; + case 2: + if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed. + return false; + // Allow 2*r as r+r. + break; + default: + // No other scales are supported. + return false; + } + + return true; } /// isLegalAddressImmediate - Return true if the integer value can be used @@ -3208,11 +3791,39 @@ bool PPCTargetLowering::isLegalAddressImmediate(int64_t V,const Type *Ty) const{ } bool PPCTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const { - return TargetLowering::isLegalAddressImmediate(GV); + return false; +} + +SDOperand PPCTargetLowering::LowerRETURNADDR(SDOperand Op, SelectionDAG &DAG) { + // Depths > 0 not supported yet! + if (cast(Op.getOperand(0))->getValue() > 0) + return SDOperand(); + + MachineFunction &MF = DAG.getMachineFunction(); + PPCFunctionInfo *FuncInfo = MF.getInfo(); + int RAIdx = FuncInfo->getReturnAddrSaveIndex(); + if (RAIdx == 0) { + bool isPPC64 = PPCSubTarget.isPPC64(); + int Offset = + PPCFrameInfo::getReturnSaveOffset(isPPC64, PPCSubTarget.isMachoABI()); + + // Set up a frame object for the return address. + RAIdx = MF.getFrameInfo()->CreateFixedObject(isPPC64 ? 8 : 4, Offset); + + // Remember it for next time. + FuncInfo->setReturnAddrSaveIndex(RAIdx); + + // Make sure the function really does not optimize away the store of the RA + // to the stack. + FuncInfo->setLRStoreRequired(); + } + + // Just load the return address off the stack. + SDOperand RetAddrFI = DAG.getFrameIndex(RAIdx, getPointerTy()); + return DAG.getLoad(getPointerTy(), DAG.getEntryNode(), RetAddrFI, NULL, 0); } -SDOperand PPCTargetLowering::LowerFRAMEADDR(SDOperand Op, SelectionDAG &DAG) -{ +SDOperand PPCTargetLowering::LowerFRAMEADDR(SDOperand Op, SelectionDAG &DAG) { // Depths > 0 not supported yet! if (cast(Op.getOperand(0))->getValue() > 0) return SDOperand(); @@ -3227,7 +3838,7 @@ SDOperand PPCTargetLowering::LowerFRAMEADDR(SDOperand Op, SelectionDAG &DAG) if (isPPC64) return DAG.getCopyFromReg(DAG.getEntryNode(), is31 ? PPC::X31 : PPC::X1, - MVT::i32); + MVT::i64); else return DAG.getCopyFromReg(DAG.getEntryNode(), is31 ? PPC::R31 : PPC::R1, MVT::i32);