1 //===-- AMDILISelLowering.cpp - AMDIL DAG Lowering Implementation ---------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //==-----------------------------------------------------------------------===//
10 // This file implements the interfaces that AMDIL uses to lower LLVM code into a
13 //===----------------------------------------------------------------------===//
15 #include "AMDILISelLowering.h"
16 #include "AMDILDevices.h"
17 #include "AMDILIntrinsicInfo.h"
18 #include "AMDILRegisterInfo.h"
19 #include "AMDILSubtarget.h"
20 #include "AMDILUtilityFunctions.h"
21 #include "llvm/CallingConv.h"
22 #include "llvm/CodeGen/MachineFrameInfo.h"
23 #include "llvm/CodeGen/MachineRegisterInfo.h"
24 #include "llvm/CodeGen/PseudoSourceValue.h"
25 #include "llvm/CodeGen/SelectionDAG.h"
26 #include "llvm/CodeGen/SelectionDAGNodes.h"
27 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
28 #include "llvm/DerivedTypes.h"
29 #include "llvm/Instructions.h"
30 #include "llvm/Intrinsics.h"
31 #include "llvm/Support/raw_ostream.h"
32 #include "llvm/Target/TargetInstrInfo.h"
33 #include "llvm/Target/TargetOptions.h"
36 #define ISDBITCAST ISD::BITCAST
37 #define MVTGLUE MVT::Glue
38 //===----------------------------------------------------------------------===//
39 // Calling Convention Implementation
40 //===----------------------------------------------------------------------===//
41 #include "AMDGPUGenCallingConv.inc"
43 //===----------------------------------------------------------------------===//
44 // TargetLowering Implementation Help Functions Begin
45 //===----------------------------------------------------------------------===//
47 getConversionNode(SelectionDAG &DAG, SDValue& Src, SDValue& Dst, bool asType)
49 DebugLoc DL = Src.getDebugLoc();
50 EVT svt = Src.getValueType().getScalarType();
51 EVT dvt = Dst.getValueType().getScalarType();
52 if (svt.isFloatingPoint() && dvt.isFloatingPoint()) {
53 if (dvt.bitsGT(svt)) {
54 Src = DAG.getNode(ISD::FP_EXTEND, DL, dvt, Src);
55 } else if (svt.bitsLT(svt)) {
56 Src = DAG.getNode(ISD::FP_ROUND, DL, dvt, Src,
57 DAG.getConstant(1, MVT::i32));
59 } else if (svt.isInteger() && dvt.isInteger()) {
60 if (!svt.bitsEq(dvt)) {
61 Src = DAG.getSExtOrTrunc(Src, DL, dvt);
63 } else if (svt.isInteger()) {
64 unsigned opcode = (asType) ? ISDBITCAST : ISD::SINT_TO_FP;
65 if (!svt.bitsEq(dvt)) {
66 if (dvt.getSimpleVT().SimpleTy == MVT::f32) {
67 Src = DAG.getSExtOrTrunc(Src, DL, MVT::i32);
68 } else if (dvt.getSimpleVT().SimpleTy == MVT::f64) {
69 Src = DAG.getSExtOrTrunc(Src, DL, MVT::i64);
71 assert(0 && "We only support 32 and 64bit fp types");
74 Src = DAG.getNode(opcode, DL, dvt, Src);
75 } else if (dvt.isInteger()) {
76 unsigned opcode = (asType) ? ISDBITCAST : ISD::FP_TO_SINT;
77 if (svt.getSimpleVT().SimpleTy == MVT::f32) {
78 Src = DAG.getNode(opcode, DL, MVT::i32, Src);
79 } else if (svt.getSimpleVT().SimpleTy == MVT::f64) {
80 Src = DAG.getNode(opcode, DL, MVT::i64, Src);
82 assert(0 && "We only support 32 and 64bit fp types");
84 Src = DAG.getSExtOrTrunc(Src, DL, dvt);
88 // CondCCodeToCC - Convert a DAG condition code to a AMDIL CC
90 static AMDILCC::CondCodes
91 CondCCodeToCC(ISD::CondCode CC, const MVT::SimpleValueType& type)
96 errs()<<"Condition Code: "<< (unsigned int)CC<<"\n";
97 assert(0 && "Unknown condition code!");
102 return AMDILCC::IL_CC_F_O;
104 return AMDILCC::IL_CC_D_O;
106 assert(0 && "Opcode combination not generated correctly!");
107 return AMDILCC::COND_ERROR;
112 return AMDILCC::IL_CC_F_UO;
114 return AMDILCC::IL_CC_D_UO;
116 assert(0 && "Opcode combination not generated correctly!");
117 return AMDILCC::COND_ERROR;
125 return AMDILCC::IL_CC_I_GT;
127 return AMDILCC::IL_CC_F_GT;
129 return AMDILCC::IL_CC_D_GT;
131 return AMDILCC::IL_CC_L_GT;
133 assert(0 && "Opcode combination not generated correctly!");
134 return AMDILCC::COND_ERROR;
142 return AMDILCC::IL_CC_I_GE;
144 return AMDILCC::IL_CC_F_GE;
146 return AMDILCC::IL_CC_D_GE;
148 return AMDILCC::IL_CC_L_GE;
150 assert(0 && "Opcode combination not generated correctly!");
151 return AMDILCC::COND_ERROR;
159 return AMDILCC::IL_CC_I_LT;
161 return AMDILCC::IL_CC_F_LT;
163 return AMDILCC::IL_CC_D_LT;
165 return AMDILCC::IL_CC_L_LT;
167 assert(0 && "Opcode combination not generated correctly!");
168 return AMDILCC::COND_ERROR;
176 return AMDILCC::IL_CC_I_LE;
178 return AMDILCC::IL_CC_F_LE;
180 return AMDILCC::IL_CC_D_LE;
182 return AMDILCC::IL_CC_L_LE;
184 assert(0 && "Opcode combination not generated correctly!");
185 return AMDILCC::COND_ERROR;
193 return AMDILCC::IL_CC_I_NE;
195 return AMDILCC::IL_CC_F_NE;
197 return AMDILCC::IL_CC_D_NE;
199 return AMDILCC::IL_CC_L_NE;
201 assert(0 && "Opcode combination not generated correctly!");
202 return AMDILCC::COND_ERROR;
210 return AMDILCC::IL_CC_I_EQ;
212 return AMDILCC::IL_CC_F_EQ;
214 return AMDILCC::IL_CC_D_EQ;
216 return AMDILCC::IL_CC_L_EQ;
218 assert(0 && "Opcode combination not generated correctly!");
219 return AMDILCC::COND_ERROR;
227 return AMDILCC::IL_CC_U_GT;
229 return AMDILCC::IL_CC_F_UGT;
231 return AMDILCC::IL_CC_D_UGT;
233 return AMDILCC::IL_CC_UL_GT;
235 assert(0 && "Opcode combination not generated correctly!");
236 return AMDILCC::COND_ERROR;
244 return AMDILCC::IL_CC_U_GE;
246 return AMDILCC::IL_CC_F_UGE;
248 return AMDILCC::IL_CC_D_UGE;
250 return AMDILCC::IL_CC_UL_GE;
252 assert(0 && "Opcode combination not generated correctly!");
253 return AMDILCC::COND_ERROR;
261 return AMDILCC::IL_CC_U_LT;
263 return AMDILCC::IL_CC_F_ULT;
265 return AMDILCC::IL_CC_D_ULT;
267 return AMDILCC::IL_CC_UL_LT;
269 assert(0 && "Opcode combination not generated correctly!");
270 return AMDILCC::COND_ERROR;
278 return AMDILCC::IL_CC_U_LE;
280 return AMDILCC::IL_CC_F_ULE;
282 return AMDILCC::IL_CC_D_ULE;
284 return AMDILCC::IL_CC_UL_LE;
286 assert(0 && "Opcode combination not generated correctly!");
287 return AMDILCC::COND_ERROR;
295 return AMDILCC::IL_CC_U_NE;
297 return AMDILCC::IL_CC_F_UNE;
299 return AMDILCC::IL_CC_D_UNE;
301 return AMDILCC::IL_CC_UL_NE;
303 assert(0 && "Opcode combination not generated correctly!");
304 return AMDILCC::COND_ERROR;
312 return AMDILCC::IL_CC_U_EQ;
314 return AMDILCC::IL_CC_F_UEQ;
316 return AMDILCC::IL_CC_D_UEQ;
318 return AMDILCC::IL_CC_UL_EQ;
320 assert(0 && "Opcode combination not generated correctly!");
321 return AMDILCC::COND_ERROR;
326 return AMDILCC::IL_CC_F_OGT;
328 return AMDILCC::IL_CC_D_OGT;
335 assert(0 && "Opcode combination not generated correctly!");
336 return AMDILCC::COND_ERROR;
341 return AMDILCC::IL_CC_F_OGE;
343 return AMDILCC::IL_CC_D_OGE;
350 assert(0 && "Opcode combination not generated correctly!");
351 return AMDILCC::COND_ERROR;
356 return AMDILCC::IL_CC_F_OLT;
358 return AMDILCC::IL_CC_D_OLT;
365 assert(0 && "Opcode combination not generated correctly!");
366 return AMDILCC::COND_ERROR;
371 return AMDILCC::IL_CC_F_OLE;
373 return AMDILCC::IL_CC_D_OLE;
380 assert(0 && "Opcode combination not generated correctly!");
381 return AMDILCC::COND_ERROR;
386 return AMDILCC::IL_CC_F_ONE;
388 return AMDILCC::IL_CC_D_ONE;
395 assert(0 && "Opcode combination not generated correctly!");
396 return AMDILCC::COND_ERROR;
401 return AMDILCC::IL_CC_F_OEQ;
403 return AMDILCC::IL_CC_D_OEQ;
410 assert(0 && "Opcode combination not generated correctly!");
411 return AMDILCC::COND_ERROR;
417 AMDILTargetLowering::LowerMemArgument(
419 CallingConv::ID CallConv,
420 const SmallVectorImpl<ISD::InputArg> &Ins,
421 DebugLoc dl, SelectionDAG &DAG,
422 const CCValAssign &VA,
423 MachineFrameInfo *MFI,
426 // Create the nodes corresponding to a load from this parameter slot.
427 ISD::ArgFlagsTy Flags = Ins[i].Flags;
429 bool AlwaysUseMutable = (CallConv==CallingConv::Fast) &&
430 getTargetMachine().Options.GuaranteedTailCallOpt;
431 bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
433 // FIXME: For now, all byval parameter objects are marked mutable. This can
434 // be changed with more analysis.
435 // In case of tail call optimization mark all arguments mutable. Since they
436 // could be overwritten by lowering of arguments in case of a tail call.
437 int FI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8,
438 VA.getLocMemOffset(), isImmutable);
439 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
443 return DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
444 MachinePointerInfo::getFixedStack(FI),
445 false, false, false, 0);
447 //===----------------------------------------------------------------------===//
448 // TargetLowering Implementation Help Functions End
449 //===----------------------------------------------------------------------===//
451 //===----------------------------------------------------------------------===//
452 // TargetLowering Class Implementation Begins
453 //===----------------------------------------------------------------------===//
454 AMDILTargetLowering::AMDILTargetLowering(TargetMachine &TM)
455 : TargetLowering(TM, new TargetLoweringObjectFileELF())
504 size_t numTypes = sizeof(types) / sizeof(*types);
505 size_t numFloatTypes = sizeof(FloatTypes) / sizeof(*FloatTypes);
506 size_t numIntTypes = sizeof(IntTypes) / sizeof(*IntTypes);
507 size_t numVectorTypes = sizeof(VectorTypes) / sizeof(*VectorTypes);
509 const AMDILSubtarget &STM = getTargetMachine().getSubtarget<AMDILSubtarget>();
510 // These are the current register classes that are
513 for (unsigned int x = 0; x < numTypes; ++x) {
514 MVT::SimpleValueType VT = (MVT::SimpleValueType)types[x];
516 //FIXME: SIGN_EXTEND_INREG is not meaningful for floating point types
517 // We cannot sextinreg, expand to shifts
518 setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Custom);
519 setOperationAction(ISD::SUBE, VT, Expand);
520 setOperationAction(ISD::SUBC, VT, Expand);
521 setOperationAction(ISD::ADDE, VT, Expand);
522 setOperationAction(ISD::ADDC, VT, Expand);
523 setOperationAction(ISD::SETCC, VT, Custom);
524 setOperationAction(ISD::BRCOND, VT, Custom);
525 setOperationAction(ISD::BR_CC, VT, Custom);
526 setOperationAction(ISD::BR_JT, VT, Expand);
527 setOperationAction(ISD::BRIND, VT, Expand);
528 // TODO: Implement custom UREM/SREM routines
529 setOperationAction(ISD::SREM, VT, Expand);
530 setOperationAction(ISD::GlobalAddress, VT, Custom);
531 setOperationAction(ISD::JumpTable, VT, Custom);
532 setOperationAction(ISD::ConstantPool, VT, Custom);
533 setOperationAction(ISD::SELECT, VT, Custom);
534 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
535 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
536 if (VT != MVT::i64 && VT != MVT::v2i64) {
537 setOperationAction(ISD::SDIV, VT, Custom);
540 for (unsigned int x = 0; x < numFloatTypes; ++x) {
541 MVT::SimpleValueType VT = (MVT::SimpleValueType)FloatTypes[x];
543 // IL does not have these operations for floating point types
544 setOperationAction(ISD::FP_ROUND_INREG, VT, Expand);
545 setOperationAction(ISD::SETOLT, VT, Expand);
546 setOperationAction(ISD::SETOGE, VT, Expand);
547 setOperationAction(ISD::SETOGT, VT, Expand);
548 setOperationAction(ISD::SETOLE, VT, Expand);
549 setOperationAction(ISD::SETULT, VT, Expand);
550 setOperationAction(ISD::SETUGE, VT, Expand);
551 setOperationAction(ISD::SETUGT, VT, Expand);
552 setOperationAction(ISD::SETULE, VT, Expand);
555 for (unsigned int x = 0; x < numIntTypes; ++x) {
556 MVT::SimpleValueType VT = (MVT::SimpleValueType)IntTypes[x];
558 // GPU also does not have divrem function for signed or unsigned
559 setOperationAction(ISD::SDIVREM, VT, Expand);
561 // GPU does not have [S|U]MUL_LOHI functions as a single instruction
562 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
563 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
565 // GPU doesn't have a rotl, rotr, or byteswap instruction
566 setOperationAction(ISD::ROTR, VT, Expand);
567 setOperationAction(ISD::BSWAP, VT, Expand);
569 // GPU doesn't have any counting operators
570 setOperationAction(ISD::CTPOP, VT, Expand);
571 setOperationAction(ISD::CTTZ, VT, Expand);
572 setOperationAction(ISD::CTLZ, VT, Expand);
575 for ( unsigned int ii = 0; ii < numVectorTypes; ++ii )
577 MVT::SimpleValueType VT = (MVT::SimpleValueType)VectorTypes[ii];
579 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
580 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand);
581 setOperationAction(ISD::SDIVREM, VT, Expand);
582 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
583 // setOperationAction(ISD::VSETCC, VT, Expand);
584 setOperationAction(ISD::SETCC, VT, Expand);
585 setOperationAction(ISD::SELECT_CC, VT, Expand);
586 setOperationAction(ISD::SELECT, VT, Expand);
589 if (STM.device()->isSupported(AMDILDeviceInfo::LongOps)) {
590 setOperationAction(ISD::MULHU, MVT::i64, Expand);
591 setOperationAction(ISD::MULHU, MVT::v2i64, Expand);
592 setOperationAction(ISD::MULHS, MVT::i64, Expand);
593 setOperationAction(ISD::MULHS, MVT::v2i64, Expand);
594 setOperationAction(ISD::ADD, MVT::v2i64, Expand);
595 setOperationAction(ISD::SREM, MVT::v2i64, Expand);
596 setOperationAction(ISD::Constant , MVT::i64 , Legal);
597 setOperationAction(ISD::SDIV, MVT::v2i64, Expand);
598 setOperationAction(ISD::TRUNCATE, MVT::v2i64, Expand);
599 setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Expand);
600 setOperationAction(ISD::ZERO_EXTEND, MVT::v2i64, Expand);
601 setOperationAction(ISD::ANY_EXTEND, MVT::v2i64, Expand);
603 if (STM.device()->isSupported(AMDILDeviceInfo::DoubleOps)) {
604 // we support loading/storing v2f64 but not operations on the type
605 setOperationAction(ISD::FADD, MVT::v2f64, Expand);
606 setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
607 setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
608 setOperationAction(ISD::FP_ROUND_INREG, MVT::v2f64, Expand);
609 setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand);
610 setOperationAction(ISD::ConstantFP , MVT::f64 , Legal);
611 // We want to expand vector conversions into their scalar
613 setOperationAction(ISD::TRUNCATE, MVT::v2f64, Expand);
614 setOperationAction(ISD::SIGN_EXTEND, MVT::v2f64, Expand);
615 setOperationAction(ISD::ZERO_EXTEND, MVT::v2f64, Expand);
616 setOperationAction(ISD::ANY_EXTEND, MVT::v2f64, Expand);
617 setOperationAction(ISD::FABS, MVT::f64, Expand);
618 setOperationAction(ISD::FABS, MVT::v2f64, Expand);
620 // TODO: Fix the UDIV24 algorithm so it works for these
621 // types correctly. This needs vector comparisons
622 // for this to work correctly.
623 setOperationAction(ISD::UDIV, MVT::v2i8, Expand);
624 setOperationAction(ISD::UDIV, MVT::v4i8, Expand);
625 setOperationAction(ISD::UDIV, MVT::v2i16, Expand);
626 setOperationAction(ISD::UDIV, MVT::v4i16, Expand);
627 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Custom);
628 setOperationAction(ISD::SUBC, MVT::Other, Expand);
629 setOperationAction(ISD::ADDE, MVT::Other, Expand);
630 setOperationAction(ISD::ADDC, MVT::Other, Expand);
631 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
632 setOperationAction(ISD::BR_CC, MVT::Other, Custom);
633 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
634 setOperationAction(ISD::BRIND, MVT::Other, Expand);
635 setOperationAction(ISD::SETCC, MVT::Other, Custom);
636 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand);
638 setOperationAction(ISD::BUILD_VECTOR, MVT::Other, Custom);
639 // Use the default implementation.
640 setOperationAction(ISD::VAARG , MVT::Other, Expand);
641 setOperationAction(ISD::VACOPY , MVT::Other, Expand);
642 setOperationAction(ISD::VAEND , MVT::Other, Expand);
643 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
644 setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
645 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom);
646 setOperationAction(ISD::ConstantFP , MVT::f32 , Legal);
647 setOperationAction(ISD::Constant , MVT::i32 , Legal);
648 setOperationAction(ISD::TRAP , MVT::Other , Legal);
650 setStackPointerRegisterToSaveRestore(AMDGPU::SP);
651 setSchedulingPreference(Sched::RegPressure);
652 setPow2DivIsCheap(false);
653 setPrefLoopAlignment(16);
654 setSelectIsExpensive(true);
655 setJumpIsExpensive(true);
657 maxStoresPerMemcpy = 4096;
658 maxStoresPerMemmove = 4096;
659 maxStoresPerMemset = 4096;
663 #undef numVectorTypes
668 AMDILTargetLowering::getTargetNodeName(unsigned Opcode) const
672 case AMDILISD::CMOVLOG: return "AMDILISD::CMOVLOG";
673 case AMDILISD::MAD: return "AMDILISD::MAD";
674 case AMDILISD::CALL: return "AMDILISD::CALL";
675 case AMDILISD::SELECT_CC: return "AMDILISD::SELECT_CC";
676 case AMDILISD::UMUL: return "AMDILISD::UMUL";
677 case AMDILISD::DIV_INF: return "AMDILISD::DIV_INF";
678 case AMDILISD::VBUILD: return "AMDILISD::VBUILD";
679 case AMDILISD::CMP: return "AMDILISD::CMP";
680 case AMDILISD::IL_CC_I_LT: return "AMDILISD::IL_CC_I_LT";
681 case AMDILISD::IL_CC_I_LE: return "AMDILISD::IL_CC_I_LE";
682 case AMDILISD::IL_CC_I_GT: return "AMDILISD::IL_CC_I_GT";
683 case AMDILISD::IL_CC_I_GE: return "AMDILISD::IL_CC_I_GE";
684 case AMDILISD::IL_CC_I_EQ: return "AMDILISD::IL_CC_I_EQ";
685 case AMDILISD::IL_CC_I_NE: return "AMDILISD::IL_CC_I_NE";
686 case AMDILISD::RET_FLAG: return "AMDILISD::RET_FLAG";
687 case AMDILISD::BRANCH_COND: return "AMDILISD::BRANCH_COND";
692 AMDILTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
693 const CallInst &I, unsigned Intrinsic) const
698 // The backend supports 32 and 64 bit floating point immediates
700 AMDILTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const
702 if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
703 || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
711 AMDILTargetLowering::ShouldShrinkFPConstant(EVT VT) const
713 if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
714 || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
722 // isMaskedValueZeroForTargetNode - Return true if 'Op & Mask' is known to
723 // be zero. Op is expected to be a target specific node. Used by DAG
727 AMDILTargetLowering::computeMaskedBitsForTargetNode(
731 const SelectionDAG &DAG,
732 unsigned Depth) const
736 KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); // Don't know anything
737 switch (Op.getOpcode()) {
739 case AMDILISD::SELECT_CC:
740 DAG.ComputeMaskedBits(
746 DAG.ComputeMaskedBits(
751 assert((KnownZero & KnownOne) == 0
752 && "Bits known to be one AND zero?");
753 assert((KnownZero2 & KnownOne2) == 0
754 && "Bits known to be one AND zero?");
755 // Only known if known in both the LHS and RHS
756 KnownOne &= KnownOne2;
757 KnownZero &= KnownZero2;
762 // This is the function that determines which calling convention should
763 // be used. Currently there is only one calling convention
765 AMDILTargetLowering::CCAssignFnForNode(unsigned int Op) const
767 //uint64_t CC = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
771 // LowerCallResult - Lower the result values of an ISD::CALL into the
772 // appropriate copies out of appropriate physical registers. This assumes that
773 // Chain/InFlag are the input chain/flag to use, and that TheCall is the call
774 // being lowered. The returns a SDNode with the same number of values as the
777 AMDILTargetLowering::LowerCallResult(
780 CallingConv::ID CallConv,
782 const SmallVectorImpl<ISD::InputArg> &Ins,
785 SmallVectorImpl<SDValue> &InVals) const
787 // Assign locations to each value returned by this call
788 SmallVector<CCValAssign, 16> RVLocs;
789 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
790 getTargetMachine(), RVLocs, *DAG.getContext());
791 CCInfo.AnalyzeCallResult(Ins, RetCC_AMDIL32);
793 // Copy all of the result registers out of their specified physreg.
794 for (unsigned i = 0; i != RVLocs.size(); ++i) {
795 EVT CopyVT = RVLocs[i].getValVT();
796 if (RVLocs[i].isRegLoc()) {
797 Chain = DAG.getCopyFromReg(
800 RVLocs[i].getLocReg(),
804 SDValue Val = Chain.getValue(0);
805 InFlag = Chain.getValue(2);
806 InVals.push_back(Val);
814 //===----------------------------------------------------------------------===//
815 // Other Lowering Hooks
816 //===----------------------------------------------------------------------===//
818 // Recursively assign SDNodeOrdering to any unordered nodes
819 // This is necessary to maintain source ordering of instructions
820 // under -O0 to avoid odd-looking "skipping around" issues.
822 Ordered( SelectionDAG &DAG, unsigned order, const SDValue New )
824 if (order != 0 && DAG.GetOrdering( New.getNode() ) == 0) {
825 DAG.AssignOrdering( New.getNode(), order );
826 for (unsigned i = 0, e = New.getNumOperands(); i < e; ++i)
827 Ordered( DAG, order, New.getOperand(i) );
834 return Ordered( DAG, DAG.GetOrdering( Op.getNode() ), Lower##A(Op, DAG) )
837 AMDILTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
839 switch (Op.getOpcode()) {
841 Op.getNode()->dump();
842 assert(0 && "Custom lowering code for this"
843 "instruction is not implemented yet!");
845 LOWER(GlobalAddress);
848 LOWER(ExternalSymbol);
854 LOWER(SIGN_EXTEND_INREG);
855 LOWER(DYNAMIC_STACKALLOC);
865 AMDILTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const
868 const GlobalAddressSDNode *GADN = cast<GlobalAddressSDNode>(Op);
869 const GlobalValue *G = GADN->getGlobal();
870 DebugLoc DL = Op.getDebugLoc();
871 const GlobalVariable *GV = dyn_cast<GlobalVariable>(G);
873 DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32);
875 if (GV->hasInitializer()) {
876 const Constant *C = dyn_cast<Constant>(GV->getInitializer());
877 if (const ConstantInt *CI = dyn_cast<ConstantInt>(C)) {
878 DST = DAG.getConstant(CI->getValue(), Op.getValueType());
879 } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(C)) {
880 DST = DAG.getConstantFP(CF->getValueAPF(),
882 } else if (dyn_cast<ConstantAggregateZero>(C)) {
883 EVT VT = Op.getValueType();
884 if (VT.isInteger()) {
885 DST = DAG.getConstant(0, VT);
887 DST = DAG.getConstantFP(0, VT);
890 assert(!"lowering this type of Global Address "
891 "not implemented yet!");
893 DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32);
896 DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32);
903 AMDILTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const
905 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
906 SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), MVT::i32);
910 AMDILTargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const
912 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
913 EVT PtrVT = Op.getValueType();
915 if (CP->isMachineConstantPoolEntry()) {
916 Result = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
917 CP->getAlignment(), CP->getOffset(), CP->getTargetFlags());
919 Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,
920 CP->getAlignment(), CP->getOffset(), CP->getTargetFlags());
926 AMDILTargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const
928 const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol();
929 SDValue Result = DAG.getTargetExternalSymbol(Sym, MVT::i32);
933 /// LowerFORMAL_ARGUMENTS - transform physical registers into
934 /// virtual registers and generate load operations for
935 /// arguments places on the stack.
936 /// TODO: isVarArg, hasStructRet, isMemReg
938 AMDILTargetLowering::LowerFormalArguments(SDValue Chain,
939 CallingConv::ID CallConv,
941 const SmallVectorImpl<ISD::InputArg> &Ins,
944 SmallVectorImpl<SDValue> &InVals)
948 MachineFunction &MF = DAG.getMachineFunction();
949 MachineFrameInfo *MFI = MF.getFrameInfo();
950 //const Function *Fn = MF.getFunction();
951 //MachineRegisterInfo &RegInfo = MF.getRegInfo();
953 SmallVector<CCValAssign, 16> ArgLocs;
954 CallingConv::ID CC = MF.getFunction()->getCallingConv();
955 //bool hasStructRet = MF.getFunction()->hasStructRetAttr();
957 CCState CCInfo(CC, isVarArg, DAG.getMachineFunction(),
958 getTargetMachine(), ArgLocs, *DAG.getContext());
960 // When more calling conventions are added, they need to be chosen here
961 CCInfo.AnalyzeFormalArguments(Ins, CC_AMDIL32);
964 //unsigned int FirstStackArgLoc = 0;
966 for (unsigned int i = 0, e = ArgLocs.size(); i != e; ++i) {
967 CCValAssign &VA = ArgLocs[i];
969 EVT RegVT = VA.getLocVT();
970 const TargetRegisterClass *RC = getRegClassFor(
971 RegVT.getSimpleVT().SimpleTy);
973 unsigned int Reg = MF.addLiveIn(VA.getLocReg(), RC);
974 SDValue ArgValue = DAG.getCopyFromReg(
979 // If this is an 8 or 16-bit value, it is really passed
980 // promoted to 32 bits. Insert an assert[sz]ext to capture
981 // this, then truncate to the right size.
983 if (VA.getLocInfo() == CCValAssign::SExt) {
984 ArgValue = DAG.getNode(
989 DAG.getValueType(VA.getValVT()));
990 } else if (VA.getLocInfo() == CCValAssign::ZExt) {
991 ArgValue = DAG.getNode(
996 DAG.getValueType(VA.getValVT()));
998 if (VA.getLocInfo() != CCValAssign::Full) {
999 ArgValue = DAG.getNode(
1005 // Add the value to the list of arguments
1006 // to be passed in registers
1007 InVals.push_back(ArgValue);
1009 assert(0 && "Variable arguments are not yet supported");
1010 // See MipsISelLowering.cpp for ideas on how to implement
1012 } else if(VA.isMemLoc()) {
1013 InVals.push_back(LowerMemArgument(Chain, CallConv, Ins,
1014 dl, DAG, VA, MFI, i));
1016 assert(0 && "found a Value Assign that is "
1017 "neither a register or a memory location");
1020 /*if (hasStructRet) {
1021 assert(0 && "Has struct return is not yet implemented");
1022 // See MipsISelLowering.cpp for ideas on how to implement
1026 assert(0 && "Variable arguments are not yet supported");
1027 // See X86/PPC/CellSPU ISelLowering.cpp for ideas on how to implement
1029 // This needs to be changed to non-zero if the return function needs
1033 /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
1034 /// by "Src" to address "Dst" with size and alignment information specified by
1035 /// the specific parameter attribute. The copy will be passed as a byval
1036 /// function parameter.
1038 CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
1039 ISD::ArgFlagsTy Flags, SelectionDAG &DAG) {
1040 assert(0 && "MemCopy does not exist yet");
1041 SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32);
1043 return DAG.getMemcpy(Chain,
1045 Dst, Src, SizeNode, Flags.getByValAlign(),
1046 /*IsVol=*/false, /*AlwaysInline=*/true,
1047 MachinePointerInfo(), MachinePointerInfo());
1051 AMDILTargetLowering::LowerMemOpCallTo(SDValue Chain,
1052 SDValue StackPtr, SDValue Arg,
1053 DebugLoc dl, SelectionDAG &DAG,
1054 const CCValAssign &VA,
1055 ISD::ArgFlagsTy Flags) const
1057 unsigned int LocMemOffset = VA.getLocMemOffset();
1058 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
1059 PtrOff = DAG.getNode(ISD::ADD,
1061 getPointerTy(), StackPtr, PtrOff);
1062 if (Flags.isByVal()) {
1063 PtrOff = CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG);
1065 PtrOff = DAG.getStore(Chain, dl, Arg, PtrOff,
1066 MachinePointerInfo::getStack(LocMemOffset),
1071 /// LowerCAL - functions arguments are copied from virtual
1072 /// regs to (physical regs)/(stack frame), CALLSEQ_START and
1073 /// CALLSEQ_END are emitted.
1074 /// TODO: isVarArg, isTailCall, hasStructRet
1076 AMDILTargetLowering::LowerCall(CallLoweringInfo &CLI,
1077 SmallVectorImpl<SDValue> &InVals) const
1080 SDValue Chain, SDValue Callee,
1081 CallingConv::ID CallConv, bool isVarArg, bool doesNotRet,
1083 const SmallVectorImpl<ISD::OutputArg> &Outs,
1084 const SmallVectorImpl<SDValue> &OutVals,
1085 const SmallVectorImpl<ISD::InputArg> &Ins,
1086 DebugLoc dl, SelectionDAG &DAG,
1089 CLI.IsTailCall = false;
1090 MachineFunction& MF = CLI.DAG.getMachineFunction();
1091 // FIXME: DO we need to handle fast calling conventions and tail call
1092 // optimizations?? X86/PPC ISelLowering
1093 /*bool hasStructRet = (TheCall->getNumArgs())
1094 ? TheCall->getArgFlags(0).device()->isSRet()
1097 MachineFrameInfo *MFI = MF.getFrameInfo();
1099 // Analyze operands of the call, assigning locations to each operand
1100 SmallVector<CCValAssign, 16> ArgLocs;
1101 CCState CCInfo(CLI.CallConv, CLI.IsVarArg, CLI.DAG.getMachineFunction(),
1102 getTargetMachine(), ArgLocs, *CLI.DAG.getContext());
1103 // Analyize the calling operands, but need to change
1104 // if we have more than one calling convetion
1105 CCInfo.AnalyzeCallOperands(CLI.Outs, CCAssignFnForNode(CLI.CallConv));
1107 unsigned int NumBytes = CCInfo.getNextStackOffset();
1108 if (CLI.IsTailCall) {
1109 assert(CLI.IsTailCall && "Tail Call not handled yet!");
1110 // See X86/PPC ISelLowering
1113 CLI.Chain = CLI.DAG.getCALLSEQ_START(CLI.Chain,
1114 CLI.DAG.getIntPtrConstant(NumBytes, true));
1116 SmallVector<std::pair<unsigned int, SDValue>, 8> RegsToPass;
1117 SmallVector<SDValue, 8> MemOpChains;
1119 //unsigned int FirstStacArgLoc = 0;
1120 //int LastArgStackLoc = 0;
1122 // Walk the register/memloc assignments, insert copies/loads
1123 for (unsigned int i = 0, e = ArgLocs.size(); i != e; ++i) {
1124 CCValAssign &VA = ArgLocs[i];
1125 //bool isByVal = Flags.isByVal(); // handle byval/bypointer registers
1126 // Arguments start after the 5 first operands of ISD::CALL
1127 SDValue Arg = CLI.OutVals[i];
1128 //Promote the value if needed
1129 switch(VA.getLocInfo()) {
1130 default: assert(0 && "Unknown loc info!");
1131 case CCValAssign::Full:
1133 case CCValAssign::SExt:
1134 Arg = CLI.DAG.getNode(ISD::SIGN_EXTEND,
1136 VA.getLocVT(), Arg);
1138 case CCValAssign::ZExt:
1139 Arg = CLI.DAG.getNode(ISD::ZERO_EXTEND,
1141 VA.getLocVT(), Arg);
1143 case CCValAssign::AExt:
1144 Arg = CLI.DAG.getNode(ISD::ANY_EXTEND,
1146 VA.getLocVT(), Arg);
1150 if (VA.isRegLoc()) {
1151 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
1152 } else if (VA.isMemLoc()) {
1153 // Create the frame index object for this incoming parameter
1154 int FI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8,
1155 VA.getLocMemOffset(), true);
1156 SDValue PtrOff = CLI.DAG.getFrameIndex(FI,getPointerTy());
1158 // emit ISD::STORE whichs stores the
1159 // parameter value to a stack Location
1160 MemOpChains.push_back(CLI.DAG.getStore(CLI.Chain, CLI.DL, Arg, PtrOff,
1161 MachinePointerInfo::getFixedStack(FI),
1164 assert(0 && "Not a Reg/Mem Loc, major error!");
1167 if (!MemOpChains.empty()) {
1168 CLI.Chain = CLI.DAG.getNode(ISD::TokenFactor,
1172 MemOpChains.size());
1175 if (!CLI.IsTailCall) {
1176 for (unsigned int i = 0, e = RegsToPass.size(); i != e; ++i) {
1177 CLI.Chain = CLI.DAG.getCopyToReg(CLI.Chain,
1179 RegsToPass[i].first,
1180 RegsToPass[i].second,
1182 InFlag = CLI.Chain.getValue(1);
1186 // If the callee is a GlobalAddress/ExternalSymbol node (quite common,
1187 // every direct call is) turn it into a TargetGlobalAddress/
1188 // TargetExternalSymbol
1189 // node so that legalize doesn't hack it.
1190 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(CLI.Callee)) {
1191 CLI.Callee = CLI.DAG.getTargetGlobalAddress(G->getGlobal(), CLI.DL, getPointerTy());
1193 else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(CLI.Callee)) {
1194 CLI.Callee = CLI.DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy());
1196 else if (CLI.IsTailCall) {
1197 assert(0 && "Tail calls are not handled yet");
1198 // see X86 ISelLowering for ideas on implementation: 1708
1201 SDVTList NodeTys = CLI.DAG.getVTList(MVT::Other, MVTGLUE);
1202 SmallVector<SDValue, 8> Ops;
1204 if (CLI.IsTailCall) {
1205 assert(0 && "Tail calls are not handled yet");
1206 // see X86 ISelLowering for ideas on implementation: 1721
1208 // If this is a direct call, pass the chain and the callee
1209 if (CLI.Callee.getNode()) {
1210 Ops.push_back(CLI.Chain);
1211 Ops.push_back(CLI.Callee);
1214 if (CLI.IsTailCall) {
1215 assert(0 && "Tail calls are not handled yet");
1216 // see X86 ISelLowering for ideas on implementation: 1739
1219 // Add argument registers to the end of the list so that they are known
1220 // live into the call
1221 for (unsigned int i = 0, e = RegsToPass.size(); i != e; ++i) {
1222 Ops.push_back(CLI.DAG.getRegister(
1223 RegsToPass[i].first,
1224 RegsToPass[i].second.getValueType()));
1226 if (InFlag.getNode()) {
1227 Ops.push_back(InFlag);
1231 if (CLI.IsTailCall) {
1232 assert(0 && "Tail calls are not handled yet");
1233 // see X86 ISelLowering for ideas on implementation: 1762
1236 CLI.Chain = CLI.DAG.getNode(AMDILISD::CALL,
1238 NodeTys, &Ops[0], Ops.size());
1239 InFlag = CLI.Chain.getValue(1);
1241 // Create the CALLSEQ_END node
1242 CLI.Chain = CLI.DAG.getCALLSEQ_END(
1244 CLI.DAG.getIntPtrConstant(NumBytes, true),
1245 CLI.DAG.getIntPtrConstant(0, true),
1247 InFlag = CLI.Chain.getValue(1);
1248 // Handle result values, copying them out of physregs into vregs that
1250 return LowerCallResult(CLI.Chain, InFlag, CLI.CallConv, CLI.IsVarArg, CLI.Ins, CLI.DL, CLI.DAG,
1255 AMDILTargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const
1257 EVT OVT = Op.getValueType();
1259 if (OVT.getScalarType() == MVT::i64) {
1260 DST = LowerSDIV64(Op, DAG);
1261 } else if (OVT.getScalarType() == MVT::i32) {
1262 DST = LowerSDIV32(Op, DAG);
1263 } else if (OVT.getScalarType() == MVT::i16
1264 || OVT.getScalarType() == MVT::i8) {
1265 DST = LowerSDIV24(Op, DAG);
1267 DST = SDValue(Op.getNode(), 0);
1273 AMDILTargetLowering::LowerSREM(SDValue Op, SelectionDAG &DAG) const
1275 EVT OVT = Op.getValueType();
1277 if (OVT.getScalarType() == MVT::i64) {
1278 DST = LowerSREM64(Op, DAG);
1279 } else if (OVT.getScalarType() == MVT::i32) {
1280 DST = LowerSREM32(Op, DAG);
1281 } else if (OVT.getScalarType() == MVT::i16) {
1282 DST = LowerSREM16(Op, DAG);
1283 } else if (OVT.getScalarType() == MVT::i8) {
1284 DST = LowerSREM8(Op, DAG);
1286 DST = SDValue(Op.getNode(), 0);
1292 AMDILTargetLowering::LowerBUILD_VECTOR( SDValue Op, SelectionDAG &DAG ) const
1294 EVT VT = Op.getValueType();
1299 DebugLoc DL = Op.getDebugLoc();
1300 Nodes1 = DAG.getNode(AMDILISD::VBUILD,
1302 VT, Op.getOperand(0));
1304 bool allEqual = true;
1305 for (unsigned x = 1, y = Op.getNumOperands(); x < y; ++x) {
1306 if (Op.getOperand(0) != Op.getOperand(x)) {
1315 switch(Op.getNumOperands()) {
1320 fourth = Op.getOperand(3);
1321 if (fourth.getOpcode() != ISD::UNDEF) {
1322 Nodes1 = DAG.getNode(
1323 ISD::INSERT_VECTOR_ELT,
1328 DAG.getConstant(7, MVT::i32));
1331 third = Op.getOperand(2);
1332 if (third.getOpcode() != ISD::UNDEF) {
1333 Nodes1 = DAG.getNode(
1334 ISD::INSERT_VECTOR_ELT,
1339 DAG.getConstant(6, MVT::i32));
1342 second = Op.getOperand(1);
1343 if (second.getOpcode() != ISD::UNDEF) {
1344 Nodes1 = DAG.getNode(
1345 ISD::INSERT_VECTOR_ELT,
1350 DAG.getConstant(5, MVT::i32));
1358 AMDILTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const
1360 SDValue Cond = Op.getOperand(0);
1361 SDValue LHS = Op.getOperand(1);
1362 SDValue RHS = Op.getOperand(2);
1363 DebugLoc DL = Op.getDebugLoc();
1364 Cond = getConversionNode(DAG, Cond, Op, true);
1365 Cond = DAG.getNode(AMDILISD::CMOVLOG,
1367 Op.getValueType(), Cond, LHS, RHS);
1371 AMDILTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const
1374 SDValue LHS = Op.getOperand(0);
1375 SDValue RHS = Op.getOperand(1);
1376 SDValue CC = Op.getOperand(2);
1377 DebugLoc DL = Op.getDebugLoc();
1378 ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
1379 unsigned int AMDILCC = CondCCodeToCC(
1381 LHS.getValueType().getSimpleVT().SimpleTy);
1382 assert((AMDILCC != AMDILCC::COND_ERROR) && "Invalid SetCC!");
1388 DAG.getConstant(-1, MVT::i32),
1389 DAG.getConstant(0, MVT::i32),
1391 Cond = getConversionNode(DAG, Cond, Op, true);
1395 Cond.getValueType(),
1396 DAG.getConstant(1, Cond.getValueType()),
1402 AMDILTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const
1404 SDValue Data = Op.getOperand(0);
1405 VTSDNode *BaseType = cast<VTSDNode>(Op.getOperand(1));
1406 DebugLoc DL = Op.getDebugLoc();
1407 EVT DVT = Data.getValueType();
1408 EVT BVT = BaseType->getVT();
1409 unsigned baseBits = BVT.getScalarType().getSizeInBits();
1410 unsigned srcBits = DVT.isSimple() ? DVT.getScalarType().getSizeInBits() : 1;
1411 unsigned shiftBits = srcBits - baseBits;
1413 // If the op is less than 32 bits, then it needs to extend to 32bits
1414 // so it can properly keep the upper bits valid.
1415 EVT IVT = genIntType(32, DVT.isVector() ? DVT.getVectorNumElements() : 1);
1416 Data = DAG.getNode(ISD::ZERO_EXTEND, DL, IVT, Data);
1417 shiftBits = 32 - baseBits;
1420 SDValue Shift = DAG.getConstant(shiftBits, DVT);
1421 // Shift left by 'Shift' bits.
1422 Data = DAG.getNode(ISD::SHL, DL, DVT, Data, Shift);
1423 // Signed shift Right by 'Shift' bits.
1424 Data = DAG.getNode(ISD::SRA, DL, DVT, Data, Shift);
1426 // Once the sign extension is done, the op needs to be converted to
1427 // its original type.
1428 Data = DAG.getSExtOrTrunc(Data, DL, Op.getOperand(0).getValueType());
1433 AMDILTargetLowering::genIntType(uint32_t size, uint32_t numEle) const
1435 int iSize = (size * numEle);
1436 int vEle = (iSize >> ((size == 64) ? 6 : 5));
1442 return EVT(MVT::i64);
1444 return EVT(MVT::getVectorVT(MVT::i64, vEle));
1448 return EVT(MVT::i32);
1450 return EVT(MVT::getVectorVT(MVT::i32, vEle));
1456 AMDILTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
1457 SelectionDAG &DAG) const
1459 SDValue Chain = Op.getOperand(0);
1460 SDValue Size = Op.getOperand(1);
1461 unsigned int SPReg = AMDGPU::SP;
1462 DebugLoc DL = Op.getDebugLoc();
1463 SDValue SP = DAG.getCopyFromReg(Chain,
1466 SDValue NewSP = DAG.getNode(ISD::ADD,
1468 MVT::i32, SP, Size);
1469 Chain = DAG.getCopyToReg(SP.getValue(1),
1472 SDValue Ops[2] = {NewSP, Chain};
1473 Chain = DAG.getMergeValues(Ops, 2 ,DL);
1477 AMDILTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const
1479 SDValue Chain = Op.getOperand(0);
1480 SDValue Cond = Op.getOperand(1);
1481 SDValue Jump = Op.getOperand(2);
1483 Result = DAG.getNode(
1484 AMDILISD::BRANCH_COND,
1492 AMDILTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const
1494 SDValue Chain = Op.getOperand(0);
1495 SDValue CC = Op.getOperand(1);
1496 SDValue LHS = Op.getOperand(2);
1497 SDValue RHS = Op.getOperand(3);
1498 SDValue JumpT = Op.getOperand(4);
1501 CmpValue = DAG.getNode(
1506 DAG.getConstant(-1, MVT::i32),
1507 DAG.getConstant(0, MVT::i32),
1509 Result = DAG.getNode(
1510 AMDILISD::BRANCH_COND,
1511 CmpValue.getDebugLoc(),
1517 // LowerRET - Lower an ISD::RET node.
1519 AMDILTargetLowering::LowerReturn(SDValue Chain,
1520 CallingConv::ID CallConv, bool isVarArg,
1521 const SmallVectorImpl<ISD::OutputArg> &Outs,
1522 const SmallVectorImpl<SDValue> &OutVals,
1523 DebugLoc dl, SelectionDAG &DAG)
1526 //MachineFunction& MF = DAG.getMachineFunction();
1527 // CCValAssign - represent the assignment of the return value
1529 SmallVector<CCValAssign, 16> RVLocs;
1531 // CCState - Info about the registers and stack slot
1532 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
1533 getTargetMachine(), RVLocs, *DAG.getContext());
1535 // Analyze return values of ISD::RET
1536 CCInfo.AnalyzeReturn(Outs, RetCC_AMDIL32);
1537 // If this is the first return lowered for this function, add
1538 // the regs to the liveout set for the function
1539 MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
1540 for (unsigned int i = 0, e = RVLocs.size(); i != e; ++i) {
1541 if (RVLocs[i].isRegLoc() && !MRI.isLiveOut(RVLocs[i].getLocReg())) {
1542 MRI.addLiveOut(RVLocs[i].getLocReg());
1545 // FIXME: implement this when tail call is implemented
1546 // Chain = GetPossiblePreceedingTailCall(Chain, AMDILISD::TAILCALL);
1547 // both x86 and ppc implement this in ISelLowering
1549 // Regular return here
1551 SmallVector<SDValue, 6> RetOps;
1552 RetOps.push_back(Chain);
1553 RetOps.push_back(DAG.getConstant(0/*getBytesToPopOnReturn()*/, MVT::i32));
1554 for (unsigned int i = 0, e = RVLocs.size(); i != e; ++i) {
1555 CCValAssign &VA = RVLocs[i];
1556 SDValue ValToCopy = OutVals[i];
1557 assert(VA.isRegLoc() && "Can only return in registers!");
1558 // ISD::Ret => ret chain, (regnum1, val1), ...
1559 // So i * 2 + 1 index only the regnums
1560 Chain = DAG.getCopyToReg(Chain,
1565 // guarantee that all emitted copies are stuck together
1566 // avoiding something bad
1567 Flag = Chain.getValue(1);
1569 /*if (MF.getFunction()->hasStructRetAttr()) {
1570 assert(0 && "Struct returns are not yet implemented!");
1571 // Both MIPS and X86 have this
1575 RetOps.push_back(Flag);
1577 Flag = DAG.getNode(AMDILISD::RET_FLAG,
1579 MVT::Other, &RetOps[0], RetOps.size());
1584 AMDILTargetLowering::getFunctionAlignment(const Function *) const
1590 AMDILTargetLowering::LowerSDIV24(SDValue Op, SelectionDAG &DAG) const
1592 DebugLoc DL = Op.getDebugLoc();
1593 EVT OVT = Op.getValueType();
1594 SDValue LHS = Op.getOperand(0);
1595 SDValue RHS = Op.getOperand(1);
1598 if (!OVT.isVector()) {
1601 } else if (OVT.getVectorNumElements() == 2) {
1604 } else if (OVT.getVectorNumElements() == 4) {
1608 unsigned bitsize = OVT.getScalarType().getSizeInBits();
1609 // char|short jq = ia ^ ib;
1610 SDValue jq = DAG.getNode(ISD::XOR, DL, OVT, LHS, RHS);
1612 // jq = jq >> (bitsize - 2)
1613 jq = DAG.getNode(ISD::SRA, DL, OVT, jq, DAG.getConstant(bitsize - 2, OVT));
1616 jq = DAG.getNode(ISD::OR, DL, OVT, jq, DAG.getConstant(1, OVT));
1619 jq = DAG.getSExtOrTrunc(jq, DL, INTTY);
1621 // int ia = (int)LHS;
1622 SDValue ia = DAG.getSExtOrTrunc(LHS, DL, INTTY);
1624 // int ib, (int)RHS;
1625 SDValue ib = DAG.getSExtOrTrunc(RHS, DL, INTTY);
1627 // float fa = (float)ia;
1628 SDValue fa = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ia);
1630 // float fb = (float)ib;
1631 SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib);
1633 // float fq = native_divide(fa, fb);
1634 SDValue fq = DAG.getNode(AMDILISD::DIV_INF, DL, FLTTY, fa, fb);
1637 fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq);
1639 // float fqneg = -fq;
1640 SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FLTTY, fq);
1642 // float fr = mad(fqneg, fb, fa);
1643 SDValue fr = DAG.getNode(AMDILISD::MAD, DL, FLTTY, fqneg, fb, fa);
1645 // int iq = (int)fq;
1646 SDValue iq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq);
1649 fr = DAG.getNode(ISD::FABS, DL, FLTTY, fr);
1652 fb = DAG.getNode(ISD::FABS, DL, FLTTY, fb);
1654 // int cv = fr >= fb;
1656 if (INTTY == MVT::i32) {
1657 cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
1659 cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
1661 // jq = (cv ? jq : 0);
1662 jq = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, cv, jq,
1663 DAG.getConstant(0, OVT));
1665 iq = DAG.getSExtOrTrunc(iq, DL, OVT);
1666 iq = DAG.getNode(ISD::ADD, DL, OVT, iq, jq);
1671 AMDILTargetLowering::LowerSDIV32(SDValue Op, SelectionDAG &DAG) const
1673 DebugLoc DL = Op.getDebugLoc();
1674 EVT OVT = Op.getValueType();
1675 SDValue LHS = Op.getOperand(0);
1676 SDValue RHS = Op.getOperand(1);
1677 // The LowerSDIV32 function generates equivalent to the following IL.
1687 // ixor r10, r10, r11
1689 // ixor DST, r0, r10
1698 SDValue r10 = DAG.getSelectCC(DL,
1699 r0, DAG.getConstant(0, OVT),
1700 DAG.getConstant(-1, MVT::i32),
1701 DAG.getConstant(0, MVT::i32),
1705 SDValue r11 = DAG.getSelectCC(DL,
1706 r1, DAG.getConstant(0, OVT),
1707 DAG.getConstant(-1, MVT::i32),
1708 DAG.getConstant(0, MVT::i32),
1712 r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
1715 r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
1718 r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
1721 r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
1724 r0 = DAG.getNode(ISD::UDIV, DL, OVT, r0, r1);
1726 // ixor r10, r10, r11
1727 r10 = DAG.getNode(ISD::XOR, DL, OVT, r10, r11);
1730 r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
1732 // ixor DST, r0, r10
1733 SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
1738 AMDILTargetLowering::LowerSDIV64(SDValue Op, SelectionDAG &DAG) const
1740 return SDValue(Op.getNode(), 0);
1744 AMDILTargetLowering::LowerSREM8(SDValue Op, SelectionDAG &DAG) const
1746 DebugLoc DL = Op.getDebugLoc();
1747 EVT OVT = Op.getValueType();
1748 MVT INTTY = MVT::i32;
1749 if (OVT == MVT::v2i8) {
1751 } else if (OVT == MVT::v4i8) {
1754 SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
1755 SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
1756 LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
1757 LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
1762 AMDILTargetLowering::LowerSREM16(SDValue Op, SelectionDAG &DAG) const
1764 DebugLoc DL = Op.getDebugLoc();
1765 EVT OVT = Op.getValueType();
1766 MVT INTTY = MVT::i32;
1767 if (OVT == MVT::v2i16) {
1769 } else if (OVT == MVT::v4i16) {
1772 SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
1773 SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
1774 LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
1775 LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
1780 AMDILTargetLowering::LowerSREM32(SDValue Op, SelectionDAG &DAG) const
1782 DebugLoc DL = Op.getDebugLoc();
1783 EVT OVT = Op.getValueType();
1784 SDValue LHS = Op.getOperand(0);
1785 SDValue RHS = Op.getOperand(1);
1786 // The LowerSREM32 function generates equivalent to the following IL.
1796 // umul r20, r20, r1
1799 // ixor DST, r0, r10
1808 SDValue r10 = DAG.getNode(AMDILISD::CMP, DL, OVT,
1809 DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32),
1810 r0, DAG.getConstant(0, OVT));
1813 SDValue r11 = DAG.getNode(AMDILISD::CMP, DL, OVT,
1814 DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32),
1815 r1, DAG.getConstant(0, OVT));
1818 r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
1821 r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
1824 r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
1827 r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
1830 SDValue r20 = DAG.getNode(ISD::UREM, DL, OVT, r0, r1);
1832 // umul r20, r20, r1
1833 r20 = DAG.getNode(AMDILISD::UMUL, DL, OVT, r20, r1);
1836 r0 = DAG.getNode(ISD::SUB, DL, OVT, r0, r20);
1839 r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
1841 // ixor DST, r0, r10
1842 SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
1847 AMDILTargetLowering::LowerSREM64(SDValue Op, SelectionDAG &DAG) const
1849 return SDValue(Op.getNode(), 0);