--- /dev/null
+//===-- AMDGPU.h - MachineFunction passes hw codegen --------------*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef AMDGPU_H
+#define AMDGPU_H
+
+#include "AMDGPUTargetMachine.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+
+class FunctionPass;
+class AMDGPUTargetMachine;
+
+// R600 Passes
+FunctionPass* createR600KernelParametersPass(const TargetData* TD);
+FunctionPass *createR600CodeEmitterPass(formatted_raw_ostream &OS);
+
+// SI Passes
+FunctionPass *createSIAssignInterpRegsPass(TargetMachine &tm);
+FunctionPass *createSICodeEmitterPass(formatted_raw_ostream &OS);
+
+// Passes common to R600 and SI
+FunctionPass *createAMDGPUConvertToISAPass(TargetMachine &tm);
+
+} // End namespace llvm
+
+#endif // AMDGPU_H
--- /dev/null
+//===-- AMDIL.td - AMDIL Tablegen files --*- tablegen -*-------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+
+// Include AMDIL TD files
+include "AMDILBase.td"
+include "AMDILVersion.td"
+
+// Include AMDGPU TD files
+include "R600Schedule.td"
+include "SISchedule.td"
+include "Processors.td"
+include "AMDGPUInstrInfo.td"
+include "AMDGPUIntrinsics.td"
+include "AMDGPURegisterInfo.td"
+include "AMDGPUInstructions.td"
--- /dev/null
+//===-- AMDGPUConvertToISA.cpp - Lower AMDIL to HW ISA --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass lowers AMDIL machine instructions to the appropriate hardware
+// instructions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "AMDGPUInstrInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+
+#include <stdio.h>
+using namespace llvm;
+
+namespace {
+
+class AMDGPUConvertToISAPass : public MachineFunctionPass {
+
+private:
+ static char ID;
+ TargetMachine &TM;
+
+public:
+ AMDGPUConvertToISAPass(TargetMachine &tm) :
+ MachineFunctionPass(ID), TM(tm) { }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ virtual const char *getPassName() const {return "AMDGPU Convert to ISA";}
+
+};
+
+} // End anonymous namespace
+
+char AMDGPUConvertToISAPass::ID = 0;
+
+FunctionPass *llvm::createAMDGPUConvertToISAPass(TargetMachine &tm) {
+ return new AMDGPUConvertToISAPass(tm);
+}
+
+bool AMDGPUConvertToISAPass::runOnMachineFunction(MachineFunction &MF)
+{
+ const AMDGPUInstrInfo * TII =
+ static_cast<const AMDGPUInstrInfo*>(TM.getInstrInfo());
+
+ for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
+ BB != BB_E; ++BB) {
+ MachineBasicBlock &MBB = *BB;
+ for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
+ I != E; ++I) {
+ MachineInstr &MI = *I;
+ TII->convertToISA(MI, MF, MBB.findDebugLoc(I));
+ }
+ }
+ return false;
+}
--- /dev/null
+//===-- AMDGPUISelLowering.cpp - AMDGPU Common DAG lowering functions -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is the parent TargetLowering class for hardware code gen targets.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPUISelLowering.h"
+#include "AMDILIntrinsicInfo.h"
+#include "AMDGPUUtil.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+using namespace llvm;
+
+AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
+ AMDILTargetLowering(TM)
+{
+ // We need to custom lower some of the intrinsics
+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
+
+ setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
+ setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
+
+ // Library functions. These default to Expand, but we have instructions
+ // for them.
+ setOperationAction(ISD::FCEIL, MVT::f32, Legal);
+ setOperationAction(ISD::FEXP2, MVT::f32, Legal);
+ setOperationAction(ISD::FRINT, MVT::f32, Legal);
+
+ setOperationAction(ISD::UDIV, MVT::i32, Expand);
+ setOperationAction(ISD::UDIVREM, MVT::i32, Custom);
+ setOperationAction(ISD::UREM, MVT::i32, Expand);
+}
+
+SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
+ const
+{
+ switch (Op.getOpcode()) {
+ default: return AMDILTargetLowering::LowerOperation(Op, DAG);
+ case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
+ case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
+ case ISD::UDIVREM: return LowerUDIVREM(Op, DAG);
+ }
+}
+
+SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
+ SelectionDAG &DAG) const
+{
+ unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ DebugLoc DL = Op.getDebugLoc();
+ EVT VT = Op.getValueType();
+
+ switch (IntrinsicID) {
+ default: return Op;
+ case AMDGPUIntrinsic::AMDIL_abs:
+ return LowerIntrinsicIABS(Op, DAG);
+ case AMDGPUIntrinsic::AMDIL_exp:
+ return DAG.getNode(ISD::FEXP2, DL, VT, Op.getOperand(1));
+ case AMDGPUIntrinsic::AMDIL_fabs:
+ return DAG.getNode(ISD::FABS, DL, VT, Op.getOperand(1));
+ case AMDGPUIntrinsic::AMDGPU_lrp:
+ return LowerIntrinsicLRP(Op, DAG);
+ case AMDGPUIntrinsic::AMDIL_fraction:
+ return DAG.getNode(AMDGPUISD::FRACT, DL, VT, Op.getOperand(1));
+ case AMDGPUIntrinsic::AMDIL_mad:
+ return DAG.getNode(AMDILISD::MAD, DL, VT, Op.getOperand(1),
+ Op.getOperand(2), Op.getOperand(3));
+ case AMDGPUIntrinsic::AMDIL_max:
+ return DAG.getNode(AMDGPUISD::FMAX, DL, VT, Op.getOperand(1),
+ Op.getOperand(2));
+ case AMDGPUIntrinsic::AMDGPU_imax:
+ return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Op.getOperand(1),
+ Op.getOperand(2));
+ case AMDGPUIntrinsic::AMDGPU_umax:
+ return DAG.getNode(AMDGPUISD::UMAX, DL, VT, Op.getOperand(1),
+ Op.getOperand(2));
+ case AMDGPUIntrinsic::AMDIL_min:
+ return DAG.getNode(AMDGPUISD::FMIN, DL, VT, Op.getOperand(1),
+ Op.getOperand(2));
+ case AMDGPUIntrinsic::AMDGPU_imin:
+ return DAG.getNode(AMDGPUISD::SMIN, DL, VT, Op.getOperand(1),
+ Op.getOperand(2));
+ case AMDGPUIntrinsic::AMDGPU_umin:
+ return DAG.getNode(AMDGPUISD::UMIN, DL, VT, Op.getOperand(1),
+ Op.getOperand(2));
+ case AMDGPUIntrinsic::AMDIL_round_nearest:
+ return DAG.getNode(ISD::FRINT, DL, VT, Op.getOperand(1));
+ case AMDGPUIntrinsic::AMDIL_round_posinf:
+ return DAG.getNode(ISD::FCEIL, DL, VT, Op.getOperand(1));
+ }
+}
+
+///IABS(a) = SMAX(sub(0, a), a)
+SDValue AMDGPUTargetLowering::LowerIntrinsicIABS(SDValue Op,
+ SelectionDAG &DAG) const
+{
+
+ DebugLoc DL = Op.getDebugLoc();
+ EVT VT = Op.getValueType();
+ SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT),
+ Op.getOperand(1));
+
+ return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Neg, Op.getOperand(1));
+}
+
+/// Linear Interpolation
+/// LRP(a, b, c) = muladd(a, b, (1 - a) * c)
+SDValue AMDGPUTargetLowering::LowerIntrinsicLRP(SDValue Op,
+ SelectionDAG &DAG) const
+{
+ DebugLoc DL = Op.getDebugLoc();
+ EVT VT = Op.getValueType();
+ SDValue OneSubA = DAG.getNode(ISD::FSUB, DL, VT,
+ DAG.getConstantFP(1.0f, MVT::f32),
+ Op.getOperand(1));
+ SDValue OneSubAC = DAG.getNode(ISD::FMUL, DL, VT, OneSubA,
+ Op.getOperand(3));
+ return DAG.getNode(AMDILISD::MAD, DL, VT, Op.getOperand(1),
+ Op.getOperand(2),
+ OneSubAC);
+}
+
+SDValue AMDGPUTargetLowering::LowerSELECT_CC(SDValue Op,
+ SelectionDAG &DAG) const
+{
+ DebugLoc DL = Op.getDebugLoc();
+ EVT VT = Op.getValueType();
+
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ SDValue True = Op.getOperand(2);
+ SDValue False = Op.getOperand(3);
+ SDValue CC = Op.getOperand(4);
+ ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
+ SDValue Temp;
+
+ // LHS and RHS are guaranteed to be the same value type
+ EVT CompareVT = LHS.getValueType();
+
+ // We need all the operands of SELECT_CC to have the same value type, so if
+ // necessary we need to convert LHS and RHS to be the same type True and
+ // False. True and False are guaranteed to have the same type as this
+ // SELECT_CC node.
+
+ if (CompareVT != VT) {
+ ISD::NodeType ConversionOp = ISD::DELETED_NODE;
+ if (VT == MVT::f32 && CompareVT == MVT::i32) {
+ if (isUnsignedIntSetCC(CCOpcode)) {
+ ConversionOp = ISD::UINT_TO_FP;
+ } else {
+ ConversionOp = ISD::SINT_TO_FP;
+ }
+ } else if (VT == MVT::i32 && CompareVT == MVT::f32) {
+ ConversionOp = ISD::FP_TO_SINT;
+ } else {
+ // I don't think there will be any other type pairings.
+ assert(!"Unhandled operand type parings in SELECT_CC");
+ }
+ // XXX Check the value of LHS and RHS and avoid creating sequences like
+ // (FTOI (ITOF))
+ LHS = DAG.getNode(ConversionOp, DL, VT, LHS);
+ RHS = DAG.getNode(ConversionOp, DL, VT, RHS);
+ }
+
+ // If True is a hardware TRUE value and False is a hardware FALSE value or
+ // vice-versa we can handle this with a native instruction (SET* instructions).
+ if ((isHWTrueValue(True) && isHWFalseValue(False))) {
+ return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
+ }
+
+ // XXX If True is a hardware TRUE value and False is a hardware FALSE value,
+ // we can handle this with a native instruction, but we need to swap true
+ // and false and change the conditional.
+ if (isHWTrueValue(False) && isHWFalseValue(True)) {
+ }
+
+ // XXX Check if we can lower this to a SELECT or if it is supported by a native
+ // operation. (The code below does this but we don't have the Instruction
+ // selection patterns to do this yet.
+#if 0
+ if (isZero(LHS) || isZero(RHS)) {
+ SDValue Cond = (isZero(LHS) ? RHS : LHS);
+ bool SwapTF = false;
+ switch (CCOpcode) {
+ case ISD::SETOEQ:
+ case ISD::SETUEQ:
+ case ISD::SETEQ:
+ SwapTF = true;
+ // Fall through
+ case ISD::SETONE:
+ case ISD::SETUNE:
+ case ISD::SETNE:
+ // We can lower to select
+ if (SwapTF) {
+ Temp = True;
+ True = False;
+ False = Temp;
+ }
+ // CNDE
+ return DAG.getNode(ISD::SELECT, DL, VT, Cond, True, False);
+ default:
+ // Supported by a native operation (CNDGE, CNDGT)
+ return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
+ }
+ }
+#endif
+
+ // If we make it this for it means we have no native instructions to handle
+ // this SELECT_CC, so we must lower it.
+ SDValue HWTrue, HWFalse;
+
+ if (VT == MVT::f32) {
+ HWTrue = DAG.getConstantFP(1.0f, VT);
+ HWFalse = DAG.getConstantFP(0.0f, VT);
+ } else if (VT == MVT::i32) {
+ HWTrue = DAG.getConstant(-1, VT);
+ HWFalse = DAG.getConstant(0, VT);
+ }
+ else {
+ assert(!"Unhandled value type in LowerSELECT_CC");
+ }
+
+ // Lower this unsupported SELECT_CC into a combination of two supported
+ // SELECT_CC operations.
+ SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, HWTrue, HWFalse, CC);
+
+ return DAG.getNode(ISD::SELECT, DL, VT, Cond, True, False);
+}
+
+
+SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op,
+ SelectionDAG &DAG) const
+{
+ DebugLoc DL = Op.getDebugLoc();
+ EVT VT = Op.getValueType();
+
+ SDValue Num = Op.getOperand(0);
+ SDValue Den = Op.getOperand(1);
+
+ SmallVector<SDValue, 8> Results;
+
+ // RCP = URECIP(Den) = 2^32 / Den + e
+ // e is rounding error.
+ SDValue RCP = DAG.getNode(AMDGPUISD::URECIP, DL, VT, Den);
+
+ // RCP_LO = umulo(RCP, Den) */
+ SDValue RCP_LO = DAG.getNode(ISD::UMULO, DL, VT, RCP, Den);
+
+ // RCP_HI = mulhu (RCP, Den) */
+ SDValue RCP_HI = DAG.getNode(ISD::MULHU, DL, VT, RCP, Den);
+
+ // NEG_RCP_LO = -RCP_LO
+ SDValue NEG_RCP_LO = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT),
+ RCP_LO);
+
+ // ABS_RCP_LO = (RCP_HI == 0 ? NEG_RCP_LO : RCP_LO)
+ SDValue ABS_RCP_LO = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT),
+ NEG_RCP_LO, RCP_LO,
+ ISD::SETEQ);
+ // Calculate the rounding error from the URECIP instruction
+ // E = mulhu(ABS_RCP_LO, RCP)
+ SDValue E = DAG.getNode(ISD::MULHU, DL, VT, ABS_RCP_LO, RCP);
+
+ // RCP_A_E = RCP + E
+ SDValue RCP_A_E = DAG.getNode(ISD::ADD, DL, VT, RCP, E);
+
+ // RCP_S_E = RCP - E
+ SDValue RCP_S_E = DAG.getNode(ISD::SUB, DL, VT, RCP, E);
+
+ // Tmp0 = (RCP_HI == 0 ? RCP_A_E : RCP_SUB_E)
+ SDValue Tmp0 = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT),
+ RCP_A_E, RCP_S_E,
+ ISD::SETEQ);
+ // Quotient = mulhu(Tmp0, Num)
+ SDValue Quotient = DAG.getNode(ISD::MULHU, DL, VT, Tmp0, Num);
+
+ // Num_S_Remainder = Quotient * Den
+ SDValue Num_S_Remainder = DAG.getNode(ISD::UMULO, DL, VT, Quotient, Den);
+
+ // Remainder = Num - Num_S_Remainder
+ SDValue Remainder = DAG.getNode(ISD::SUB, DL, VT, Num, Num_S_Remainder);
+
+ // Remainder_GE_Den = (Remainder >= Den ? -1 : 0)
+ SDValue Remainder_GE_Den = DAG.getSelectCC(DL, Remainder, Den,
+ DAG.getConstant(-1, VT),
+ DAG.getConstant(0, VT),
+ ISD::SETGE);
+ // Remainder_GE_Zero = (Remainder >= 0 ? -1 : 0)
+ SDValue Remainder_GE_Zero = DAG.getSelectCC(DL, Remainder,
+ DAG.getConstant(0, VT),
+ DAG.getConstant(-1, VT),
+ DAG.getConstant(0, VT),
+ ISD::SETGE);
+ // Tmp1 = Remainder_GE_Den & Remainder_GE_Zero
+ SDValue Tmp1 = DAG.getNode(ISD::AND, DL, VT, Remainder_GE_Den,
+ Remainder_GE_Zero);
+
+ // Calculate Division result:
+
+ // Quotient_A_One = Quotient + 1
+ SDValue Quotient_A_One = DAG.getNode(ISD::ADD, DL, VT, Quotient,
+ DAG.getConstant(1, VT));
+
+ // Quotient_S_One = Quotient - 1
+ SDValue Quotient_S_One = DAG.getNode(ISD::SUB, DL, VT, Quotient,
+ DAG.getConstant(1, VT));
+
+ // Div = (Tmp1 == 0 ? Quotient : Quotient_A_One)
+ SDValue Div = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT),
+ Quotient, Quotient_A_One, ISD::SETEQ);
+
+ // Div = (Remainder_GE_Zero == 0 ? Quotient_S_One : Div)
+ Div = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT),
+ Quotient_S_One, Div, ISD::SETEQ);
+
+ // Calculate Rem result:
+
+ // Remainder_S_Den = Remainder - Den
+ SDValue Remainder_S_Den = DAG.getNode(ISD::SUB, DL, VT, Remainder, Den);
+
+ // Remainder_A_Den = Remainder + Den
+ SDValue Remainder_A_Den = DAG.getNode(ISD::ADD, DL, VT, Remainder, Den);
+
+ // Rem = (Tmp1 == 0 ? Remainder : Remainder_S_Den)
+ SDValue Rem = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT),
+ Remainder, Remainder_S_Den, ISD::SETEQ);
+
+ // Rem = (Remainder_GE_Zero == 0 ? Remainder_A_Den : Rem)
+ Rem = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT),
+ Remainder_A_Den, Rem, ISD::SETEQ);
+
+ DAG.ReplaceAllUsesWith(Op.getValue(0).getNode(), &Div);
+ DAG.ReplaceAllUsesWith(Op.getValue(1).getNode(), &Rem);
+
+ return Op;
+}
+
+//===----------------------------------------------------------------------===//
+// Helper functions
+//===----------------------------------------------------------------------===//
+
+bool AMDGPUTargetLowering::isHWTrueValue(SDValue Op) const
+{
+ if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
+ return CFP->isExactlyValue(1.0);
+ }
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
+ return C->isAllOnesValue();
+ }
+ return false;
+}
+
+bool AMDGPUTargetLowering::isHWFalseValue(SDValue Op) const
+{
+ if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
+ return CFP->getValueAPF().isZero();
+ }
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
+ return C->isNullValue();
+ }
+ return false;
+}
+
+void AMDGPUTargetLowering::addLiveIn(MachineInstr * MI,
+ MachineFunction * MF, MachineRegisterInfo & MRI,
+ const TargetInstrInfo * TII, unsigned reg) const
+{
+ AMDGPU::utilAddLiveIn(MF, MRI, TII, reg, MI->getOperand(0).getReg());
+}
+
+#define NODE_NAME_CASE(node) case AMDGPUISD::node: return #node;
+
+const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const
+{
+ switch (Opcode) {
+ default: return AMDILTargetLowering::getTargetNodeName(Opcode);
+
+ NODE_NAME_CASE(FRACT)
+ NODE_NAME_CASE(FMAX)
+ NODE_NAME_CASE(SMAX)
+ NODE_NAME_CASE(UMAX)
+ NODE_NAME_CASE(FMIN)
+ NODE_NAME_CASE(SMIN)
+ NODE_NAME_CASE(UMIN)
+ NODE_NAME_CASE(URECIP)
+ }
+}
--- /dev/null
+//===-- AMDGPUISelLowering.h - AMDGPU Lowering Interface --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the interface defintiion of the TargetLowering class
+// that is common to all AMD GPUs.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef AMDGPUISELLOWERING_H
+#define AMDGPUISELLOWERING_H
+
+#include "AMDILISelLowering.h"
+
+namespace llvm {
+
+class AMDGPUTargetLowering : public AMDILTargetLowering
+{
+private:
+ SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerUDIVREM(SDValue Op, SelectionDAG &DAG) const;
+
+protected:
+
+ /// addLiveIn - This functions adds reg to the live in list of the entry block
+ /// and emits a copy from reg to MI.getOperand(0).
+ ///
+ // Some registers are loaded with values before the program
+ /// begins to execute. The loading of these values is modeled with pseudo
+ /// instructions which are lowered using this function.
+ void addLiveIn(MachineInstr * MI, MachineFunction * MF,
+ MachineRegisterInfo & MRI, const TargetInstrInfo * TII,
+ unsigned reg) const;
+
+ bool isHWTrueValue(SDValue Op) const;
+ bool isHWFalseValue(SDValue Op) const;
+
+public:
+ AMDGPUTargetLowering(TargetMachine &TM);
+
+ virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerIntrinsicIABS(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerIntrinsicLRP(SDValue Op, SelectionDAG &DAG) const;
+ virtual const char* getTargetNodeName(unsigned Opcode) const;
+
+};
+
+namespace AMDGPUISD
+{
+
+enum
+{
+ AMDGPU_FIRST = AMDILISD::LAST_ISD_NUMBER,
+ BITALIGN,
+ FRACT,
+ FMAX,
+ SMAX,
+ UMAX,
+ FMIN,
+ SMIN,
+ UMIN,
+ URECIP,
+ LAST_AMDGPU_ISD_NUMBER
+};
+
+
+} // End namespace AMDGPUISD
+
+} // End namespace llvm
+
+#endif // AMDGPUISELLOWERING_H
--- /dev/null
+//===-- AMDGPUInstrInfo.cpp - Base class for AMD GPU InstrInfo ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the implementation of the TargetInstrInfo class that is
+// common to all AMD GPUs.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPUInstrInfo.h"
+#include "AMDGPURegisterInfo.h"
+#include "AMDGPUTargetMachine.h"
+#include "AMDIL.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+using namespace llvm;
+
+AMDGPUInstrInfo::AMDGPUInstrInfo(AMDGPUTargetMachine &tm)
+ : AMDILInstrInfo(tm) { }
+
+void AMDGPUInstrInfo::convertToISA(MachineInstr & MI, MachineFunction &MF,
+ DebugLoc DL) const
+{
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ const AMDGPURegisterInfo & RI = getRegisterInfo();
+
+ for (unsigned i = 0; i < MI.getNumOperands(); i++) {
+ MachineOperand &MO = MI.getOperand(i);
+ // Convert dst regclass to one that is supported by the ISA
+ if (MO.isReg() && MO.isDef()) {
+ if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
+ const TargetRegisterClass * oldRegClass = MRI.getRegClass(MO.getReg());
+ const TargetRegisterClass * newRegClass = RI.getISARegClass(oldRegClass);
+
+ assert(newRegClass);
+
+ MRI.setRegClass(MO.getReg(), newRegClass);
+ }
+ }
+ }
+}
--- /dev/null
+//===-- AMDGPUInstrInfo.h - AMDGPU Instruction Information ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the definition of a TargetInstrInfo class that is common
+// to all AMD GPUs.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef AMDGPUINSTRUCTIONINFO_H_
+#define AMDGPUINSTRUCTIONINFO_H_
+
+#include "AMDGPURegisterInfo.h"
+#include "AMDILInstrInfo.h"
+
+#include <map>
+
+namespace llvm {
+
+class AMDGPUTargetMachine;
+class MachineFunction;
+class MachineInstr;
+class MachineInstrBuilder;
+
+class AMDGPUInstrInfo : public AMDILInstrInfo {
+
+public:
+ explicit AMDGPUInstrInfo(AMDGPUTargetMachine &tm);
+
+ virtual const AMDGPURegisterInfo &getRegisterInfo() const = 0;
+
+ /// convertToISA - Convert the AMDIL MachineInstr to a supported ISA
+ /// MachineInstr
+ virtual void convertToISA(MachineInstr & MI, MachineFunction &MF,
+ DebugLoc DL) const;
+
+};
+
+} // End llvm namespace
+
+#endif // AMDGPUINSTRINFO_H_
--- /dev/null
+//===-- AMDGPUInstrInfo.td - AMDGPU DAG nodes --------------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains DAG node defintions for the AMDGPU target.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// AMDGPU DAG Profiles
+//===----------------------------------------------------------------------===//
+
+def AMDGPUDTIntTernaryOp : SDTypeProfile<1, 3, [
+ SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisInt<0>, SDTCisInt<3>
+]>;
+
+//===----------------------------------------------------------------------===//
+// AMDGPU DAG Nodes
+//
+
+// out = ((a << 32) | b) >> c)
+//
+// Can be used to optimize rtol:
+// rotl(a, b) = bitalign(a, a, 32 - b)
+def AMDGPUbitalign : SDNode<"AMDGPUISD::BITALIGN", AMDGPUDTIntTernaryOp>;
+
+// out = a - floor(a)
+def AMDGPUfract : SDNode<"AMDGPUISD::FRACT", SDTFPUnaryOp>;
+
+// out = max(a, b) a and b are floats
+def AMDGPUfmax : SDNode<"AMDGPUISD::FMAX", SDTFPBinOp,
+ [SDNPCommutative, SDNPAssociative]
+>;
+
+// out = max(a, b) a and b are signed ints
+def AMDGPUsmax : SDNode<"AMDGPUISD::SMAX", SDTIntBinOp,
+ [SDNPCommutative, SDNPAssociative]
+>;
+
+// out = max(a, b) a and b are unsigned ints
+def AMDGPUumax : SDNode<"AMDGPUISD::UMAX", SDTIntBinOp,
+ [SDNPCommutative, SDNPAssociative]
+>;
+
+// out = min(a, b) a and b are floats
+def AMDGPUfmin : SDNode<"AMDGPUISD::FMIN", SDTFPBinOp,
+ [SDNPCommutative, SDNPAssociative]
+>;
+
+// out = min(a, b) a snd b are signed ints
+def AMDGPUsmin : SDNode<"AMDGPUISD::SMIN", SDTIntBinOp,
+ [SDNPCommutative, SDNPAssociative]
+>;
+
+// out = min(a, b) a and b are unsigned ints
+def AMDGPUumin : SDNode<"AMDGPUISD::UMIN", SDTIntBinOp,
+ [SDNPCommutative, SDNPAssociative]
+>;
+
+// urecip - This operation is a helper for integer division, it returns the
+// result of 1 / a as a fractional unsigned integer.
+// out = (2^32 / a) + e
+// e is rounding error
+def AMDGPUurecip : SDNode<"AMDGPUISD::URECIP", SDTIntUnaryOp>;
--- /dev/null
+//===-- AMDGPUInstructions.td - Common instruction defs ---*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains instruction defs that are common to all hw codegen
+// targets.
+//
+//===----------------------------------------------------------------------===//
+
+class AMDGPUInst <dag outs, dag ins, string asm, list<dag> pattern> : Instruction {
+ field bits<16> AMDILOp = 0;
+ field bits<3> Gen = 0;
+
+ let Namespace = "AMDGPU";
+ let OutOperandList = outs;
+ let InOperandList = ins;
+ let AsmString = asm;
+ let Pattern = pattern;
+ let Itinerary = NullALU;
+ let TSFlags{42-40} = Gen;
+ let TSFlags{63-48} = AMDILOp;
+}
+
+class AMDGPUShaderInst <dag outs, dag ins, string asm, list<dag> pattern>
+ : AMDGPUInst<outs, ins, asm, pattern> {
+
+ field bits<32> Inst = 0xffffffff;
+
+}
+
+class Constants {
+int TWO_PI = 0x40c90fdb;
+int PI = 0x40490fdb;
+int TWO_PI_INV = 0x3e22f983;
+}
+def CONST : Constants;
+
+def FP_ZERO : PatLeaf <
+ (fpimm),
+ [{return N->getValueAPF().isZero();}]
+>;
+
+def FP_ONE : PatLeaf <
+ (fpimm),
+ [{return N->isExactlyValue(1.0);}]
+>;
+
+let isCodeGenOnly = 1, isPseudo = 1, usesCustomInserter = 1 in {
+
+class CLAMP <RegisterClass rc> : AMDGPUShaderInst <
+ (outs rc:$dst),
+ (ins rc:$src0),
+ "CLAMP $dst, $src0",
+ [(set rc:$dst, (int_AMDIL_clamp rc:$src0, (f32 FP_ZERO), (f32 FP_ONE)))]
+>;
+
+class FABS <RegisterClass rc> : AMDGPUShaderInst <
+ (outs rc:$dst),
+ (ins rc:$src0),
+ "FABS $dst, $src0",
+ [(set rc:$dst, (fabs rc:$src0))]
+>;
+
+class FNEG <RegisterClass rc> : AMDGPUShaderInst <
+ (outs rc:$dst),
+ (ins rc:$src0),
+ "FNEG $dst, $src0",
+ [(set rc:$dst, (fneg rc:$src0))]
+>;
+
+} // End isCodeGenOnly = 1, isPseudo = 1, hasCustomInserter = 1
+
+/* Generic helper patterns for intrinsics */
+/* -------------------------------------- */
+
+class POW_Common <AMDGPUInst log_ieee, AMDGPUInst exp_ieee, AMDGPUInst mul,
+ RegisterClass rc> : Pat <
+ (int_AMDGPU_pow rc:$src0, rc:$src1),
+ (exp_ieee (mul rc:$src1, (log_ieee rc:$src0)))
+>;
+
+/* Other helper patterns */
+/* --------------------- */
+
+/* Extract element pattern */
+class Extract_Element <ValueType sub_type, ValueType vec_type,
+ RegisterClass vec_class, int sub_idx,
+ SubRegIndex sub_reg>: Pat<
+ (sub_type (vector_extract (vec_type vec_class:$src), sub_idx)),
+ (EXTRACT_SUBREG vec_class:$src, sub_reg)
+>;
+
+/* Insert element pattern */
+class Insert_Element <ValueType elem_type, ValueType vec_type,
+ RegisterClass elem_class, RegisterClass vec_class,
+ int sub_idx, SubRegIndex sub_reg> : Pat <
+
+ (vec_type (vector_insert (vec_type vec_class:$vec),
+ (elem_type elem_class:$elem), sub_idx)),
+ (INSERT_SUBREG vec_class:$vec, elem_class:$elem, sub_reg)
+>;
+
+// Vector Build pattern
+class Vector_Build <ValueType vecType, RegisterClass elemClass> : Pat <
+ (IL_vbuild elemClass:$src),
+ (INSERT_SUBREG (vecType (IMPLICIT_DEF)), elemClass:$src, sel_x)
+>;
+
+// bitconvert pattern
+class BitConvert <ValueType dt, ValueType st, RegisterClass rc> : Pat <
+ (dt (bitconvert (st rc:$src0))),
+ (dt rc:$src0)
+>;
+
+include "R600Instructions.td"
+
+include "SIInstrInfo.td"
+
--- /dev/null
+//===-- AMDGPUIntrinsics.td - Common intrinsics -*- tablegen -*-----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines intrinsics that are used by all hw codegen targets.
+//
+//===----------------------------------------------------------------------===//
+
+let TargetPrefix = "AMDGPU", isTarget = 1 in {
+
+ def int_AMDGPU_load_const : Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
+ def int_AMDGPU_load_imm : Intrinsic<[llvm_v4f32_ty], [llvm_i32_ty], [IntrNoMem]>;
+ def int_AMDGPU_reserve_reg : Intrinsic<[], [llvm_i32_ty], [IntrNoMem]>;
+ def int_AMDGPU_store_output : Intrinsic<[], [llvm_float_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_AMDGPU_swizzle : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty], [IntrNoMem]>;
+
+ def int_AMDGPU_arl : Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
+ def int_AMDGPU_cndlt : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
+ def int_AMDGPU_cos : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ def int_AMDGPU_div : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
+ def int_AMDGPU_dp4 : Intrinsic<[llvm_float_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
+ def int_AMDGPU_floor : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ def int_AMDGPU_kill : Intrinsic<[], [llvm_float_ty], []>;
+ def int_AMDGPU_kilp : Intrinsic<[], [], []>;
+ def int_AMDGPU_lrp : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
+ def int_AMDGPU_mul : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
+ def int_AMDGPU_pow : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
+ def int_AMDGPU_rcp : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ def int_AMDGPU_rsq : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ def int_AMDGPU_seq : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
+ def int_AMDGPU_sgt : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
+ def int_AMDGPU_sge : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
+ def int_AMDGPU_sin : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ def int_AMDGPU_sle : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
+ def int_AMDGPU_sne : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
+ def int_AMDGPU_ssg : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ def int_AMDGPU_mullit : Intrinsic<[llvm_v4f32_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
+ def int_AMDGPU_tex : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_AMDGPU_txb : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_AMDGPU_txf : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_AMDGPU_txq : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_AMDGPU_txd : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_AMDGPU_txl : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_AMDGPU_trunc : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ def int_AMDGPU_ddx : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_AMDGPU_ddy : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_AMDGPU_imax : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_AMDGPU_imin : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_AMDGPU_umax : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_AMDGPU_umin : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_AMDGPU_cube : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
+}
+
+let TargetPrefix = "TGSI", isTarget = 1 in {
+
+ def int_TGSI_lit_z : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],[]>;
+}
+
+include "SIIntrinsics.td"
--- /dev/null
+//===-- AMDGPURegisterInfo.cpp - AMDGPU Register Information -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Parent TargetRegisterInfo class common to all hw codegen targets.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPURegisterInfo.h"
+#include "AMDGPUTargetMachine.h"
+
+using namespace llvm;
+
+AMDGPURegisterInfo::AMDGPURegisterInfo(AMDGPUTargetMachine &tm,
+ const TargetInstrInfo &tii)
+: AMDILRegisterInfo(tm, tii),
+ TM(tm),
+ TII(tii)
+ { }
--- /dev/null
+//===-- AMDGPURegisterInfo.h - AMDGPURegisterInfo Interface -*- C++ -*-----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the TargetRegisterInfo interface that is implemented
+// by all hw codegen targets.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef AMDGPUREGISTERINFO_H_
+#define AMDGPUREGISTERINFO_H_
+
+#include "AMDILRegisterInfo.h"
+
+namespace llvm {
+
+class AMDGPUTargetMachine;
+class TargetInstrInfo;
+
+struct AMDGPURegisterInfo : public AMDILRegisterInfo
+{
+ AMDGPUTargetMachine &TM;
+ const TargetInstrInfo &TII;
+
+ AMDGPURegisterInfo(AMDGPUTargetMachine &tm, const TargetInstrInfo &tii);
+
+ virtual BitVector getReservedRegs(const MachineFunction &MF) const = 0;
+
+ /// getISARegClass - rc is an AMDIL reg class. This function returns the
+ /// ISA reg class that is equivalent to the given AMDIL reg class.
+ virtual const TargetRegisterClass *
+ getISARegClass(const TargetRegisterClass * rc) const = 0;
+};
+
+} // End namespace llvm
+
+#endif // AMDIDSAREGISTERINFO_H_
--- /dev/null
+//===-- AMDGPURegisterInfo.td - AMDGPU register info -------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Tablegen register definitions common to all hw codegen targets.
+//
+//===----------------------------------------------------------------------===//
+
+let Namespace = "AMDGPU" in {
+ def sel_x : SubRegIndex;
+ def sel_y : SubRegIndex;
+ def sel_z : SubRegIndex;
+ def sel_w : SubRegIndex;
+}
+
+include "R600RegisterInfo.td"
+include "SIRegisterInfo.td"
--- /dev/null
+//=====-- AMDGPUSubtarget.h - Define Subtarget for the AMDIL ---*- C++ -*-====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+// This file declares the AMDGPU specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _AMDGPUSUBTARGET_H_
+#define _AMDGPUSUBTARGET_H_
+#include "AMDILSubtarget.h"
+
+namespace llvm {
+
+class AMDGPUSubtarget : public AMDILSubtarget
+{
+ InstrItineraryData InstrItins;
+
+public:
+ AMDGPUSubtarget(StringRef TT, StringRef CPU, StringRef FS) :
+ AMDILSubtarget(TT, CPU, FS)
+ {
+ InstrItins = getInstrItineraryForCPU(CPU);
+ }
+
+ const InstrItineraryData &getInstrItineraryData() const { return InstrItins; }
+};
+
+} // End namespace llvm
+
+#endif // AMDGPUSUBTARGET_H_
--- /dev/null
+//===-- AMDGPUTargetMachine.cpp - TargetMachine for hw codegen targets-----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The AMDGPU target machine contains all of the hardware specific information
+// needed to emit code for R600 and SI GPUs.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPUTargetMachine.h"
+#include "AMDGPU.h"
+#include "R600ISelLowering.h"
+#include "R600InstrInfo.h"
+#include "SIISelLowering.h"
+#include "SIInstrInfo.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/PassManager.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_os_ostream.h"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/Scalar.h"
+
+using namespace llvm;
+
+extern "C" void LLVMInitializeAMDGPUTarget() {
+ // Register the target
+ RegisterTargetMachine<AMDGPUTargetMachine> X(TheAMDGPUTarget);
+}
+
+AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ TargetOptions Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OptLevel
+)
+:
+ LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OptLevel),
+ Subtarget(TT, CPU, FS),
+ DataLayout(Subtarget.getDataLayout()),
+ FrameLowering(TargetFrameLowering::StackGrowsUp,
+ Subtarget.device()->getStackAlignment(), 0),
+ IntrinsicInfo(this),
+ InstrItins(&Subtarget.getInstrItineraryData()),
+ mDump(false)
+
+{
+ // TLInfo uses InstrInfo so it must be initialized after.
+ if (Subtarget.device()->getGeneration() <= AMDILDeviceInfo::HD6XXX) {
+ InstrInfo = new R600InstrInfo(*this);
+ TLInfo = new R600TargetLowering(*this);
+ } else {
+ InstrInfo = new SIInstrInfo(*this);
+ TLInfo = new SITargetLowering(*this);
+ }
+}
+
+AMDGPUTargetMachine::~AMDGPUTargetMachine()
+{
+}
+
+bool AMDGPUTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
+ formatted_raw_ostream &Out,
+ CodeGenFileType FileType,
+ bool DisableVerify,
+ AnalysisID StartAfter,
+ AnalysisID StopAfter) {
+ // XXX: Hack here addPassesToEmitFile will fail, but this is Ok since we are
+ // only using it to access addPassesToGenerateCode()
+ bool fail = LLVMTargetMachine::addPassesToEmitFile(PM, Out, FileType,
+ DisableVerify);
+ assert(fail);
+
+ const AMDILSubtarget &STM = getSubtarget<AMDILSubtarget>();
+ std::string gpu = STM.getDeviceName();
+ if (gpu == "SI") {
+ PM.add(createSICodeEmitterPass(Out));
+ } else if (Subtarget.device()->getGeneration() <= AMDILDeviceInfo::HD6XXX) {
+ PM.add(createR600CodeEmitterPass(Out));
+ } else {
+ abort();
+ return true;
+ }
+ PM.add(createGCInfoDeleter());
+
+ return false;
+}
+
+namespace {
+class AMDGPUPassConfig : public TargetPassConfig {
+public:
+ AMDGPUPassConfig(AMDGPUTargetMachine *TM, PassManagerBase &PM)
+ : TargetPassConfig(TM, PM) {}
+
+ AMDGPUTargetMachine &getAMDGPUTargetMachine() const {
+ return getTM<AMDGPUTargetMachine>();
+ }
+
+ virtual bool addPreISel();
+ virtual bool addInstSelector();
+ virtual bool addPreRegAlloc();
+ virtual bool addPostRegAlloc();
+ virtual bool addPreSched2();
+ virtual bool addPreEmitPass();
+};
+} // End of anonymous namespace
+
+TargetPassConfig *AMDGPUTargetMachine::createPassConfig(PassManagerBase &PM) {
+ return new AMDGPUPassConfig(this, PM);
+}
+
+bool
+AMDGPUPassConfig::addPreISel()
+{
+ const AMDILSubtarget &ST = TM->getSubtarget<AMDILSubtarget>();
+ if (ST.device()->getGeneration() <= AMDILDeviceInfo::HD6XXX) {
+ addPass(createR600KernelParametersPass(
+ getAMDGPUTargetMachine().getTargetData()));
+ }
+ return false;
+}
+
+bool AMDGPUPassConfig::addInstSelector() {
+ addPass(createAMDILPeepholeOpt(*TM));
+ addPass(createAMDILISelDag(getAMDGPUTargetMachine()));
+ return false;
+}
+
+bool AMDGPUPassConfig::addPreRegAlloc() {
+ const AMDILSubtarget &ST = TM->getSubtarget<AMDILSubtarget>();
+
+ if (ST.device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
+ addPass(createSIAssignInterpRegsPass(*TM));
+ }
+ addPass(createAMDGPUConvertToISAPass(*TM));
+ return false;
+}
+
+bool AMDGPUPassConfig::addPostRegAlloc() {
+ return false;
+}
+
+bool AMDGPUPassConfig::addPreSched2() {
+ return false;
+}
+
+bool AMDGPUPassConfig::addPreEmitPass() {
+ addPass(createAMDILCFGPreparationPass(*TM));
+ addPass(createAMDILCFGStructurizerPass(*TM));
+
+ return false;
+}
+
--- /dev/null
+//===-- AMDGPUTargetMachine.h - AMDGPU TargetMachine Interface --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The AMDGPU TargetMachine interface definition for hw codgen targets.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef AMDGPU_TARGET_MACHINE_H
+#define AMDGPU_TARGET_MACHINE_H
+
+#include "AMDGPUInstrInfo.h"
+#include "AMDGPUSubtarget.h"
+#include "AMDILFrameLowering.h"
+#include "AMDILIntrinsicInfo.h"
+#include "R600ISelLowering.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/Target/TargetData.h"
+
+namespace llvm {
+
+MCAsmInfo* createMCAsmInfo(const Target &T, StringRef TT);
+
+class AMDGPUTargetMachine : public LLVMTargetMachine {
+
+ AMDGPUSubtarget Subtarget;
+ const TargetData DataLayout;
+ AMDILFrameLowering FrameLowering;
+ AMDILIntrinsicInfo IntrinsicInfo;
+ const AMDGPUInstrInfo * InstrInfo;
+ AMDGPUTargetLowering * TLInfo;
+ const InstrItineraryData* InstrItins;
+ bool mDump;
+
+public:
+ AMDGPUTargetMachine(const Target &T, StringRef TT, StringRef FS,
+ StringRef CPU,
+ TargetOptions Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL);
+ ~AMDGPUTargetMachine();
+ virtual const AMDILFrameLowering* getFrameLowering() const {
+ return &FrameLowering;
+ }
+ virtual const AMDILIntrinsicInfo* getIntrinsicInfo() const {
+ return &IntrinsicInfo;
+ }
+ virtual const AMDGPUInstrInfo *getInstrInfo() const {return InstrInfo;}
+ virtual const AMDGPUSubtarget *getSubtargetImpl() const {return &Subtarget; }
+ virtual const AMDGPURegisterInfo *getRegisterInfo() const {
+ return &InstrInfo->getRegisterInfo();
+ }
+ virtual AMDGPUTargetLowering * getTargetLowering() const {
+ return TLInfo;
+ }
+ virtual const InstrItineraryData* getInstrItineraryData() const {
+ return InstrItins;
+ }
+ virtual const TargetData* getTargetData() const { return &DataLayout; }
+ virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
+ virtual bool addPassesToEmitFile(PassManagerBase &PM,
+ formatted_raw_ostream &Out,
+ CodeGenFileType FileType,
+ bool DisableVerify,
+ AnalysisID StartAfter = 0,
+ AnalysisID StopAfter = 0);
+};
+
+} // End namespace llvm
+
+#endif // AMDGPU_TARGET_MACHINE_H
--- /dev/null
+//===-- AMDGPUUtil.cpp - AMDGPU Utility functions -------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Common utility functions used by hw codegen targets
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPUUtil.h"
+#include "AMDGPURegisterInfo.h"
+#include "AMDIL.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+using namespace llvm;
+
+// Some instructions act as place holders to emulate operations that the GPU
+// hardware does automatically. This function can be used to check if
+// an opcode falls into this category.
+bool AMDGPU::isPlaceHolderOpcode(unsigned opcode)
+{
+ switch (opcode) {
+ default: return false;
+ case AMDGPU::RETURN:
+ case AMDGPU::LOAD_INPUT:
+ case AMDGPU::LAST:
+ case AMDGPU::MASK_WRITE:
+ case AMDGPU::RESERVE_REG:
+ return true;
+ }
+}
+
+bool AMDGPU::isTransOp(unsigned opcode)
+{
+ switch(opcode) {
+ default: return false;
+
+ case AMDGPU::COS_r600:
+ case AMDGPU::COS_eg:
+ case AMDGPU::MULLIT:
+ case AMDGPU::MUL_LIT_r600:
+ case AMDGPU::MUL_LIT_eg:
+ case AMDGPU::EXP_IEEE_r600:
+ case AMDGPU::EXP_IEEE_eg:
+ case AMDGPU::LOG_CLAMPED_r600:
+ case AMDGPU::LOG_IEEE_r600:
+ case AMDGPU::LOG_CLAMPED_eg:
+ case AMDGPU::LOG_IEEE_eg:
+ return true;
+ }
+}
+
+bool AMDGPU::isTexOp(unsigned opcode)
+{
+ switch(opcode) {
+ default: return false;
+ case AMDGPU::TEX_LD:
+ case AMDGPU::TEX_GET_TEXTURE_RESINFO:
+ case AMDGPU::TEX_SAMPLE:
+ case AMDGPU::TEX_SAMPLE_C:
+ case AMDGPU::TEX_SAMPLE_L:
+ case AMDGPU::TEX_SAMPLE_C_L:
+ case AMDGPU::TEX_SAMPLE_LB:
+ case AMDGPU::TEX_SAMPLE_C_LB:
+ case AMDGPU::TEX_SAMPLE_G:
+ case AMDGPU::TEX_SAMPLE_C_G:
+ case AMDGPU::TEX_GET_GRADIENTS_H:
+ case AMDGPU::TEX_GET_GRADIENTS_V:
+ case AMDGPU::TEX_SET_GRADIENTS_H:
+ case AMDGPU::TEX_SET_GRADIENTS_V:
+ return true;
+ }
+}
+
+bool AMDGPU::isReductionOp(unsigned opcode)
+{
+ switch(opcode) {
+ default: return false;
+ case AMDGPU::DOT4_r600:
+ case AMDGPU::DOT4_eg:
+ return true;
+ }
+}
+
+bool AMDGPU::isCubeOp(unsigned opcode)
+{
+ switch(opcode) {
+ default: return false;
+ case AMDGPU::CUBE_r600:
+ case AMDGPU::CUBE_eg:
+ return true;
+ }
+}
+
+
+bool AMDGPU::isFCOp(unsigned opcode)
+{
+ switch(opcode) {
+ default: return false;
+ case AMDGPU::BREAK_LOGICALZ_f32:
+ case AMDGPU::BREAK_LOGICALNZ_i32:
+ case AMDGPU::BREAK_LOGICALZ_i32:
+ case AMDGPU::BREAK_LOGICALNZ_f32:
+ case AMDGPU::CONTINUE_LOGICALNZ_f32:
+ case AMDGPU::IF_LOGICALNZ_i32:
+ case AMDGPU::IF_LOGICALZ_f32:
+ case AMDGPU::ELSE:
+ case AMDGPU::ENDIF:
+ case AMDGPU::ENDLOOP:
+ case AMDGPU::IF_LOGICALNZ_f32:
+ case AMDGPU::WHILELOOP:
+ return true;
+ }
+}
+
+void AMDGPU::utilAddLiveIn(MachineFunction * MF,
+ MachineRegisterInfo & MRI,
+ const TargetInstrInfo * TII,
+ unsigned physReg, unsigned virtReg)
+{
+ if (!MRI.isLiveIn(physReg)) {
+ MRI.addLiveIn(physReg, virtReg);
+ MF->front().addLiveIn(physReg);
+ BuildMI(MF->front(), MF->front().begin(), DebugLoc(),
+ TII->get(TargetOpcode::COPY), virtReg)
+ .addReg(physReg);
+ } else {
+ MRI.replaceRegWith(virtReg, MRI.getLiveInVirtReg(physReg));
+ }
+}
--- /dev/null
+//===-- AMDGPUUtil.h - AMDGPU Utility function declarations -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Declarations for utility functions common to all hw codegen targets.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef AMDGPU_UTIL_H
+#define AMDGPU_UTIL_H
+
+namespace llvm {
+
+class MachineFunction;
+class MachineRegisterInfo;
+class TargetInstrInfo;
+
+namespace AMDGPU {
+
+bool isPlaceHolderOpcode(unsigned opcode);
+
+bool isTransOp(unsigned opcode);
+bool isTexOp(unsigned opcode);
+bool isReductionOp(unsigned opcode);
+bool isCubeOp(unsigned opcode);
+bool isFCOp(unsigned opcode);
+
+// XXX: Move these to AMDGPUInstrInfo.h
+#define MO_FLAG_CLAMP (1 << 0)
+#define MO_FLAG_NEG (1 << 1)
+#define MO_FLAG_ABS (1 << 2)
+#define MO_FLAG_MASK (1 << 3)
+
+void utilAddLiveIn(MachineFunction * MF, MachineRegisterInfo & MRI,
+ const TargetInstrInfo * TII, unsigned physReg, unsigned virtReg);
+
+} // End namespace AMDGPU
+
+} // End namespace llvm
+
+#endif // AMDGPU_UTIL_H
--- /dev/null
+//===-- AMDIL.h - Top-level interface for AMDIL representation --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in the LLVM
+// AMDIL back-end.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef AMDIL_H_
+#define AMDIL_H_
+
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/Target/TargetMachine.h"
+
+#define AMDIL_MAJOR_VERSION 2
+#define AMDIL_MINOR_VERSION 0
+#define AMDIL_REVISION_NUMBER 74
+#define ARENA_SEGMENT_RESERVED_UAVS 12
+#define DEFAULT_ARENA_UAV_ID 8
+#define DEFAULT_RAW_UAV_ID 7
+#define GLOBAL_RETURN_RAW_UAV_ID 11
+#define HW_MAX_NUM_CB 8
+#define MAX_NUM_UNIQUE_UAVS 8
+#define OPENCL_MAX_NUM_ATOMIC_COUNTERS 8
+#define OPENCL_MAX_READ_IMAGES 128
+#define OPENCL_MAX_WRITE_IMAGES 8
+#define OPENCL_MAX_SAMPLERS 16
+
+// The next two values can never be zero, as zero is the ID that is
+// used to assert against.
+#define DEFAULT_LDS_ID 1
+#define DEFAULT_GDS_ID 1
+#define DEFAULT_SCRATCH_ID 1
+#define DEFAULT_VEC_SLOTS 8
+
+// SC->CAL version matchings.
+#define CAL_VERSION_SC_150 1700
+#define CAL_VERSION_SC_149 1700
+#define CAL_VERSION_SC_148 1525
+#define CAL_VERSION_SC_147 1525
+#define CAL_VERSION_SC_146 1525
+#define CAL_VERSION_SC_145 1451
+#define CAL_VERSION_SC_144 1451
+#define CAL_VERSION_SC_143 1441
+#define CAL_VERSION_SC_142 1441
+#define CAL_VERSION_SC_141 1420
+#define CAL_VERSION_SC_140 1400
+#define CAL_VERSION_SC_139 1387
+#define CAL_VERSION_SC_138 1387
+#define CAL_APPEND_BUFFER_SUPPORT 1340
+#define CAL_VERSION_SC_137 1331
+#define CAL_VERSION_SC_136 982
+#define CAL_VERSION_SC_135 950
+#define CAL_VERSION_GLOBAL_RETURN_BUFFER 990
+
+#define OCL_DEVICE_RV710 0x0001
+#define OCL_DEVICE_RV730 0x0002
+#define OCL_DEVICE_RV770 0x0004
+#define OCL_DEVICE_CEDAR 0x0008
+#define OCL_DEVICE_REDWOOD 0x0010
+#define OCL_DEVICE_JUNIPER 0x0020
+#define OCL_DEVICE_CYPRESS 0x0040
+#define OCL_DEVICE_CAICOS 0x0080
+#define OCL_DEVICE_TURKS 0x0100
+#define OCL_DEVICE_BARTS 0x0200
+#define OCL_DEVICE_CAYMAN 0x0400
+#define OCL_DEVICE_ALL 0x3FFF
+
+/// The number of function ID's that are reserved for
+/// internal compiler usage.
+const unsigned int RESERVED_FUNCS = 1024;
+
+#define AMDIL_OPT_LEVEL_DECL
+#define AMDIL_OPT_LEVEL_VAR
+#define AMDIL_OPT_LEVEL_VAR_NO_COMMA
+
+namespace llvm {
+class AMDILInstrPrinter;
+class FunctionPass;
+class MCAsmInfo;
+class raw_ostream;
+class Target;
+class TargetMachine;
+
+/// Instruction selection passes.
+FunctionPass*
+ createAMDILISelDag(TargetMachine &TM AMDIL_OPT_LEVEL_DECL);
+FunctionPass*
+ createAMDILPeepholeOpt(TargetMachine &TM AMDIL_OPT_LEVEL_DECL);
+
+/// Pre emit passes.
+FunctionPass*
+ createAMDILCFGPreparationPass(TargetMachine &TM AMDIL_OPT_LEVEL_DECL);
+FunctionPass*
+ createAMDILCFGStructurizerPass(TargetMachine &TM AMDIL_OPT_LEVEL_DECL);
+
+extern Target TheAMDILTarget;
+extern Target TheAMDGPUTarget;
+} // end namespace llvm;
+
+#define GET_REGINFO_ENUM
+#include "AMDGPUGenRegisterInfo.inc"
+#define GET_INSTRINFO_ENUM
+#include "AMDGPUGenInstrInfo.inc"
+
+/// Include device information enumerations
+#include "AMDILDeviceInfo.h"
+
+namespace llvm {
+/// OpenCL uses address spaces to differentiate between
+/// various memory regions on the hardware. On the CPU
+/// all of the address spaces point to the same memory,
+/// however on the GPU, each address space points to
+/// a seperate piece of memory that is unique from other
+/// memory locations.
+namespace AMDILAS {
+enum AddressSpaces {
+ PRIVATE_ADDRESS = 0, // Address space for private memory.
+ GLOBAL_ADDRESS = 1, // Address space for global memory (RAT0, VTX0).
+ CONSTANT_ADDRESS = 2, // Address space for constant memory.
+ LOCAL_ADDRESS = 3, // Address space for local memory.
+ REGION_ADDRESS = 4, // Address space for region memory.
+ ADDRESS_NONE = 5, // Address space for unknown memory.
+ PARAM_D_ADDRESS = 6, // Address space for direct addressible parameter memory (CONST0)
+ PARAM_I_ADDRESS = 7, // Address space for indirect addressible parameter memory (VTX1)
+ USER_SGPR_ADDRESS = 8, // Address space for USER_SGPRS on SI
+ LAST_ADDRESS = 9
+};
+
+// This union/struct combination is an easy way to read out the
+// exact bits that are needed.
+typedef union ResourceRec {
+ struct {
+#ifdef __BIG_ENDIAN__
+ unsigned short isImage : 1; // Reserved for future use/llvm.
+ unsigned short ResourceID : 10; // Flag to specify the resourece ID for
+ // the op.
+ unsigned short HardwareInst : 1; // Flag to specify that this instruction
+ // is a hardware instruction.
+ unsigned short ConflictPtr : 1; // Flag to specify that the pointer has a
+ // conflict.
+ unsigned short ByteStore : 1; // Flag to specify if the op is a byte
+ // store op.
+ unsigned short PointerPath : 1; // Flag to specify if the op is on the
+ // pointer path.
+ unsigned short CacheableRead : 1; // Flag to specify if the read is
+ // cacheable.
+#else
+ unsigned short CacheableRead : 1; // Flag to specify if the read is
+ // cacheable.
+ unsigned short PointerPath : 1; // Flag to specify if the op is on the
+ // pointer path.
+ unsigned short ByteStore : 1; // Flag to specify if the op is byte
+ // store op.
+ unsigned short ConflictPtr : 1; // Flag to specify that the pointer has
+ // a conflict.
+ unsigned short HardwareInst : 1; // Flag to specify that this instruction
+ // is a hardware instruction.
+ unsigned short ResourceID : 10; // Flag to specify the resource ID for
+ // the op.
+ unsigned short isImage : 1; // Reserved for future use.
+#endif
+ } bits;
+ unsigned short u16all;
+} InstrResEnc;
+
+} // namespace AMDILAS
+
+// Enums corresponding to AMDIL condition codes for IL. These
+// values must be kept in sync with the ones in the .td file.
+namespace AMDILCC {
+enum CondCodes {
+ // AMDIL specific condition codes. These correspond to the IL_CC_*
+ // in AMDILInstrInfo.td and must be kept in the same order.
+ IL_CC_D_EQ = 0, // DEQ instruction.
+ IL_CC_D_GE = 1, // DGE instruction.
+ IL_CC_D_LT = 2, // DLT instruction.
+ IL_CC_D_NE = 3, // DNE instruction.
+ IL_CC_F_EQ = 4, // EQ instruction.
+ IL_CC_F_GE = 5, // GE instruction.
+ IL_CC_F_LT = 6, // LT instruction.
+ IL_CC_F_NE = 7, // NE instruction.
+ IL_CC_I_EQ = 8, // IEQ instruction.
+ IL_CC_I_GE = 9, // IGE instruction.
+ IL_CC_I_LT = 10, // ILT instruction.
+ IL_CC_I_NE = 11, // INE instruction.
+ IL_CC_U_GE = 12, // UGE instruction.
+ IL_CC_U_LT = 13, // ULE instruction.
+ // Pseudo IL Comparison instructions here.
+ IL_CC_F_GT = 14, // GT instruction.
+ IL_CC_U_GT = 15,
+ IL_CC_I_GT = 16,
+ IL_CC_D_GT = 17,
+ IL_CC_F_LE = 18, // LE instruction
+ IL_CC_U_LE = 19,
+ IL_CC_I_LE = 20,
+ IL_CC_D_LE = 21,
+ IL_CC_F_UNE = 22,
+ IL_CC_F_UEQ = 23,
+ IL_CC_F_ULT = 24,
+ IL_CC_F_UGT = 25,
+ IL_CC_F_ULE = 26,
+ IL_CC_F_UGE = 27,
+ IL_CC_F_ONE = 28,
+ IL_CC_F_OEQ = 29,
+ IL_CC_F_OLT = 30,
+ IL_CC_F_OGT = 31,
+ IL_CC_F_OLE = 32,
+ IL_CC_F_OGE = 33,
+ IL_CC_D_UNE = 34,
+ IL_CC_D_UEQ = 35,
+ IL_CC_D_ULT = 36,
+ IL_CC_D_UGT = 37,
+ IL_CC_D_ULE = 38,
+ IL_CC_D_UGE = 39,
+ IL_CC_D_ONE = 40,
+ IL_CC_D_OEQ = 41,
+ IL_CC_D_OLT = 42,
+ IL_CC_D_OGT = 43,
+ IL_CC_D_OLE = 44,
+ IL_CC_D_OGE = 45,
+ IL_CC_U_EQ = 46,
+ IL_CC_U_NE = 47,
+ IL_CC_F_O = 48,
+ IL_CC_D_O = 49,
+ IL_CC_F_UO = 50,
+ IL_CC_D_UO = 51,
+ IL_CC_L_LE = 52,
+ IL_CC_L_GE = 53,
+ IL_CC_L_EQ = 54,
+ IL_CC_L_NE = 55,
+ IL_CC_L_LT = 56,
+ IL_CC_L_GT = 57,
+ IL_CC_UL_LE = 58,
+ IL_CC_UL_GE = 59,
+ IL_CC_UL_EQ = 60,
+ IL_CC_UL_NE = 61,
+ IL_CC_UL_LT = 62,
+ IL_CC_UL_GT = 63,
+ COND_ERROR = 64
+};
+
+} // end namespace AMDILCC
+} // end namespace llvm
+#endif // AMDIL_H_
--- /dev/null
+//===-- AMDIL7XXDevice.cpp - Device Info for 7XX GPUs ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+#include "AMDIL7XXDevice.h"
+#include "AMDILDevice.h"
+
+using namespace llvm;
+
+AMDIL7XXDevice::AMDIL7XXDevice(AMDILSubtarget *ST) : AMDILDevice(ST)
+{
+ setCaps();
+ std::string name = mSTM->getDeviceName();
+ if (name == "rv710") {
+ mDeviceFlag = OCL_DEVICE_RV710;
+ } else if (name == "rv730") {
+ mDeviceFlag = OCL_DEVICE_RV730;
+ } else {
+ mDeviceFlag = OCL_DEVICE_RV770;
+ }
+}
+
+AMDIL7XXDevice::~AMDIL7XXDevice()
+{
+}
+
+void AMDIL7XXDevice::setCaps()
+{
+ mSWBits.set(AMDILDeviceInfo::LocalMem);
+}
+
+size_t AMDIL7XXDevice::getMaxLDSSize() const
+{
+ if (usesHardware(AMDILDeviceInfo::LocalMem)) {
+ return MAX_LDS_SIZE_700;
+ }
+ return 0;
+}
+
+size_t AMDIL7XXDevice::getWavefrontSize() const
+{
+ return AMDILDevice::HalfWavefrontSize;
+}
+
+uint32_t AMDIL7XXDevice::getGeneration() const
+{
+ return AMDILDeviceInfo::HD4XXX;
+}
+
+uint32_t AMDIL7XXDevice::getResourceID(uint32_t DeviceID) const
+{
+ switch (DeviceID) {
+ default:
+ assert(0 && "ID type passed in is unknown!");
+ break;
+ case GLOBAL_ID:
+ case CONSTANT_ID:
+ case RAW_UAV_ID:
+ case ARENA_UAV_ID:
+ break;
+ case LDS_ID:
+ if (usesHardware(AMDILDeviceInfo::LocalMem)) {
+ return DEFAULT_LDS_ID;
+ }
+ break;
+ case SCRATCH_ID:
+ if (usesHardware(AMDILDeviceInfo::PrivateMem)) {
+ return DEFAULT_SCRATCH_ID;
+ }
+ break;
+ case GDS_ID:
+ assert(0 && "GDS UAV ID is not supported on this chip");
+ if (usesHardware(AMDILDeviceInfo::RegionMem)) {
+ return DEFAULT_GDS_ID;
+ }
+ break;
+ };
+
+ return 0;
+}
+
+uint32_t AMDIL7XXDevice::getMaxNumUAVs() const
+{
+ return 1;
+}
+
+AMDIL770Device::AMDIL770Device(AMDILSubtarget *ST): AMDIL7XXDevice(ST)
+{
+ setCaps();
+}
+
+AMDIL770Device::~AMDIL770Device()
+{
+}
+
+void AMDIL770Device::setCaps()
+{
+ if (mSTM->isOverride(AMDILDeviceInfo::DoubleOps)) {
+ mSWBits.set(AMDILDeviceInfo::FMA);
+ mHWBits.set(AMDILDeviceInfo::DoubleOps);
+ }
+ mSWBits.set(AMDILDeviceInfo::BarrierDetect);
+ mHWBits.reset(AMDILDeviceInfo::LongOps);
+ mSWBits.set(AMDILDeviceInfo::LongOps);
+ mSWBits.set(AMDILDeviceInfo::LocalMem);
+}
+
+size_t AMDIL770Device::getWavefrontSize() const
+{
+ return AMDILDevice::WavefrontSize;
+}
+
+AMDIL710Device::AMDIL710Device(AMDILSubtarget *ST) : AMDIL7XXDevice(ST)
+{
+}
+
+AMDIL710Device::~AMDIL710Device()
+{
+}
+
+size_t AMDIL710Device::getWavefrontSize() const
+{
+ return AMDILDevice::QuarterWavefrontSize;
+}
--- /dev/null
+//==-- AMDIL7XXDevice.h - Define 7XX Device Device for AMDIL ---*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+// Interface for the subtarget data classes.
+//
+//===----------------------------------------------------------------------===//
+// This file will define the interface that each generation needs to
+// implement in order to correctly answer queries on the capabilities of the
+// specific hardware.
+//===----------------------------------------------------------------------===//
+#ifndef _AMDIL7XXDEVICEIMPL_H_
+#define _AMDIL7XXDEVICEIMPL_H_
+#include "AMDILDevice.h"
+#include "AMDILSubtarget.h"
+
+namespace llvm {
+class AMDILSubtarget;
+
+//===----------------------------------------------------------------------===//
+// 7XX generation of devices and their respective sub classes
+//===----------------------------------------------------------------------===//
+
+// The AMDIL7XXDevice class represents the generic 7XX device. All 7XX
+// devices are derived from this class. The AMDIL7XX device will only
+// support the minimal features that are required to be considered OpenCL 1.0
+// compliant and nothing more.
+class AMDIL7XXDevice : public AMDILDevice {
+public:
+ AMDIL7XXDevice(AMDILSubtarget *ST);
+ virtual ~AMDIL7XXDevice();
+ virtual size_t getMaxLDSSize() const;
+ virtual size_t getWavefrontSize() const;
+ virtual uint32_t getGeneration() const;
+ virtual uint32_t getResourceID(uint32_t DeviceID) const;
+ virtual uint32_t getMaxNumUAVs() const;
+
+protected:
+ virtual void setCaps();
+}; // AMDIL7XXDevice
+
+// The AMDIL770Device class represents the RV770 chip and it's
+// derivative cards. The difference between this device and the base
+// class is this device device adds support for double precision
+// and has a larger wavefront size.
+class AMDIL770Device : public AMDIL7XXDevice {
+public:
+ AMDIL770Device(AMDILSubtarget *ST);
+ virtual ~AMDIL770Device();
+ virtual size_t getWavefrontSize() const;
+private:
+ virtual void setCaps();
+}; // AMDIL770Device
+
+// The AMDIL710Device class derives from the 7XX base class, but this
+// class is a smaller derivative, so we need to overload some of the
+// functions in order to correctly specify this information.
+class AMDIL710Device : public AMDIL7XXDevice {
+public:
+ AMDIL710Device(AMDILSubtarget *ST);
+ virtual ~AMDIL710Device();
+ virtual size_t getWavefrontSize() const;
+}; // AMDIL710Device
+
+} // namespace llvm
+#endif // _AMDILDEVICEIMPL_H_
--- /dev/null
+//===------ AMDILAlgorithms.tpp - AMDIL Template Algorithms Header --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides templates algorithms that extend the STL algorithms, but
+// are useful for the AMDIL backend
+//
+//===----------------------------------------------------------------------===//
+
+// A template function that loops through the iterators and passes the second
+// argument along with each iterator to the function. If the function returns
+// true, then the current iterator is invalidated and it moves back, before
+// moving forward to the next iterator, otherwise it moves forward without
+// issue. This is based on the for_each STL function, but allows a reference to
+// the second argument
+template<class InputIterator, class Function, typename Arg>
+Function binaryForEach(InputIterator First, InputIterator Last, Function F,
+ Arg &Second)
+{
+ for ( ; First!=Last; ++First ) {
+ F(*First, Second);
+ }
+ return F;
+}
+
+template<class InputIterator, class Function, typename Arg>
+Function safeBinaryForEach(InputIterator First, InputIterator Last, Function F,
+ Arg &Second)
+{
+ for ( ; First!=Last; ++First ) {
+ if (F(*First, Second)) {
+ --First;
+ }
+ }
+ return F;
+}
+
+// A template function that has two levels of looping before calling the
+// function with the passed in argument. See binaryForEach for further
+// explanation
+template<class InputIterator, class Function, typename Arg>
+Function binaryNestedForEach(InputIterator First, InputIterator Last,
+ Function F, Arg &Second)
+{
+ for ( ; First != Last; ++First) {
+ binaryForEach(First->begin(), First->end(), F, Second);
+ }
+ return F;
+}
+template<class InputIterator, class Function, typename Arg>
+Function safeBinaryNestedForEach(InputIterator First, InputIterator Last,
+ Function F, Arg &Second)
+{
+ for ( ; First != Last; ++First) {
+ safeBinaryForEach(First->begin(), First->end(), F, Second);
+ }
+ return F;
+}
+
+// Unlike the STL, a pointer to the iterator itself is passed in with the 'safe'
+// versions of these functions This allows the function to handle situations
+// such as invalidated iterators
+template<class InputIterator, class Function>
+Function safeForEach(InputIterator First, InputIterator Last, Function F)
+{
+ for ( ; First!=Last; ++First ) F(&First)
+ ; // Do nothing.
+ return F;
+}
+
+// A template function that has two levels of looping before calling the
+// function with a pointer to the current iterator. See binaryForEach for
+// further explanation
+template<class InputIterator, class SecondIterator, class Function>
+Function safeNestedForEach(InputIterator First, InputIterator Last,
+ SecondIterator S, Function F)
+{
+ for ( ; First != Last; ++First) {
+ SecondIterator sf, sl;
+ for (sf = First->begin(), sl = First->end();
+ sf != sl; ) {
+ if (!F(&sf)) {
+ ++sf;
+ }
+ }
+ }
+ return F;
+}
--- /dev/null
+//===- AMDIL.td - AMDIL Target Machine -------------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// Target-independent interfaces which we are implementing
+//===----------------------------------------------------------------------===//
+
+include "llvm/Target/Target.td"
+
+// Dummy Instruction itineraries for pseudo instructions
+def ALU_NULL : FuncUnit;
+def NullALU : InstrItinClass;
+
+//===----------------------------------------------------------------------===//
+// AMDIL Subtarget features.
+//===----------------------------------------------------------------------===//
+def FeatureFP64 : SubtargetFeature<"fp64",
+ "CapsOverride[AMDILDeviceInfo::DoubleOps]",
+ "true",
+ "Enable 64bit double precision operations">;
+def FeatureByteAddress : SubtargetFeature<"byte_addressable_store",
+ "CapsOverride[AMDILDeviceInfo::ByteStores]",
+ "true",
+ "Enable byte addressable stores">;
+def FeatureBarrierDetect : SubtargetFeature<"barrier_detect",
+ "CapsOverride[AMDILDeviceInfo::BarrierDetect]",
+ "true",
+ "Enable duplicate barrier detection(HD5XXX or later).">;
+def FeatureImages : SubtargetFeature<"images",
+ "CapsOverride[AMDILDeviceInfo::Images]",
+ "true",
+ "Enable image functions">;
+def FeatureMultiUAV : SubtargetFeature<"multi_uav",
+ "CapsOverride[AMDILDeviceInfo::MultiUAV]",
+ "true",
+ "Generate multiple UAV code(HD5XXX family or later)">;
+def FeatureMacroDB : SubtargetFeature<"macrodb",
+ "CapsOverride[AMDILDeviceInfo::MacroDB]",
+ "true",
+ "Use internal macrodb, instead of macrodb in driver">;
+def FeatureNoAlias : SubtargetFeature<"noalias",
+ "CapsOverride[AMDILDeviceInfo::NoAlias]",
+ "true",
+ "assert that all kernel argument pointers are not aliased">;
+def FeatureNoInline : SubtargetFeature<"no-inline",
+ "CapsOverride[AMDILDeviceInfo::NoInline]",
+ "true",
+ "specify whether to not inline functions">;
+
+def Feature64BitPtr : SubtargetFeature<"64BitPtr",
+ "mIs64bit",
+ "false",
+ "Specify if 64bit addressing should be used.">;
+
+def Feature32on64BitPtr : SubtargetFeature<"64on32BitPtr",
+ "mIs32on64bit",
+ "false",
+ "Specify if 64bit sized pointers with 32bit addressing should be used.">;
+def FeatureDebug : SubtargetFeature<"debug",
+ "CapsOverride[AMDILDeviceInfo::Debug]",
+ "true",
+ "Debug mode is enabled, so disable hardware accelerated address spaces.">;
+def FeatureDumpCode : SubtargetFeature <"DumpCode",
+ "mDumpCode",
+ "true",
+ "Dump MachineInstrs in the CodeEmitter">;
+
+
+//===----------------------------------------------------------------------===//
+// Register File, Calling Conv, Instruction Descriptions
+//===----------------------------------------------------------------------===//
+
+
+include "AMDILRegisterInfo.td"
+include "AMDILCallingConv.td"
+include "AMDILInstrInfo.td"
+
+def AMDILInstrInfo : InstrInfo {}
+
+//===----------------------------------------------------------------------===//
+// AMDIL processors supported.
+//===----------------------------------------------------------------------===//
+//include "Processors.td"
+
+//===----------------------------------------------------------------------===//
+// Declare the target which we are implementing
+//===----------------------------------------------------------------------===//
+def AMDILAsmWriter : AsmWriter {
+ string AsmWriterClassName = "AsmPrinter";
+ int Variant = 0;
+}
+
+def AMDILAsmParser : AsmParser {
+ string AsmParserClassName = "AsmParser";
+ int Variant = 0;
+
+ string CommentDelimiter = ";";
+
+ string RegisterPrefix = "r";
+
+}
+
+
+def AMDIL : Target {
+ // Pull in Instruction Info:
+ let InstructionSet = AMDILInstrInfo;
+ let AssemblyWriters = [AMDILAsmWriter];
+ let AssemblyParsers = [AMDILAsmParser];
+}
--- /dev/null
+//===-- AMDILCFGStructurizer.cpp - CFG Structurizer -----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+
+#define DEBUGME 0
+#define DEBUG_TYPE "structcfg"
+
+#include "AMDIL.h"
+#include "AMDILInstrInfo.h"
+#include "AMDILRegisterInfo.h"
+#include "AMDILUtilityFunctions.h"
+#include "llvm/ADT/SCCIterator.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/DominatorInternals.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+#define FirstNonDebugInstr(A) A->begin()
+using namespace llvm;
+
+// TODO: move-begin.
+
+//===----------------------------------------------------------------------===//
+//
+// Statistics for CFGStructurizer.
+//
+//===----------------------------------------------------------------------===//
+
+STATISTIC(numSerialPatternMatch, "CFGStructurizer number of serial pattern "
+ "matched");
+STATISTIC(numIfPatternMatch, "CFGStructurizer number of if pattern "
+ "matched");
+STATISTIC(numLoopbreakPatternMatch, "CFGStructurizer number of loop-break "
+ "pattern matched");
+STATISTIC(numLoopcontPatternMatch, "CFGStructurizer number of loop-continue "
+ "pattern matched");
+STATISTIC(numLoopPatternMatch, "CFGStructurizer number of loop pattern "
+ "matched");
+STATISTIC(numClonedBlock, "CFGStructurizer cloned blocks");
+STATISTIC(numClonedInstr, "CFGStructurizer cloned instructions");
+
+//===----------------------------------------------------------------------===//
+//
+// Miscellaneous utility for CFGStructurizer.
+//
+//===----------------------------------------------------------------------===//
+namespace llvmCFGStruct
+{
+#define SHOWNEWINSTR(i) \
+ if (DEBUGME) errs() << "New instr: " << *i << "\n"
+
+#define SHOWNEWBLK(b, msg) \
+if (DEBUGME) { \
+ errs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
+ errs() << "\n"; \
+}
+
+#define SHOWBLK_DETAIL(b, msg) \
+if (DEBUGME) { \
+ if (b) { \
+ errs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
+ b->print(errs()); \
+ errs() << "\n"; \
+ } \
+}
+
+#define INVALIDSCCNUM -1
+#define INVALIDREGNUM 0
+
+template<class LoopinfoT>
+void PrintLoopinfo(const LoopinfoT &LoopInfo, llvm::raw_ostream &OS) {
+ for (typename LoopinfoT::iterator iter = LoopInfo.begin(),
+ iterEnd = LoopInfo.end();
+ iter != iterEnd; ++iter) {
+ (*iter)->print(OS, 0);
+ }
+}
+
+template<class NodeT>
+void ReverseVector(SmallVector<NodeT *, DEFAULT_VEC_SLOTS> &Src) {
+ size_t sz = Src.size();
+ for (size_t i = 0; i < sz/2; ++i) {
+ NodeT *t = Src[i];
+ Src[i] = Src[sz - i - 1];
+ Src[sz - i - 1] = t;
+ }
+}
+
+} //end namespace llvmCFGStruct
+
+
+//===----------------------------------------------------------------------===//
+//
+// MachinePostDominatorTree
+//
+//===----------------------------------------------------------------------===//
+
+namespace llvm {
+
+/// PostDominatorTree Class - Concrete subclass of DominatorTree that is used
+/// to compute the a post-dominator tree.
+///
+struct MachinePostDominatorTree : public MachineFunctionPass {
+ static char ID; // Pass identification, replacement for typeid
+ DominatorTreeBase<MachineBasicBlock> *DT;
+ MachinePostDominatorTree() : MachineFunctionPass(ID)
+ {
+ DT = new DominatorTreeBase<MachineBasicBlock>(true); //true indicate
+ // postdominator
+ }
+
+ ~MachinePostDominatorTree();
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ inline const std::vector<MachineBasicBlock *> &getRoots() const {
+ return DT->getRoots();
+ }
+
+ inline MachineDomTreeNode *getRootNode() const {
+ return DT->getRootNode();
+ }
+
+ inline MachineDomTreeNode *operator[](MachineBasicBlock *BB) const {
+ return DT->getNode(BB);
+ }
+
+ inline MachineDomTreeNode *getNode(MachineBasicBlock *BB) const {
+ return DT->getNode(BB);
+ }
+
+ inline bool dominates(MachineDomTreeNode *A, MachineDomTreeNode *B) const {
+ return DT->dominates(A, B);
+ }
+
+ inline bool dominates(MachineBasicBlock *A, MachineBasicBlock *B) const {
+ return DT->dominates(A, B);
+ }
+
+ inline bool
+ properlyDominates(const MachineDomTreeNode *A, MachineDomTreeNode *B) const {
+ return DT->properlyDominates(A, B);
+ }
+
+ inline bool
+ properlyDominates(MachineBasicBlock *A, MachineBasicBlock *B) const {
+ return DT->properlyDominates(A, B);
+ }
+
+ inline MachineBasicBlock *
+ findNearestCommonDominator(MachineBasicBlock *A, MachineBasicBlock *B) {
+ return DT->findNearestCommonDominator(A, B);
+ }
+
+ virtual void print(llvm::raw_ostream &OS, const Module *M = 0) const {
+ DT->print(OS);
+ }
+};
+} //end of namespace llvm
+
+char MachinePostDominatorTree::ID = 0;
+static RegisterPass<MachinePostDominatorTree>
+machinePostDominatorTreePass("machinepostdomtree",
+ "MachinePostDominator Tree Construction",
+ true, true);
+
+//const PassInfo *const llvm::MachinePostDominatorsID
+//= &machinePostDominatorTreePass;
+
+bool MachinePostDominatorTree::runOnMachineFunction(MachineFunction &F) {
+ DT->recalculate(F);
+ //DEBUG(DT->dump());
+ return false;
+}
+
+MachinePostDominatorTree::~MachinePostDominatorTree() {
+ delete DT;
+}
+
+//===----------------------------------------------------------------------===//
+//
+// supporting data structure for CFGStructurizer
+//
+//===----------------------------------------------------------------------===//
+
+namespace llvmCFGStruct
+{
+template<class PassT>
+struct CFGStructTraits {
+};
+
+template <class InstrT>
+class BlockInformation {
+public:
+ bool isRetired;
+ int sccNum;
+ //SmallVector<InstrT*, DEFAULT_VEC_SLOTS> succInstr;
+ //Instructions defining the corresponding successor.
+ BlockInformation() : isRetired(false), sccNum(INVALIDSCCNUM) {}
+};
+
+template <class BlockT, class InstrT, class RegiT>
+class LandInformation {
+public:
+ BlockT *landBlk;
+ std::set<RegiT> breakInitRegs; //Registers that need to "reg = 0", before
+ //WHILELOOP(thisloop) init before entering
+ //thisloop.
+ std::set<RegiT> contInitRegs; //Registers that need to "reg = 0", after
+ //WHILELOOP(thisloop) init after entering
+ //thisloop.
+ std::set<RegiT> endbranchInitRegs; //Init before entering this loop, at loop
+ //land block, branch cond on this reg.
+ std::set<RegiT> breakOnRegs; //registers that need to "if (reg) break
+ //endif" after ENDLOOP(thisloop) break
+ //outerLoopOf(thisLoop).
+ std::set<RegiT> contOnRegs; //registers that need to "if (reg) continue
+ //endif" after ENDLOOP(thisloop) continue on
+ //outerLoopOf(thisLoop).
+ LandInformation() : landBlk(NULL) {}
+};
+
+} //end of namespace llvmCFGStruct
+
+//===----------------------------------------------------------------------===//
+//
+// CFGStructurizer
+//
+//===----------------------------------------------------------------------===//
+
+namespace llvmCFGStruct
+{
+// bixia TODO: port it to BasicBlock, not just MachineBasicBlock.
+template<class PassT>
+class CFGStructurizer
+{
+public:
+ typedef enum {
+ Not_SinglePath = 0,
+ SinglePath_InPath = 1,
+ SinglePath_NotInPath = 2
+ } PathToKind;
+
+public:
+ typedef typename PassT::InstructionType InstrT;
+ typedef typename PassT::FunctionType FuncT;
+ typedef typename PassT::DominatortreeType DomTreeT;
+ typedef typename PassT::PostDominatortreeType PostDomTreeT;
+ typedef typename PassT::DomTreeNodeType DomTreeNodeT;
+ typedef typename PassT::LoopinfoType LoopInfoT;
+
+ typedef GraphTraits<FuncT *> FuncGTraits;
+ //typedef FuncGTraits::nodes_iterator BlockIterator;
+ typedef typename FuncT::iterator BlockIterator;
+
+ typedef typename FuncGTraits::NodeType BlockT;
+ typedef GraphTraits<BlockT *> BlockGTraits;
+ typedef GraphTraits<Inverse<BlockT *> > InvBlockGTraits;
+ //typedef BlockGTraits::succ_iterator InstructionIterator;
+ typedef typename BlockT::iterator InstrIterator;
+
+ typedef CFGStructTraits<PassT> CFGTraits;
+ typedef BlockInformation<InstrT> BlockInfo;
+ typedef std::map<BlockT *, BlockInfo *> BlockInfoMap;
+
+ typedef int RegiT;
+ typedef typename PassT::LoopType LoopT;
+ typedef LandInformation<BlockT, InstrT, RegiT> LoopLandInfo;
+ typedef std::map<LoopT *, LoopLandInfo *> LoopLandInfoMap;
+ //landing info for loop break
+ typedef SmallVector<BlockT *, 32> BlockTSmallerVector;
+
+public:
+ CFGStructurizer();
+ ~CFGStructurizer();
+
+ /// Perform the CFG structurization
+ bool run(FuncT &Func, PassT &Pass, const AMDILRegisterInfo *tri);
+
+ /// Perform the CFG preparation
+ bool prepare(FuncT &Func, PassT &Pass, const AMDILRegisterInfo *tri);
+
+private:
+ void orderBlocks();
+ void printOrderedBlocks(llvm::raw_ostream &OS);
+ int patternMatch(BlockT *CurBlock);
+ int patternMatchGroup(BlockT *CurBlock);
+
+ int serialPatternMatch(BlockT *CurBlock);
+ int ifPatternMatch(BlockT *CurBlock);
+ int switchPatternMatch(BlockT *CurBlock);
+ int loopendPatternMatch(BlockT *CurBlock);
+ int loopPatternMatch(BlockT *CurBlock);
+
+ int loopbreakPatternMatch(LoopT *LoopRep, BlockT *LoopHeader);
+ int loopcontPatternMatch(LoopT *LoopRep, BlockT *LoopHeader);
+ //int loopWithoutBreak(BlockT *);
+
+ void handleLoopbreak (BlockT *ExitingBlock, LoopT *ExitingLoop,
+ BlockT *ExitBlock, LoopT *exitLoop, BlockT *landBlock);
+ void handleLoopcontBlock(BlockT *ContingBlock, LoopT *contingLoop,
+ BlockT *ContBlock, LoopT *contLoop);
+ bool isSameloopDetachedContbreak(BlockT *Src1Block, BlockT *Src2Block);
+ int handleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock,
+ BlockT *FalseBlock);
+ int handleJumpintoIfImp(BlockT *HeadBlock, BlockT *TrueBlock,
+ BlockT *FalseBlock);
+ int improveSimpleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock,
+ BlockT *FalseBlock, BlockT **LandBlockPtr);
+ void showImproveSimpleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock,
+ BlockT *FalseBlock, BlockT *LandBlock,
+ bool Detail = false);
+ PathToKind singlePathTo(BlockT *SrcBlock, BlockT *DstBlock,
+ bool AllowSideEntry = true);
+ BlockT *singlePathEnd(BlockT *srcBlock, BlockT *DstBlock,
+ bool AllowSideEntry = true);
+ int cloneOnSideEntryTo(BlockT *PreBlock, BlockT *SrcBlock, BlockT *DstBlock);
+ void mergeSerialBlock(BlockT *DstBlock, BlockT *srcBlock);
+
+ void mergeIfthenelseBlock(InstrT *BranchInstr, BlockT *CurBlock,
+ BlockT *TrueBlock, BlockT *FalseBlock,
+ BlockT *LandBlock);
+ void mergeLooplandBlock(BlockT *DstBlock, LoopLandInfo *LoopLand);
+ void mergeLoopbreakBlock(BlockT *ExitingBlock, BlockT *ExitBlock,
+ BlockT *ExitLandBlock, RegiT SetReg);
+ void settleLoopcontBlock(BlockT *ContingBlock, BlockT *ContBlock,
+ RegiT SetReg);
+ BlockT *relocateLoopcontBlock(LoopT *ParentLoopRep, LoopT *LoopRep,
+ std::set<BlockT*> &ExitBlockSet,
+ BlockT *ExitLandBlk);
+ BlockT *addLoopEndbranchBlock(LoopT *LoopRep,
+ BlockTSmallerVector &ExitingBlocks,
+ BlockTSmallerVector &ExitBlocks);
+ BlockT *normalizeInfiniteLoopExit(LoopT *LoopRep);
+ void removeUnconditionalBranch(BlockT *SrcBlock);
+ void removeRedundantConditionalBranch(BlockT *SrcBlock);
+ void addDummyExitBlock(SmallVector<BlockT *, DEFAULT_VEC_SLOTS> &RetBlocks);
+
+ void removeSuccessor(BlockT *SrcBlock);
+ BlockT *cloneBlockForPredecessor(BlockT *CurBlock, BlockT *PredBlock);
+ BlockT *exitingBlock2ExitBlock (LoopT *LoopRep, BlockT *exitingBlock);
+
+ void migrateInstruction(BlockT *SrcBlock, BlockT *DstBlock,
+ InstrIterator InsertPos);
+
+ void recordSccnum(BlockT *SrcBlock, int SCCNum);
+ int getSCCNum(BlockT *srcBlk);
+
+ void retireBlock(BlockT *DstBlock, BlockT *SrcBlock);
+ bool isRetiredBlock(BlockT *SrcBlock);
+ bool isActiveLoophead(BlockT *CurBlock);
+ bool needMigrateBlock(BlockT *Block);
+
+ BlockT *recordLoopLandBlock(LoopT *LoopRep, BlockT *LandBlock,
+ BlockTSmallerVector &exitBlocks,
+ std::set<BlockT*> &ExitBlockSet);
+ void setLoopLandBlock(LoopT *LoopRep, BlockT *Block = NULL);
+ BlockT *getLoopLandBlock(LoopT *LoopRep);
+ LoopLandInfo *getLoopLandInfo(LoopT *LoopRep);
+
+ void addLoopBreakOnReg(LoopT *LoopRep, RegiT RegNum);
+ void addLoopContOnReg(LoopT *LoopRep, RegiT RegNum);
+ void addLoopBreakInitReg(LoopT *LoopRep, RegiT RegNum);
+ void addLoopContInitReg(LoopT *LoopRep, RegiT RegNum);
+ void addLoopEndbranchInitReg(LoopT *LoopRep, RegiT RegNum);
+
+ bool hasBackEdge(BlockT *curBlock);
+ unsigned getLoopDepth (LoopT *LoopRep);
+ int countActiveBlock(
+ typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator IterStart,
+ typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator IterEnd);
+ BlockT *findNearestCommonPostDom(std::set<BlockT *>&);
+ BlockT *findNearestCommonPostDom(BlockT *Block1, BlockT *Block2);
+
+private:
+ DomTreeT *domTree;
+ PostDomTreeT *postDomTree;
+ LoopInfoT *loopInfo;
+ PassT *passRep;
+ FuncT *funcRep;
+
+ BlockInfoMap blockInfoMap;
+ LoopLandInfoMap loopLandInfoMap;
+ SmallVector<BlockT *, DEFAULT_VEC_SLOTS> orderedBlks;
+ const AMDILRegisterInfo *TRI;
+
+}; //template class CFGStructurizer
+
+template<class PassT> CFGStructurizer<PassT>::CFGStructurizer()
+ : domTree(NULL), postDomTree(NULL), loopInfo(NULL) {
+}
+
+template<class PassT> CFGStructurizer<PassT>::~CFGStructurizer() {
+ for (typename BlockInfoMap::iterator I = blockInfoMap.begin(),
+ E = blockInfoMap.end(); I != E; ++I) {
+ delete I->second;
+ }
+}
+
+template<class PassT>
+bool CFGStructurizer<PassT>::prepare(FuncT &func, PassT &pass,
+ const AMDILRegisterInfo * tri) {
+ passRep = &pass;
+ funcRep = &func;
+ TRI = tri;
+
+ bool changed = false;
+ //func.RenumberBlocks();
+
+ //to do, if not reducible flow graph, make it so ???
+
+ if (DEBUGME) {
+ errs() << "AMDILCFGStructurizer::prepare\n";
+ //func.viewCFG();
+ //func.viewCFGOnly();
+ //func.dump();
+ }
+
+ //FIXME: gcc complains on this.
+ //domTree = &pass.getAnalysis<DomTreeT>();
+ //domTree = CFGTraits::getDominatorTree(pass);
+ //if (DEBUGME) {
+ // domTree->print(errs());
+ //}
+
+ //FIXME: gcc complains on this.
+ //domTree = &pass.getAnalysis<DomTreeT>();
+ //postDomTree = CFGTraits::getPostDominatorTree(pass);
+ //if (DEBUGME) {
+ // postDomTree->print(errs());
+ //}
+
+ //FIXME: gcc complains on this.
+ //loopInfo = &pass.getAnalysis<LoopInfoT>();
+ loopInfo = CFGTraits::getLoopInfo(pass);
+ if (DEBUGME) {
+ errs() << "LoopInfo:\n";
+ PrintLoopinfo(*loopInfo, errs());
+ }
+
+ orderBlocks();
+ if (DEBUGME) {
+ errs() << "Ordered blocks:\n";
+ printOrderedBlocks(errs());
+ }
+
+ SmallVector<BlockT *, DEFAULT_VEC_SLOTS> retBlks;
+
+ for (typename LoopInfoT::iterator iter = loopInfo->begin(),
+ iterEnd = loopInfo->end();
+ iter != iterEnd; ++iter) {
+ LoopT* loopRep = (*iter);
+ BlockTSmallerVector exitingBlks;
+ loopRep->getExitingBlocks(exitingBlks);
+
+ if (exitingBlks.size() == 0) {
+ BlockT* dummyExitBlk = normalizeInfiniteLoopExit(loopRep);
+ if (dummyExitBlk != NULL)
+ retBlks.push_back(dummyExitBlk);
+ }
+ }
+
+ // Remove unconditional branch instr.
+ // Add dummy exit block iff there are multiple returns.
+
+ for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
+ iterBlk = orderedBlks.begin(), iterEndBlk = orderedBlks.end();
+ iterBlk != iterEndBlk;
+ ++iterBlk) {
+ BlockT *curBlk = *iterBlk;
+ removeUnconditionalBranch(curBlk);
+ removeRedundantConditionalBranch(curBlk);
+ if (CFGTraits::isReturnBlock(curBlk)) {
+ retBlks.push_back(curBlk);
+ }
+ assert(curBlk->succ_size() <= 2);
+ //assert(curBlk->size() > 0);
+ //removeEmptyBlock(curBlk) ??
+ } //for
+
+ if (retBlks.size() >= 2) {
+ addDummyExitBlock(retBlks);
+ changed = true;
+ }
+
+ return changed;
+} //CFGStructurizer::prepare
+
+template<class PassT>
+bool CFGStructurizer<PassT>::run(FuncT &func, PassT &pass,
+ const AMDILRegisterInfo * tri) {
+ passRep = &pass;
+ funcRep = &func;
+ TRI = tri;
+
+ //func.RenumberBlocks();
+
+ //Assume reducible CFG...
+ if (DEBUGME) {
+ errs() << "AMDILCFGStructurizer::run\n";
+ //errs() << func.getFunction()->getNameStr() << "\n";
+ func.viewCFG();
+ //func.viewCFGOnly();
+ //func.dump();
+ }
+
+#if 1
+ //FIXME: gcc complains on this.
+ //domTree = &pass.getAnalysis<DomTreeT>();
+ domTree = CFGTraits::getDominatorTree(pass);
+ if (DEBUGME) {
+ domTree->print(errs(), (const llvm::Module*)0);
+ }
+#endif
+
+ //FIXME: gcc complains on this.
+ //domTree = &pass.getAnalysis<DomTreeT>();
+ postDomTree = CFGTraits::getPostDominatorTree(pass);
+ if (DEBUGME) {
+ postDomTree->print(errs());
+ }
+
+ //FIXME: gcc complains on this.
+ //loopInfo = &pass.getAnalysis<LoopInfoT>();
+ loopInfo = CFGTraits::getLoopInfo(pass);
+ if (DEBUGME) {
+ errs() << "LoopInfo:\n";
+ PrintLoopinfo(*loopInfo, errs());
+ }
+
+ orderBlocks();
+//#define STRESSTEST
+#ifdef STRESSTEST
+ //Use the worse block ordering to test the algorithm.
+ ReverseVector(orderedBlks);
+#endif
+
+ if (DEBUGME) {
+ errs() << "Ordered blocks:\n";
+ printOrderedBlocks(errs());
+ }
+ int numIter = 0;
+ bool finish = false;
+ BlockT *curBlk;
+ bool makeProgress = false;
+ int numRemainedBlk = countActiveBlock(orderedBlks.begin(),
+ orderedBlks.end());
+
+ do {
+ ++numIter;
+ if (DEBUGME) {
+ errs() << "numIter = " << numIter
+ << ", numRemaintedBlk = " << numRemainedBlk << "\n";
+ }
+
+ typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
+ iterBlk = orderedBlks.begin();
+ typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
+ iterBlkEnd = orderedBlks.end();
+
+ typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
+ sccBeginIter = iterBlk;
+ BlockT *sccBeginBlk = NULL;
+ int sccNumBlk = 0; // The number of active blocks, init to a
+ // maximum possible number.
+ int sccNumIter; // Number of iteration in this SCC.
+
+ while (iterBlk != iterBlkEnd) {
+ curBlk = *iterBlk;
+
+ if (sccBeginBlk == NULL) {
+ sccBeginIter = iterBlk;
+ sccBeginBlk = curBlk;
+ sccNumIter = 0;
+ sccNumBlk = numRemainedBlk; // Init to maximum possible number.
+ if (DEBUGME) {
+ errs() << "start processing SCC" << getSCCNum(sccBeginBlk);
+ errs() << "\n";
+ }
+ }
+
+ if (!isRetiredBlock(curBlk)) {
+ patternMatch(curBlk);
+ }
+
+ ++iterBlk;
+
+ bool contNextScc = true;
+ if (iterBlk == iterBlkEnd
+ || getSCCNum(sccBeginBlk) != getSCCNum(*iterBlk)) {
+ // Just finish one scc.
+ ++sccNumIter;
+ int sccRemainedNumBlk = countActiveBlock(sccBeginIter, iterBlk);
+ if (sccRemainedNumBlk != 1 && sccRemainedNumBlk >= sccNumBlk) {
+ if (DEBUGME) {
+ errs() << "Can't reduce SCC " << getSCCNum(curBlk)
+ << ", sccNumIter = " << sccNumIter;
+ errs() << "doesn't make any progress\n";
+ }
+ contNextScc = true;
+ } else if (sccRemainedNumBlk != 1 && sccRemainedNumBlk < sccNumBlk) {
+ sccNumBlk = sccRemainedNumBlk;
+ iterBlk = sccBeginIter;
+ contNextScc = false;
+ if (DEBUGME) {
+ errs() << "repeat processing SCC" << getSCCNum(curBlk)
+ << "sccNumIter = " << sccNumIter << "\n";
+ func.viewCFG();
+ //func.viewCFGOnly();
+ }
+ } else {
+ // Finish the current scc.
+ contNextScc = true;
+ }
+ } else {
+ // Continue on next component in the current scc.
+ contNextScc = false;
+ }
+
+ if (contNextScc) {
+ sccBeginBlk = NULL;
+ }
+ } //while, "one iteration" over the function.
+
+ BlockT *entryBlk = FuncGTraits::nodes_begin(&func);
+ if (entryBlk->succ_size() == 0) {
+ finish = true;
+ if (DEBUGME) {
+ errs() << "Reduce to one block\n";
+ }
+ } else {
+ int newnumRemainedBlk
+ = countActiveBlock(orderedBlks.begin(), orderedBlks.end());
+ // consider cloned blocks ??
+ if (newnumRemainedBlk == 1 || newnumRemainedBlk < numRemainedBlk) {
+ makeProgress = true;
+ numRemainedBlk = newnumRemainedBlk;
+ } else {
+ makeProgress = false;
+ if (DEBUGME) {
+ errs() << "No progress\n";
+ }
+ }
+ }
+ } while (!finish && makeProgress);
+
+ // Misc wrap up to maintain the consistency of the Function representation.
+ CFGTraits::wrapup(FuncGTraits::nodes_begin(&func));
+
+ // Detach retired Block, release memory.
+ for (typename BlockInfoMap::iterator iterMap = blockInfoMap.begin(),
+ iterEndMap = blockInfoMap.end(); iterMap != iterEndMap; ++iterMap) {
+ if ((*iterMap).second && (*iterMap).second->isRetired) {
+ assert(((*iterMap).first)->getNumber() != -1);
+ if (DEBUGME) {
+ errs() << "Erase BB" << ((*iterMap).first)->getNumber() << "\n";
+ }
+ (*iterMap).first->eraseFromParent(); //Remove from the parent Function.
+ }
+ delete (*iterMap).second;
+ }
+ blockInfoMap.clear();
+
+ // clear loopLandInfoMap
+ for (typename LoopLandInfoMap::iterator iterMap = loopLandInfoMap.begin(),
+ iterEndMap = loopLandInfoMap.end(); iterMap != iterEndMap; ++iterMap) {
+ delete (*iterMap).second;
+ }
+ loopLandInfoMap.clear();
+
+ if (DEBUGME) {
+ func.viewCFG();
+ //func.dump();
+ }
+
+ if (!finish) {
+ assert(!"IRREDUCIBL_CF");
+ }
+
+ return true;
+} //CFGStructurizer::run
+
+/// Print the ordered Blocks.
+///
+template<class PassT>
+void CFGStructurizer<PassT>::printOrderedBlocks(llvm::raw_ostream &os) {
+ size_t i = 0;
+ for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
+ iterBlk = orderedBlks.begin(), iterBlkEnd = orderedBlks.end();
+ iterBlk != iterBlkEnd;
+ ++iterBlk, ++i) {
+ os << "BB" << (*iterBlk)->getNumber();
+ os << "(" << getSCCNum(*iterBlk) << "," << (*iterBlk)->size() << ")";
+ if (i != 0 && i % 10 == 0) {
+ os << "\n";
+ } else {
+ os << " ";
+ }
+ }
+} //printOrderedBlocks
+
+/// Compute the reversed DFS post order of Blocks
+///
+template<class PassT> void CFGStructurizer<PassT>::orderBlocks() {
+ int sccNum = 0;
+ BlockT *bb;
+ for (scc_iterator<FuncT *> sccIter = scc_begin(funcRep),
+ sccEnd = scc_end(funcRep); sccIter != sccEnd; ++sccIter, ++sccNum) {
+ std::vector<BlockT *> &sccNext = *sccIter;
+ for (typename std::vector<BlockT *>::const_iterator
+ blockIter = sccNext.begin(), blockEnd = sccNext.end();
+ blockIter != blockEnd; ++blockIter) {
+ bb = *blockIter;
+ orderedBlks.push_back(bb);
+ recordSccnum(bb, sccNum);
+ }
+ }
+
+ //walk through all the block in func to check for unreachable
+ for (BlockIterator blockIter1 = FuncGTraits::nodes_begin(funcRep),
+ blockEnd1 = FuncGTraits::nodes_end(funcRep);
+ blockIter1 != blockEnd1; ++blockIter1) {
+ BlockT *bb = &(*blockIter1);
+ sccNum = getSCCNum(bb);
+ if (sccNum == INVALIDSCCNUM) {
+ errs() << "unreachable block BB" << bb->getNumber() << "\n";
+ }
+ } //end of for
+} //orderBlocks
+
+template<class PassT> int CFGStructurizer<PassT>::patternMatch(BlockT *curBlk) {
+ int numMatch = 0;
+ int curMatch;
+
+ if (DEBUGME) {
+ errs() << "Begin patternMatch BB" << curBlk->getNumber() << "\n";
+ }
+
+ while ((curMatch = patternMatchGroup(curBlk)) > 0) {
+ numMatch += curMatch;
+ }
+
+ if (DEBUGME) {
+ errs() << "End patternMatch BB" << curBlk->getNumber()
+ << ", numMatch = " << numMatch << "\n";
+ }
+
+ return numMatch;
+} //patternMatch
+
+template<class PassT>
+int CFGStructurizer<PassT>::patternMatchGroup(BlockT *curBlk) {
+ int numMatch = 0;
+ numMatch += serialPatternMatch(curBlk);
+ numMatch += ifPatternMatch(curBlk);
+ //numMatch += switchPatternMatch(curBlk);
+ numMatch += loopendPatternMatch(curBlk);
+ numMatch += loopPatternMatch(curBlk);
+ return numMatch;
+}//patternMatchGroup
+
+template<class PassT>
+int CFGStructurizer<PassT>::serialPatternMatch(BlockT *curBlk) {
+ if (curBlk->succ_size() != 1) {
+ return 0;
+ }
+
+ BlockT *childBlk = *curBlk->succ_begin();
+ if (childBlk->pred_size() != 1 || isActiveLoophead(childBlk)) {
+ return 0;
+ }
+
+ mergeSerialBlock(curBlk, childBlk);
+ ++numSerialPatternMatch;
+ return 1;
+} //serialPatternMatch
+
+template<class PassT>
+int CFGStructurizer<PassT>::ifPatternMatch(BlockT *curBlk) {
+ //two edges
+ if (curBlk->succ_size() != 2) {
+ return 0;
+ }
+
+ if (hasBackEdge(curBlk)) {
+ return 0;
+ }
+
+ InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(curBlk);
+ if (branchInstr == NULL) {
+ return 0;
+ }
+
+ assert(CFGTraits::isCondBranch(branchInstr));
+
+ BlockT *trueBlk = CFGTraits::getTrueBranch(branchInstr);
+ BlockT *falseBlk = CFGTraits::getFalseBranch(curBlk, branchInstr);
+ BlockT *landBlk;
+ int cloned = 0;
+
+ // TODO: Simplify
+ if (trueBlk->succ_size() == 1 && falseBlk->succ_size() == 1
+ && *trueBlk->succ_begin() == *falseBlk->succ_begin()) {
+ landBlk = *trueBlk->succ_begin();
+ } else if (trueBlk->succ_size() == 0 && falseBlk->succ_size() == 0) {
+ landBlk = NULL;
+ } else if (trueBlk->succ_size() == 1 && *trueBlk->succ_begin() == falseBlk) {
+ landBlk = falseBlk;
+ falseBlk = NULL;
+ } else if (falseBlk->succ_size() == 1
+ && *falseBlk->succ_begin() == trueBlk) {
+ landBlk = trueBlk;
+ trueBlk = NULL;
+ } else if (falseBlk->succ_size() == 1
+ && isSameloopDetachedContbreak(trueBlk, falseBlk)) {
+ landBlk = *falseBlk->succ_begin();
+ } else if (trueBlk->succ_size() == 1
+ && isSameloopDetachedContbreak(falseBlk, trueBlk)) {
+ landBlk = *trueBlk->succ_begin();
+ } else {
+ return handleJumpintoIf(curBlk, trueBlk, falseBlk);
+ }
+
+ // improveSimpleJumpinfoIf can handle the case where landBlk == NULL but the
+ // new BB created for landBlk==NULL may introduce new challenge to the
+ // reduction process.
+ if (landBlk != NULL &&
+ ((trueBlk && trueBlk->pred_size() > 1)
+ || (falseBlk && falseBlk->pred_size() > 1))) {
+ cloned += improveSimpleJumpintoIf(curBlk, trueBlk, falseBlk, &landBlk);
+ }
+
+ if (trueBlk && trueBlk->pred_size() > 1) {
+ trueBlk = cloneBlockForPredecessor(trueBlk, curBlk);
+ ++cloned;
+ }
+
+ if (falseBlk && falseBlk->pred_size() > 1) {
+ falseBlk = cloneBlockForPredecessor(falseBlk, curBlk);
+ ++cloned;
+ }
+
+ mergeIfthenelseBlock(branchInstr, curBlk, trueBlk, falseBlk, landBlk);
+
+ ++numIfPatternMatch;
+
+ numClonedBlock += cloned;
+
+ return 1 + cloned;
+} //ifPatternMatch
+
+template<class PassT>
+int CFGStructurizer<PassT>::switchPatternMatch(BlockT *curBlk) {
+ return 0;
+} //switchPatternMatch
+
+template<class PassT>
+int CFGStructurizer<PassT>::loopendPatternMatch(BlockT *curBlk) {
+ LoopT *loopRep = loopInfo->getLoopFor(curBlk);
+ typename std::vector<LoopT *> nestedLoops;
+ while (loopRep) {
+ nestedLoops.push_back(loopRep);
+ loopRep = loopRep->getParentLoop();
+ }
+
+ if (nestedLoops.size() == 0) {
+ return 0;
+ }
+
+ // Process nested loop outside->inside, so "continue" to a outside loop won't
+ // be mistaken as "break" of the current loop.
+ int num = 0;
+ for (typename std::vector<LoopT *>::reverse_iterator
+ iter = nestedLoops.rbegin(), iterEnd = nestedLoops.rend();
+ iter != iterEnd; ++iter) {
+ loopRep = *iter;
+
+ if (getLoopLandBlock(loopRep) != NULL) {
+ continue;
+ }
+
+ BlockT *loopHeader = loopRep->getHeader();
+
+ int numBreak = loopbreakPatternMatch(loopRep, loopHeader);
+
+ if (numBreak == -1) {
+ break;
+ }
+
+ int numCont = loopcontPatternMatch(loopRep, loopHeader);
+ num += numBreak + numCont;
+ }
+
+ return num;
+} //loopendPatternMatch
+
+template<class PassT>
+int CFGStructurizer<PassT>::loopPatternMatch(BlockT *curBlk) {
+ if (curBlk->succ_size() != 0) {
+ return 0;
+ }
+
+ int numLoop = 0;
+ LoopT *loopRep = loopInfo->getLoopFor(curBlk);
+ while (loopRep && loopRep->getHeader() == curBlk) {
+ LoopLandInfo *loopLand = getLoopLandInfo(loopRep);
+ if (loopLand) {
+ BlockT *landBlk = loopLand->landBlk;
+ assert(landBlk);
+ if (!isRetiredBlock(landBlk)) {
+ mergeLooplandBlock(curBlk, loopLand);
+ ++numLoop;
+ }
+ }
+ loopRep = loopRep->getParentLoop();
+ }
+
+ numLoopPatternMatch += numLoop;
+
+ return numLoop;
+} //loopPatternMatch
+
+template<class PassT>
+int CFGStructurizer<PassT>::loopbreakPatternMatch(LoopT *loopRep,
+ BlockT *loopHeader) {
+ BlockTSmallerVector exitingBlks;
+ loopRep->getExitingBlocks(exitingBlks);
+
+ if (DEBUGME) {
+ errs() << "Loop has " << exitingBlks.size() << " exiting blocks\n";
+ }
+
+ if (exitingBlks.size() == 0) {
+ setLoopLandBlock(loopRep);
+ return 0;
+ }
+
+ // Compute the corresponding exitBlks and exit block set.
+ BlockTSmallerVector exitBlks;
+ std::set<BlockT *> exitBlkSet;
+ for (typename BlockTSmallerVector::const_iterator iter = exitingBlks.begin(),
+ iterEnd = exitingBlks.end(); iter != iterEnd; ++iter) {
+ BlockT *exitingBlk = *iter;
+ BlockT *exitBlk = exitingBlock2ExitBlock(loopRep, exitingBlk);
+ exitBlks.push_back(exitBlk);
+ exitBlkSet.insert(exitBlk); //non-duplicate insert
+ }
+
+ assert(exitBlkSet.size() > 0);
+ assert(exitBlks.size() == exitingBlks.size());
+
+ if (DEBUGME) {
+ errs() << "Loop has " << exitBlkSet.size() << " exit blocks\n";
+ }
+
+ // Find exitLandBlk.
+ BlockT *exitLandBlk = NULL;
+ int numCloned = 0;
+ int numSerial = 0;
+
+ if (exitBlkSet.size() == 1)
+ {
+ exitLandBlk = *exitBlkSet.begin();
+ } else {
+ exitLandBlk = findNearestCommonPostDom(exitBlkSet);
+
+ if (exitLandBlk == NULL) {
+ return -1;
+ }
+
+ bool allInPath = true;
+ bool allNotInPath = true;
+ for (typename std::set<BlockT*>::const_iterator
+ iter = exitBlkSet.begin(),
+ iterEnd = exitBlkSet.end();
+ iter != iterEnd; ++iter) {
+ BlockT *exitBlk = *iter;
+
+ PathToKind pathKind = singlePathTo(exitBlk, exitLandBlk, true);
+ if (DEBUGME) {
+ errs() << "BB" << exitBlk->getNumber()
+ << " to BB" << exitLandBlk->getNumber() << " PathToKind="
+ << pathKind << "\n";
+ }
+
+ allInPath = allInPath && (pathKind == SinglePath_InPath);
+ allNotInPath = allNotInPath && (pathKind == SinglePath_NotInPath);
+
+ if (!allInPath && !allNotInPath) {
+ if (DEBUGME) {
+ errs() << "singlePath check fail\n";
+ }
+ return -1;
+ }
+ } // check all exit blocks
+
+ if (allNotInPath) {
+#if 1
+
+ // TODO: Simplify, maybe separate function?
+ //funcRep->viewCFG();
+ LoopT *parentLoopRep = loopRep->getParentLoop();
+ BlockT *parentLoopHeader = NULL;
+ if (parentLoopRep)
+ parentLoopHeader = parentLoopRep->getHeader();
+
+ if (exitLandBlk == parentLoopHeader &&
+ (exitLandBlk = relocateLoopcontBlock(parentLoopRep,
+ loopRep,
+ exitBlkSet,
+ exitLandBlk)) != NULL) {
+ if (DEBUGME) {
+ errs() << "relocateLoopcontBlock success\n";
+ }
+ } else if ((exitLandBlk = addLoopEndbranchBlock(loopRep,
+ exitingBlks,
+ exitBlks)) != NULL) {
+ if (DEBUGME) {
+ errs() << "insertEndbranchBlock success\n";
+ }
+ } else {
+ if (DEBUGME) {
+ errs() << "loop exit fail\n";
+ }
+ return -1;
+ }
+#else
+ return -1;
+#endif
+ }
+
+ // Handle side entry to exit path.
+ exitBlks.clear();
+ exitBlkSet.clear();
+ for (typename BlockTSmallerVector::iterator iterExiting =
+ exitingBlks.begin(),
+ iterExitingEnd = exitingBlks.end();
+ iterExiting != iterExitingEnd; ++iterExiting) {
+ BlockT *exitingBlk = *iterExiting;
+ BlockT *exitBlk = exitingBlock2ExitBlock(loopRep, exitingBlk);
+ BlockT *newExitBlk = exitBlk;
+
+ if (exitBlk != exitLandBlk && exitBlk->pred_size() > 1) {
+ newExitBlk = cloneBlockForPredecessor(exitBlk, exitingBlk);
+ ++numCloned;
+ }
+
+ numCloned += cloneOnSideEntryTo(exitingBlk, newExitBlk, exitLandBlk);
+
+ exitBlks.push_back(newExitBlk);
+ exitBlkSet.insert(newExitBlk);
+ }
+
+ for (typename BlockTSmallerVector::iterator iterExit = exitBlks.begin(),
+ iterExitEnd = exitBlks.end();
+ iterExit != iterExitEnd; ++iterExit) {
+ BlockT *exitBlk = *iterExit;
+ numSerial += serialPatternMatch(exitBlk);
+ }
+
+ for (typename BlockTSmallerVector::iterator iterExit = exitBlks.begin(),
+ iterExitEnd = exitBlks.end();
+ iterExit != iterExitEnd; ++iterExit) {
+ BlockT *exitBlk = *iterExit;
+ if (exitBlk->pred_size() > 1) {
+ if (exitBlk != exitLandBlk) {
+ return -1;
+ }
+ } else {
+ if (exitBlk != exitLandBlk &&
+ (exitBlk->succ_size() != 1 ||
+ *exitBlk->succ_begin() != exitLandBlk)) {
+ return -1;
+ }
+ }
+ }
+ } // else
+
+ // LoopT *exitLandLoop = loopInfo->getLoopFor(exitLandBlk);
+ exitLandBlk = recordLoopLandBlock(loopRep, exitLandBlk, exitBlks, exitBlkSet);
+
+ // Fold break into the breaking block. Leverage across level breaks.
+ assert(exitingBlks.size() == exitBlks.size());
+ for (typename BlockTSmallerVector::const_iterator iterExit = exitBlks.begin(),
+ iterExiting = exitingBlks.begin(), iterExitEnd = exitBlks.end();
+ iterExit != iterExitEnd; ++iterExit, ++iterExiting) {
+ BlockT *exitBlk = *iterExit;
+ BlockT *exitingBlk = *iterExiting;
+ assert(exitBlk->pred_size() == 1 || exitBlk == exitLandBlk);
+ LoopT *exitingLoop = loopInfo->getLoopFor(exitingBlk);
+ handleLoopbreak(exitingBlk, exitingLoop, exitBlk, loopRep, exitLandBlk);
+ }
+
+ int numBreak = static_cast<int>(exitingBlks.size());
+ numLoopbreakPatternMatch += numBreak;
+ numClonedBlock += numCloned;
+ return numBreak + numSerial + numCloned;
+} //loopbreakPatternMatch
+
+template<class PassT>
+int CFGStructurizer<PassT>::loopcontPatternMatch(LoopT *loopRep,
+ BlockT *loopHeader) {
+ int numCont = 0;
+ SmallVector<BlockT *, DEFAULT_VEC_SLOTS> contBlk;
+ for (typename InvBlockGTraits::ChildIteratorType iter =
+ InvBlockGTraits::child_begin(loopHeader),
+ iterEnd = InvBlockGTraits::child_end(loopHeader);
+ iter != iterEnd; ++iter) {
+ BlockT *curBlk = *iter;
+ if (loopRep->contains(curBlk)) {
+ handleLoopcontBlock(curBlk, loopInfo->getLoopFor(curBlk),
+ loopHeader, loopRep);
+ contBlk.push_back(curBlk);
+ ++numCont;
+ }
+ }
+
+ for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::iterator
+ iter = contBlk.begin(), iterEnd = contBlk.end();
+ iter != iterEnd; ++iter) {
+ (*iter)->removeSuccessor(loopHeader);
+ }
+
+ numLoopcontPatternMatch += numCont;
+
+ return numCont;
+} //loopcontPatternMatch
+
+
+template<class PassT>
+bool CFGStructurizer<PassT>::isSameloopDetachedContbreak(BlockT *src1Blk,
+ BlockT *src2Blk) {
+ // return true iff src1Blk->succ_size() == 0 && src1Blk and src2Blk are in the
+ // same loop with LoopLandInfo without explicitly keeping track of
+ // loopContBlks and loopBreakBlks, this is a method to get the information.
+ //
+ if (src1Blk->succ_size() == 0) {
+ LoopT *loopRep = loopInfo->getLoopFor(src1Blk);
+ if (loopRep != NULL && loopRep == loopInfo->getLoopFor(src2Blk)) {
+ LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
+ if (theEntry != NULL) {
+ if (DEBUGME) {
+ errs() << "isLoopContBreakBlock yes src1 = BB"
+ << src1Blk->getNumber()
+ << " src2 = BB" << src2Blk->getNumber() << "\n";
+ }
+ return true;
+ }
+ }
+ }
+ return false;
+} //isSameloopDetachedContbreak
+
+template<class PassT>
+int CFGStructurizer<PassT>::handleJumpintoIf(BlockT *headBlk,
+ BlockT *trueBlk,
+ BlockT *falseBlk) {
+ int num = handleJumpintoIfImp(headBlk, trueBlk, falseBlk);
+ if (num == 0) {
+ if (DEBUGME) {
+ errs() << "handleJumpintoIf swap trueBlk and FalseBlk" << "\n";
+ }
+ num = handleJumpintoIfImp(headBlk, falseBlk, trueBlk);
+ }
+ return num;
+}
+
+template<class PassT>
+int CFGStructurizer<PassT>::handleJumpintoIfImp(BlockT *headBlk,
+ BlockT *trueBlk,
+ BlockT *falseBlk) {
+ int num = 0;
+ BlockT *downBlk;
+
+ //trueBlk could be the common post dominator
+ downBlk = trueBlk;
+
+ if (DEBUGME) {
+ errs() << "handleJumpintoIfImp head = BB" << headBlk->getNumber()
+ << " true = BB" << trueBlk->getNumber()
+ << ", numSucc=" << trueBlk->succ_size()
+ << " false = BB" << falseBlk->getNumber() << "\n";
+ }
+
+ while (downBlk) {
+ if (DEBUGME) {
+ errs() << "check down = BB" << downBlk->getNumber();
+ }
+
+ if (//postDomTree->dominates(downBlk, falseBlk) &&
+ singlePathTo(falseBlk, downBlk) == SinglePath_InPath) {
+ if (DEBUGME) {
+ errs() << " working\n";
+ }
+
+ num += cloneOnSideEntryTo(headBlk, trueBlk, downBlk);
+ num += cloneOnSideEntryTo(headBlk, falseBlk, downBlk);
+
+ numClonedBlock += num;
+ num += serialPatternMatch(*headBlk->succ_begin());
+ num += serialPatternMatch(*(++headBlk->succ_begin()));
+ num += ifPatternMatch(headBlk);
+ assert(num > 0); //
+
+ break;
+ }
+ if (DEBUGME) {
+ errs() << " not working\n";
+ }
+ downBlk = (downBlk->succ_size() == 1) ? (*downBlk->succ_begin()) : NULL;
+ } // walk down the postDomTree
+
+ return num;
+} //handleJumpintoIf
+
+template<class PassT>
+void CFGStructurizer<PassT>::showImproveSimpleJumpintoIf(BlockT *headBlk,
+ BlockT *trueBlk,
+ BlockT *falseBlk,
+ BlockT *landBlk,
+ bool detail) {
+ errs() << "head = BB" << headBlk->getNumber()
+ << " size = " << headBlk->size();
+ if (detail) {
+ errs() << "\n";
+ headBlk->print(errs());
+ errs() << "\n";
+ }
+
+ if (trueBlk) {
+ errs() << ", true = BB" << trueBlk->getNumber() << " size = "
+ << trueBlk->size() << " numPred = " << trueBlk->pred_size();
+ if (detail) {
+ errs() << "\n";
+ trueBlk->print(errs());
+ errs() << "\n";
+ }
+ }
+ if (falseBlk) {
+ errs() << ", false = BB" << falseBlk->getNumber() << " size = "
+ << falseBlk->size() << " numPred = " << falseBlk->pred_size();
+ if (detail) {
+ errs() << "\n";
+ falseBlk->print(errs());
+ errs() << "\n";
+ }
+ }
+ if (landBlk) {
+ errs() << ", land = BB" << landBlk->getNumber() << " size = "
+ << landBlk->size() << " numPred = " << landBlk->pred_size();
+ if (detail) {
+ errs() << "\n";
+ landBlk->print(errs());
+ errs() << "\n";
+ }
+ }
+
+ errs() << "\n";
+} //showImproveSimpleJumpintoIf
+
+template<class PassT>
+int CFGStructurizer<PassT>::improveSimpleJumpintoIf(BlockT *headBlk,
+ BlockT *trueBlk,
+ BlockT *falseBlk,
+ BlockT **plandBlk) {
+ bool migrateTrue = false;
+ bool migrateFalse = false;
+
+ BlockT *landBlk = *plandBlk;
+
+ assert((trueBlk == NULL || trueBlk->succ_size() <= 1)
+ && (falseBlk == NULL || falseBlk->succ_size() <= 1));
+
+ if (trueBlk == falseBlk) {
+ return 0;
+ }
+
+#if 0
+ if (DEBUGME) {
+ errs() << "improveSimpleJumpintoIf: ";
+ showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 0);
+ }
+#endif
+
+ // unsigned landPredSize = landBlk ? landBlk->pred_size() : 0;
+ // May consider the # landBlk->pred_size() as it represents the number of
+ // assignment initReg = .. needed to insert.
+ migrateTrue = needMigrateBlock(trueBlk);
+ migrateFalse = needMigrateBlock(falseBlk);
+
+ if (!migrateTrue && !migrateFalse) {
+ return 0;
+ }
+
+ // If we need to migrate either trueBlk and falseBlk, migrate the rest that
+ // have more than one predecessors. without doing this, its predecessor
+ // rather than headBlk will have undefined value in initReg.
+ if (!migrateTrue && trueBlk && trueBlk->pred_size() > 1) {
+ migrateTrue = true;
+ }
+ if (!migrateFalse && falseBlk && falseBlk->pred_size() > 1) {
+ migrateFalse = true;
+ }
+
+ if (DEBUGME) {
+ errs() << "before improveSimpleJumpintoIf: ";
+ showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 0);
+ //showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 1);
+ }
+
+ // org: headBlk => if () {trueBlk} else {falseBlk} => landBlk
+ //
+ // new: headBlk => if () {initReg = 1; org trueBlk branch} else
+ // {initReg = 0; org falseBlk branch }
+ // => landBlk => if (initReg) {org trueBlk} else {org falseBlk}
+ // => org landBlk
+ // if landBlk->pred_size() > 2, put the about if-else inside
+ // if (initReg !=2) {...}
+ //
+ // add initReg = initVal to headBlk
+
+ const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
+ unsigned initReg =
+ funcRep->getRegInfo().createVirtualRegister(I32RC);
+ if (!migrateTrue || !migrateFalse) {
+ int initVal = migrateTrue ? 0 : 1;
+ CFGTraits::insertAssignInstrBefore(headBlk, passRep, initReg, initVal);
+ }
+
+ int numNewBlk = 0;
+
+ if (landBlk == NULL) {
+ landBlk = funcRep->CreateMachineBasicBlock();
+ funcRep->push_back(landBlk); //insert to function
+
+ if (trueBlk) {
+ trueBlk->addSuccessor(landBlk);
+ } else {
+ headBlk->addSuccessor(landBlk);
+ }
+
+ if (falseBlk) {
+ falseBlk->addSuccessor(landBlk);
+ } else {
+ headBlk->addSuccessor(landBlk);
+ }
+
+ numNewBlk ++;
+ }
+
+ bool landBlkHasOtherPred = (landBlk->pred_size() > 2);
+
+ //insert AMDGPU::ENDIF to avoid special case "input landBlk == NULL"
+ typename BlockT::iterator insertPos =
+ CFGTraits::getInstrPos
+ (landBlk, CFGTraits::insertInstrBefore(landBlk, AMDGPU::ENDIF, passRep));
+
+ if (landBlkHasOtherPred) {
+ unsigned immReg =
+ funcRep->getRegInfo().createVirtualRegister(I32RC);
+ CFGTraits::insertAssignInstrBefore(insertPos, passRep, immReg, 2);
+ unsigned cmpResReg =
+ funcRep->getRegInfo().createVirtualRegister(I32RC);
+
+ CFGTraits::insertCompareInstrBefore(landBlk, insertPos, passRep, cmpResReg,
+ initReg, immReg);
+ CFGTraits::insertCondBranchBefore(landBlk, insertPos,
+ AMDGPU::IF_LOGICALZ_i32, passRep,
+ cmpResReg, DebugLoc());
+ }
+
+ CFGTraits::insertCondBranchBefore(landBlk, insertPos, AMDGPU::IF_LOGICALNZ_i32,
+ passRep, initReg, DebugLoc());
+
+ if (migrateTrue) {
+ migrateInstruction(trueBlk, landBlk, insertPos);
+ // need to uncondionally insert the assignment to ensure a path from its
+ // predecessor rather than headBlk has valid value in initReg if
+ // (initVal != 1).
+ CFGTraits::insertAssignInstrBefore(trueBlk, passRep, initReg, 1);
+ }
+ CFGTraits::insertInstrBefore(insertPos, AMDGPU::ELSE, passRep);
+
+ if (migrateFalse) {
+ migrateInstruction(falseBlk, landBlk, insertPos);
+ // need to uncondionally insert the assignment to ensure a path from its
+ // predecessor rather than headBlk has valid value in initReg if
+ // (initVal != 0)
+ CFGTraits::insertAssignInstrBefore(falseBlk, passRep, initReg, 0);
+ }
+ //CFGTraits::insertInstrBefore(insertPos, AMDGPU::ENDIF, passRep);
+
+ if (landBlkHasOtherPred) {
+ // add endif
+ CFGTraits::insertInstrBefore(insertPos, AMDGPU::ENDIF, passRep);
+
+ // put initReg = 2 to other predecessors of landBlk
+ for (typename BlockT::pred_iterator predIter = landBlk->pred_begin(),
+ predIterEnd = landBlk->pred_end(); predIter != predIterEnd;
+ ++predIter) {
+ BlockT *curBlk = *predIter;
+ if (curBlk != trueBlk && curBlk != falseBlk) {
+ CFGTraits::insertAssignInstrBefore(curBlk, passRep, initReg, 2);
+ }
+ } //for
+ }
+ if (DEBUGME) {
+ errs() << "result from improveSimpleJumpintoIf: ";
+ showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 0);
+ //showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 1);
+ }
+
+ // update landBlk
+ *plandBlk = landBlk;
+
+ return numNewBlk;
+} //improveSimpleJumpintoIf
+
+template<class PassT>
+void CFGStructurizer<PassT>::handleLoopbreak(BlockT *exitingBlk,
+ LoopT *exitingLoop,
+ BlockT *exitBlk,
+ LoopT *exitLoop,
+ BlockT *landBlk) {
+ if (DEBUGME) {
+ errs() << "Trying to break loop-depth = " << getLoopDepth(exitLoop)
+ << " from loop-depth = " << getLoopDepth(exitingLoop) << "\n";
+ }
+ const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
+
+ RegiT initReg = INVALIDREGNUM;
+ if (exitingLoop != exitLoop) {
+ initReg = static_cast<int>
+ (funcRep->getRegInfo().createVirtualRegister(I32RC));
+ assert(initReg != INVALIDREGNUM);
+ addLoopBreakInitReg(exitLoop, initReg);
+ while (exitingLoop != exitLoop && exitingLoop) {
+ addLoopBreakOnReg(exitingLoop, initReg);
+ exitingLoop = exitingLoop->getParentLoop();
+ }
+ assert(exitingLoop == exitLoop);
+ }
+
+ mergeLoopbreakBlock(exitingBlk, exitBlk, landBlk, initReg);
+
+} //handleLoopbreak
+
+template<class PassT>
+void CFGStructurizer<PassT>::handleLoopcontBlock(BlockT *contingBlk,
+ LoopT *contingLoop,
+ BlockT *contBlk,
+ LoopT *contLoop) {
+ if (DEBUGME) {
+ errs() << "loopcontPattern cont = BB" << contingBlk->getNumber()
+ << " header = BB" << contBlk->getNumber() << "\n";
+
+ errs() << "Trying to continue loop-depth = "
+ << getLoopDepth(contLoop)
+ << " from loop-depth = " << getLoopDepth(contingLoop) << "\n";
+ }
+
+ RegiT initReg = INVALIDREGNUM;
+ const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
+ if (contingLoop != contLoop) {
+ initReg = static_cast<int>
+ (funcRep->getRegInfo().createVirtualRegister(I32RC));
+ assert(initReg != INVALIDREGNUM);
+ addLoopContInitReg(contLoop, initReg);
+ while (contingLoop && contingLoop->getParentLoop() != contLoop) {
+ addLoopBreakOnReg(contingLoop, initReg); //not addLoopContOnReg
+ contingLoop = contingLoop->getParentLoop();
+ }
+ assert(contingLoop && contingLoop->getParentLoop() == contLoop);
+ addLoopContOnReg(contingLoop, initReg);
+ }
+
+ settleLoopcontBlock(contingBlk, contBlk, initReg);
+ //contingBlk->removeSuccessor(loopHeader);
+} //handleLoopcontBlock
+
+template<class PassT>
+void CFGStructurizer<PassT>::mergeSerialBlock(BlockT *dstBlk, BlockT *srcBlk) {
+ if (DEBUGME) {
+ errs() << "serialPattern BB" << dstBlk->getNumber()
+ << " <= BB" << srcBlk->getNumber() << "\n";
+ }
+ //removeUnconditionalBranch(dstBlk);
+ dstBlk->splice(dstBlk->end(), srcBlk, FirstNonDebugInstr(srcBlk), srcBlk->end());
+
+ dstBlk->removeSuccessor(srcBlk);
+ CFGTraits::cloneSuccessorList(dstBlk, srcBlk);
+
+ removeSuccessor(srcBlk);
+ retireBlock(dstBlk, srcBlk);
+} //mergeSerialBlock
+
+template<class PassT>
+void CFGStructurizer<PassT>::mergeIfthenelseBlock(InstrT *branchInstr,
+ BlockT *curBlk,
+ BlockT *trueBlk,
+ BlockT *falseBlk,
+ BlockT *landBlk) {
+ if (DEBUGME) {
+ errs() << "ifPattern BB" << curBlk->getNumber();
+ errs() << "{ ";
+ if (trueBlk) {
+ errs() << "BB" << trueBlk->getNumber();
+ }
+ errs() << " } else ";
+ errs() << "{ ";
+ if (falseBlk) {
+ errs() << "BB" << falseBlk->getNumber();
+ }
+ errs() << " }\n ";
+ errs() << "landBlock: ";
+ if (landBlk == NULL) {
+ errs() << "NULL";
+ } else {
+ errs() << "BB" << landBlk->getNumber();
+ }
+ errs() << "\n";
+ }
+
+ int oldOpcode = branchInstr->getOpcode();
+ DebugLoc branchDL = branchInstr->getDebugLoc();
+
+// transform to
+// if cond
+// trueBlk
+// else
+// falseBlk
+// endif
+// landBlk
+
+ typename BlockT::iterator branchInstrPos =
+ CFGTraits::getInstrPos(curBlk, branchInstr);
+ CFGTraits::insertCondBranchBefore(branchInstrPos,
+ CFGTraits::getBranchNzeroOpcode(oldOpcode),
+ passRep,
+ branchDL);
+
+ if (trueBlk) {
+ curBlk->splice(branchInstrPos, trueBlk, FirstNonDebugInstr(trueBlk), trueBlk->end());
+ curBlk->removeSuccessor(trueBlk);
+ if (landBlk && trueBlk->succ_size()!=0) {
+ trueBlk->removeSuccessor(landBlk);
+ }
+ retireBlock(curBlk, trueBlk);
+ }
+ CFGTraits::insertInstrBefore(branchInstrPos, AMDGPU::ELSE, passRep);
+
+ if (falseBlk) {
+ curBlk->splice(branchInstrPos, falseBlk, FirstNonDebugInstr(falseBlk),
+ falseBlk->end());
+ curBlk->removeSuccessor(falseBlk);
+ if (landBlk && falseBlk->succ_size() != 0) {
+ falseBlk->removeSuccessor(landBlk);
+ }
+ retireBlock(curBlk, falseBlk);
+ }
+ CFGTraits::insertInstrBefore(branchInstrPos, AMDGPU::ENDIF, passRep);
+
+ //curBlk->remove(branchInstrPos);
+ branchInstr->eraseFromParent();
+
+ if (landBlk && trueBlk && falseBlk) {
+ curBlk->addSuccessor(landBlk);
+ }
+
+} //mergeIfthenelseBlock
+
+template<class PassT>
+void CFGStructurizer<PassT>::mergeLooplandBlock(BlockT *dstBlk,
+ LoopLandInfo *loopLand) {
+ BlockT *landBlk = loopLand->landBlk;
+
+ if (DEBUGME) {
+ errs() << "loopPattern header = BB" << dstBlk->getNumber()
+ << " land = BB" << landBlk->getNumber() << "\n";
+ }
+
+ // Loop contInitRegs are init at the beginning of the loop.
+ for (typename std::set<RegiT>::const_iterator iter =
+ loopLand->contInitRegs.begin(),
+ iterEnd = loopLand->contInitRegs.end(); iter != iterEnd; ++iter) {
+ CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0);
+ }
+
+ /* we last inserterd the DebugLoc in the
+ * BREAK_LOGICALZ_i32 or AMDGPU::BREAK_LOGICALNZ statement in the current dstBlk.
+ * search for the DebugLoc in the that statement.
+ * if not found, we have to insert the empty/default DebugLoc */
+ InstrT *loopBreakInstr = CFGTraits::getLoopBreakInstr(dstBlk);
+ DebugLoc DLBreak = (loopBreakInstr) ? loopBreakInstr->getDebugLoc() : DebugLoc();
+
+ CFGTraits::insertInstrBefore(dstBlk, AMDGPU::WHILELOOP, passRep, DLBreak);
+ // Loop breakInitRegs are init before entering the loop.
+ for (typename std::set<RegiT>::const_iterator iter =
+ loopLand->breakInitRegs.begin(),
+ iterEnd = loopLand->breakInitRegs.end(); iter != iterEnd; ++iter)
+ {
+ CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0);
+ }
+ // Loop endbranchInitRegs are init before entering the loop.
+ for (typename std::set<RegiT>::const_iterator iter =
+ loopLand->endbranchInitRegs.begin(),
+ iterEnd = loopLand->endbranchInitRegs.end(); iter != iterEnd; ++iter) {
+ CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0);
+ }
+
+ /* we last inserterd the DebugLoc in the continue statement in the current dstBlk
+ * search for the DebugLoc in the continue statement.
+ * if not found, we have to insert the empty/default DebugLoc */
+ InstrT *continueInstr = CFGTraits::getContinueInstr(dstBlk);
+ DebugLoc DLContinue = (continueInstr) ? continueInstr->getDebugLoc() : DebugLoc();
+
+ CFGTraits::insertInstrEnd(dstBlk, AMDGPU::ENDLOOP, passRep, DLContinue);
+ // Loop breakOnRegs are check after the ENDLOOP: break the loop outside this
+ // loop.
+ for (typename std::set<RegiT>::const_iterator iter =
+ loopLand->breakOnRegs.begin(),
+ iterEnd = loopLand->breakOnRegs.end(); iter != iterEnd; ++iter) {
+ CFGTraits::insertCondBranchEnd(dstBlk, AMDGPU::BREAK_LOGICALNZ_i32, passRep,
+ *iter);
+ }
+
+ // Loop contOnRegs are check after the ENDLOOP: cont the loop outside this
+ // loop.
+ for (std::set<RegiT>::const_iterator iter = loopLand->contOnRegs.begin(),
+ iterEnd = loopLand->contOnRegs.end(); iter != iterEnd; ++iter) {
+ CFGTraits::insertCondBranchEnd(dstBlk, AMDGPU::CONTINUE_LOGICALNZ_i32,
+ passRep, *iter);
+ }
+
+ dstBlk->splice(dstBlk->end(), landBlk, landBlk->begin(), landBlk->end());
+
+ for (typename BlockT::succ_iterator iter = landBlk->succ_begin(),
+ iterEnd = landBlk->succ_end(); iter != iterEnd; ++iter) {
+ dstBlk->addSuccessor(*iter); // *iter's predecessor is also taken care of.
+ }
+
+ removeSuccessor(landBlk);
+ retireBlock(dstBlk, landBlk);
+} //mergeLooplandBlock
+
+template<class PassT>
+void CFGStructurizer<PassT>::mergeLoopbreakBlock(BlockT *exitingBlk,
+ BlockT *exitBlk,
+ BlockT *exitLandBlk,
+ RegiT setReg) {
+ if (DEBUGME) {
+ errs() << "loopbreakPattern exiting = BB" << exitingBlk->getNumber()
+ << " exit = BB" << exitBlk->getNumber()
+ << " land = BB" << exitLandBlk->getNumber() << "\n";
+ }
+
+ InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(exitingBlk);
+ assert(branchInstr && CFGTraits::isCondBranch(branchInstr));
+
+ DebugLoc DL = branchInstr->getDebugLoc();
+
+ BlockT *trueBranch = CFGTraits::getTrueBranch(branchInstr);
+ int oldOpcode = branchInstr->getOpcode();
+
+ // transform exitingBlk to
+ // if ( ) {
+ // exitBlk (if exitBlk != exitLandBlk)
+ // setReg = 1
+ // break
+ // }endif
+ // successor = {orgSuccessor(exitingBlk) - exitBlk}
+
+ typename BlockT::iterator branchInstrPos =
+ CFGTraits::getInstrPos(exitingBlk, branchInstr);
+
+ if (exitBlk == exitLandBlk && setReg == INVALIDREGNUM) {
+ //break_logical
+ int newOpcode =
+ (trueBranch == exitBlk) ? CFGTraits::getBreakNzeroOpcode(oldOpcode)
+ : CFGTraits::getBreakZeroOpcode(oldOpcode);
+ CFGTraits::insertCondBranchBefore(branchInstrPos, newOpcode, passRep, DL);
+ } else {
+ int newOpcode =
+ (trueBranch == exitBlk) ? CFGTraits::getBranchNzeroOpcode(oldOpcode)
+ : CFGTraits::getBranchZeroOpcode(oldOpcode);
+ CFGTraits::insertCondBranchBefore(branchInstrPos, newOpcode, passRep, DL);
+ if (exitBlk != exitLandBlk) {
+ //splice is insert-before ...
+ exitingBlk->splice(branchInstrPos, exitBlk, exitBlk->begin(),
+ exitBlk->end());
+ }
+ if (setReg != INVALIDREGNUM) {
+ CFGTraits::insertAssignInstrBefore(branchInstrPos, passRep, setReg, 1);
+ }
+ CFGTraits::insertInstrBefore(branchInstrPos, AMDGPU::BREAK, passRep);
+ CFGTraits::insertInstrBefore(branchInstrPos, AMDGPU::ENDIF, passRep);
+ } //if_logical
+
+ //now branchInst can be erase safely
+ //exitingBlk->eraseFromParent(branchInstr);
+ branchInstr->eraseFromParent();
+
+ //now take care of successors, retire blocks
+ exitingBlk->removeSuccessor(exitBlk);
+ if (exitBlk != exitLandBlk) {
+ //splice is insert-before ...
+ exitBlk->removeSuccessor(exitLandBlk);
+ retireBlock(exitingBlk, exitBlk);
+ }
+
+} //mergeLoopbreakBlock
+
+template<class PassT>
+void CFGStructurizer<PassT>::settleLoopcontBlock(BlockT *contingBlk,
+ BlockT *contBlk,
+ RegiT setReg) {
+ if (DEBUGME) {
+ errs() << "settleLoopcontBlock conting = BB"
+ << contingBlk->getNumber()
+ << ", cont = BB" << contBlk->getNumber() << "\n";
+ }
+
+ InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(contingBlk);
+ if (branchInstr) {
+ assert(CFGTraits::isCondBranch(branchInstr));
+ typename BlockT::iterator branchInstrPos =
+ CFGTraits::getInstrPos(contingBlk, branchInstr);
+ BlockT *trueBranch = CFGTraits::getTrueBranch(branchInstr);
+ int oldOpcode = branchInstr->getOpcode();
+ DebugLoc DL = branchInstr->getDebugLoc();
+
+ // transform contingBlk to
+ // if () {
+ // move instr after branchInstr
+ // continue
+ // or
+ // setReg = 1
+ // break
+ // }endif
+ // successor = {orgSuccessor(contingBlk) - loopHeader}
+
+ bool useContinueLogical =
+ (setReg == INVALIDREGNUM && (&*contingBlk->rbegin()) == branchInstr);
+
+ if (useContinueLogical == false)
+ {
+ int branchOpcode =
+ trueBranch == contBlk ? CFGTraits::getBranchNzeroOpcode(oldOpcode)
+ : CFGTraits::getBranchZeroOpcode(oldOpcode);
+
+ CFGTraits::insertCondBranchBefore(branchInstrPos, branchOpcode, passRep, DL);
+
+ if (setReg != INVALIDREGNUM) {
+ CFGTraits::insertAssignInstrBefore(branchInstrPos, passRep, setReg, 1);
+ // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
+ CFGTraits::insertInstrEnd(contingBlk, AMDGPU::BREAK, passRep, DL);
+ } else {
+ // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
+ CFGTraits::insertInstrEnd(contingBlk, AMDGPU::CONTINUE, passRep, DL);
+ }
+
+ CFGTraits::insertInstrEnd(contingBlk, AMDGPU::ENDIF, passRep, DL);
+ } else {
+ int branchOpcode =
+ trueBranch == contBlk ? CFGTraits::getContinueNzeroOpcode(oldOpcode)
+ : CFGTraits::getContinueZeroOpcode(oldOpcode);
+
+ CFGTraits::insertCondBranchBefore(branchInstrPos, branchOpcode, passRep, DL);
+ }
+
+ //contingBlk->eraseFromParent(branchInstr);
+ branchInstr->eraseFromParent();
+ } else {
+ /* if we've arrived here then we've already erased the branch instruction
+ * travel back up the basic block to see the last reference of our debug location
+ * we've just inserted that reference here so it should be representative */
+ if (setReg != INVALIDREGNUM) {
+ CFGTraits::insertAssignInstrBefore(contingBlk, passRep, setReg, 1);
+ // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
+ CFGTraits::insertInstrEnd(contingBlk, AMDGPU::BREAK, passRep, CFGTraits::getLastDebugLocInBB(contingBlk));
+ } else {
+ // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
+ CFGTraits::insertInstrEnd(contingBlk, AMDGPU::CONTINUE, passRep, CFGTraits::getLastDebugLocInBB(contingBlk));
+ }
+ } //else
+
+} //settleLoopcontBlock
+
+// BBs in exitBlkSet are determined as in break-path for loopRep,
+// before we can put code for BBs as inside loop-body for loopRep
+// check whether those BBs are determined as cont-BB for parentLoopRep
+// earlier.
+// If so, generate a new BB newBlk
+// (1) set newBlk common successor of BBs in exitBlkSet
+// (2) change the continue-instr in BBs in exitBlkSet to break-instr
+// (3) generate continue-instr in newBlk
+//
+template<class PassT>
+typename CFGStructurizer<PassT>::BlockT *
+CFGStructurizer<PassT>::relocateLoopcontBlock(LoopT *parentLoopRep,
+ LoopT *loopRep,
+ std::set<BlockT *> &exitBlkSet,
+ BlockT *exitLandBlk) {
+ std::set<BlockT *> endBlkSet;
+
+// BlockT *parentLoopHead = parentLoopRep->getHeader();
+
+
+ for (typename std::set<BlockT *>::const_iterator iter = exitBlkSet.begin(),
+ iterEnd = exitBlkSet.end();
+ iter != iterEnd; ++iter) {
+ BlockT *exitBlk = *iter;
+ BlockT *endBlk = singlePathEnd(exitBlk, exitLandBlk);
+
+ if (endBlk == NULL || CFGTraits::getContinueInstr(endBlk) == NULL)
+ return NULL;
+
+ endBlkSet.insert(endBlk);
+ }
+
+ BlockT *newBlk = funcRep->CreateMachineBasicBlock();
+ funcRep->push_back(newBlk); //insert to function
+ CFGTraits::insertInstrEnd(newBlk, AMDGPU::CONTINUE, passRep);
+ SHOWNEWBLK(newBlk, "New continue block: ");
+
+ for (typename std::set<BlockT*>::const_iterator iter = endBlkSet.begin(),
+ iterEnd = endBlkSet.end();
+ iter != iterEnd; ++iter) {
+ BlockT *endBlk = *iter;
+ InstrT *contInstr = CFGTraits::getContinueInstr(endBlk);
+ if (contInstr) {
+ contInstr->eraseFromParent();
+ }
+ endBlk->addSuccessor(newBlk);
+ if (DEBUGME) {
+ errs() << "Add new continue Block to BB"
+ << endBlk->getNumber() << " successors\n";
+ }
+ }
+
+ return newBlk;
+} //relocateLoopcontBlock
+
+
+// LoopEndbranchBlock is a BB created by the CFGStructurizer to use as
+// LoopLandBlock. This BB branch on the loop endBranchInit register to the
+// pathes corresponding to the loop exiting branches.
+
+template<class PassT>
+typename CFGStructurizer<PassT>::BlockT *
+CFGStructurizer<PassT>::addLoopEndbranchBlock(LoopT *loopRep,
+ BlockTSmallerVector &exitingBlks,
+ BlockTSmallerVector &exitBlks) {
+ const AMDILInstrInfo *tii =
+ static_cast<const AMDILInstrInfo *>(passRep->getTargetInstrInfo());
+ const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
+
+ RegiT endBranchReg = static_cast<int>
+ (funcRep->getRegInfo().createVirtualRegister(I32RC));
+ assert(endBranchReg >= 0);
+
+ // reg = 0 before entering the loop
+ addLoopEndbranchInitReg(loopRep, endBranchReg);
+
+ uint32_t numBlks = static_cast<uint32_t>(exitingBlks.size());
+ assert(numBlks >=2 && numBlks == exitBlks.size());
+
+ BlockT *preExitingBlk = exitingBlks[0];
+ BlockT *preExitBlk = exitBlks[0];
+ BlockT *preBranchBlk = funcRep->CreateMachineBasicBlock();
+ funcRep->push_back(preBranchBlk); //insert to function
+ SHOWNEWBLK(preBranchBlk, "New loopEndbranch block: ");
+
+ BlockT *newLandBlk = preBranchBlk;
+
+ CFGTraits::replaceInstrUseOfBlockWith(preExitingBlk, preExitBlk,
+ newLandBlk);
+ preExitingBlk->removeSuccessor(preExitBlk);
+ preExitingBlk->addSuccessor(newLandBlk);
+
+ //it is redundant to add reg = 0 to exitingBlks[0]
+
+ // For 1..n th exiting path (the last iteration handles two pathes) create the
+ // branch to the previous path and the current path.
+ for (uint32_t i = 1; i < numBlks; ++i) {
+ BlockT *curExitingBlk = exitingBlks[i];
+ BlockT *curExitBlk = exitBlks[i];
+ BlockT *curBranchBlk;
+
+ if (i == numBlks - 1) {
+ curBranchBlk = curExitBlk;
+ } else {
+ curBranchBlk = funcRep->CreateMachineBasicBlock();
+ funcRep->push_back(curBranchBlk); //insert to function
+ SHOWNEWBLK(curBranchBlk, "New loopEndbranch block: ");
+ }
+
+ // Add reg = i to exitingBlks[i].
+ CFGTraits::insertAssignInstrBefore(curExitingBlk, passRep,
+ endBranchReg, i);
+
+ // Remove the edge (exitingBlks[i] exitBlks[i]) add new edge
+ // (exitingBlks[i], newLandBlk).
+ CFGTraits::replaceInstrUseOfBlockWith(curExitingBlk, curExitBlk,
+ newLandBlk);
+ curExitingBlk->removeSuccessor(curExitBlk);
+ curExitingBlk->addSuccessor(newLandBlk);
+
+ // add to preBranchBlk the branch instruction:
+ // if (endBranchReg == preVal)
+ // preExitBlk
+ // else
+ // curBranchBlk
+ //
+ // preValReg = i - 1
+
+ DebugLoc DL;
+ RegiT preValReg = static_cast<int>
+ (funcRep->getRegInfo().createVirtualRegister(I32RC));
+
+ preBranchBlk->insert(preBranchBlk->begin(),
+ tii->getMovImmInstr(preBranchBlk->getParent(), preValReg,
+ i - 1));
+
+ // condResReg = (endBranchReg == preValReg)
+ RegiT condResReg = static_cast<int>
+ (funcRep->getRegInfo().createVirtualRegister(I32RC));
+ BuildMI(preBranchBlk, DL, tii->get(tii->getIEQOpcode()), condResReg)
+ .addReg(endBranchReg).addReg(preValReg);
+
+ BuildMI(preBranchBlk, DL, tii->get(AMDGPU::BRANCH_COND_i32))
+ .addMBB(preExitBlk).addReg(condResReg);
+
+ preBranchBlk->addSuccessor(preExitBlk);
+ preBranchBlk->addSuccessor(curBranchBlk);
+
+ // Update preExitingBlk, preExitBlk, preBranchBlk.
+ preExitingBlk = curExitingBlk;
+ preExitBlk = curExitBlk;
+ preBranchBlk = curBranchBlk;
+
+ } //end for 1 .. n blocks
+
+ return newLandBlk;
+} //addLoopEndbranchBlock
+
+template<class PassT>
+typename CFGStructurizer<PassT>::PathToKind
+CFGStructurizer<PassT>::singlePathTo(BlockT *srcBlk, BlockT *dstBlk,
+ bool allowSideEntry) {
+ assert(dstBlk);
+
+ if (srcBlk == dstBlk) {
+ return SinglePath_InPath;
+ }
+
+ while (srcBlk && srcBlk->succ_size() == 1) {
+ srcBlk = *srcBlk->succ_begin();
+ if (srcBlk == dstBlk) {
+ return SinglePath_InPath;
+ }
+
+ if (!allowSideEntry && srcBlk->pred_size() > 1) {
+ return Not_SinglePath;
+ }
+ }
+
+ if (srcBlk && srcBlk->succ_size()==0) {
+ return SinglePath_NotInPath;
+ }
+
+ return Not_SinglePath;
+} //singlePathTo
+
+// If there is a single path from srcBlk to dstBlk, return the last block before
+// dstBlk If there is a single path from srcBlk->end without dstBlk, return the
+// last block in the path Otherwise, return NULL
+template<class PassT>
+typename CFGStructurizer<PassT>::BlockT *
+CFGStructurizer<PassT>::singlePathEnd(BlockT *srcBlk, BlockT *dstBlk,
+ bool allowSideEntry) {
+ assert(dstBlk);
+
+ if (srcBlk == dstBlk) {
+ return srcBlk;
+ }
+
+ if (srcBlk->succ_size() == 0) {
+ return srcBlk;
+ }
+
+ while (srcBlk && srcBlk->succ_size() == 1) {
+ BlockT *preBlk = srcBlk;
+
+ srcBlk = *srcBlk->succ_begin();
+ if (srcBlk == NULL) {
+ return preBlk;
+ }
+
+ if (!allowSideEntry && srcBlk->pred_size() > 1) {
+ return NULL;
+ }
+ }
+
+ if (srcBlk && srcBlk->succ_size()==0) {
+ return srcBlk;
+ }
+
+ return NULL;
+
+} //singlePathEnd
+
+template<class PassT>
+int CFGStructurizer<PassT>::cloneOnSideEntryTo(BlockT *preBlk, BlockT *srcBlk,
+ BlockT *dstBlk) {
+ int cloned = 0;
+ assert(preBlk->isSuccessor(srcBlk));
+ while (srcBlk && srcBlk != dstBlk) {
+ assert(srcBlk->succ_size() == 1);
+ if (srcBlk->pred_size() > 1) {
+ srcBlk = cloneBlockForPredecessor(srcBlk, preBlk);
+ ++cloned;
+ }
+
+ preBlk = srcBlk;
+ srcBlk = *srcBlk->succ_begin();
+ }
+
+ return cloned;
+} //cloneOnSideEntryTo
+
+template<class PassT>
+typename CFGStructurizer<PassT>::BlockT *
+CFGStructurizer<PassT>::cloneBlockForPredecessor(BlockT *curBlk,
+ BlockT *predBlk) {
+ assert(predBlk->isSuccessor(curBlk) &&
+ "succBlk is not a prececessor of curBlk");
+
+ BlockT *cloneBlk = CFGTraits::clone(curBlk); //clone instructions
+ CFGTraits::replaceInstrUseOfBlockWith(predBlk, curBlk, cloneBlk);
+ //srcBlk, oldBlk, newBlk
+
+ predBlk->removeSuccessor(curBlk);
+ predBlk->addSuccessor(cloneBlk);
+
+ // add all successor to cloneBlk
+ CFGTraits::cloneSuccessorList(cloneBlk, curBlk);
+
+ numClonedInstr += curBlk->size();
+
+ if (DEBUGME) {
+ errs() << "Cloned block: " << "BB"
+ << curBlk->getNumber() << "size " << curBlk->size() << "\n";
+ }
+
+ SHOWNEWBLK(cloneBlk, "result of Cloned block: ");
+
+ return cloneBlk;
+} //cloneBlockForPredecessor
+
+template<class PassT>
+typename CFGStructurizer<PassT>::BlockT *
+CFGStructurizer<PassT>::exitingBlock2ExitBlock(LoopT *loopRep,
+ BlockT *exitingBlk) {
+ BlockT *exitBlk = NULL;
+
+ for (typename BlockT::succ_iterator iterSucc = exitingBlk->succ_begin(),
+ iterSuccEnd = exitingBlk->succ_end();
+ iterSucc != iterSuccEnd; ++iterSucc) {
+ BlockT *curBlk = *iterSucc;
+ if (!loopRep->contains(curBlk)) {
+ assert(exitBlk == NULL);
+ exitBlk = curBlk;
+ }
+ }
+
+ assert(exitBlk != NULL);
+
+ return exitBlk;
+} //exitingBlock2ExitBlock
+
+template<class PassT>
+void CFGStructurizer<PassT>::migrateInstruction(BlockT *srcBlk,
+ BlockT *dstBlk,
+ InstrIterator insertPos) {
+ InstrIterator spliceEnd;
+ //look for the input branchinstr, not the AMDIL branchinstr
+ InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(srcBlk);
+ if (branchInstr == NULL) {
+ if (DEBUGME) {
+ errs() << "migrateInstruction don't see branch instr\n" ;
+ }
+ spliceEnd = srcBlk->end();
+ } else {
+ if (DEBUGME) {
+ errs() << "migrateInstruction see branch instr\n" ;
+ branchInstr->dump();
+ }
+ spliceEnd = CFGTraits::getInstrPos(srcBlk, branchInstr);
+ }
+ if (DEBUGME) {
+ errs() << "migrateInstruction before splice dstSize = " << dstBlk->size()
+ << "srcSize = " << srcBlk->size() << "\n";
+ }
+
+ //splice insert before insertPos
+ dstBlk->splice(insertPos, srcBlk, srcBlk->begin(), spliceEnd);
+
+ if (DEBUGME) {
+ errs() << "migrateInstruction after splice dstSize = " << dstBlk->size()
+ << "srcSize = " << srcBlk->size() << "\n";
+ }
+} //migrateInstruction
+
+// normalizeInfiniteLoopExit change
+// B1:
+// uncond_br LoopHeader
+//
+// to
+// B1:
+// cond_br 1 LoopHeader dummyExit
+// and return the newly added dummy exit block
+//
+template<class PassT>
+typename CFGStructurizer<PassT>::BlockT *
+CFGStructurizer<PassT>::normalizeInfiniteLoopExit(LoopT* LoopRep) {
+ BlockT *loopHeader;
+ BlockT *loopLatch;
+ loopHeader = LoopRep->getHeader();
+ loopLatch = LoopRep->getLoopLatch();
+ BlockT *dummyExitBlk = NULL;
+ const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
+ if (loopHeader!=NULL && loopLatch!=NULL) {
+ InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(loopLatch);
+ if (branchInstr!=NULL && CFGTraits::isUncondBranch(branchInstr)) {
+ dummyExitBlk = funcRep->CreateMachineBasicBlock();
+ funcRep->push_back(dummyExitBlk); //insert to function
+ SHOWNEWBLK(dummyExitBlk, "DummyExitBlock to normalize infiniteLoop: ");
+
+ if (DEBUGME) errs() << "Old branch instr: " << *branchInstr << "\n";
+
+ typename BlockT::iterator insertPos =
+ CFGTraits::getInstrPos(loopLatch, branchInstr);
+ unsigned immReg =
+ funcRep->getRegInfo().createVirtualRegister(I32RC);
+ CFGTraits::insertAssignInstrBefore(insertPos, passRep, immReg, 1);
+ InstrT *newInstr =
+ CFGTraits::insertInstrBefore(insertPos, AMDGPU::BRANCH_COND_i32, passRep);
+ MachineInstrBuilder(newInstr).addMBB(loopHeader).addReg(immReg, false);
+
+ SHOWNEWINSTR(newInstr);
+
+ branchInstr->eraseFromParent();
+ loopLatch->addSuccessor(dummyExitBlk);
+ }
+ }
+
+ return dummyExitBlk;
+} //normalizeInfiniteLoopExit
+
+template<class PassT>
+void CFGStructurizer<PassT>::removeUnconditionalBranch(BlockT *srcBlk) {
+ InstrT *branchInstr;
+
+ // I saw two unconditional branch in one basic block in example
+ // test_fc_do_while_or.c need to fix the upstream on this to remove the loop.
+ while ((branchInstr = CFGTraits::getLoopendBlockBranchInstr(srcBlk))
+ && CFGTraits::isUncondBranch(branchInstr)) {
+ if (DEBUGME) {
+ errs() << "Removing unconditional branch instruction" ;
+ branchInstr->dump();
+ }
+ branchInstr->eraseFromParent();
+ }
+} //removeUnconditionalBranch
+
+template<class PassT>
+void CFGStructurizer<PassT>::removeRedundantConditionalBranch(BlockT *srcBlk) {
+ if (srcBlk->succ_size() == 2) {
+ BlockT *blk1 = *srcBlk->succ_begin();
+ BlockT *blk2 = *(++srcBlk->succ_begin());
+
+ if (blk1 == blk2) {
+ InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(srcBlk);
+ assert(branchInstr && CFGTraits::isCondBranch(branchInstr));
+ if (DEBUGME) {
+ errs() << "Removing unneeded conditional branch instruction" ;
+ branchInstr->dump();
+ }
+ branchInstr->eraseFromParent();
+ SHOWNEWBLK(blk1, "Removing redundant successor");
+ srcBlk->removeSuccessor(blk1);
+ }
+ }
+} //removeRedundantConditionalBranch
+
+template<class PassT>
+void CFGStructurizer<PassT>::addDummyExitBlock(SmallVector<BlockT*,
+ DEFAULT_VEC_SLOTS> &retBlks) {
+ BlockT *dummyExitBlk = funcRep->CreateMachineBasicBlock();
+ funcRep->push_back(dummyExitBlk); //insert to function
+ CFGTraits::insertInstrEnd(dummyExitBlk, AMDGPU::RETURN, passRep);
+
+ for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::iterator iter =
+ retBlks.begin(),
+ iterEnd = retBlks.end(); iter != iterEnd; ++iter) {
+ BlockT *curBlk = *iter;
+ InstrT *curInstr = CFGTraits::getReturnInstr(curBlk);
+ if (curInstr) {
+ curInstr->eraseFromParent();
+ }
+#if 0
+ if (curBlk->size()==0 && curBlk->pred_size() == 1) {
+ if (DEBUGME) {
+ errs() << "Replace empty block BB" << curBlk->getNumber()
+ << " with dummyExitBlock\n";
+ }
+ BlockT *predb = *curBlk->pred_begin();
+ predb->removeSuccessor(curBlk);
+ curBlk = predb;
+ } //handle empty curBlk
+#endif
+ curBlk->addSuccessor(dummyExitBlk);
+ if (DEBUGME) {
+ errs() << "Add dummyExitBlock to BB" << curBlk->getNumber()
+ << " successors\n";
+ }
+ } //for
+
+ SHOWNEWBLK(dummyExitBlk, "DummyExitBlock: ");
+} //addDummyExitBlock
+
+template<class PassT>
+void CFGStructurizer<PassT>::removeSuccessor(BlockT *srcBlk) {
+ while (srcBlk->succ_size()) {
+ srcBlk->removeSuccessor(*srcBlk->succ_begin());
+ }
+}
+
+template<class PassT>
+void CFGStructurizer<PassT>::recordSccnum(BlockT *srcBlk, int sccNum) {
+ BlockInfo *&srcBlkInfo = blockInfoMap[srcBlk];
+
+ if (srcBlkInfo == NULL) {
+ srcBlkInfo = new BlockInfo();
+ }
+
+ srcBlkInfo->sccNum = sccNum;
+}
+
+template<class PassT>
+int CFGStructurizer<PassT>::getSCCNum(BlockT *srcBlk) {
+ BlockInfo *srcBlkInfo = blockInfoMap[srcBlk];
+ return srcBlkInfo ? srcBlkInfo->sccNum : INVALIDSCCNUM;
+}
+
+template<class PassT>
+void CFGStructurizer<PassT>::retireBlock(BlockT *dstBlk, BlockT *srcBlk) {
+ if (DEBUGME) {
+ errs() << "Retiring BB" << srcBlk->getNumber() << "\n";
+ }
+
+ BlockInfo *&srcBlkInfo = blockInfoMap[srcBlk];
+
+ if (srcBlkInfo == NULL) {
+ srcBlkInfo = new BlockInfo();
+ }
+
+ srcBlkInfo->isRetired = true;
+ //int i = srcBlk->succ_size();
+ //int j = srcBlk->pred_size();
+ assert(srcBlk->succ_size() == 0 && srcBlk->pred_size() == 0
+ && "can't retire block yet");
+}
+
+template<class PassT>
+bool CFGStructurizer<PassT>::isRetiredBlock(BlockT *srcBlk) {
+ BlockInfo *srcBlkInfo = blockInfoMap[srcBlk];
+ return (srcBlkInfo && srcBlkInfo->isRetired);
+}
+
+template<class PassT>
+bool CFGStructurizer<PassT>::isActiveLoophead(BlockT *curBlk) {
+ LoopT *loopRep = loopInfo->getLoopFor(curBlk);
+ while (loopRep && loopRep->getHeader() == curBlk) {
+ LoopLandInfo *loopLand = getLoopLandInfo(loopRep);
+
+ if(loopLand == NULL)
+ return true;
+
+ BlockT *landBlk = loopLand->landBlk;
+ assert(landBlk);
+ if (!isRetiredBlock(landBlk)) {
+ return true;
+ }
+
+ loopRep = loopRep->getParentLoop();
+ }
+
+ return false;
+} //isActiveLoophead
+
+template<class PassT>
+bool CFGStructurizer<PassT>::needMigrateBlock(BlockT *blk) {
+ const unsigned blockSizeThreshold = 30;
+ const unsigned cloneInstrThreshold = 100;
+
+ bool multiplePreds = blk && (blk->pred_size() > 1);
+
+ if(!multiplePreds)
+ return false;
+
+ unsigned blkSize = blk->size();
+ return ((blkSize > blockSizeThreshold)
+ && (blkSize * (blk->pred_size() - 1) > cloneInstrThreshold));
+} //needMigrateBlock
+
+template<class PassT>
+typename CFGStructurizer<PassT>::BlockT *
+CFGStructurizer<PassT>::recordLoopLandBlock(LoopT *loopRep, BlockT *landBlk,
+ BlockTSmallerVector &exitBlks,
+ std::set<BlockT *> &exitBlkSet) {
+ SmallVector<BlockT *, DEFAULT_VEC_SLOTS> inpathBlks; //in exit path blocks
+
+ for (typename BlockT::pred_iterator predIter = landBlk->pred_begin(),
+ predIterEnd = landBlk->pred_end();
+ predIter != predIterEnd; ++predIter) {
+ BlockT *curBlk = *predIter;
+ if (loopRep->contains(curBlk) || exitBlkSet.count(curBlk)) {
+ inpathBlks.push_back(curBlk);
+ }
+ } //for
+
+ //if landBlk has predecessors that are not in the given loop,
+ //create a new block
+ BlockT *newLandBlk = landBlk;
+ if (inpathBlks.size() != landBlk->pred_size()) {
+ newLandBlk = funcRep->CreateMachineBasicBlock();
+ funcRep->push_back(newLandBlk); //insert to function
+ newLandBlk->addSuccessor(landBlk);
+ for (typename SmallVector<BlockT*, DEFAULT_VEC_SLOTS>::iterator iter =
+ inpathBlks.begin(),
+ iterEnd = inpathBlks.end(); iter != iterEnd; ++iter) {
+ BlockT *curBlk = *iter;
+ CFGTraits::replaceInstrUseOfBlockWith(curBlk, landBlk, newLandBlk);
+ //srcBlk, oldBlk, newBlk
+ curBlk->removeSuccessor(landBlk);
+ curBlk->addSuccessor(newLandBlk);
+ }
+ for (size_t i = 0, tot = exitBlks.size(); i < tot; ++i) {
+ if (exitBlks[i] == landBlk) {
+ exitBlks[i] = newLandBlk;
+ }
+ }
+ SHOWNEWBLK(newLandBlk, "NewLandingBlock: ");
+ }
+
+ setLoopLandBlock(loopRep, newLandBlk);
+
+ return newLandBlk;
+} // recordLoopbreakLand
+
+template<class PassT>
+void CFGStructurizer<PassT>::setLoopLandBlock(LoopT *loopRep, BlockT *blk) {
+ LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
+
+ if (theEntry == NULL) {
+ theEntry = new LoopLandInfo();
+ }
+ assert(theEntry->landBlk == NULL);
+
+ if (blk == NULL) {
+ blk = funcRep->CreateMachineBasicBlock();
+ funcRep->push_back(blk); //insert to function
+ SHOWNEWBLK(blk, "DummyLandingBlock for loop without break: ");
+ }
+
+ theEntry->landBlk = blk;
+
+ if (DEBUGME) {
+ errs() << "setLoopLandBlock loop-header = BB"
+ << loopRep->getHeader()->getNumber()
+ << " landing-block = BB" << blk->getNumber() << "\n";
+ }
+} // setLoopLandBlock
+
+template<class PassT>
+void CFGStructurizer<PassT>::addLoopBreakOnReg(LoopT *loopRep, RegiT regNum) {
+ LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
+
+ if (theEntry == NULL) {
+ theEntry = new LoopLandInfo();
+ }
+
+ theEntry->breakOnRegs.insert(regNum);
+
+ if (DEBUGME) {
+ errs() << "addLoopBreakOnReg loop-header = BB"
+ << loopRep->getHeader()->getNumber()
+ << " regNum = " << regNum << "\n";
+ }
+} // addLoopBreakOnReg
+
+template<class PassT>
+void CFGStructurizer<PassT>::addLoopContOnReg(LoopT *loopRep, RegiT regNum) {
+ LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
+
+ if (theEntry == NULL) {
+ theEntry = new LoopLandInfo();
+ }
+ theEntry->contOnRegs.insert(regNum);
+
+ if (DEBUGME) {
+ errs() << "addLoopContOnReg loop-header = BB"
+ << loopRep->getHeader()->getNumber()
+ << " regNum = " << regNum << "\n";
+ }
+} // addLoopContOnReg
+
+template<class PassT>
+void CFGStructurizer<PassT>::addLoopBreakInitReg(LoopT *loopRep, RegiT regNum) {
+ LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
+
+ if (theEntry == NULL) {
+ theEntry = new LoopLandInfo();
+ }
+ theEntry->breakInitRegs.insert(regNum);
+
+ if (DEBUGME) {
+ errs() << "addLoopBreakInitReg loop-header = BB"
+ << loopRep->getHeader()->getNumber()
+ << " regNum = " << regNum << "\n";
+ }
+} // addLoopBreakInitReg
+
+template<class PassT>
+void CFGStructurizer<PassT>::addLoopContInitReg(LoopT *loopRep, RegiT regNum) {
+ LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
+
+ if (theEntry == NULL) {
+ theEntry = new LoopLandInfo();
+ }
+ theEntry->contInitRegs.insert(regNum);
+
+ if (DEBUGME) {
+ errs() << "addLoopContInitReg loop-header = BB"
+ << loopRep->getHeader()->getNumber()
+ << " regNum = " << regNum << "\n";
+ }
+} // addLoopContInitReg
+
+template<class PassT>
+void CFGStructurizer<PassT>::addLoopEndbranchInitReg(LoopT *loopRep,
+ RegiT regNum) {
+ LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
+
+ if (theEntry == NULL) {
+ theEntry = new LoopLandInfo();
+ }
+ theEntry->endbranchInitRegs.insert(regNum);
+
+ if (DEBUGME)
+ {
+ errs() << "addLoopEndbranchInitReg loop-header = BB"
+ << loopRep->getHeader()->getNumber()
+ << " regNum = " << regNum << "\n";
+ }
+} // addLoopEndbranchInitReg
+
+template<class PassT>
+typename CFGStructurizer<PassT>::LoopLandInfo *
+CFGStructurizer<PassT>::getLoopLandInfo(LoopT *loopRep) {
+ LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
+
+ return theEntry;
+} // getLoopLandInfo
+
+template<class PassT>
+typename CFGStructurizer<PassT>::BlockT *
+CFGStructurizer<PassT>::getLoopLandBlock(LoopT *loopRep) {
+ LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
+
+ return theEntry ? theEntry->landBlk : NULL;
+} // getLoopLandBlock
+
+
+template<class PassT>
+bool CFGStructurizer<PassT>::hasBackEdge(BlockT *curBlk) {
+ LoopT *loopRep = loopInfo->getLoopFor(curBlk);
+ if (loopRep == NULL)
+ return false;
+
+ BlockT *loopHeader = loopRep->getHeader();
+
+ return curBlk->isSuccessor(loopHeader);
+
+} //hasBackEdge
+
+template<class PassT>
+unsigned CFGStructurizer<PassT>::getLoopDepth(LoopT *loopRep) {
+ return loopRep ? loopRep->getLoopDepth() : 0;
+} //getLoopDepth
+
+template<class PassT>
+int CFGStructurizer<PassT>::countActiveBlock
+(typename SmallVector<BlockT*, DEFAULT_VEC_SLOTS>::const_iterator iterStart,
+ typename SmallVector<BlockT*, DEFAULT_VEC_SLOTS>::const_iterator iterEnd) {
+ int count = 0;
+ while (iterStart != iterEnd) {
+ if (!isRetiredBlock(*iterStart)) {
+ ++count;
+ }
+ ++iterStart;
+ }
+
+ return count;
+} //countActiveBlock
+
+// This is work around solution for findNearestCommonDominator not avaiable to
+// post dom a proper fix should go to Dominators.h.
+
+template<class PassT>
+typename CFGStructurizer<PassT>::BlockT*
+CFGStructurizer<PassT>::findNearestCommonPostDom(BlockT *blk1, BlockT *blk2) {
+
+ if (postDomTree->dominates(blk1, blk2)) {
+ return blk1;
+ }
+ if (postDomTree->dominates(blk2, blk1)) {
+ return blk2;
+ }
+
+ DomTreeNodeT *node1 = postDomTree->getNode(blk1);
+ DomTreeNodeT *node2 = postDomTree->getNode(blk2);
+
+ // Handle newly cloned node.
+ if (node1 == NULL && blk1->succ_size() == 1) {
+ return findNearestCommonPostDom(*blk1->succ_begin(), blk2);
+ }
+ if (node2 == NULL && blk2->succ_size() == 1) {
+ return findNearestCommonPostDom(blk1, *blk2->succ_begin());
+ }
+
+ if (node1 == NULL || node2 == NULL) {
+ return NULL;
+ }
+
+ node1 = node1->getIDom();
+ while (node1) {
+ if (postDomTree->dominates(node1, node2)) {
+ return node1->getBlock();
+ }
+ node1 = node1->getIDom();
+ }
+
+ return NULL;
+}
+
+template<class PassT>
+typename CFGStructurizer<PassT>::BlockT *
+CFGStructurizer<PassT>::findNearestCommonPostDom
+(typename std::set<BlockT *> &blks) {
+ BlockT *commonDom;
+ typename std::set<BlockT *>::const_iterator iter = blks.begin();
+ typename std::set<BlockT *>::const_iterator iterEnd = blks.end();
+ for (commonDom = *iter; iter != iterEnd && commonDom != NULL; ++iter) {
+ BlockT *curBlk = *iter;
+ if (curBlk != commonDom) {
+ commonDom = findNearestCommonPostDom(curBlk, commonDom);
+ }
+ }
+
+ if (DEBUGME) {
+ errs() << "Common post dominator for exit blocks is ";
+ if (commonDom) {
+ errs() << "BB" << commonDom->getNumber() << "\n";
+ } else {
+ errs() << "NULL\n";
+ }
+ }
+
+ return commonDom;
+} //findNearestCommonPostDom
+
+} //end namespace llvm
+
+//todo: move-end
+
+
+//===----------------------------------------------------------------------===//
+//
+// CFGStructurizer for AMDIL
+//
+//===----------------------------------------------------------------------===//
+
+
+using namespace llvmCFGStruct;
+
+namespace llvm
+{
+class AMDILCFGStructurizer : public MachineFunctionPass
+{
+public:
+ typedef MachineInstr InstructionType;
+ typedef MachineFunction FunctionType;
+ typedef MachineBasicBlock BlockType;
+ typedef MachineLoopInfo LoopinfoType;
+ typedef MachineDominatorTree DominatortreeType;
+ typedef MachinePostDominatorTree PostDominatortreeType;
+ typedef MachineDomTreeNode DomTreeNodeType;
+ typedef MachineLoop LoopType;
+
+protected:
+ TargetMachine &TM;
+ const TargetInstrInfo *TII;
+ const AMDILRegisterInfo *TRI;
+
+public:
+ AMDILCFGStructurizer(char &pid, TargetMachine &tm AMDIL_OPT_LEVEL_DECL);
+ const TargetInstrInfo *getTargetInstrInfo() const;
+ //bool runOnMachineFunction(MachineFunction &F);
+
+private:
+
+}; //end of class AMDILCFGStructurizer
+
+//char AMDILCFGStructurizer::ID = 0;
+} //end of namespace llvm
+AMDILCFGStructurizer::AMDILCFGStructurizer(char &pid, TargetMachine &tm
+ AMDIL_OPT_LEVEL_DECL)
+: MachineFunctionPass(pid), TM(tm), TII(tm.getInstrInfo()),
+ TRI(static_cast<const AMDILRegisterInfo *>(tm.getRegisterInfo())
+ ) {
+}
+
+const TargetInstrInfo *AMDILCFGStructurizer::getTargetInstrInfo() const {
+ return TII;
+}
+//===----------------------------------------------------------------------===//
+//
+// CFGPrepare
+//
+//===----------------------------------------------------------------------===//
+
+
+using namespace llvmCFGStruct;
+
+namespace llvm
+{
+class AMDILCFGPrepare : public AMDILCFGStructurizer
+{
+public:
+ static char ID;
+
+public:
+ AMDILCFGPrepare(TargetMachine &tm AMDIL_OPT_LEVEL_DECL);
+
+ virtual const char *getPassName() const;
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+
+ bool runOnMachineFunction(MachineFunction &F);
+
+private:
+
+}; //end of class AMDILCFGPrepare
+
+char AMDILCFGPrepare::ID = 0;
+} //end of namespace llvm
+
+AMDILCFGPrepare::AMDILCFGPrepare(TargetMachine &tm AMDIL_OPT_LEVEL_DECL)
+ : AMDILCFGStructurizer(ID, tm AMDIL_OPT_LEVEL_VAR)
+{
+}
+const char *AMDILCFGPrepare::getPassName() const {
+ return "AMD IL Control Flow Graph Preparation Pass";
+}
+
+void AMDILCFGPrepare::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addPreserved<MachineFunctionAnalysis>();
+ AU.addRequired<MachineFunctionAnalysis>();
+ AU.addRequired<MachineDominatorTree>();
+ AU.addRequired<MachinePostDominatorTree>();
+ AU.addRequired<MachineLoopInfo>();
+}
+
+//===----------------------------------------------------------------------===//
+//
+// CFGPerform
+//
+//===----------------------------------------------------------------------===//
+
+
+using namespace llvmCFGStruct;
+
+namespace llvm
+{
+class AMDILCFGPerform : public AMDILCFGStructurizer
+{
+public:
+ static char ID;
+
+public:
+ AMDILCFGPerform(TargetMachine &tm AMDIL_OPT_LEVEL_DECL);
+ virtual const char *getPassName() const;
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+ bool runOnMachineFunction(MachineFunction &F);
+
+private:
+
+}; //end of class AMDILCFGPerform
+
+char AMDILCFGPerform::ID = 0;
+} //end of namespace llvm
+
+ AMDILCFGPerform::AMDILCFGPerform(TargetMachine &tm AMDIL_OPT_LEVEL_DECL)
+: AMDILCFGStructurizer(ID, tm AMDIL_OPT_LEVEL_VAR)
+{
+}
+
+const char *AMDILCFGPerform::getPassName() const {
+ return "AMD IL Control Flow Graph structurizer Pass";
+}
+
+void AMDILCFGPerform::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addPreserved<MachineFunctionAnalysis>();
+ AU.addRequired<MachineFunctionAnalysis>();
+ AU.addRequired<MachineDominatorTree>();
+ AU.addRequired<MachinePostDominatorTree>();
+ AU.addRequired<MachineLoopInfo>();
+}
+
+//===----------------------------------------------------------------------===//
+//
+// CFGStructTraits<AMDILCFGStructurizer>
+//
+//===----------------------------------------------------------------------===//
+
+namespace llvmCFGStruct
+{
+// this class is tailor to the AMDIL backend
+template<>
+struct CFGStructTraits<AMDILCFGStructurizer>
+{
+ typedef int RegiT;
+
+ static int getBreakNzeroOpcode(int oldOpcode) {
+ switch(oldOpcode) {
+ ExpandCaseToAllScalarReturn(AMDGPU::BRANCH_COND, AMDGPU::BREAK_LOGICALNZ);
+ default:
+ assert(0 && "internal error");
+ };
+ return -1;
+ }
+
+ static int getBreakZeroOpcode(int oldOpcode) {
+ switch(oldOpcode) {
+ ExpandCaseToAllScalarReturn(AMDGPU::BRANCH_COND, AMDGPU::BREAK_LOGICALZ);
+ default:
+ assert(0 && "internal error");
+ };
+ return -1;
+ }
+
+ static int getBranchNzeroOpcode(int oldOpcode) {
+ switch(oldOpcode) {
+ ExpandCaseToAllScalarReturn(AMDGPU::BRANCH_COND, AMDGPU::IF_LOGICALNZ);
+ default:
+ assert(0 && "internal error");
+ };
+ return -1;
+ }
+
+ static int getBranchZeroOpcode(int oldOpcode) {
+ switch(oldOpcode) {
+ ExpandCaseToAllScalarReturn(AMDGPU::BRANCH_COND, AMDGPU::IF_LOGICALZ);
+ default:
+ assert(0 && "internal error");
+ };
+ return -1;
+ }
+
+ static int getContinueNzeroOpcode(int oldOpcode)
+ {
+ switch(oldOpcode) {
+ ExpandCaseToAllScalarReturn(AMDGPU::BRANCH_COND, AMDGPU::CONTINUE_LOGICALNZ);
+ default:
+ assert(0 && "internal error");
+ };
+ return -1;
+ }
+
+ static int getContinueZeroOpcode(int oldOpcode) {
+ switch(oldOpcode) {
+ ExpandCaseToAllScalarReturn(AMDGPU::BRANCH_COND, AMDGPU::CONTINUE_LOGICALZ);
+ default:
+ assert(0 && "internal error");
+ };
+ return -1;
+ }
+
+// the explicitly represented branch target is the true branch target
+#define getExplicitBranch getTrueBranch
+#define setExplicitBranch setTrueBranch
+
+ static MachineBasicBlock *getTrueBranch(MachineInstr *instr) {
+ return instr->getOperand(0).getMBB();
+ }
+
+ static void setTrueBranch(MachineInstr *instr, MachineBasicBlock *blk) {
+ instr->getOperand(0).setMBB(blk);
+ }
+
+ static MachineBasicBlock *
+ getFalseBranch(MachineBasicBlock *blk, MachineInstr *instr) {
+ assert(blk->succ_size() == 2);
+ MachineBasicBlock *trueBranch = getTrueBranch(instr);
+ MachineBasicBlock::succ_iterator iter = blk->succ_begin();
+ MachineBasicBlock::succ_iterator iterNext = iter;
+ ++iterNext;
+
+ return (*iter == trueBranch) ? *iterNext : *iter;
+ }
+
+ static bool isCondBranch(MachineInstr *instr) {
+ switch (instr->getOpcode()) {
+ ExpandCaseToAllScalarTypes(AMDGPU::BRANCH_COND);
+ break;
+ default:
+ return false;
+ }
+ return true;
+ }
+
+ static bool isUncondBranch(MachineInstr *instr) {
+ switch (instr->getOpcode()) {
+ case AMDGPU::BRANCH:
+ break;
+ default:
+ return false;
+ }
+ return true;
+ }
+
+ static DebugLoc getLastDebugLocInBB(MachineBasicBlock *blk) {
+ //get DebugLoc from the first MachineBasicBlock instruction with debug info
+ DebugLoc DL;
+ for (MachineBasicBlock::iterator iter = blk->begin(); iter != blk->end(); ++iter) {
+ MachineInstr *instr = &(*iter);
+ if (instr->getDebugLoc().isUnknown() == false) {
+ DL = instr->getDebugLoc();
+ }
+ }
+ return DL;
+ }
+
+ static MachineInstr *getNormalBlockBranchInstr(MachineBasicBlock *blk) {
+ MachineBasicBlock::reverse_iterator iter = blk->rbegin();
+ MachineInstr *instr = &*iter;
+ if (instr && (isCondBranch(instr) || isUncondBranch(instr))) {
+ return instr;
+ }
+ return NULL;
+ }
+
+ // The correct naming for this is getPossibleLoopendBlockBranchInstr.
+ //
+ // BB with backward-edge could have move instructions after the branch
+ // instruction. Such move instruction "belong to" the loop backward-edge.
+ //
+ static MachineInstr *getLoopendBlockBranchInstr(MachineBasicBlock *blk) {
+ const AMDILInstrInfo * TII = static_cast<const AMDILInstrInfo *>(
+ blk->getParent()->getTarget().getInstrInfo());
+
+ for (MachineBasicBlock::reverse_iterator iter = blk->rbegin(),
+ iterEnd = blk->rend(); iter != iterEnd; ++iter) {
+ // FIXME: Simplify
+ MachineInstr *instr = &*iter;
+ if (instr) {
+ if (isCondBranch(instr) || isUncondBranch(instr)) {
+ return instr;
+ } else if (!TII->isMov(instr->getOpcode())) {
+ break;
+ }
+ }
+ }
+ return NULL;
+ }
+
+ static MachineInstr *getReturnInstr(MachineBasicBlock *blk) {
+ MachineBasicBlock::reverse_iterator iter = blk->rbegin();
+ if (iter != blk->rend()) {
+ MachineInstr *instr = &(*iter);
+ if (instr->getOpcode() == AMDGPU::RETURN) {
+ return instr;
+ }
+ }
+ return NULL;
+ }
+
+ static MachineInstr *getContinueInstr(MachineBasicBlock *blk) {
+ MachineBasicBlock::reverse_iterator iter = blk->rbegin();
+ if (iter != blk->rend()) {
+ MachineInstr *instr = &(*iter);
+ if (instr->getOpcode() == AMDGPU::CONTINUE) {
+ return instr;
+ }
+ }
+ return NULL;
+ }
+
+ static MachineInstr *getLoopBreakInstr(MachineBasicBlock *blk) {
+ for (MachineBasicBlock::iterator iter = blk->begin(); (iter != blk->end()); ++iter) {
+ MachineInstr *instr = &(*iter);
+ if ((instr->getOpcode() == AMDGPU::BREAK_LOGICALNZ_i32) || (instr->getOpcode() == AMDGPU::BREAK_LOGICALZ_i32)) {
+ return instr;
+ }
+ }
+ return NULL;
+ }
+
+ static bool isReturnBlock(MachineBasicBlock *blk) {
+ MachineInstr *instr = getReturnInstr(blk);
+ bool isReturn = (blk->succ_size() == 0);
+ if (instr) {
+ assert(isReturn);
+ } else if (isReturn) {
+ if (DEBUGME) {
+ errs() << "BB" << blk->getNumber()
+ <<" is return block without RETURN instr\n";
+ }
+ }
+
+ return isReturn;
+ }
+
+ static MachineBasicBlock::iterator
+ getInstrPos(MachineBasicBlock *blk, MachineInstr *instr) {
+ assert(instr->getParent() == blk && "instruction doesn't belong to block");
+ MachineBasicBlock::iterator iter = blk->begin();
+ MachineBasicBlock::iterator iterEnd = blk->end();
+ while (&(*iter) != instr && iter != iterEnd) {
+ ++iter;
+ }
+
+ assert(iter != iterEnd);
+ return iter;
+ }//getInstrPos
+
+ static MachineInstr *insertInstrBefore(MachineBasicBlock *blk, int newOpcode,
+ AMDILCFGStructurizer *passRep) {
+ return insertInstrBefore(blk,newOpcode,passRep,DebugLoc());
+ } //insertInstrBefore
+
+ static MachineInstr *insertInstrBefore(MachineBasicBlock *blk, int newOpcode,
+ AMDILCFGStructurizer *passRep, DebugLoc DL) {
+ const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
+ MachineInstr *newInstr =
+ blk->getParent()->CreateMachineInstr(tii->get(newOpcode), DL);
+
+ MachineBasicBlock::iterator res;
+ if (blk->begin() != blk->end()) {
+ blk->insert(blk->begin(), newInstr);
+ } else {
+ blk->push_back(newInstr);
+ }
+
+ SHOWNEWINSTR(newInstr);
+
+ return newInstr;
+ } //insertInstrBefore
+
+ static void insertInstrEnd(MachineBasicBlock *blk, int newOpcode,
+ AMDILCFGStructurizer *passRep) {
+ insertInstrEnd(blk,newOpcode,passRep,DebugLoc());
+ } //insertInstrEnd
+
+ static void insertInstrEnd(MachineBasicBlock *blk, int newOpcode,
+ AMDILCFGStructurizer *passRep, DebugLoc DL) {
+ const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
+ MachineInstr *newInstr = blk->getParent()
+ ->CreateMachineInstr(tii->get(newOpcode), DL);
+
+ blk->push_back(newInstr);
+ //assume the instruction doesn't take any reg operand ...
+
+ SHOWNEWINSTR(newInstr);
+ } //insertInstrEnd
+
+ static MachineInstr *insertInstrBefore(MachineBasicBlock::iterator instrPos,
+ int newOpcode,
+ AMDILCFGStructurizer *passRep) {
+ MachineInstr *oldInstr = &(*instrPos);
+ const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
+ MachineBasicBlock *blk = oldInstr->getParent();
+ MachineInstr *newInstr =
+ blk->getParent()->CreateMachineInstr(tii->get(newOpcode),
+ DebugLoc());
+
+ blk->insert(instrPos, newInstr);
+ //assume the instruction doesn't take any reg operand ...
+
+ SHOWNEWINSTR(newInstr);
+ return newInstr;
+ } //insertInstrBefore
+
+ static void insertCondBranchBefore(MachineBasicBlock::iterator instrPos,
+ int newOpcode,
+ AMDILCFGStructurizer *passRep,
+ DebugLoc DL) {
+ MachineInstr *oldInstr = &(*instrPos);
+ const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
+ MachineBasicBlock *blk = oldInstr->getParent();
+ MachineInstr *newInstr =
+ blk->getParent()->CreateMachineInstr(tii->get(newOpcode),
+ DL);
+
+ blk->insert(instrPos, newInstr);
+ MachineInstrBuilder(newInstr).addReg(oldInstr->getOperand(1).getReg(),
+ false);
+
+ SHOWNEWINSTR(newInstr);
+ //erase later oldInstr->eraseFromParent();
+ } //insertCondBranchBefore
+
+ static void insertCondBranchBefore(MachineBasicBlock *blk,
+ MachineBasicBlock::iterator insertPos,
+ int newOpcode,
+ AMDILCFGStructurizer *passRep,
+ RegiT regNum,
+ DebugLoc DL) {
+ const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
+
+ MachineInstr *newInstr =
+ blk->getParent()->CreateMachineInstr(tii->get(newOpcode), DL);
+
+ //insert before
+ blk->insert(insertPos, newInstr);
+ MachineInstrBuilder(newInstr).addReg(regNum, false);
+
+ SHOWNEWINSTR(newInstr);
+ } //insertCondBranchBefore
+
+ static void insertCondBranchEnd(MachineBasicBlock *blk,
+ int newOpcode,
+ AMDILCFGStructurizer *passRep,
+ RegiT regNum) {
+ const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
+ MachineInstr *newInstr =
+ blk->getParent()->CreateMachineInstr(tii->get(newOpcode), DebugLoc());
+
+ blk->push_back(newInstr);
+ MachineInstrBuilder(newInstr).addReg(regNum, false);
+
+ SHOWNEWINSTR(newInstr);
+ } //insertCondBranchEnd
+
+
+ static void insertAssignInstrBefore(MachineBasicBlock::iterator instrPos,
+ AMDILCFGStructurizer *passRep,
+ RegiT regNum, int regVal) {
+ MachineInstr *oldInstr = &(*instrPos);
+ const AMDILInstrInfo *tii =
+ static_cast<const AMDILInstrInfo *>(passRep->getTargetInstrInfo());
+ MachineBasicBlock *blk = oldInstr->getParent();
+ MachineInstr *newInstr = tii->getMovImmInstr(blk->getParent(), regNum,
+ regVal);
+ blk->insert(instrPos, newInstr);
+
+ SHOWNEWINSTR(newInstr);
+ } //insertAssignInstrBefore
+
+ static void insertAssignInstrBefore(MachineBasicBlock *blk,
+ AMDILCFGStructurizer *passRep,
+ RegiT regNum, int regVal) {
+ const AMDILInstrInfo *tii =
+ static_cast<const AMDILInstrInfo *>(passRep->getTargetInstrInfo());
+
+ MachineInstr *newInstr = tii->getMovImmInstr(blk->getParent(), regNum,
+ regVal);
+ if (blk->begin() != blk->end()) {
+ blk->insert(blk->begin(), newInstr);
+ } else {
+ blk->push_back(newInstr);
+ }
+
+ SHOWNEWINSTR(newInstr);
+
+ } //insertInstrBefore
+
+ static void insertCompareInstrBefore(MachineBasicBlock *blk,
+ MachineBasicBlock::iterator instrPos,
+ AMDILCFGStructurizer *passRep,
+ RegiT dstReg, RegiT src1Reg,
+ RegiT src2Reg) {
+ const AMDILInstrInfo *tii =
+ static_cast<const AMDILInstrInfo *>(passRep->getTargetInstrInfo());
+ MachineInstr *newInstr =
+ blk->getParent()->CreateMachineInstr(tii->get(tii->getIEQOpcode()), DebugLoc());
+
+ MachineInstrBuilder(newInstr).addReg(dstReg, RegState::Define); //set target
+ MachineInstrBuilder(newInstr).addReg(src1Reg); //set src value
+ MachineInstrBuilder(newInstr).addReg(src2Reg); //set src value
+
+ blk->insert(instrPos, newInstr);
+ SHOWNEWINSTR(newInstr);
+
+ } //insertCompareInstrBefore
+
+ static void cloneSuccessorList(MachineBasicBlock *dstBlk,
+ MachineBasicBlock *srcBlk) {
+ for (MachineBasicBlock::succ_iterator iter = srcBlk->succ_begin(),
+ iterEnd = srcBlk->succ_end(); iter != iterEnd; ++iter) {
+ dstBlk->addSuccessor(*iter); // *iter's predecessor is also taken care of
+ }
+ } //cloneSuccessorList
+
+ static MachineBasicBlock *clone(MachineBasicBlock *srcBlk) {
+ MachineFunction *func = srcBlk->getParent();
+ MachineBasicBlock *newBlk = func->CreateMachineBasicBlock();
+ func->push_back(newBlk); //insert to function
+ //newBlk->setNumber(srcBlk->getNumber());
+ for (MachineBasicBlock::iterator iter = srcBlk->begin(),
+ iterEnd = srcBlk->end();
+ iter != iterEnd; ++iter) {
+ MachineInstr *instr = func->CloneMachineInstr(iter);
+ newBlk->push_back(instr);
+ }
+ return newBlk;
+ }
+
+ //MachineBasicBlock::ReplaceUsesOfBlockWith doesn't serve the purpose because
+ //the AMDIL instruction is not recognized as terminator fix this and retire
+ //this routine
+ static void replaceInstrUseOfBlockWith(MachineBasicBlock *srcBlk,
+ MachineBasicBlock *oldBlk,
+ MachineBasicBlock *newBlk) {
+ MachineInstr *branchInstr = getLoopendBlockBranchInstr(srcBlk);
+ if (branchInstr && isCondBranch(branchInstr) &&
+ getExplicitBranch(branchInstr) == oldBlk) {
+ setExplicitBranch(branchInstr, newBlk);
+ }
+ }
+
+ static void wrapup(MachineBasicBlock *entryBlk) {
+ assert((!entryBlk->getParent()->getJumpTableInfo()
+ || entryBlk->getParent()->getJumpTableInfo()->isEmpty())
+ && "found a jump table");
+
+ //collect continue right before endloop
+ SmallVector<MachineInstr *, DEFAULT_VEC_SLOTS> contInstr;
+ MachineBasicBlock::iterator pre = entryBlk->begin();
+ MachineBasicBlock::iterator iterEnd = entryBlk->end();
+ MachineBasicBlock::iterator iter = pre;
+ while (iter != iterEnd) {
+ if (pre->getOpcode() == AMDGPU::CONTINUE
+ && iter->getOpcode() == AMDGPU::ENDLOOP) {
+ contInstr.push_back(pre);
+ }
+ pre = iter;
+ ++iter;
+ } //end while
+
+ //delete continue right before endloop
+ for (unsigned i = 0; i < contInstr.size(); ++i) {
+ contInstr[i]->eraseFromParent();
+ }
+
+ // TODO to fix up jump table so later phase won't be confused. if
+ // (jumpTableInfo->isEmpty() == false) { need to clean the jump table, but
+ // there isn't such an interface yet. alternatively, replace all the other
+ // blocks in the jump table with the entryBlk //}
+
+ } //wrapup
+
+ static MachineDominatorTree *getDominatorTree(AMDILCFGStructurizer &pass) {
+ return &pass.getAnalysis<MachineDominatorTree>();
+ }
+
+ static MachinePostDominatorTree*
+ getPostDominatorTree(AMDILCFGStructurizer &pass) {
+ return &pass.getAnalysis<MachinePostDominatorTree>();
+ }
+
+ static MachineLoopInfo *getLoopInfo(AMDILCFGStructurizer &pass) {
+ return &pass.getAnalysis<MachineLoopInfo>();
+ }
+}; // template class CFGStructTraits
+} //end of namespace llvm
+
+// createAMDILCFGPreparationPass- Returns a pass
+FunctionPass *llvm::createAMDILCFGPreparationPass(TargetMachine &tm
+ AMDIL_OPT_LEVEL_DECL) {
+ return new AMDILCFGPrepare(tm AMDIL_OPT_LEVEL_VAR);
+}
+
+bool AMDILCFGPrepare::runOnMachineFunction(MachineFunction &func) {
+ return llvmCFGStruct::CFGStructurizer<AMDILCFGStructurizer>().prepare(func,
+ *this,
+ TRI);
+}
+
+// createAMDILCFGStructurizerPass- Returns a pass
+FunctionPass *llvm::createAMDILCFGStructurizerPass(TargetMachine &tm
+ AMDIL_OPT_LEVEL_DECL) {
+ return new AMDILCFGPerform(tm AMDIL_OPT_LEVEL_VAR);
+}
+
+bool AMDILCFGPerform::runOnMachineFunction(MachineFunction &func) {
+ return llvmCFGStruct::CFGStructurizer<AMDILCFGStructurizer>().run(func,
+ *this,
+ TRI);
+}
+
+//end of file newline goes below
+
--- /dev/null
+//===- AMDILCallingConv.td - Calling Conventions AMDIL -----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+// This describes the calling conventions for the AMDIL architectures.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Return Value Calling Conventions
+//===----------------------------------------------------------------------===//
+
+// AMDIL 32-bit C return-value convention.
+def RetCC_AMDIL32 : CallingConv<[
+ // Since IL has no return values, all values can be emulated on the stack
+ // The stack can then be mapped to a number of sequential virtual registers
+ // in IL
+
+ // Integer and FP scalar values get put on the stack at 16-byte alignment
+ // but with a size of 4 bytes
+ CCIfType<[i32, f32], CCAssignToReg<
+ [
+ R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, R13, R14, R15, R16, R17, R18, R19, R20
+]> >, CCAssignToStack<16, 16>]>;
+
+// AMDIL 32-bit C Calling convention.
+def CC_AMDIL32 : CallingConv<[
+ // Since IL has parameter values, all values can be emulated on the stack
+ // The stack can then be mapped to a number of sequential virtual registers
+ // in IL
+ // Integer and FP scalar values get put on the stack at 16-byte alignment
+ // but with a size of 4 bytes
+ // Integer and FP scalar values get put on the stack at 16-byte alignment
+ // but with a size of 4 bytes
+ CCIfType<[i32, f32], CCAssignToReg<
+[R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, R13, R14, R15, R16, R17, R18, R19, R20
+]> >, CCAssignToStack<16, 16>]>;
--- /dev/null
+//===-- AMDILCodeEmitter.h - AMDIL Code Emitter interface -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// CodeEmitter interface for R600 and SI codegen.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef AMDILCODEEMITTER_H
+#define AMDILCODEEMITTER_H
+
+namespace llvm {
+
+ class AMDILCodeEmitter {
+ public:
+ uint64_t getBinaryCodeForInstr(const MachineInstr &MI) const;
+ virtual uint64_t getMachineOpValue(const MachineInstr &MI,
+ const MachineOperand &MO) const { return 0; }
+ virtual unsigned GPR4AlignEncode(const MachineInstr &MI,
+ unsigned OpNo) const {
+ return 0;
+ }
+ virtual unsigned GPR2AlignEncode(const MachineInstr &MI,
+ unsigned OpNo) const {
+ return 0;
+ }
+ virtual uint64_t VOPPostEncode(const MachineInstr &MI,
+ uint64_t Value) const {
+ return Value;
+ }
+ virtual uint64_t i32LiteralEncode(const MachineInstr &MI,
+ unsigned OpNo) const {
+ return 0;
+ }
+ virtual uint32_t SMRDmemriEncode(const MachineInstr &MI, unsigned OpNo)
+ const {
+ return 0;
+ }
+ };
+
+} // End namespace llvm
+
+#endif // AMDILCODEEMITTER_H
--- /dev/null
+//===-- AMDILDevice.cpp - Base class for AMDIL Devices --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+#include "AMDILDevice.h"
+#include "AMDILSubtarget.h"
+
+using namespace llvm;
+// Default implementation for all of the classes.
+AMDILDevice::AMDILDevice(AMDILSubtarget *ST) : mSTM(ST)
+{
+ mHWBits.resize(AMDILDeviceInfo::MaxNumberCapabilities);
+ mSWBits.resize(AMDILDeviceInfo::MaxNumberCapabilities);
+ setCaps();
+ mDeviceFlag = OCL_DEVICE_ALL;
+}
+
+AMDILDevice::~AMDILDevice()
+{
+ mHWBits.clear();
+ mSWBits.clear();
+}
+
+size_t AMDILDevice::getMaxGDSSize() const
+{
+ return 0;
+}
+
+uint32_t
+AMDILDevice::getDeviceFlag() const
+{
+ return mDeviceFlag;
+}
+
+size_t AMDILDevice::getMaxNumCBs() const
+{
+ if (usesHardware(AMDILDeviceInfo::ConstantMem)) {
+ return HW_MAX_NUM_CB;
+ }
+
+ return 0;
+}
+
+size_t AMDILDevice::getMaxCBSize() const
+{
+ if (usesHardware(AMDILDeviceInfo::ConstantMem)) {
+ return MAX_CB_SIZE;
+ }
+
+ return 0;
+}
+
+size_t AMDILDevice::getMaxScratchSize() const
+{
+ return 65536;
+}
+
+uint32_t AMDILDevice::getStackAlignment() const
+{
+ return 16;
+}
+
+void AMDILDevice::setCaps()
+{
+ mSWBits.set(AMDILDeviceInfo::HalfOps);
+ mSWBits.set(AMDILDeviceInfo::ByteOps);
+ mSWBits.set(AMDILDeviceInfo::ShortOps);
+ mSWBits.set(AMDILDeviceInfo::HW64BitDivMod);
+ if (mSTM->isOverride(AMDILDeviceInfo::NoInline)) {
+ mSWBits.set(AMDILDeviceInfo::NoInline);
+ }
+ if (mSTM->isOverride(AMDILDeviceInfo::MacroDB)) {
+ mSWBits.set(AMDILDeviceInfo::MacroDB);
+ }
+ if (mSTM->isOverride(AMDILDeviceInfo::Debug)) {
+ mSWBits.set(AMDILDeviceInfo::ConstantMem);
+ } else {
+ mHWBits.set(AMDILDeviceInfo::ConstantMem);
+ }
+ if (mSTM->isOverride(AMDILDeviceInfo::Debug)) {
+ mSWBits.set(AMDILDeviceInfo::PrivateMem);
+ } else {
+ mHWBits.set(AMDILDeviceInfo::PrivateMem);
+ }
+ if (mSTM->isOverride(AMDILDeviceInfo::BarrierDetect)) {
+ mSWBits.set(AMDILDeviceInfo::BarrierDetect);
+ }
+ mSWBits.set(AMDILDeviceInfo::ByteLDSOps);
+ mSWBits.set(AMDILDeviceInfo::LongOps);
+}
+
+AMDILDeviceInfo::ExecutionMode
+AMDILDevice::getExecutionMode(AMDILDeviceInfo::Caps Caps) const
+{
+ if (mHWBits[Caps]) {
+ assert(!mSWBits[Caps] && "Cannot set both SW and HW caps");
+ return AMDILDeviceInfo::Hardware;
+ }
+
+ if (mSWBits[Caps]) {
+ assert(!mHWBits[Caps] && "Cannot set both SW and HW caps");
+ return AMDILDeviceInfo::Software;
+ }
+
+ return AMDILDeviceInfo::Unsupported;
+
+}
+
+bool AMDILDevice::isSupported(AMDILDeviceInfo::Caps Mode) const
+{
+ return getExecutionMode(Mode) != AMDILDeviceInfo::Unsupported;
+}
+
+bool AMDILDevice::usesHardware(AMDILDeviceInfo::Caps Mode) const
+{
+ return getExecutionMode(Mode) == AMDILDeviceInfo::Hardware;
+}
+
+bool AMDILDevice::usesSoftware(AMDILDeviceInfo::Caps Mode) const
+{
+ return getExecutionMode(Mode) == AMDILDeviceInfo::Software;
+}
+
+std::string
+AMDILDevice::getDataLayout() const
+{
+ return std::string("e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16"
+ "-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:32:32"
+ "-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64"
+ "-v96:128:128-v128:128:128-v192:256:256-v256:256:256"
+ "-v512:512:512-v1024:1024:1024-v2048:2048:2048"
+ "-n8:16:32:64");
+}
--- /dev/null
+//===---- AMDILDevice.h - Define Device Data for AMDIL -----*- C++ -*------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+// Interface for the subtarget data classes.
+//
+//===----------------------------------------------------------------------===//
+// This file will define the interface that each generation needs to
+// implement in order to correctly answer queries on the capabilities of the
+// specific hardware.
+//===----------------------------------------------------------------------===//
+#ifndef _AMDILDEVICEIMPL_H_
+#define _AMDILDEVICEIMPL_H_
+#include "AMDIL.h"
+#include "llvm/ADT/BitVector.h"
+
+namespace llvm {
+ class AMDILSubtarget;
+ class MCStreamer;
+//===----------------------------------------------------------------------===//
+// Interface for data that is specific to a single device
+//===----------------------------------------------------------------------===//
+class AMDILDevice {
+public:
+ AMDILDevice(AMDILSubtarget *ST);
+ virtual ~AMDILDevice();
+
+ // Enum values for the various memory types.
+ enum {
+ RAW_UAV_ID = 0,
+ ARENA_UAV_ID = 1,
+ LDS_ID = 2,
+ GDS_ID = 3,
+ SCRATCH_ID = 4,
+ CONSTANT_ID = 5,
+ GLOBAL_ID = 6,
+ MAX_IDS = 7
+ } IO_TYPE_IDS;
+
+ // Returns the max LDS size that the hardware supports. Size is in
+ // bytes.
+ virtual size_t getMaxLDSSize() const = 0;
+
+ // Returns the max GDS size that the hardware supports if the GDS is
+ // supported by the hardware. Size is in bytes.
+ virtual size_t getMaxGDSSize() const;
+
+ // Returns the max number of hardware constant address spaces that
+ // are supported by this device.
+ virtual size_t getMaxNumCBs() const;
+
+ // Returns the max number of bytes a single hardware constant buffer
+ // can support. Size is in bytes.
+ virtual size_t getMaxCBSize() const;
+
+ // Returns the max number of bytes allowed by the hardware scratch
+ // buffer. Size is in bytes.
+ virtual size_t getMaxScratchSize() const;
+
+ // Get the flag that corresponds to the device.
+ virtual uint32_t getDeviceFlag() const;
+
+ // Returns the number of work-items that exist in a single hardware
+ // wavefront.
+ virtual size_t getWavefrontSize() const = 0;
+
+ // Get the generational name of this specific device.
+ virtual uint32_t getGeneration() const = 0;
+
+ // Get the stack alignment of this specific device.
+ virtual uint32_t getStackAlignment() const;
+
+ // Get the resource ID for this specific device.
+ virtual uint32_t getResourceID(uint32_t DeviceID) const = 0;
+
+ // Get the max number of UAV's for this device.
+ virtual uint32_t getMaxNumUAVs() const = 0;
+
+
+ // API utilizing more detailed capabilities of each family of
+ // cards. If a capability is supported, then either usesHardware or
+ // usesSoftware returned true. If usesHardware returned true, then
+ // usesSoftware must return false for the same capability. Hardware
+ // execution means that the feature is done natively by the hardware
+ // and is not emulated by the softare. Software execution means
+ // that the feature could be done in the hardware, but there is
+ // software that emulates it with possibly using the hardware for
+ // support since the hardware does not fully comply with OpenCL
+ // specs.
+ bool isSupported(AMDILDeviceInfo::Caps Mode) const;
+ bool usesHardware(AMDILDeviceInfo::Caps Mode) const;
+ bool usesSoftware(AMDILDeviceInfo::Caps Mode) const;
+ virtual std::string getDataLayout() const;
+ static const unsigned int MAX_LDS_SIZE_700 = 16384;
+ static const unsigned int MAX_LDS_SIZE_800 = 32768;
+ static const unsigned int WavefrontSize = 64;
+ static const unsigned int HalfWavefrontSize = 32;
+ static const unsigned int QuarterWavefrontSize = 16;
+protected:
+ virtual void setCaps();
+ llvm::BitVector mHWBits;
+ llvm::BitVector mSWBits;
+ AMDILSubtarget *mSTM;
+ uint32_t mDeviceFlag;
+private:
+ AMDILDeviceInfo::ExecutionMode
+ getExecutionMode(AMDILDeviceInfo::Caps Caps) const;
+}; // AMDILDevice
+
+} // namespace llvm
+#endif // _AMDILDEVICEIMPL_H_
--- /dev/null
+//===-- AMDILDeviceInfo.cpp - AMDILDeviceInfo class -----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+// Function that creates DeviceInfo from a device name and other information.
+//
+//==-----------------------------------------------------------------------===//
+#include "AMDILDevices.h"
+#include "AMDILSubtarget.h"
+
+using namespace llvm;
+namespace llvm {
+namespace AMDILDeviceInfo {
+ AMDILDevice*
+getDeviceFromName(const std::string &deviceName, AMDILSubtarget *ptr, bool is64bit, bool is64on32bit)
+{
+ if (deviceName.c_str()[2] == '7') {
+ switch (deviceName.c_str()[3]) {
+ case '1':
+ return new AMDIL710Device(ptr);
+ case '7':
+ return new AMDIL770Device(ptr);
+ default:
+ return new AMDIL7XXDevice(ptr);
+ };
+ } else if (deviceName == "cypress") {
+#if DEBUG
+ assert(!is64bit && "This device does not support 64bit pointers!");
+ assert(!is64on32bit && "This device does not support 64bit"
+ " on 32bit pointers!");
+#endif
+ return new AMDILCypressDevice(ptr);
+ } else if (deviceName == "juniper") {
+#if DEBUG
+ assert(!is64bit && "This device does not support 64bit pointers!");
+ assert(!is64on32bit && "This device does not support 64bit"
+ " on 32bit pointers!");
+#endif
+ return new AMDILEvergreenDevice(ptr);
+ } else if (deviceName == "redwood") {
+#if DEBUG
+ assert(!is64bit && "This device does not support 64bit pointers!");
+ assert(!is64on32bit && "This device does not support 64bit"
+ " on 32bit pointers!");
+#endif
+ return new AMDILRedwoodDevice(ptr);
+ } else if (deviceName == "cedar") {
+#if DEBUG
+ assert(!is64bit && "This device does not support 64bit pointers!");
+ assert(!is64on32bit && "This device does not support 64bit"
+ " on 32bit pointers!");
+#endif
+ return new AMDILCedarDevice(ptr);
+ } else if (deviceName == "barts"
+ || deviceName == "turks") {
+#if DEBUG
+ assert(!is64bit && "This device does not support 64bit pointers!");
+ assert(!is64on32bit && "This device does not support 64bit"
+ " on 32bit pointers!");
+#endif
+ return new AMDILNIDevice(ptr);
+ } else if (deviceName == "cayman") {
+#if DEBUG
+ assert(!is64bit && "This device does not support 64bit pointers!");
+ assert(!is64on32bit && "This device does not support 64bit"
+ " on 32bit pointers!");
+#endif
+ return new AMDILCaymanDevice(ptr);
+ } else if (deviceName == "caicos") {
+#if DEBUG
+ assert(!is64bit && "This device does not support 64bit pointers!");
+ assert(!is64on32bit && "This device does not support 64bit"
+ " on 32bit pointers!");
+#endif
+ return new AMDILNIDevice(ptr);
+ } else if (deviceName == "SI") {
+ return new AMDILSIDevice(ptr);
+ } else {
+#if DEBUG
+ assert(!is64bit && "This device does not support 64bit pointers!");
+ assert(!is64on32bit && "This device does not support 64bit"
+ " on 32bit pointers!");
+#endif
+ return new AMDIL7XXDevice(ptr);
+ }
+}
+} // End namespace AMDILDeviceInfo
+} // End namespace llvm
--- /dev/null
+//===-- AMDILDeviceInfo.h - Constants for describing devices --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+#ifndef _AMDILDEVICEINFO_H_
+#define _AMDILDEVICEINFO_H_
+
+
+#include <string>
+
+namespace llvm
+{
+ class AMDILDevice;
+ class AMDILSubtarget;
+ namespace AMDILDeviceInfo
+ {
+ // Each Capabilities can be executed using a hardware instruction,
+ // emulated with a sequence of software instructions, or not
+ // supported at all.
+ enum ExecutionMode {
+ Unsupported = 0, // Unsupported feature on the card(Default value)
+ Software, // This is the execution mode that is set if the
+ // feature is emulated in software
+ Hardware // This execution mode is set if the feature exists
+ // natively in hardware
+ };
+
+ // Any changes to this needs to have a corresponding update to the
+ // twiki page GPUMetadataABI
+ enum Caps {
+ HalfOps = 0x1, // Half float is supported or not.
+ DoubleOps = 0x2, // Double is supported or not.
+ ByteOps = 0x3, // Byte(char) is support or not.
+ ShortOps = 0x4, // Short is supported or not.
+ LongOps = 0x5, // Long is supported or not.
+ Images = 0x6, // Images are supported or not.
+ ByteStores = 0x7, // ByteStores available(!HD4XXX).
+ ConstantMem = 0x8, // Constant/CB memory.
+ LocalMem = 0x9, // Local/LDS memory.
+ PrivateMem = 0xA, // Scratch/Private/Stack memory.
+ RegionMem = 0xB, // OCL GDS Memory Extension.
+ FMA = 0xC, // Use HW FMA or SW FMA.
+ ArenaSegment = 0xD, // Use for Arena UAV per pointer 12-1023.
+ MultiUAV = 0xE, // Use for UAV per Pointer 0-7.
+ Reserved0 = 0xF, // ReservedFlag
+ NoAlias = 0x10, // Cached loads.
+ Signed24BitOps = 0x11, // Peephole Optimization.
+ // Debug mode implies that no hardware features or optimizations
+ // are performned and that all memory access go through a single
+ // uav(Arena on HD5XXX/HD6XXX and Raw on HD4XXX).
+ Debug = 0x12, // Debug mode is enabled.
+ CachedMem = 0x13, // Cached mem is available or not.
+ BarrierDetect = 0x14, // Detect duplicate barriers.
+ Reserved1 = 0x15, // Reserved flag
+ ByteLDSOps = 0x16, // Flag to specify if byte LDS ops are available.
+ ArenaVectors = 0x17, // Flag to specify if vector loads from arena work.
+ TmrReg = 0x18, // Flag to specify if Tmr register is supported.
+ NoInline = 0x19, // Flag to specify that no inlining should occur.
+ MacroDB = 0x1A, // Flag to specify that backend handles macrodb.
+ HW64BitDivMod = 0x1B, // Flag for backend to generate 64bit div/mod.
+ ArenaUAV = 0x1C, // Flag to specify that arena uav is supported.
+ PrivateUAV = 0x1D, // Flag to specify that private memory uses uav's.
+ // If more capabilities are required, then
+ // this number needs to be increased.
+ // All capabilities must come before this
+ // number.
+ MaxNumberCapabilities = 0x20
+ };
+ // These have to be in order with the older generations
+ // having the lower number enumerations.
+ enum Generation {
+ HD4XXX = 0, // 7XX based devices.
+ HD5XXX, // Evergreen based devices.
+ HD6XXX, // NI/Evergreen+ based devices.
+ HD7XXX,
+ HDTEST, // Experimental feature testing device.
+ HDNUMGEN
+ };
+
+
+ AMDILDevice*
+ getDeviceFromName(const std::string &name, AMDILSubtarget *ptr, bool is64bit = false, bool is64on32bit = false);
+ } // namespace AMDILDeviceInfo
+} // namespace llvm
+#endif // _AMDILDEVICEINFO_H_
--- /dev/null
+//===-- AMDILDevices.h - Consolidate AMDIL Device headers -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+#ifndef __AMDIL_DEVICES_H_
+#define __AMDIL_DEVICES_H_
+// Include all of the device specific header files
+// This file is for Internal use only!
+#include "AMDIL7XXDevice.h"
+#include "AMDILDevice.h"
+#include "AMDILEvergreenDevice.h"
+#include "AMDILNIDevice.h"
+#include "AMDILSIDevice.h"
+
+#endif // _AMDIL_DEVICES_H_
--- /dev/null
+//===-- AMDILEnumeratedTypes.td - IL Type definitions --*- tablegen -*-----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+// ILEnumreatedTypes.td - The IL Enumerated Types
+//===--------------------------------------------------------------------===//
+
+// Section 5.1 IL Shader
+class ILShader<bits<8> val> {
+ bits<8> Value = val;
+}
+// Table 5-1
+def IL_SHADER_PIXEL : ILShader<0>;
+def IL_SHADER_COMPUTE : ILShader<1>;
+
+// Section 5.2 IL RegType
+class ILRegType<bits<6> val> {
+ bits<6> Value = val;
+}
+// Table 5-2
+def IL_REGTYPE_TEMP : ILRegType<0>;
+def IL_REGTYPE_WINCOORD : ILRegType<1>;
+def IL_REGTYPE_CONST_BUF : ILRegType<2>;
+def IL_REGTYPE_LITERAL : ILRegType<3>;
+def IL_REGTYPE_ITEMP : ILRegType<4>;
+def IL_REGTYPE_GLOBAL : ILRegType<5>;
+
+// Section 5.3 IL Component Select
+class ILComponentSelect<bits<3> val, string text> {
+ bits<3> Value = val;
+ string Text = text;
+}
+// Table 5-3
+def IL_COMPSEL_X : ILComponentSelect<0, "x">;
+def IL_COMPSEL_Y : ILComponentSelect<1, "y">;
+def IL_COMPSEL_Z : ILComponentSelect<2, "z">;
+def IL_COMPSEL_W : ILComponentSelect<3, "w">;
+def IL_COMPSEL_0 : ILComponentSelect<4, "0">;
+def IL_COMPSEL_1 : ILComponentSelect<5, "1">;
+
+// Section 5.4 IL Mod Dst Comp
+class ILModDstComp<bits<2> val, string text> {
+ bits<2> Value = val;
+ string Text = text;
+}
+// Table 5-4
+def IL_MODCOMP_NOWRITE : ILModDstComp<0, "_">;
+def IL_MODCOMP_WRITE_X : ILModDstComp<1, "x">;
+def IL_MODCOMP_WRITE_y : ILModDstComp<1, "y">;
+def IL_MODCOMP_WRITE_z : ILModDstComp<1, "z">;
+def IL_MODCOMP_WRITE_w : ILModDstComp<1, "w">;
+def IL_MODCOMP_0 : ILModDstComp<2, "0">;
+def IL_MODCOMP_1 : ILModDstComp<3, "1">;
+
+// Section 5.5 IL Import Usage
+class ILImportUsage<bits<1> val, string usage> {
+ bits<1> Value = val;
+ string Text = usage;
+}
+// Table 5-5
+def IL_IMPORTUSAGE_WINCOORD : ILImportUsage<0, "_usage(wincoord)">;
+
+// Section 5.6 Il Shift Scale
+class ILShiftScale<bits<4> val, string scale> {
+ bits<4> Value = val;
+ string Text = scale;
+}
+
+// Table 5-6
+def IL_SHIFT_NONE : ILShiftScale<0, "">;
+def IL_SHIFT_X2 : ILShiftScale<1, "_x2">;
+def IL_SHIFT_X4 : ILShiftScale<2, "_x4">;
+def IL_SHIFT_X8 : ILShiftScale<3, "_x8">;
+def IL_SHIFT_D2 : ILShiftScale<4, "_d2">;
+def IL_SHIFT_D4 : ILShiftScale<5, "_d4">;
+def IL_SHIFT_D8 : ILShiftScale<6, "_d8">;
+
+// Section 5.7 IL Divide Component
+class ILDivComp<bits<3> val, string divcomp> {
+ bits<3> Value = val;
+ string Text = divcomp;
+}
+
+// Table 5-7
+def IL_DIVCOMP_NONE : ILDivComp<0, "_divcomp(none)">;
+def IL_DIVCOMP_Y : ILDivComp<1, "_divcomp(y)">;
+def IL_DIVCOMP_Z : ILDivComp<2, "_divcomp(z)">;
+def IL_DIVCOMP_W : ILDivComp<3, "_divcomp(w)">;
+//def IL_DIVCOMP_UNKNOWN : ILDivComp<4, "_divcomp(unknown)">;
+
+// Section 5.8 IL Relational Op
+class ILRelOp<bits<3> val, string op> {
+ bits<3> Value = val;
+ string Text = op;
+}
+
+// Table 5-8
+def IL_RELOP_EQ : ILRelOp<0, "_relop(eq)">;
+def IL_RELOP_NE : ILRelOp<1, "_relop(ne)">;
+def IL_RELOP_GT : ILRelOp<2, "_relop(gt)">;
+def IL_RELOP_GE : ILRelOp<3, "_relop(ge)">;
+def IL_RELOP_LT : ILRelOp<4, "_relop(lt)">;
+def IL_RELOP_LE : ILRelOp<5, "_relop(le)">;
+
+// Section 5.9 IL Zero Op
+class ILZeroOp<bits<3> val, string behavior> {
+ bits<3> Value = val;
+ string Text = behavior;
+}
+
+// Table 5-9
+def IL_ZEROOP_FLTMAX : ILZeroOp<0, "_zeroop(fltmax)">;
+def IL_ZEROOP_0 : ILZeroOp<1, "_zeroop(zero)">;
+def IL_ZEROOP_INFINITY : ILZeroOp<2, "_zeroop(infinity)">;
+def IL_ZEROOP_INF_ELSE_MAX : ILZeroOp<3, "_zeroop(inf_else_max)">;
+
+// Section 5.10 IL Cmp Value
+class ILCmpValue<bits<3> val, string num> {
+ bits<3> Value = val;
+ string Text = num;
+}
+
+// Table 5-10
+def IL_CMPVAL_0_0 : ILCmpValue<0, "0.0">;
+def IL_CMPVAL_0_5 : ILCmpValue<1, "0.5">;
+def IL_CMPVAL_1_0 : ILCmpValue<2, "1.0">;
+def IL_CMPVAL_NEG_0_5 : ILCmpValue<3, "-0.5">;
+def IL_CMPVAL_NEG_1_0 : ILCmpValue<4, "-1.0">;
+
+// Section 5.11 IL Addressing
+class ILAddressing<bits<3> val> {
+ bits<3> Value = val;
+}
+
+// Table 5-11
+def IL_ADDR_ABSOLUTE : ILAddressing<0>;
+def IL_ADDR_RELATIVE : ILAddressing<1>;
+def IL_ADDR_REG_RELATIVE : ILAddressing<2>;
+
+// Section 5.11 IL Element Format
+class ILElementFormat<bits<5> val> {
+ bits<5> Value = val;
+}
+
+// Table 5-11
+def IL_ELEMENTFORMAT_UNKNOWN : ILElementFormat<0>;
+def IL_ELEMENTFORMAT_SNORM : ILElementFormat<1>;
+def IL_ELEMENTFORMAT_UNORM : ILElementFormat<2>;
+def IL_ELEMENTFORMAT_SINT : ILElementFormat<3>;
+def IL_ELEMENTFORMAT_UINT : ILElementFormat<4>;
+def IL_ELEMENTFORMAT_FLOAT : ILElementFormat<5>;
+def IL_ELEMENTFORMAT_SRGB : ILElementFormat<6>;
+def IL_ELEMENTFORMAT_MIXED : ILElementFormat<7>;
+def IL_ELEMENTFORMAT_Last : ILElementFormat<8>;
+
+// Section 5.12 IL Op Code
+class ILOpCode<bits<16> val = -1, string cmd> {
+ bits<16> Value = val;
+ string Text = cmd;
+}
+
+// Table 5-12
+def IL_DCL_CONST_BUFFER : ILOpCode<0, "dcl_cb">;
+def IL_DCL_INDEXED_TEMP_ARRAY : ILOpCode<1, "dcl_index_temp_array">;
+def IL_DCL_INPUT : ILOpCode<2, "dcl_input">;
+def IL_DCL_LITERAL : ILOpCode<3, "dcl_literal">;
+def IL_DCL_OUTPUT : ILOpCode<4, "dcl_output">;
+def IL_DCL_RESOURCE : ILOpCode<5, "dcl_resource">;
+def IL_OP_ABS : ILOpCode<6, "abs">;
+def IL_OP_ADD : ILOpCode<7, "add">;
+def IL_OP_AND : ILOpCode<8, "iand">;
+def IL_OP_BREAK : ILOpCode<9, "break">;
+def IL_OP_BREAK_LOGICALNZ : ILOpCode<10, "break_logicalnz">;
+def IL_OP_BREAK_LOGICALZ : ILOpCode<11, "break_logicalz">;
+def IL_OP_BREAKC : ILOpCode<12, "breakc">;
+def IL_OP_CALL : ILOpCode<13, "call">;
+def IL_OP_CALL_LOGICALNZ : ILOpCode<14, "call_logicalnz">;
+def IL_OP_CALL_LOGICALZ : ILOpCode<15, "call_logicalz">;
+def IL_OP_CASE : ILOpCode<16, "case">;
+def IL_OP_CLG : ILOpCode<17, "clg">;
+def IL_OP_CMOV : ILOpCode<18, "cmov">;
+def IL_OP_CMOV_LOGICAL : ILOpCode<19, "cmov_logical">;
+def IL_OP_CMP : ILOpCode<20, "cmp">;
+def IL_OP_CONTINUE : ILOpCode<21, "continue">;
+def IL_OP_CONTINUE_LOGICALNZ : ILOpCode<22, "continue_logicalnz">;
+def IL_OP_CONTINUE_LOGICALZ : ILOpCode<23, "continue_logicalz">;
+def IL_OP_CONTINUEC : ILOpCode<24, "continuec">;
+def IL_OP_COS : ILOpCode<25, "cos">;
+def IL_OP_COS_VEC : ILOpCode<26, "cos_vec">;
+def IL_OP_D_2_F : ILOpCode<27, "d2f">;
+def IL_OP_D_ADD : ILOpCode<28, "dadd">;
+def IL_OP_D_EQ : ILOpCode<29, "deq">;
+def IL_OP_D_FRC : ILOpCode<30, "dfrac">;
+def IL_OP_D_FREXP : ILOpCode<31, "dfrexp">;
+def IL_OP_D_GE : ILOpCode<32, "dge">;
+def IL_OP_D_LDEXP : ILOpCode<33, "dldexp">;
+def IL_OP_D_LT : ILOpCode<34, "dlt">;
+def IL_OP_D_MAD : ILOpCode<35, "dmad">;
+def IL_OP_D_MUL : ILOpCode<36, "dmul">;
+def IL_OP_D_NE : ILOpCode<37, "dne">;
+def IL_OP_DEFAULT : ILOpCode<38, "default">;
+def IL_OP_DISCARD_LOGICALNZ : ILOpCode<39, "discard_logicalnz">;
+def IL_OP_DISCARD_LOGICALZ : ILOpCode<40, "discard_logicalz">;
+def IL_OP_DIV : ILOpCode<41, "div_zeroop(infinity)">;
+def IL_OP_DP2 : ILOpCode<42, "dp2">;
+def IL_OP_DP3 : ILOpCode<43, "dp3">;
+def IL_OP_DP4 : ILOpCode<44, "dp4">;
+def IL_OP_ELSE : ILOpCode<45, "else">;
+def IL_OP_END : ILOpCode<46, "end">;
+def IL_OP_ENDFUNC : ILOpCode<47, "endfunc">;
+def IL_OP_ENDIF : ILOpCode<48, "endif">;
+def IL_OP_ENDLOOP : ILOpCode<49, "endloop">;
+def IL_OP_ENDMAIN : ILOpCode<50, "endmain">;
+def IL_OP_ENDSWITCH : ILOpCode<51, "endswitch">;
+def IL_OP_EQ : ILOpCode<52, "eq">;
+def IL_OP_EXP : ILOpCode<53, "exp">;
+def IL_OP_EXP_VEC : ILOpCode<54, "exp_vec">;
+def IL_OP_F_2_D : ILOpCode<55, "f2d">;
+def IL_OP_FLR : ILOpCode<56, "flr">;
+def IL_OP_FRC : ILOpCode<57, "frc">;
+def IL_OP_FTOI : ILOpCode<58, "ftoi">;
+def IL_OP_FTOU : ILOpCode<59, "ftou">;
+def IL_OP_FUNC : ILOpCode<60, "func">;
+def IL_OP_GE : ILOpCode<61, "ge">;
+def IL_OP_I_ADD : ILOpCode<62, "iadd">;
+def IL_OP_I_EQ : ILOpCode<63, "ieq">;
+def IL_OP_I_GE : ILOpCode<64, "ige">;
+def IL_OP_I_LT : ILOpCode<65, "ilt">;
+def IL_OP_I_MAD : ILOpCode<66, "imad">;
+def IL_OP_I_MAX : ILOpCode<67, "imax">;
+def IL_OP_I_MIN : ILOpCode<68, "imin">;
+def IL_OP_I_MUL : ILOpCode<69, "imul">;
+def IL_OP_I_MUL_HIGH : ILOpCode<70, "imul_high">;
+def IL_OP_I_NE : ILOpCode<71, "ine">;
+def IL_OP_I_NEGATE : ILOpCode<72, "inegate">;
+def IL_OP_I_NOT : ILOpCode<73, "inot">;
+def IL_OP_I_OR : ILOpCode<74, "ior">;
+def IL_OP_I_SHL : ILOpCode<75, "ishl">;
+def IL_OP_I_SHR : ILOpCode<76, "ishr">;
+def IL_OP_I_XOR : ILOpCode<77, "ixor">;
+def IL_OP_IF_LOGICALNZ : ILOpCode<78, "if_logicalnz">;
+def IL_OP_IF_LOGICALZ : ILOpCode<79, "if_logicalz">;
+def IL_OP_IFC : ILOpCode<80, "ifc">;
+def IL_OP_ITOF : ILOpCode<81, "itof">;
+def IL_OP_LN : ILOpCode<82, "ln">;
+def IL_OP_LOG : ILOpCode<83, "log">;
+def IL_OP_LOG_VEC : ILOpCode<84, "log_vec">;
+def IL_OP_LOOP : ILOpCode<85, "loop">;
+def IL_OP_LT : ILOpCode<86, "lt">;
+def IL_OP_MAD : ILOpCode<87, "mad_ieee">;
+def IL_OP_MAX : ILOpCode<88, "max_ieee">;
+def IL_OP_MIN : ILOpCode<89, "min_ieee">;
+def IL_OP_MOD : ILOpCode<90, "mod_ieee">;
+def IL_OP_MOV : ILOpCode<91, "mov">;
+def IL_OP_MUL_IEEE : ILOpCode<92, "mul_ieee">;
+def IL_OP_NE : ILOpCode<93, "ne">;
+def IL_OP_NRM : ILOpCode<94, "nrm_nrm4_zeroop(zero)">;
+def IL_OP_POW : ILOpCode<95, "pow">;
+def IL_OP_RCP : ILOpCode<96, "rcp">;
+def IL_OP_RET : ILOpCode<97, "ret">;
+def IL_OP_RET_DYN : ILOpCode<98, "ret_dyn">;
+def IL_OP_RET_LOGICALNZ : ILOpCode<99, "ret_logicalnz">;
+def IL_OP_RET_LOGICALZ : ILOpCode<100, "ret_logicalz">;
+def IL_OP_RND : ILOpCode<101, "rnd">;
+def IL_OP_ROUND_NEAR : ILOpCode<102, "round_nearest">;
+def IL_OP_ROUND_NEG_INF : ILOpCode<103, "round_neginf">;
+def IL_OP_ROUND_POS_INF : ILOpCode<104, "round_plusinf">;
+def IL_OP_ROUND_ZERO : ILOpCode<105, "round_z">;
+def IL_OP_RSQ : ILOpCode<106, "rsq">;
+def IL_OP_RSQ_VEC : ILOpCode<107, "rsq_vec">;
+def IL_OP_SAMPLE : ILOpCode<108, "sample">;
+def IL_OP_SAMPLE_L : ILOpCode<109, "sample_l">;
+def IL_OP_SET : ILOpCode<110, "set">;
+def IL_OP_SGN : ILOpCode<111, "sgn">;
+def IL_OP_SIN : ILOpCode<112, "sin">;
+def IL_OP_SIN_VEC : ILOpCode<113, "sin_vec">;
+def IL_OP_SUB : ILOpCode<114, "sub">;
+def IL_OP_SWITCH : ILOpCode<115, "switch">;
+def IL_OP_TRC : ILOpCode<116, "trc">;
+def IL_OP_U_DIV : ILOpCode<117, "udiv">;
+def IL_OP_U_GE : ILOpCode<118, "uge">;
+def IL_OP_U_LT : ILOpCode<119, "ult">;
+def IL_OP_U_MAD : ILOpCode<120, "umad">;
+def IL_OP_U_MAX : ILOpCode<121, "umax">;
+def IL_OP_U_MIN : ILOpCode<122, "umin">;
+def IL_OP_U_MOD : ILOpCode<123, "umod">;
+def IL_OP_U_MUL : ILOpCode<124, "umul">;
+def IL_OP_U_MUL_HIGH : ILOpCode<125, "umul_high">;
+def IL_OP_U_SHR : ILOpCode<126, "ushr">;
+def IL_OP_UTOF : ILOpCode<127, "utof">;
+def IL_OP_WHILE : ILOpCode<128, "whileloop">;
+// SC IL instructions that are not in CAL IL
+def IL_OP_ACOS : ILOpCode<129, "acos">;
+def IL_OP_ASIN : ILOpCode<130, "asin">;
+def IL_OP_EXN : ILOpCode<131, "exn">;
+def IL_OP_UBIT_REVERSE : ILOpCode<132, "ubit_reverse">;
+def IL_OP_UBIT_EXTRACT : ILOpCode<133, "ubit_extract">;
+def IL_OP_IBIT_EXTRACT : ILOpCode<134, "ibit_extract">;
+def IL_OP_SQRT : ILOpCode<135, "sqrt">;
+def IL_OP_SQRT_VEC : ILOpCode<136, "sqrt_vec">;
+def IL_OP_ATAN : ILOpCode<137, "atan">;
+def IL_OP_TAN : ILOpCode<137, "tan">;
+def IL_OP_D_DIV : ILOpCode<138, "ddiv">;
+def IL_OP_F_NEG : ILOpCode<139, "mov">;
+def IL_OP_GT : ILOpCode<140, "gt">;
+def IL_OP_LE : ILOpCode<141, "lt">;
+def IL_OP_DIST : ILOpCode<142, "dist">;
+def IL_OP_LEN : ILOpCode<143, "len">;
+def IL_OP_MACRO : ILOpCode<144, "mcall">;
+def IL_OP_INTR : ILOpCode<145, "call">;
+def IL_OP_I_FFB_HI : ILOpCode<146, "ffb_hi">;
+def IL_OP_I_FFB_LO : ILOpCode<147, "ffb_lo">;
+def IL_OP_BARRIER : ILOpCode<148, "fence_threads_memory_lds">;
+def IL_OP_BARRIER_LOCAL : ILOpCode<149, "fence_threads_lds">;
+def IL_OP_BARRIER_GLOBAL : ILOpCode<150, "fence_threads_memory">;
+def IL_OP_FENCE : ILOpCode<151, "fence_lds_memory">;
+def IL_OP_FENCE_READ_ONLY : ILOpCode<152, "fence_lds_mem_read_only">;
+def IL_OP_FENCE_WRITE_ONLY : ILOpCode<153, "fence_lds_mem_write_only">;
+def IL_PSEUDO_INST : ILOpCode<154, ";Pseudo Op">;
+def IL_OP_UNPACK_0 : ILOpCode<155, "unpack0">;
+def IL_OP_UNPACK_1 : ILOpCode<156, "unpack1">;
+def IL_OP_UNPACK_2 : ILOpCode<157, "unpack2">;
+def IL_OP_UNPACK_3 : ILOpCode<158, "unpack3">;
+def IL_OP_PI_REDUCE : ILOpCode<159, "pireduce">;
+def IL_OP_IBIT_COUNT : ILOpCode<160, "icbits">;
+def IL_OP_I_FFB_SGN : ILOpCode<161, "ffb_shi">;
+def IL_OP_F2U4 : ILOpCode<162, "f_2_u4">;
+def IL_OP_BIT_ALIGN : ILOpCode<163, "bitalign">;
+def IL_OP_BYTE_ALIGN : ILOpCode<164, "bytealign">;
+def IL_OP_U4_LERP : ILOpCode<165, "u4lerp">;
+def IL_OP_SAD : ILOpCode<166, "sad">;
+def IL_OP_SAD_HI : ILOpCode<167, "sadhi">;
+def IL_OP_SAD4 : ILOpCode<168, "sad4">;
+def IL_OP_UBIT_INSERT : ILOpCode<169, "ubit_insert">;
+def IL_OP_I_CARRY : ILOpCode<170, "icarry">;
+def IL_OP_I_BORROW : ILOpCode<171, "iborrow">;
+def IL_OP_U_MAD24 : ILOpCode<172, "umad24">;
+def IL_OP_U_MUL24 : ILOpCode<173, "umul24">;
+def IL_OP_I_MAD24 : ILOpCode<174, "imad24">;
+def IL_OP_I_MUL24 : ILOpCode<175, "imul24">;
+def IL_OP_CLAMP : ILOpCode<176, "clamp">;
+def IL_OP_LERP : ILOpCode<177, "lrp">;
+def IL_OP_FMA : ILOpCode<178, "fma">;
+def IL_OP_D_MIN : ILOpCode<179, "dmin">;
+def IL_OP_D_MAX : ILOpCode<180, "dmax">;
+def IL_OP_D_SQRT : ILOpCode<181, "dsqrt">;
+def IL_OP_DP2_ADD : ILOpCode<182, "dp2add">;
+def IL_OP_F16_TO_F32 : ILOpCode<183, "f162f">;
+def IL_OP_F32_TO_F16 : ILOpCode<184, "f2f16">;
+def IL_REG_LOCAL_ID_FLAT : ILOpCode<185, "vTidInGrpFlat">;
+def IL_REG_LOCAL_ID : ILOpCode<186, "vTidInGrp">;
+def IL_REG_GLOBAL_ID_FLAT : ILOpCode<187, "vAbsTidFlag">;
+def IL_REG_GLOBAL_ID : ILOpCode<188, "vAbsTid">;
+def IL_REG_GROUP_ID_FLAT : ILOpCode<189, "vThreadGrpIDFlat">;
+def IL_REG_GROUP_ID : ILOpCode<190, "vThreadGrpID">;
+def IL_OP_D_RCP : ILOpCode<191, "drcp_zeroop(infinity)">;
+def IL_OP_D_RSQ : ILOpCode<192, "drsq_zeroop(infinity)">;
+def IL_OP_D_MOV : ILOpCode<193, "dmov">;
+def IL_OP_D_MOVC : ILOpCode<194, "dmovc">;
+def IL_OP_NOP : ILOpCode<195, "nop">;
+def IL_OP_UAV_ADD : ILOpCode<196, "uav_add">;
+def IL_OP_UAV_AND : ILOpCode<197, "uav_and">;
+def IL_OP_UAV_MAX : ILOpCode<198, "uav_max">;
+def IL_OP_UAV_MIN : ILOpCode<199, "uav_min">;
+def IL_OP_UAV_OR : ILOpCode<200, "uav_or">;
+def IL_OP_UAV_RSUB : ILOpCode<201, "uav_rsub">;
+def IL_OP_UAV_SUB : ILOpCode<202, "uav_sub">;
+def IL_OP_UAV_UMAX : ILOpCode<203, "uav_umax">;
+def IL_OP_UAV_UMIN : ILOpCode<204, "uav_umin">;
+def IL_OP_UAV_XOR : ILOpCode<205, "uav_xor">;
+def IL_OP_UAV_INC : ILOpCode<206, "uav_uinc">;
+def IL_OP_UAV_DEC : ILOpCode<207, "uav_udec">;
+def IL_OP_UAV_CMP : ILOpCode<208, "uav_cmp">;
+def IL_OP_UAV_READ_ADD : ILOpCode<209, "uav_read_add">;
+def IL_OP_UAV_READ_AND : ILOpCode<210, "uav_read_and">;
+def IL_OP_UAV_READ_MAX : ILOpCode<211, "uav_read_max">;
+def IL_OP_UAV_READ_MIN : ILOpCode<212, "uav_read_min">;
+def IL_OP_UAV_READ_OR : ILOpCode<213, "uav_read_or">;
+def IL_OP_UAV_READ_RSUB : ILOpCode<214, "uav_read_rsub">;
+def IL_OP_UAV_READ_SUB : ILOpCode<215, "uav_read_sub">;
+def IL_OP_UAV_READ_UMAX : ILOpCode<216, "uav_read_umax">;
+def IL_OP_UAV_READ_UMIN : ILOpCode<217, "uav_read_umin">;
+def IL_OP_UAV_READ_XOR : ILOpCode<218, "uav_read_xor">;
+def IL_OP_UAV_READ_INC : ILOpCode<219, "uav_read_uinc">;
+def IL_OP_UAV_READ_DEC : ILOpCode<220, "uav_read_udec">;
+def IL_OP_UAV_READ_XCHG : ILOpCode<221, "uav_read_xchg">;
+def IL_OP_UAV_READ_CMPXCHG : ILOpCode<222, "uav_read_cmp_xchg">;
+def IL_OP_LDS_ADD : ILOpCode<223, "lds_add">;
+def IL_OP_LDS_AND : ILOpCode<224, "lds_and">;
+def IL_OP_LDS_MAX : ILOpCode<225, "lds_max">;
+def IL_OP_LDS_MIN : ILOpCode<226, "lds_min">;
+def IL_OP_LDS_OR : ILOpCode<227, "lds_or">;
+def IL_OP_LDS_RSUB : ILOpCode<228, "lds_rsub">;
+def IL_OP_LDS_SUB : ILOpCode<229, "lds_sub">;
+def IL_OP_LDS_UMAX : ILOpCode<230, "lds_umax">;
+def IL_OP_LDS_UMIN : ILOpCode<231, "lds_umin">;
+def IL_OP_LDS_XOR : ILOpCode<232, "lds_xor">;
+def IL_OP_LDS_INC : ILOpCode<233, "lds_inc">;
+def IL_OP_LDS_DEC : ILOpCode<234, "lds_dec">;
+def IL_OP_LDS_CMP : ILOpCode<235, "lds_cmp">;
+def IL_OP_LDS_READ_ADD : ILOpCode<236, "lds_read_add">;
+def IL_OP_LDS_READ_AND : ILOpCode<237, "lds_read_and">;
+def IL_OP_LDS_READ_MAX : ILOpCode<238, "lds_read_max">;
+def IL_OP_LDS_READ_MIN : ILOpCode<239, "lds_read_min">;
+def IL_OP_LDS_READ_OR : ILOpCode<240, "lds_read_or">;
+def IL_OP_LDS_READ_RSUB : ILOpCode<241, "lds_read_rsub">;
+def IL_OP_LDS_READ_SUB : ILOpCode<242, "lds_read_sub">;
+def IL_OP_LDS_READ_UMAX : ILOpCode<243, "lds_read_umax">;
+def IL_OP_LDS_READ_UMIN : ILOpCode<244, "lds_read_umin">;
+def IL_OP_LDS_READ_XOR : ILOpCode<245, "lds_read_xor">;
+def IL_OP_LDS_READ_INC : ILOpCode<246, "lds_read_inc">;
+def IL_OP_LDS_READ_DEC : ILOpCode<247, "lds_read_dec">;
+def IL_OP_LDS_READ_XCHG : ILOpCode<248, "lds_read_xchg">;
+def IL_OP_LDS_READ_CMPXCHG : ILOpCode<249, "lds_read_cmp_xchg">;
+def IL_OP_GDS_ADD : ILOpCode<250, "gds_add">;
+def IL_OP_GDS_AND : ILOpCode<251, "gds_and">;
+def IL_OP_GDS_MAX : ILOpCode<252, "gds_max">;
+def IL_OP_GDS_MIN : ILOpCode<253, "gds_min">;
+def IL_OP_GDS_OR : ILOpCode<254, "gds_or">;
+def IL_OP_GDS_RSUB : ILOpCode<255, "gds_rsub">;
+def IL_OP_GDS_SUB : ILOpCode<256, "gds_sub">;
+def IL_OP_GDS_UMAX : ILOpCode<257, "gds_umax">;
+def IL_OP_GDS_UMIN : ILOpCode<258, "gds_umin">;
+def IL_OP_GDS_MSKOR : ILOpCode<259, "gds_mskor">;
+def IL_OP_GDS_XOR : ILOpCode<260, "gds_xor">;
+def IL_OP_GDS_INC : ILOpCode<261, "gds_inc">;
+def IL_OP_GDS_DEC : ILOpCode<262, "gds_dec">;
+def IL_OP_GDS_CMP : ILOpCode<263, "gds_cmp">;
+def IL_OP_GDS_READ_ADD : ILOpCode<264, "gds_read_add">;
+def IL_OP_GDS_READ_AND : ILOpCode<265, "gds_read_and">;
+def IL_OP_GDS_READ_MAX : ILOpCode<266, "gds_read_max">;
+def IL_OP_GDS_READ_MIN : ILOpCode<267, "gds_read_min">;
+def IL_OP_GDS_READ_OR : ILOpCode<268, "gds_read_or">;
+def IL_OP_GDS_READ_RSUB : ILOpCode<269, "gds_read_rsub">;
+def IL_OP_GDS_READ_SUB : ILOpCode<270, "gds_read_sub">;
+def IL_OP_GDS_READ_UMAX : ILOpCode<271, "gds_read_umax">;
+def IL_OP_GDS_READ_UMIN : ILOpCode<272, "gds_read_umin">;
+def IL_OP_GDS_READ_MSKOR : ILOpCode<273, "gds_read_mskor">;
+def IL_OP_GDS_READ_XOR : ILOpCode<274, "gds_read_xor">;
+def IL_OP_GDS_READ_INC : ILOpCode<275, "gds_read_inc">;
+def IL_OP_GDS_READ_DEC : ILOpCode<276, "gds_read_dec">;
+def IL_OP_GDS_READ_XCHG : ILOpCode<277, "gds_read_xchg">;
+def IL_OP_GDS_READ_CMPXCHG : ILOpCode<278, "gds_read_cmp_xchg">;
+def IL_OP_APPEND_BUF_ALLOC : ILOpCode<279, "append_buf_alloc">;
+def IL_OP_APPEND_BUF_CONSUME : ILOpCode<280, "append_buf_consume">;
+def IL_OP_I64_ADD : ILOpCode<281, "i64add">;
+def IL_OP_I64_MAX : ILOpCode<282, "i64max">;
+def IL_OP_U64_MAX : ILOpCode<283, "u64max">;
+def IL_OP_I64_MIN : ILOpCode<284, "i64min">;
+def IL_OP_U64_MIN : ILOpCode<285, "u64min">;
+def IL_OP_I64_NEGATE : ILOpCode<286, "i64negate">;
+def IL_OP_I64_SHL : ILOpCode<287, "i64shl">;
+def IL_OP_I64_SHR : ILOpCode<288, "i64shr">;
+def IL_OP_U64_SHR : ILOpCode<289, "u64shr">;
+def IL_OP_I64_EQ : ILOpCode<290, "i64eq">;
+def IL_OP_I64_GE : ILOpCode<291, "i64ge">;
+def IL_OP_U64_GE : ILOpCode<292, "u64ge">;
+def IL_OP_I64_LT : ILOpCode<293, "i64lt">;
+def IL_OP_U64_LT : ILOpCode<294, "u64lt">;
+def IL_OP_I64_NE : ILOpCode<295, "i64ne">;
+def IL_OP_U_MULHI24 : ILOpCode<296, "umul24_high">;
+def IL_OP_I_MULHI24 : ILOpCode<297, "imul24_high">;
+def IL_OP_GDS_LOAD : ILOpCode<298, "gds_load">;
+def IL_OP_GDS_STORE : ILOpCode<299, "gds_store">;
+def IL_OP_LDS_LOAD : ILOpCode<300, "lds_load">;
+def IL_OP_LDS_LOAD_VEC : ILOpCode<301, "lds_load_vec">;
+def IL_OP_LDS_LOAD_BYTE : ILOpCode<302, "lds_load_byte">;
+def IL_OP_LDS_LOAD_UBYTE : ILOpCode<303, "lds_load_ubyte">;
+def IL_OP_LDS_LOAD_SHORT : ILOpCode<304, "lds_load_short">;
+def IL_OP_LDS_LOAD_USHORT : ILOpCode<305, "lds_load_ushort">;
+def IL_OP_LDS_STORE : ILOpCode<306, "lds_store">;
+def IL_OP_LDS_STORE_VEC : ILOpCode<307, "lds_store_vec">;
+def IL_OP_LDS_STORE_BYTE : ILOpCode<308, "lds_store_byte">;
+def IL_OP_LDS_STORE_SHORT : ILOpCode<309, "lds_store_short">;
+def IL_OP_RAW_UAV_LOAD : ILOpCode<310, "uav_raw_load">;
+def IL_OP_RAW_UAV_STORE : ILOpCode<311, "uav_raw_store">;
+def IL_OP_ARENA_UAV_LOAD : ILOpCode<312, "uav_arena_load">;
+def IL_OP_ARENA_UAV_STORE : ILOpCode<313, "uav_arena_store">;
+def IL_OP_LDS_MSKOR : ILOpCode<314, "lds_mskor">;
+def IL_OP_LDS_READ_MSKOR : ILOpCode<315, "lds_read_mskor">;
+def IL_OP_UAV_BYTE_LOAD : ILOpCode<316, "uav_byte_load">;
+def IL_OP_UAV_UBYTE_LOAD : ILOpCode<317, "uav_ubyte_load">;
+def IL_OP_UAV_SHORT_LOAD : ILOpCode<318, "uav_short_load">;
+def IL_OP_UAV_USHORT_LOAD : ILOpCode<319, "uav_ushort_load">;
+def IL_OP_UAV_BYTE_STORE : ILOpCode<320, "uav_byte_store">;
+def IL_OP_UAV_SHORT_STORE : ILOpCode<320, "uav_short_store">;
+def IL_OP_UAV_STORE : ILOpCode<321, "uav_store">;
+def IL_OP_UAV_LOAD : ILOpCode<322, "uav_load">;
+def IL_OP_MUL : ILOpCode<323, "mul">;
+def IL_OP_DIV_INF : ILOpCode<324, "div_zeroop(infinity)">;
+def IL_OP_DIV_FLTMAX : ILOpCode<325, "div_zeroop(fltmax)">;
+def IL_OP_DIV_ZERO : ILOpCode<326, "div_zeroop(zero)">;
+def IL_OP_DIV_INFELSEMAX : ILOpCode<327, "div_zeroop(inf_else_max)">;
+def IL_OP_FTOI_FLR : ILOpCode<328, "ftoi_flr">;
+def IL_OP_FTOI_RPI : ILOpCode<329, "ftoi_rpi">;
+def IL_OP_F32_TO_F16_NEAR : ILOpCode<330, "f2f16_near">;
+def IL_OP_F32_TO_F16_NEG_INF : ILOpCode<331, "f2f16_neg_inf">;
+def IL_OP_F32_TO_F16_PLUS_INF : ILOpCode<332, "f2f16_plus_inf">;
+def IL_OP_I64_MUL : ILOpCode<333, "i64mul">;
+def IL_OP_U64_MUL : ILOpCode<334, "u64mul">;
+def IL_OP_CU_ID : ILOpCode<355, "cu_id">;
+def IL_OP_WAVE_ID : ILOpCode<356, "wave_id">;
+def IL_OP_I64_SUB : ILOpCode<357, "i64sub">;
+def IL_OP_I64_DIV : ILOpCode<358, "i64div">;
+def IL_OP_U64_DIV : ILOpCode<359, "u64div">;
+def IL_OP_I64_MOD : ILOpCode<360, "i64mod">;
+def IL_OP_U64_MOD : ILOpCode<361, "u64mod">;
+def IL_DCL_GWS_THREAD_COUNT : ILOpCode<362, "dcl_gws_thread_count">;
+def IL_DCL_SEMAPHORE : ILOpCode<363, "dcl_semaphore">;
+def IL_OP_SEMAPHORE_INIT : ILOpCode<364, "init_semaphore">;
+def IL_OP_SEMAPHORE_WAIT : ILOpCode<365, "semaphore_wait">;
+def IL_OP_SEMAPHORE_SIGNAL : ILOpCode<366, "semaphore_signal">;
+def IL_OP_BARRIER_REGION : ILOpCode<377, "fence_threads_gds">;
+def IL_OP_BFI : ILOpCode<394, "bfi">;
+def IL_OP_BFM : ILOpCode<395, "bfm">;
+def IL_DBG_STRING : ILOpCode<396, "dbg_string">;
+def IL_DBG_LINE : ILOpCode<397, "dbg_line">;
+def IL_DBG_TEMPLOC : ILOpCode<398, "dbg_temploc">;
--- /dev/null
+//===-- AMDILEvergreenDevice.cpp - Device Info for Evergreen --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+#include "AMDILEvergreenDevice.h"
+
+using namespace llvm;
+
+AMDILEvergreenDevice::AMDILEvergreenDevice(AMDILSubtarget *ST)
+: AMDILDevice(ST) {
+ setCaps();
+ std::string name = ST->getDeviceName();
+ if (name == "cedar") {
+ mDeviceFlag = OCL_DEVICE_CEDAR;
+ } else if (name == "redwood") {
+ mDeviceFlag = OCL_DEVICE_REDWOOD;
+ } else if (name == "cypress") {
+ mDeviceFlag = OCL_DEVICE_CYPRESS;
+ } else {
+ mDeviceFlag = OCL_DEVICE_JUNIPER;
+ }
+}
+
+AMDILEvergreenDevice::~AMDILEvergreenDevice() {
+}
+
+size_t AMDILEvergreenDevice::getMaxLDSSize() const {
+ if (usesHardware(AMDILDeviceInfo::LocalMem)) {
+ return MAX_LDS_SIZE_800;
+ } else {
+ return 0;
+ }
+}
+size_t AMDILEvergreenDevice::getMaxGDSSize() const {
+ if (usesHardware(AMDILDeviceInfo::RegionMem)) {
+ return MAX_LDS_SIZE_800;
+ } else {
+ return 0;
+ }
+}
+uint32_t AMDILEvergreenDevice::getMaxNumUAVs() const {
+ return 12;
+}
+
+uint32_t AMDILEvergreenDevice::getResourceID(uint32_t id) const {
+ switch(id) {
+ default:
+ assert(0 && "ID type passed in is unknown!");
+ break;
+ case CONSTANT_ID:
+ case RAW_UAV_ID:
+ if (mSTM->calVersion() >= CAL_VERSION_GLOBAL_RETURN_BUFFER) {
+ return GLOBAL_RETURN_RAW_UAV_ID;
+ } else {
+ return DEFAULT_RAW_UAV_ID;
+ }
+ case GLOBAL_ID:
+ case ARENA_UAV_ID:
+ return DEFAULT_ARENA_UAV_ID;
+ case LDS_ID:
+ if (usesHardware(AMDILDeviceInfo::LocalMem)) {
+ return DEFAULT_LDS_ID;
+ } else {
+ return DEFAULT_ARENA_UAV_ID;
+ }
+ case GDS_ID:
+ if (usesHardware(AMDILDeviceInfo::RegionMem)) {
+ return DEFAULT_GDS_ID;
+ } else {
+ return DEFAULT_ARENA_UAV_ID;
+ }
+ case SCRATCH_ID:
+ if (usesHardware(AMDILDeviceInfo::PrivateMem)) {
+ return DEFAULT_SCRATCH_ID;
+ } else {
+ return DEFAULT_ARENA_UAV_ID;
+ }
+ };
+ return 0;
+}
+
+size_t AMDILEvergreenDevice::getWavefrontSize() const {
+ return AMDILDevice::WavefrontSize;
+}
+
+uint32_t AMDILEvergreenDevice::getGeneration() const {
+ return AMDILDeviceInfo::HD5XXX;
+}
+
+void AMDILEvergreenDevice::setCaps() {
+ mSWBits.set(AMDILDeviceInfo::ArenaSegment);
+ mHWBits.set(AMDILDeviceInfo::ArenaUAV);
+ if (mSTM->calVersion() >= CAL_VERSION_SC_140) {
+ mHWBits.set(AMDILDeviceInfo::HW64BitDivMod);
+ mSWBits.reset(AMDILDeviceInfo::HW64BitDivMod);
+ }
+ mSWBits.set(AMDILDeviceInfo::Signed24BitOps);
+ if (mSTM->isOverride(AMDILDeviceInfo::ByteStores)) {
+ mHWBits.set(AMDILDeviceInfo::ByteStores);
+ }
+ if (mSTM->isOverride(AMDILDeviceInfo::Debug)) {
+ mSWBits.set(AMDILDeviceInfo::LocalMem);
+ mSWBits.set(AMDILDeviceInfo::RegionMem);
+ } else {
+ mHWBits.set(AMDILDeviceInfo::LocalMem);
+ mHWBits.set(AMDILDeviceInfo::RegionMem);
+ }
+ mHWBits.set(AMDILDeviceInfo::Images);
+ if (mSTM->isOverride(AMDILDeviceInfo::NoAlias)) {
+ mHWBits.set(AMDILDeviceInfo::NoAlias);
+ }
+ if (mSTM->calVersion() > CAL_VERSION_GLOBAL_RETURN_BUFFER) {
+ mHWBits.set(AMDILDeviceInfo::CachedMem);
+ }
+ if (mSTM->isOverride(AMDILDeviceInfo::MultiUAV)) {
+ mHWBits.set(AMDILDeviceInfo::MultiUAV);
+ }
+ if (mSTM->calVersion() > CAL_VERSION_SC_136) {
+ mHWBits.set(AMDILDeviceInfo::ByteLDSOps);
+ mSWBits.reset(AMDILDeviceInfo::ByteLDSOps);
+ mHWBits.set(AMDILDeviceInfo::ArenaVectors);
+ } else {
+ mSWBits.set(AMDILDeviceInfo::ArenaVectors);
+ }
+ if (mSTM->calVersion() > CAL_VERSION_SC_137) {
+ mHWBits.set(AMDILDeviceInfo::LongOps);
+ mSWBits.reset(AMDILDeviceInfo::LongOps);
+ }
+ mHWBits.set(AMDILDeviceInfo::TmrReg);
+}
+
+AMDILCypressDevice::AMDILCypressDevice(AMDILSubtarget *ST)
+ : AMDILEvergreenDevice(ST) {
+ setCaps();
+}
+
+AMDILCypressDevice::~AMDILCypressDevice() {
+}
+
+void AMDILCypressDevice::setCaps() {
+ if (mSTM->isOverride(AMDILDeviceInfo::DoubleOps)) {
+ mHWBits.set(AMDILDeviceInfo::DoubleOps);
+ mHWBits.set(AMDILDeviceInfo::FMA);
+ }
+}
+
+
+AMDILCedarDevice::AMDILCedarDevice(AMDILSubtarget *ST)
+ : AMDILEvergreenDevice(ST) {
+ setCaps();
+}
+
+AMDILCedarDevice::~AMDILCedarDevice() {
+}
+
+void AMDILCedarDevice::setCaps() {
+ mSWBits.set(AMDILDeviceInfo::FMA);
+}
+
+size_t AMDILCedarDevice::getWavefrontSize() const {
+ return AMDILDevice::QuarterWavefrontSize;
+}
+
+AMDILRedwoodDevice::AMDILRedwoodDevice(AMDILSubtarget *ST)
+ : AMDILEvergreenDevice(ST) {
+ setCaps();
+}
+
+AMDILRedwoodDevice::~AMDILRedwoodDevice()
+{
+}
+
+void AMDILRedwoodDevice::setCaps() {
+ mSWBits.set(AMDILDeviceInfo::FMA);
+}
+
+size_t AMDILRedwoodDevice::getWavefrontSize() const {
+ return AMDILDevice::HalfWavefrontSize;
+}
--- /dev/null
+//==- AMDILEvergreenDevice.h - Define Evergreen Device for AMDIL -*- C++ -*--=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+// Interface for the subtarget data classes.
+//
+//===----------------------------------------------------------------------===//
+// This file will define the interface that each generation needs to
+// implement in order to correctly answer queries on the capabilities of the
+// specific hardware.
+//===----------------------------------------------------------------------===//
+#ifndef _AMDILEVERGREENDEVICE_H_
+#define _AMDILEVERGREENDEVICE_H_
+#include "AMDILDevice.h"
+#include "AMDILSubtarget.h"
+
+namespace llvm {
+ class AMDILSubtarget;
+//===----------------------------------------------------------------------===//
+// Evergreen generation of devices and their respective sub classes
+//===----------------------------------------------------------------------===//
+
+
+// The AMDILEvergreenDevice is the base device class for all of the Evergreen
+// series of cards. This class contains information required to differentiate
+// the Evergreen device from the generic AMDILDevice. This device represents
+// that capabilities of the 'Juniper' cards, also known as the HD57XX.
+class AMDILEvergreenDevice : public AMDILDevice {
+public:
+ AMDILEvergreenDevice(AMDILSubtarget *ST);
+ virtual ~AMDILEvergreenDevice();
+ virtual size_t getMaxLDSSize() const;
+ virtual size_t getMaxGDSSize() const;
+ virtual size_t getWavefrontSize() const;
+ virtual uint32_t getGeneration() const;
+ virtual uint32_t getMaxNumUAVs() const;
+ virtual uint32_t getResourceID(uint32_t) const;
+protected:
+ virtual void setCaps();
+}; // AMDILEvergreenDevice
+
+// The AMDILCypressDevice is similiar to the AMDILEvergreenDevice, except it has
+// support for double precision operations. This device is used to represent
+// both the Cypress and Hemlock cards, which are commercially known as HD58XX
+// and HD59XX cards.
+class AMDILCypressDevice : public AMDILEvergreenDevice {
+public:
+ AMDILCypressDevice(AMDILSubtarget *ST);
+ virtual ~AMDILCypressDevice();
+private:
+ virtual void setCaps();
+}; // AMDILCypressDevice
+
+
+// The AMDILCedarDevice is the class that represents all of the 'Cedar' based
+// devices. This class differs from the base AMDILEvergreenDevice in that the
+// device is a ~quarter of the 'Juniper'. These are commercially known as the
+// HD54XX and HD53XX series of cards.
+class AMDILCedarDevice : public AMDILEvergreenDevice {
+public:
+ AMDILCedarDevice(AMDILSubtarget *ST);
+ virtual ~AMDILCedarDevice();
+ virtual size_t getWavefrontSize() const;
+private:
+ virtual void setCaps();
+}; // AMDILCedarDevice
+
+// The AMDILRedwoodDevice is the class the represents all of the 'Redwood' based
+// devices. This class differs from the base class, in that these devices are
+// considered about half of a 'Juniper' device. These are commercially known as
+// the HD55XX and HD56XX series of cards.
+class AMDILRedwoodDevice : public AMDILEvergreenDevice {
+public:
+ AMDILRedwoodDevice(AMDILSubtarget *ST);
+ virtual ~AMDILRedwoodDevice();
+ virtual size_t getWavefrontSize() const;
+private:
+ virtual void setCaps();
+}; // AMDILRedwoodDevice
+
+} // namespace llvm
+#endif // _AMDILEVERGREENDEVICE_H_
--- /dev/null
+//==- AMDILFormats.td - AMDIL Instruction Formats ----*- tablegen -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+//===--------------------------------------------------------------------===//
+include "AMDILTokenDesc.td"
+
+//===--------------------------------------------------------------------===//
+// The parent IL instruction class that inherits the Instruction class. This
+// class sets the corresponding namespace, the out and input dag lists the
+// pattern to match to and the string to print out for the assembly printer.
+//===--------------------------------------------------------------------===//
+class ILFormat<ILOpCode op, dag outs, dag ins, string asmstr, list<dag> pattern>
+: Instruction {
+
+ let Namespace = "AMDGPU";
+ dag OutOperandList = outs;
+ dag InOperandList = ins;
+ ILOpCode operation = op;
+ let Pattern = pattern;
+ let AsmString = !strconcat(asmstr, "\n");
+ let isPseudo = 1;
+ let Itinerary = NullALU;
+ bit hasIEEEFlag = 0;
+ bit hasZeroOpFlag = 0;
+}
+
+//===--------------------------------------------------------------------===//
+// Class that has one input parameters and one output parameter.
+// The basic pattern for this class is "Opcode Dst, Src0" and
+// handles the unary math operators.
+// It sets the binary token ILSrc, ILSrcMod, ILRelAddr and ILSrc and ILSrcMod
+// if the addressing is register relative for input and output register 0.
+//===--------------------------------------------------------------------===//
+class OneInOneOut<ILOpCode op, dag outs, dag ins,
+ string asmstr, list<dag> pattern>
+ : ILFormat<op, outs, ins, asmstr, pattern>
+{
+ ILDst dst_reg;
+ ILDstMod dst_mod;
+ ILRelAddr dst_rel;
+ ILSrc dst_reg_rel;
+ ILSrcMod dst_reg_rel_mod;
+ ILSrc src0_reg;
+ ILSrcMod src0_mod;
+ ILRelAddr src0_rel;
+ ILSrc src0_reg_rel;
+ ILSrcMod src0_reg_rel_mod;
+}
+
+//===--------------------------------------------------------------------===//
+// This class is similiar to the UnaryOp class, however, there is no
+// result value to assign.
+//===--------------------------------------------------------------------===//
+class UnaryOpNoRet<ILOpCode op, dag outs, dag ins,
+ string asmstr, list<dag> pattern>
+ : ILFormat<op, outs, ins, asmstr, pattern>
+{
+ ILSrc src0_reg;
+ ILSrcMod src0_mod;
+ ILRelAddr src0_rel;
+ ILSrc src0_reg_rel;
+ ILSrcMod src0_reg_rel_mod;
+}
+
+//===--------------------------------------------------------------------===//
+// Set of classes that have two input parameters and one output parameter.
+// The basic pattern for this class is "Opcode Dst, Src0, Src1" and
+// handles the binary math operators and comparison operations.
+// It sets the binary token ILSrc, ILSrcMod, ILRelAddr and ILSrc and ILSrcMod
+// if the addressing is register relative for input register 1.
+//===--------------------------------------------------------------------===//
+class TwoInOneOut<ILOpCode op, dag outs, dag ins,
+ string asmstr, list<dag> pattern>
+ : OneInOneOut<op, outs, ins, asmstr, pattern>
+{
+ ILSrc src1_reg;
+ ILSrcMod src1_mod;
+ ILRelAddr src1_rel;
+ ILSrc src1_reg_rel;
+ ILSrcMod src1_reg_rel_mod;
+}
+
+//===--------------------------------------------------------------------===//
+// Similiar to the UnaryOpNoRet class, but takes as arguments two input
+// operands. Used mainly for barrier instructions on PC platform.
+//===--------------------------------------------------------------------===//
+class BinaryOpNoRet<ILOpCode op, dag outs, dag ins,
+ string asmstr, list<dag> pattern>
+ : UnaryOpNoRet<op, outs, ins, asmstr, pattern>
+{
+ ILSrc src1_reg;
+ ILSrcMod src1_mod;
+ ILRelAddr src1_rel;
+ ILSrc src1_reg_rel;
+ ILSrcMod src1_reg_rel_mod;
+}
+
+//===--------------------------------------------------------------------===//
+// Set of classes that have three input parameters and one output parameter.
+// The basic pattern for this class is "Opcode Dst, Src0, Src1, Src2" and
+// handles the mad and conditional mov instruction.
+// It sets the binary token ILSrc, ILSrcMod, ILRelAddr and ILSrc and ILSrcMod
+// if the addressing is register relative.
+// This class is the parent class of TernaryOp
+//===--------------------------------------------------------------------===//
+class ThreeInOneOut<ILOpCode op, dag outs, dag ins,
+ string asmstr, list<dag> pattern>
+ : TwoInOneOut<op, outs, ins, asmstr, pattern> {
+ ILSrc src2_reg;
+ ILSrcMod src2_mod;
+ ILRelAddr src2_rel;
+ ILSrc src2_reg_rel;
+ ILSrcMod src2_reg_rel_mod;
+ }
+
+//===--------------------------------------------------------------------===//
+// Intrinsic classes
+// Generic versions of the above classes but for Target specific intrinsics
+// instead of SDNode patterns.
+//===--------------------------------------------------------------------===//
+let TargetPrefix = "AMDIL", isTarget = 1 in {
+ class VoidIntLong :
+ Intrinsic<[llvm_i64_ty], [], []>;
+ class VoidIntInt :
+ Intrinsic<[llvm_i32_ty], [], []>;
+ class VoidIntBool :
+ Intrinsic<[llvm_i32_ty], [], []>;
+ class UnaryIntInt :
+ Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [IntrNoMem]>;
+ class UnaryIntFloat :
+ Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
+ class ConvertIntFTOI :
+ Intrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty], [IntrNoMem]>;
+ class ConvertIntITOF :
+ Intrinsic<[llvm_anyfloat_ty], [llvm_anyint_ty], [IntrNoMem]>;
+ class UnaryIntNoRetInt :
+ Intrinsic<[], [llvm_anyint_ty], []>;
+ class UnaryIntNoRetFloat :
+ Intrinsic<[], [llvm_anyfloat_ty], []>;
+ class BinaryIntInt :
+ Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
+ class BinaryIntFloat :
+ Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
+ class BinaryIntNoRetInt :
+ Intrinsic<[], [llvm_anyint_ty, LLVMMatchType<0>], []>;
+ class BinaryIntNoRetFloat :
+ Intrinsic<[], [llvm_anyfloat_ty, LLVMMatchType<0>], []>;
+ class TernaryIntInt :
+ Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>,
+ LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
+ class TernaryIntFloat :
+ Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>,
+ LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
+ class QuaternaryIntInt :
+ Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>,
+ LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
+ class UnaryAtomicInt :
+ Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
+ class BinaryAtomicInt :
+ Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
+ class TernaryAtomicInt :
+ Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty]>;
+ class UnaryAtomicIntNoRet :
+ Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
+ class BinaryAtomicIntNoRet :
+ Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
+ class TernaryAtomicIntNoRet :
+ Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
+}
--- /dev/null
+//===----------------------- AMDILFrameLowering.cpp -----------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+// Interface to describe a layout of a stack frame on a AMDIL target machine
+//
+//===----------------------------------------------------------------------===//
+#include "AMDILFrameLowering.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+
+using namespace llvm;
+AMDILFrameLowering::AMDILFrameLowering(StackDirection D, unsigned StackAl,
+ int LAO, unsigned TransAl)
+ : TargetFrameLowering(D, StackAl, LAO, TransAl)
+{
+}
+
+AMDILFrameLowering::~AMDILFrameLowering()
+{
+}
+
+/// getFrameIndexOffset - Returns the displacement from the frame register to
+/// the stack frame of the specified index.
+int AMDILFrameLowering::getFrameIndexOffset(const MachineFunction &MF,
+ int FI) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ return MFI->getObjectOffset(FI);
+}
+
+const TargetFrameLowering::SpillSlot *
+AMDILFrameLowering::getCalleeSavedSpillSlots(unsigned &NumEntries) const
+{
+ NumEntries = 0;
+ return 0;
+}
+void
+AMDILFrameLowering::emitPrologue(MachineFunction &MF) const
+{
+}
+void
+AMDILFrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const
+{
+}
+bool
+AMDILFrameLowering::hasFP(const MachineFunction &MF) const
+{
+ return false;
+}
--- /dev/null
+//===--------------------- AMDILFrameLowering.h -----------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Interface to describe a layout of a stack frame on a AMDIL target machine
+//
+//===----------------------------------------------------------------------===//
+#ifndef _AMDILFRAME_LOWERING_H_
+#define _AMDILFRAME_LOWERING_H_
+
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+/// Information about the stack frame layout on the AMDIL targets. It holds
+/// the direction of the stack growth, the known stack alignment on entry to
+/// each function, and the offset to the locals area.
+/// See TargetFrameInfo for more comments.
+
+namespace llvm {
+ class AMDILFrameLowering : public TargetFrameLowering {
+ public:
+ AMDILFrameLowering(StackDirection D, unsigned StackAl, int LAO, unsigned
+ TransAl = 1);
+ virtual ~AMDILFrameLowering();
+ virtual int getFrameIndexOffset(const MachineFunction &MF,
+ int FI) const;
+ virtual const SpillSlot *
+ getCalleeSavedSpillSlots(unsigned &NumEntries) const;
+ virtual void emitPrologue(MachineFunction &MF) const;
+ virtual void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+ virtual bool hasFP(const MachineFunction &MF) const;
+ }; // class AMDILFrameLowering
+} // namespace llvm
+#endif // _AMDILFRAME_LOWERING_H_
--- /dev/null
+//===-- AMDILISelDAGToDAG.cpp - A dag to dag inst selector for AMDIL ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+// This file defines an instruction selector for the AMDIL target.
+//
+//===----------------------------------------------------------------------===//
+#include "AMDGPUISelLowering.h" // For AMDGPUISD
+#include "AMDILDevices.h"
+#include "AMDILUtilityFunctions.h"
+#include "llvm/ADT/ValueMap.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Support/Compiler.h"
+#include <list>
+#include <queue>
+
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Instruction Selector Implementation
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// AMDILDAGToDAGISel - AMDIL specific code to select AMDIL machine instructions
+// //for SelectionDAG operations.
+//
+namespace {
+class AMDILDAGToDAGISel : public SelectionDAGISel {
+ // Subtarget - Keep a pointer to the AMDIL Subtarget around so that we can
+ // make the right decision when generating code for different targets.
+ const AMDILSubtarget &Subtarget;
+public:
+ AMDILDAGToDAGISel(TargetMachine &TM AMDIL_OPT_LEVEL_DECL);
+ virtual ~AMDILDAGToDAGISel();
+
+ SDNode *Select(SDNode *N);
+ virtual const char *getPassName() const;
+
+private:
+ inline SDValue getSmallIPtrImm(unsigned Imm);
+
+ // Complex pattern selectors
+ bool SelectADDRParam(SDValue Addr, SDValue& R1, SDValue& R2);
+ bool SelectADDR(SDValue N, SDValue &R1, SDValue &R2);
+ bool SelectADDR64(SDValue N, SDValue &R1, SDValue &R2);
+
+ static bool checkType(const Value *ptr, unsigned int addrspace);
+ static const Value *getBasePointerValue(const Value *V);
+
+ static bool isGlobalStore(const StoreSDNode *N);
+ static bool isPrivateStore(const StoreSDNode *N);
+ static bool isLocalStore(const StoreSDNode *N);
+ static bool isRegionStore(const StoreSDNode *N);
+
+ static bool isCPLoad(const LoadSDNode *N);
+ static bool isConstantLoad(const LoadSDNode *N, int cbID);
+ static bool isGlobalLoad(const LoadSDNode *N);
+ static bool isPrivateLoad(const LoadSDNode *N);
+ static bool isLocalLoad(const LoadSDNode *N);
+ static bool isRegionLoad(const LoadSDNode *N);
+
+ bool SelectADDR8BitOffset(SDValue Addr, SDValue& Base, SDValue& Offset);
+ bool SelectADDRReg(SDValue Addr, SDValue& Base, SDValue& Offset);
+ bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
+
+ // Include the pieces autogenerated from the target description.
+#include "AMDGPUGenDAGISel.inc"
+};
+} // end anonymous namespace
+
+// createAMDILISelDag - This pass converts a legalized DAG into a AMDIL-specific
+// DAG, ready for instruction scheduling.
+//
+FunctionPass *llvm::createAMDILISelDag(TargetMachine &TM
+ AMDIL_OPT_LEVEL_DECL) {
+ return new AMDILDAGToDAGISel(TM AMDIL_OPT_LEVEL_VAR);
+}
+
+AMDILDAGToDAGISel::AMDILDAGToDAGISel(TargetMachine &TM
+ AMDIL_OPT_LEVEL_DECL)
+ : SelectionDAGISel(TM AMDIL_OPT_LEVEL_VAR), Subtarget(TM.getSubtarget<AMDILSubtarget>())
+{
+}
+
+AMDILDAGToDAGISel::~AMDILDAGToDAGISel() {
+}
+
+SDValue AMDILDAGToDAGISel::getSmallIPtrImm(unsigned int Imm) {
+ return CurDAG->getTargetConstant(Imm, MVT::i32);
+}
+
+bool AMDILDAGToDAGISel::SelectADDRParam(
+ SDValue Addr, SDValue& R1, SDValue& R2) {
+
+ if (Addr.getOpcode() == ISD::FrameIndex) {
+ if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
+ R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+ R2 = CurDAG->getTargetConstant(0, MVT::i32);
+ } else {
+ R1 = Addr;
+ R2 = CurDAG->getTargetConstant(0, MVT::i32);
+ }
+ } else if (Addr.getOpcode() == ISD::ADD) {
+ R1 = Addr.getOperand(0);
+ R2 = Addr.getOperand(1);
+ } else {
+ R1 = Addr;
+ R2 = CurDAG->getTargetConstant(0, MVT::i32);
+ }
+ return true;
+}
+
+bool AMDILDAGToDAGISel::SelectADDR(SDValue Addr, SDValue& R1, SDValue& R2) {
+ if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
+ Addr.getOpcode() == ISD::TargetGlobalAddress) {
+ return false;
+ }
+ return SelectADDRParam(Addr, R1, R2);
+}
+
+
+bool AMDILDAGToDAGISel::SelectADDR64(SDValue Addr, SDValue& R1, SDValue& R2) {
+ if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
+ Addr.getOpcode() == ISD::TargetGlobalAddress) {
+ return false;
+ }
+
+ if (Addr.getOpcode() == ISD::FrameIndex) {
+ if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
+ R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i64);
+ R2 = CurDAG->getTargetConstant(0, MVT::i64);
+ } else {
+ R1 = Addr;
+ R2 = CurDAG->getTargetConstant(0, MVT::i64);
+ }
+ } else if (Addr.getOpcode() == ISD::ADD) {
+ R1 = Addr.getOperand(0);
+ R2 = Addr.getOperand(1);
+ } else {
+ R1 = Addr;
+ R2 = CurDAG->getTargetConstant(0, MVT::i64);
+ }
+ return true;
+}
+
+SDNode *AMDILDAGToDAGISel::Select(SDNode *N) {
+ unsigned int Opc = N->getOpcode();
+ if (N->isMachineOpcode()) {
+ return NULL; // Already selected.
+ }
+ switch (Opc) {
+ default: break;
+ case ISD::FrameIndex:
+ {
+ if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(N)) {
+ unsigned int FI = FIN->getIndex();
+ EVT OpVT = N->getValueType(0);
+ unsigned int NewOpc = AMDGPU::COPY;
+ SDValue TFI = CurDAG->getTargetFrameIndex(FI, MVT::i32);
+ return CurDAG->SelectNodeTo(N, NewOpc, OpVT, TFI);
+ }
+ }
+ break;
+ }
+ return SelectCode(N);
+}
+
+bool AMDILDAGToDAGISel::checkType(const Value *ptr, unsigned int addrspace) {
+ if (!ptr) {
+ return false;
+ }
+ Type *ptrType = ptr->getType();
+ return dyn_cast<PointerType>(ptrType)->getAddressSpace() == addrspace;
+}
+
+const Value * AMDILDAGToDAGISel::getBasePointerValue(const Value *V)
+{
+ if (!V) {
+ return NULL;
+ }
+ const Value *ret = NULL;
+ ValueMap<const Value *, bool> ValueBitMap;
+ std::queue<const Value *, std::list<const Value *> > ValueQueue;
+ ValueQueue.push(V);
+ while (!ValueQueue.empty()) {
+ V = ValueQueue.front();
+ if (ValueBitMap.find(V) == ValueBitMap.end()) {
+ ValueBitMap[V] = true;
+ if (dyn_cast<Argument>(V) && dyn_cast<PointerType>(V->getType())) {
+ ret = V;
+ break;
+ } else if (dyn_cast<GlobalVariable>(V)) {
+ ret = V;
+ break;
+ } else if (dyn_cast<Constant>(V)) {
+ const ConstantExpr *CE = dyn_cast<ConstantExpr>(V);
+ if (CE) {
+ ValueQueue.push(CE->getOperand(0));
+ }
+ } else if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
+ ret = AI;
+ break;
+ } else if (const Instruction *I = dyn_cast<Instruction>(V)) {
+ uint32_t numOps = I->getNumOperands();
+ for (uint32_t x = 0; x < numOps; ++x) {
+ ValueQueue.push(I->getOperand(x));
+ }
+ } else {
+ // assert(0 && "Found a Value that we didn't know how to handle!");
+ }
+ }
+ ValueQueue.pop();
+ }
+ return ret;
+}
+
+bool AMDILDAGToDAGISel::isGlobalStore(const StoreSDNode *N) {
+ return checkType(N->getSrcValue(), AMDILAS::GLOBAL_ADDRESS);
+}
+
+bool AMDILDAGToDAGISel::isPrivateStore(const StoreSDNode *N) {
+ return (!checkType(N->getSrcValue(), AMDILAS::LOCAL_ADDRESS)
+ && !checkType(N->getSrcValue(), AMDILAS::GLOBAL_ADDRESS)
+ && !checkType(N->getSrcValue(), AMDILAS::REGION_ADDRESS));
+}
+
+bool AMDILDAGToDAGISel::isLocalStore(const StoreSDNode *N) {
+ return checkType(N->getSrcValue(), AMDILAS::LOCAL_ADDRESS);
+}
+
+bool AMDILDAGToDAGISel::isRegionStore(const StoreSDNode *N) {
+ return checkType(N->getSrcValue(), AMDILAS::REGION_ADDRESS);
+}
+
+bool AMDILDAGToDAGISel::isConstantLoad(const LoadSDNode *N, int cbID) {
+ if (checkType(N->getSrcValue(), AMDILAS::CONSTANT_ADDRESS)) {
+ return true;
+ }
+ MachineMemOperand *MMO = N->getMemOperand();
+ const Value *V = MMO->getValue();
+ const Value *BV = getBasePointerValue(V);
+ if (MMO
+ && MMO->getValue()
+ && ((V && dyn_cast<GlobalValue>(V))
+ || (BV && dyn_cast<GlobalValue>(
+ getBasePointerValue(MMO->getValue()))))) {
+ return checkType(N->getSrcValue(), AMDILAS::PRIVATE_ADDRESS);
+ } else {
+ return false;
+ }
+}
+
+bool AMDILDAGToDAGISel::isGlobalLoad(const LoadSDNode *N) {
+ return checkType(N->getSrcValue(), AMDILAS::GLOBAL_ADDRESS);
+}
+
+bool AMDILDAGToDAGISel::isLocalLoad(const LoadSDNode *N) {
+ return checkType(N->getSrcValue(), AMDILAS::LOCAL_ADDRESS);
+}
+
+bool AMDILDAGToDAGISel::isRegionLoad(const LoadSDNode *N) {
+ return checkType(N->getSrcValue(), AMDILAS::REGION_ADDRESS);
+}
+
+bool AMDILDAGToDAGISel::isCPLoad(const LoadSDNode *N) {
+ MachineMemOperand *MMO = N->getMemOperand();
+ if (checkType(N->getSrcValue(), AMDILAS::PRIVATE_ADDRESS)) {
+ if (MMO) {
+ const Value *V = MMO->getValue();
+ const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V);
+ if (PSV && PSV == PseudoSourceValue::getConstantPool()) {
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+bool AMDILDAGToDAGISel::isPrivateLoad(const LoadSDNode *N) {
+ if (checkType(N->getSrcValue(), AMDILAS::PRIVATE_ADDRESS)) {
+ // Check to make sure we are not a constant pool load or a constant load
+ // that is marked as a private load
+ if (isCPLoad(N) || isConstantLoad(N, -1)) {
+ return false;
+ }
+ }
+ if (!checkType(N->getSrcValue(), AMDILAS::LOCAL_ADDRESS)
+ && !checkType(N->getSrcValue(), AMDILAS::GLOBAL_ADDRESS)
+ && !checkType(N->getSrcValue(), AMDILAS::REGION_ADDRESS)
+ && !checkType(N->getSrcValue(), AMDILAS::CONSTANT_ADDRESS)
+ && !checkType(N->getSrcValue(), AMDILAS::PARAM_D_ADDRESS)
+ && !checkType(N->getSrcValue(), AMDILAS::PARAM_I_ADDRESS))
+ {
+ return true;
+ }
+ return false;
+}
+
+const char *AMDILDAGToDAGISel::getPassName() const {
+ return "AMDIL DAG->DAG Pattern Instruction Selection";
+}
+
+#ifdef DEBUGTMP
+#undef INT64_C
+#endif
+#undef DEBUGTMP
+
+///==== AMDGPU Functions ====///
+
+bool AMDILDAGToDAGISel::SelectADDR8BitOffset(SDValue Addr, SDValue& Base,
+ SDValue& Offset) {
+ if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
+ Addr.getOpcode() == ISD::TargetGlobalAddress) {
+ return false;
+ }
+
+
+ if (Addr.getOpcode() == ISD::ADD) {
+ bool Match = false;
+
+ // Find the base ptr and the offset
+ for (unsigned i = 0; i < Addr.getNumOperands(); i++) {
+ SDValue Arg = Addr.getOperand(i);
+ ConstantSDNode * OffsetNode = dyn_cast<ConstantSDNode>(Arg);
+ // This arg isn't a constant so it must be the base PTR.
+ if (!OffsetNode) {
+ Base = Addr.getOperand(i);
+ continue;
+ }
+ // Check if the constant argument fits in 8-bits. The offset is in bytes
+ // so we need to convert it to dwords.
+ if (isInt<8>(OffsetNode->getZExtValue() >> 2)) {
+ Match = true;
+ Offset = CurDAG->getTargetConstant(OffsetNode->getZExtValue() >> 2,
+ MVT::i32);
+ }
+ }
+ return Match;
+ }
+
+ // Default case, no offset
+ Base = Addr;
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+}
+
+bool AMDILDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
+ SDValue &Offset)
+{
+ ConstantSDNode * IMMOffset;
+
+ if (Addr.getOpcode() == ISD::ADD
+ && (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
+ && isInt<16>(IMMOffset->getZExtValue())) {
+
+ Base = Addr.getOperand(0);
+ Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), MVT::i32);
+ return true;
+ // If the pointer address is constant, we can move it to the offset field.
+ } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr))
+ && isInt<16>(IMMOffset->getZExtValue())) {
+ Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
+ CurDAG->getEntryNode().getDebugLoc(),
+ AMDGPU::ZERO, MVT::i32);
+ Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), MVT::i32);
+ return true;
+ }
+
+ // Default case, no offset
+ Base = Addr;
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+}
+
+bool AMDILDAGToDAGISel::SelectADDRReg(SDValue Addr, SDValue& Base,
+ SDValue& Offset) {
+ if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
+ Addr.getOpcode() == ISD::TargetGlobalAddress ||
+ Addr.getOpcode() != ISD::ADD) {
+ return false;
+ }
+
+ Base = Addr.getOperand(0);
+ Offset = Addr.getOperand(1);
+
+ return false;
+}
--- /dev/null
+//===-- AMDILISelLowering.cpp - AMDIL DAG Lowering Implementation ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+// This file implements the interfaces that AMDIL uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDILISelLowering.h"
+#include "AMDILDevices.h"
+#include "AMDILIntrinsicInfo.h"
+#include "AMDILRegisterInfo.h"
+#include "AMDILSubtarget.h"
+#include "AMDILUtilityFunctions.h"
+#include "llvm/CallingConv.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetOptions.h"
+
+using namespace llvm;
+#define ISDBITCAST ISD::BITCAST
+#define MVTGLUE MVT::Glue
+//===----------------------------------------------------------------------===//
+// Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+#include "AMDGPUGenCallingConv.inc"
+
+//===----------------------------------------------------------------------===//
+// TargetLowering Implementation Help Functions Begin
+//===----------------------------------------------------------------------===//
+ static SDValue
+getConversionNode(SelectionDAG &DAG, SDValue& Src, SDValue& Dst, bool asType)
+{
+ DebugLoc DL = Src.getDebugLoc();
+ EVT svt = Src.getValueType().getScalarType();
+ EVT dvt = Dst.getValueType().getScalarType();
+ if (svt.isFloatingPoint() && dvt.isFloatingPoint()) {
+ if (dvt.bitsGT(svt)) {
+ Src = DAG.getNode(ISD::FP_EXTEND, DL, dvt, Src);
+ } else if (svt.bitsLT(svt)) {
+ Src = DAG.getNode(ISD::FP_ROUND, DL, dvt, Src,
+ DAG.getConstant(1, MVT::i32));
+ }
+ } else if (svt.isInteger() && dvt.isInteger()) {
+ if (!svt.bitsEq(dvt)) {
+ Src = DAG.getSExtOrTrunc(Src, DL, dvt);
+ }
+ } else if (svt.isInteger()) {
+ unsigned opcode = (asType) ? ISDBITCAST : ISD::SINT_TO_FP;
+ if (!svt.bitsEq(dvt)) {
+ if (dvt.getSimpleVT().SimpleTy == MVT::f32) {
+ Src = DAG.getSExtOrTrunc(Src, DL, MVT::i32);
+ } else if (dvt.getSimpleVT().SimpleTy == MVT::f64) {
+ Src = DAG.getSExtOrTrunc(Src, DL, MVT::i64);
+ } else {
+ assert(0 && "We only support 32 and 64bit fp types");
+ }
+ }
+ Src = DAG.getNode(opcode, DL, dvt, Src);
+ } else if (dvt.isInteger()) {
+ unsigned opcode = (asType) ? ISDBITCAST : ISD::FP_TO_SINT;
+ if (svt.getSimpleVT().SimpleTy == MVT::f32) {
+ Src = DAG.getNode(opcode, DL, MVT::i32, Src);
+ } else if (svt.getSimpleVT().SimpleTy == MVT::f64) {
+ Src = DAG.getNode(opcode, DL, MVT::i64, Src);
+ } else {
+ assert(0 && "We only support 32 and 64bit fp types");
+ }
+ Src = DAG.getSExtOrTrunc(Src, DL, dvt);
+ }
+ return Src;
+}
+// CondCCodeToCC - Convert a DAG condition code to a AMDIL CC
+// condition.
+ static AMDILCC::CondCodes
+CondCCodeToCC(ISD::CondCode CC, const MVT::SimpleValueType& type)
+{
+ switch (CC) {
+ default:
+ {
+ errs()<<"Condition Code: "<< (unsigned int)CC<<"\n";
+ assert(0 && "Unknown condition code!");
+ }
+ case ISD::SETO:
+ switch(type) {
+ case MVT::f32:
+ return AMDILCC::IL_CC_F_O;
+ case MVT::f64:
+ return AMDILCC::IL_CC_D_O;
+ default:
+ assert(0 && "Opcode combination not generated correctly!");
+ return AMDILCC::COND_ERROR;
+ };
+ case ISD::SETUO:
+ switch(type) {
+ case MVT::f32:
+ return AMDILCC::IL_CC_F_UO;
+ case MVT::f64:
+ return AMDILCC::IL_CC_D_UO;
+ default:
+ assert(0 && "Opcode combination not generated correctly!");
+ return AMDILCC::COND_ERROR;
+ };
+ case ISD::SETGT:
+ switch (type) {
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ return AMDILCC::IL_CC_I_GT;
+ case MVT::f32:
+ return AMDILCC::IL_CC_F_GT;
+ case MVT::f64:
+ return AMDILCC::IL_CC_D_GT;
+ case MVT::i64:
+ return AMDILCC::IL_CC_L_GT;
+ default:
+ assert(0 && "Opcode combination not generated correctly!");
+ return AMDILCC::COND_ERROR;
+ };
+ case ISD::SETGE:
+ switch (type) {
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ return AMDILCC::IL_CC_I_GE;
+ case MVT::f32:
+ return AMDILCC::IL_CC_F_GE;
+ case MVT::f64:
+ return AMDILCC::IL_CC_D_GE;
+ case MVT::i64:
+ return AMDILCC::IL_CC_L_GE;
+ default:
+ assert(0 && "Opcode combination not generated correctly!");
+ return AMDILCC::COND_ERROR;
+ };
+ case ISD::SETLT:
+ switch (type) {
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ return AMDILCC::IL_CC_I_LT;
+ case MVT::f32:
+ return AMDILCC::IL_CC_F_LT;
+ case MVT::f64:
+ return AMDILCC::IL_CC_D_LT;
+ case MVT::i64:
+ return AMDILCC::IL_CC_L_LT;
+ default:
+ assert(0 && "Opcode combination not generated correctly!");
+ return AMDILCC::COND_ERROR;
+ };
+ case ISD::SETLE:
+ switch (type) {
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ return AMDILCC::IL_CC_I_LE;
+ case MVT::f32:
+ return AMDILCC::IL_CC_F_LE;
+ case MVT::f64:
+ return AMDILCC::IL_CC_D_LE;
+ case MVT::i64:
+ return AMDILCC::IL_CC_L_LE;
+ default:
+ assert(0 && "Opcode combination not generated correctly!");
+ return AMDILCC::COND_ERROR;
+ };
+ case ISD::SETNE:
+ switch (type) {
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ return AMDILCC::IL_CC_I_NE;
+ case MVT::f32:
+ return AMDILCC::IL_CC_F_NE;
+ case MVT::f64:
+ return AMDILCC::IL_CC_D_NE;
+ case MVT::i64:
+ return AMDILCC::IL_CC_L_NE;
+ default:
+ assert(0 && "Opcode combination not generated correctly!");
+ return AMDILCC::COND_ERROR;
+ };
+ case ISD::SETEQ:
+ switch (type) {
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ return AMDILCC::IL_CC_I_EQ;
+ case MVT::f32:
+ return AMDILCC::IL_CC_F_EQ;
+ case MVT::f64:
+ return AMDILCC::IL_CC_D_EQ;
+ case MVT::i64:
+ return AMDILCC::IL_CC_L_EQ;
+ default:
+ assert(0 && "Opcode combination not generated correctly!");
+ return AMDILCC::COND_ERROR;
+ };
+ case ISD::SETUGT:
+ switch (type) {
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ return AMDILCC::IL_CC_U_GT;
+ case MVT::f32:
+ return AMDILCC::IL_CC_F_UGT;
+ case MVT::f64:
+ return AMDILCC::IL_CC_D_UGT;
+ case MVT::i64:
+ return AMDILCC::IL_CC_UL_GT;
+ default:
+ assert(0 && "Opcode combination not generated correctly!");
+ return AMDILCC::COND_ERROR;
+ };
+ case ISD::SETUGE:
+ switch (type) {
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ return AMDILCC::IL_CC_U_GE;
+ case MVT::f32:
+ return AMDILCC::IL_CC_F_UGE;
+ case MVT::f64:
+ return AMDILCC::IL_CC_D_UGE;
+ case MVT::i64:
+ return AMDILCC::IL_CC_UL_GE;
+ default:
+ assert(0 && "Opcode combination not generated correctly!");
+ return AMDILCC::COND_ERROR;
+ };
+ case ISD::SETULT:
+ switch (type) {
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ return AMDILCC::IL_CC_U_LT;
+ case MVT::f32:
+ return AMDILCC::IL_CC_F_ULT;
+ case MVT::f64:
+ return AMDILCC::IL_CC_D_ULT;
+ case MVT::i64:
+ return AMDILCC::IL_CC_UL_LT;
+ default:
+ assert(0 && "Opcode combination not generated correctly!");
+ return AMDILCC::COND_ERROR;
+ };
+ case ISD::SETULE:
+ switch (type) {
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ return AMDILCC::IL_CC_U_LE;
+ case MVT::f32:
+ return AMDILCC::IL_CC_F_ULE;
+ case MVT::f64:
+ return AMDILCC::IL_CC_D_ULE;
+ case MVT::i64:
+ return AMDILCC::IL_CC_UL_LE;
+ default:
+ assert(0 && "Opcode combination not generated correctly!");
+ return AMDILCC::COND_ERROR;
+ };
+ case ISD::SETUNE:
+ switch (type) {
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ return AMDILCC::IL_CC_U_NE;
+ case MVT::f32:
+ return AMDILCC::IL_CC_F_UNE;
+ case MVT::f64:
+ return AMDILCC::IL_CC_D_UNE;
+ case MVT::i64:
+ return AMDILCC::IL_CC_UL_NE;
+ default:
+ assert(0 && "Opcode combination not generated correctly!");
+ return AMDILCC::COND_ERROR;
+ };
+ case ISD::SETUEQ:
+ switch (type) {
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ return AMDILCC::IL_CC_U_EQ;
+ case MVT::f32:
+ return AMDILCC::IL_CC_F_UEQ;
+ case MVT::f64:
+ return AMDILCC::IL_CC_D_UEQ;
+ case MVT::i64:
+ return AMDILCC::IL_CC_UL_EQ;
+ default:
+ assert(0 && "Opcode combination not generated correctly!");
+ return AMDILCC::COND_ERROR;
+ };
+ case ISD::SETOGT:
+ switch (type) {
+ case MVT::f32:
+ return AMDILCC::IL_CC_F_OGT;
+ case MVT::f64:
+ return AMDILCC::IL_CC_D_OGT;
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ case MVT::i64:
+ default:
+ assert(0 && "Opcode combination not generated correctly!");
+ return AMDILCC::COND_ERROR;
+ };
+ case ISD::SETOGE:
+ switch (type) {
+ case MVT::f32:
+ return AMDILCC::IL_CC_F_OGE;
+ case MVT::f64:
+ return AMDILCC::IL_CC_D_OGE;
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ case MVT::i64:
+ default:
+ assert(0 && "Opcode combination not generated correctly!");
+ return AMDILCC::COND_ERROR;
+ };
+ case ISD::SETOLT:
+ switch (type) {
+ case MVT::f32:
+ return AMDILCC::IL_CC_F_OLT;
+ case MVT::f64:
+ return AMDILCC::IL_CC_D_OLT;
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ case MVT::i64:
+ default:
+ assert(0 && "Opcode combination not generated correctly!");
+ return AMDILCC::COND_ERROR;
+ };
+ case ISD::SETOLE:
+ switch (type) {
+ case MVT::f32:
+ return AMDILCC::IL_CC_F_OLE;
+ case MVT::f64:
+ return AMDILCC::IL_CC_D_OLE;
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ case MVT::i64:
+ default:
+ assert(0 && "Opcode combination not generated correctly!");
+ return AMDILCC::COND_ERROR;
+ };
+ case ISD::SETONE:
+ switch (type) {
+ case MVT::f32:
+ return AMDILCC::IL_CC_F_ONE;
+ case MVT::f64:
+ return AMDILCC::IL_CC_D_ONE;
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ case MVT::i64:
+ default:
+ assert(0 && "Opcode combination not generated correctly!");
+ return AMDILCC::COND_ERROR;
+ };
+ case ISD::SETOEQ:
+ switch (type) {
+ case MVT::f32:
+ return AMDILCC::IL_CC_F_OEQ;
+ case MVT::f64:
+ return AMDILCC::IL_CC_D_OEQ;
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ case MVT::i64:
+ default:
+ assert(0 && "Opcode combination not generated correctly!");
+ return AMDILCC::COND_ERROR;
+ };
+ };
+}
+
+SDValue
+AMDILTargetLowering::LowerMemArgument(
+ SDValue Chain,
+ CallingConv::ID CallConv,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ const CCValAssign &VA,
+ MachineFrameInfo *MFI,
+ unsigned i) const
+{
+ // Create the nodes corresponding to a load from this parameter slot.
+ ISD::ArgFlagsTy Flags = Ins[i].Flags;
+
+ bool AlwaysUseMutable = (CallConv==CallingConv::Fast) &&
+ getTargetMachine().Options.GuaranteedTailCallOpt;
+ bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
+
+ // FIXME: For now, all byval parameter objects are marked mutable. This can
+ // be changed with more analysis.
+ // In case of tail call optimization mark all arguments mutable. Since they
+ // could be overwritten by lowering of arguments in case of a tail call.
+ int FI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8,
+ VA.getLocMemOffset(), isImmutable);
+ SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
+
+ if (Flags.isByVal())
+ return FIN;
+ return DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
+ MachinePointerInfo::getFixedStack(FI),
+ false, false, false, 0);
+}
+//===----------------------------------------------------------------------===//
+// TargetLowering Implementation Help Functions End
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// TargetLowering Class Implementation Begins
+//===----------------------------------------------------------------------===//
+ AMDILTargetLowering::AMDILTargetLowering(TargetMachine &TM)
+: TargetLowering(TM, new TargetLoweringObjectFileELF())
+{
+ int types[] =
+ {
+ (int)MVT::i8,
+ (int)MVT::i16,
+ (int)MVT::i32,
+ (int)MVT::f32,
+ (int)MVT::f64,
+ (int)MVT::i64,
+ (int)MVT::v2i8,
+ (int)MVT::v4i8,
+ (int)MVT::v2i16,
+ (int)MVT::v4i16,
+ (int)MVT::v4f32,
+ (int)MVT::v4i32,
+ (int)MVT::v2f32,
+ (int)MVT::v2i32,
+ (int)MVT::v2f64,
+ (int)MVT::v2i64
+ };
+
+ int IntTypes[] =
+ {
+ (int)MVT::i8,
+ (int)MVT::i16,
+ (int)MVT::i32,
+ (int)MVT::i64
+ };
+
+ int FloatTypes[] =
+ {
+ (int)MVT::f32,
+ (int)MVT::f64
+ };
+
+ int VectorTypes[] =
+ {
+ (int)MVT::v2i8,
+ (int)MVT::v4i8,
+ (int)MVT::v2i16,
+ (int)MVT::v4i16,
+ (int)MVT::v4f32,
+ (int)MVT::v4i32,
+ (int)MVT::v2f32,
+ (int)MVT::v2i32,
+ (int)MVT::v2f64,
+ (int)MVT::v2i64
+ };
+ size_t numTypes = sizeof(types) / sizeof(*types);
+ size_t numFloatTypes = sizeof(FloatTypes) / sizeof(*FloatTypes);
+ size_t numIntTypes = sizeof(IntTypes) / sizeof(*IntTypes);
+ size_t numVectorTypes = sizeof(VectorTypes) / sizeof(*VectorTypes);
+
+ const AMDILSubtarget &STM = getTargetMachine().getSubtarget<AMDILSubtarget>();
+ // These are the current register classes that are
+ // supported
+
+ for (unsigned int x = 0; x < numTypes; ++x) {
+ MVT::SimpleValueType VT = (MVT::SimpleValueType)types[x];
+
+ //FIXME: SIGN_EXTEND_INREG is not meaningful for floating point types
+ // We cannot sextinreg, expand to shifts
+ setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Custom);
+ setOperationAction(ISD::SUBE, VT, Expand);
+ setOperationAction(ISD::SUBC, VT, Expand);
+ setOperationAction(ISD::ADDE, VT, Expand);
+ setOperationAction(ISD::ADDC, VT, Expand);
+ setOperationAction(ISD::SETCC, VT, Custom);
+ setOperationAction(ISD::BRCOND, VT, Custom);
+ setOperationAction(ISD::BR_CC, VT, Custom);
+ setOperationAction(ISD::BR_JT, VT, Expand);
+ setOperationAction(ISD::BRIND, VT, Expand);
+ // TODO: Implement custom UREM/SREM routines
+ setOperationAction(ISD::SREM, VT, Expand);
+ setOperationAction(ISD::GlobalAddress, VT, Custom);
+ setOperationAction(ISD::JumpTable, VT, Custom);
+ setOperationAction(ISD::ConstantPool, VT, Custom);
+ setOperationAction(ISD::SELECT, VT, Custom);
+ setOperationAction(ISD::SMUL_LOHI, VT, Expand);
+ setOperationAction(ISD::UMUL_LOHI, VT, Expand);
+ if (VT != MVT::i64 && VT != MVT::v2i64) {
+ setOperationAction(ISD::SDIV, VT, Custom);
+ }
+ }
+ for (unsigned int x = 0; x < numFloatTypes; ++x) {
+ MVT::SimpleValueType VT = (MVT::SimpleValueType)FloatTypes[x];
+
+ // IL does not have these operations for floating point types
+ setOperationAction(ISD::FP_ROUND_INREG, VT, Expand);
+ setOperationAction(ISD::SETOLT, VT, Expand);
+ setOperationAction(ISD::SETOGE, VT, Expand);
+ setOperationAction(ISD::SETOGT, VT, Expand);
+ setOperationAction(ISD::SETOLE, VT, Expand);
+ setOperationAction(ISD::SETULT, VT, Expand);
+ setOperationAction(ISD::SETUGE, VT, Expand);
+ setOperationAction(ISD::SETUGT, VT, Expand);
+ setOperationAction(ISD::SETULE, VT, Expand);
+ }
+
+ for (unsigned int x = 0; x < numIntTypes; ++x) {
+ MVT::SimpleValueType VT = (MVT::SimpleValueType)IntTypes[x];
+
+ // GPU also does not have divrem function for signed or unsigned
+ setOperationAction(ISD::SDIVREM, VT, Expand);
+
+ // GPU does not have [S|U]MUL_LOHI functions as a single instruction
+ setOperationAction(ISD::SMUL_LOHI, VT, Expand);
+ setOperationAction(ISD::UMUL_LOHI, VT, Expand);
+
+ // GPU doesn't have a rotl, rotr, or byteswap instruction
+ setOperationAction(ISD::ROTR, VT, Expand);
+ setOperationAction(ISD::BSWAP, VT, Expand);
+
+ // GPU doesn't have any counting operators
+ setOperationAction(ISD::CTPOP, VT, Expand);
+ setOperationAction(ISD::CTTZ, VT, Expand);
+ setOperationAction(ISD::CTLZ, VT, Expand);
+ }
+
+ for ( unsigned int ii = 0; ii < numVectorTypes; ++ii )
+ {
+ MVT::SimpleValueType VT = (MVT::SimpleValueType)VectorTypes[ii];
+
+ setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand);
+ setOperationAction(ISD::SDIVREM, VT, Expand);
+ setOperationAction(ISD::SMUL_LOHI, VT, Expand);
+ // setOperationAction(ISD::VSETCC, VT, Expand);
+ setOperationAction(ISD::SETCC, VT, Expand);
+ setOperationAction(ISD::SELECT_CC, VT, Expand);
+ setOperationAction(ISD::SELECT, VT, Expand);
+
+ }
+ if (STM.device()->isSupported(AMDILDeviceInfo::LongOps)) {
+ setOperationAction(ISD::MULHU, MVT::i64, Expand);
+ setOperationAction(ISD::MULHU, MVT::v2i64, Expand);
+ setOperationAction(ISD::MULHS, MVT::i64, Expand);
+ setOperationAction(ISD::MULHS, MVT::v2i64, Expand);
+ setOperationAction(ISD::ADD, MVT::v2i64, Expand);
+ setOperationAction(ISD::SREM, MVT::v2i64, Expand);
+ setOperationAction(ISD::Constant , MVT::i64 , Legal);
+ setOperationAction(ISD::SDIV, MVT::v2i64, Expand);
+ setOperationAction(ISD::TRUNCATE, MVT::v2i64, Expand);
+ setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Expand);
+ setOperationAction(ISD::ZERO_EXTEND, MVT::v2i64, Expand);
+ setOperationAction(ISD::ANY_EXTEND, MVT::v2i64, Expand);
+ }
+ if (STM.device()->isSupported(AMDILDeviceInfo::DoubleOps)) {
+ // we support loading/storing v2f64 but not operations on the type
+ setOperationAction(ISD::FADD, MVT::v2f64, Expand);
+ setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
+ setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
+ setOperationAction(ISD::FP_ROUND_INREG, MVT::v2f64, Expand);
+ setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand);
+ setOperationAction(ISD::ConstantFP , MVT::f64 , Legal);
+ // We want to expand vector conversions into their scalar
+ // counterparts.
+ setOperationAction(ISD::TRUNCATE, MVT::v2f64, Expand);
+ setOperationAction(ISD::SIGN_EXTEND, MVT::v2f64, Expand);
+ setOperationAction(ISD::ZERO_EXTEND, MVT::v2f64, Expand);
+ setOperationAction(ISD::ANY_EXTEND, MVT::v2f64, Expand);
+ setOperationAction(ISD::FABS, MVT::f64, Expand);
+ setOperationAction(ISD::FABS, MVT::v2f64, Expand);
+ }
+ // TODO: Fix the UDIV24 algorithm so it works for these
+ // types correctly. This needs vector comparisons
+ // for this to work correctly.
+ setOperationAction(ISD::UDIV, MVT::v2i8, Expand);
+ setOperationAction(ISD::UDIV, MVT::v4i8, Expand);
+ setOperationAction(ISD::UDIV, MVT::v2i16, Expand);
+ setOperationAction(ISD::UDIV, MVT::v4i16, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Custom);
+ setOperationAction(ISD::SUBC, MVT::Other, Expand);
+ setOperationAction(ISD::ADDE, MVT::Other, Expand);
+ setOperationAction(ISD::ADDC, MVT::Other, Expand);
+ setOperationAction(ISD::BRCOND, MVT::Other, Custom);
+ setOperationAction(ISD::BR_CC, MVT::Other, Custom);
+ setOperationAction(ISD::BR_JT, MVT::Other, Expand);
+ setOperationAction(ISD::BRIND, MVT::Other, Expand);
+ setOperationAction(ISD::SETCC, MVT::Other, Custom);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand);
+
+ setOperationAction(ISD::BUILD_VECTOR, MVT::Other, Custom);
+ // Use the default implementation.
+ setOperationAction(ISD::VAARG , MVT::Other, Expand);
+ setOperationAction(ISD::VACOPY , MVT::Other, Expand);
+ setOperationAction(ISD::VAEND , MVT::Other, Expand);
+ setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
+ setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom);
+ setOperationAction(ISD::ConstantFP , MVT::f32 , Legal);
+ setOperationAction(ISD::Constant , MVT::i32 , Legal);
+ setOperationAction(ISD::TRAP , MVT::Other , Legal);
+
+ setStackPointerRegisterToSaveRestore(AMDGPU::SP);
+ setSchedulingPreference(Sched::RegPressure);
+ setPow2DivIsCheap(false);
+ setPrefLoopAlignment(16);
+ setSelectIsExpensive(true);
+ setJumpIsExpensive(true);
+
+ maxStoresPerMemcpy = 4096;
+ maxStoresPerMemmove = 4096;
+ maxStoresPerMemset = 4096;
+
+#undef numTypes
+#undef numIntTypes
+#undef numVectorTypes
+#undef numFloatTypes
+}
+
+const char *
+AMDILTargetLowering::getTargetNodeName(unsigned Opcode) const
+{
+ switch (Opcode) {
+ default: return 0;
+ case AMDILISD::CMOVLOG: return "AMDILISD::CMOVLOG";
+ case AMDILISD::MAD: return "AMDILISD::MAD";
+ case AMDILISD::CALL: return "AMDILISD::CALL";
+ case AMDILISD::SELECT_CC: return "AMDILISD::SELECT_CC";
+ case AMDILISD::UMUL: return "AMDILISD::UMUL";
+ case AMDILISD::DIV_INF: return "AMDILISD::DIV_INF";
+ case AMDILISD::VBUILD: return "AMDILISD::VBUILD";
+ case AMDILISD::CMP: return "AMDILISD::CMP";
+ case AMDILISD::IL_CC_I_LT: return "AMDILISD::IL_CC_I_LT";
+ case AMDILISD::IL_CC_I_LE: return "AMDILISD::IL_CC_I_LE";
+ case AMDILISD::IL_CC_I_GT: return "AMDILISD::IL_CC_I_GT";
+ case AMDILISD::IL_CC_I_GE: return "AMDILISD::IL_CC_I_GE";
+ case AMDILISD::IL_CC_I_EQ: return "AMDILISD::IL_CC_I_EQ";
+ case AMDILISD::IL_CC_I_NE: return "AMDILISD::IL_CC_I_NE";
+ case AMDILISD::RET_FLAG: return "AMDILISD::RET_FLAG";
+ case AMDILISD::BRANCH_COND: return "AMDILISD::BRANCH_COND";
+
+ };
+}
+bool
+AMDILTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
+ const CallInst &I, unsigned Intrinsic) const
+{
+ return false;
+}
+
+// The backend supports 32 and 64 bit floating point immediates
+bool
+AMDILTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const
+{
+ if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
+ || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
+ return true;
+ } else {
+ return false;
+ }
+}
+
+bool
+AMDILTargetLowering::ShouldShrinkFPConstant(EVT VT) const
+{
+ if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
+ || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
+ return false;
+ } else {
+ return true;
+ }
+}
+
+
+// isMaskedValueZeroForTargetNode - Return true if 'Op & Mask' is known to
+// be zero. Op is expected to be a target specific node. Used by DAG
+// combiner.
+
+void
+AMDILTargetLowering::computeMaskedBitsForTargetNode(
+ const SDValue Op,
+ APInt &KnownZero,
+ APInt &KnownOne,
+ const SelectionDAG &DAG,
+ unsigned Depth) const
+{
+ APInt KnownZero2;
+ APInt KnownOne2;
+ KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); // Don't know anything
+ switch (Op.getOpcode()) {
+ default: break;
+ case AMDILISD::SELECT_CC:
+ DAG.ComputeMaskedBits(
+ Op.getOperand(1),
+ KnownZero,
+ KnownOne,
+ Depth + 1
+ );
+ DAG.ComputeMaskedBits(
+ Op.getOperand(0),
+ KnownZero2,
+ KnownOne2
+ );
+ assert((KnownZero & KnownOne) == 0
+ && "Bits known to be one AND zero?");
+ assert((KnownZero2 & KnownOne2) == 0
+ && "Bits known to be one AND zero?");
+ // Only known if known in both the LHS and RHS
+ KnownOne &= KnownOne2;
+ KnownZero &= KnownZero2;
+ break;
+ };
+}
+
+// This is the function that determines which calling convention should
+// be used. Currently there is only one calling convention
+CCAssignFn*
+AMDILTargetLowering::CCAssignFnForNode(unsigned int Op) const
+{
+ //uint64_t CC = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+ return CC_AMDIL32;
+}
+
+// LowerCallResult - Lower the result values of an ISD::CALL into the
+// appropriate copies out of appropriate physical registers. This assumes that
+// Chain/InFlag are the input chain/flag to use, and that TheCall is the call
+// being lowered. The returns a SDNode with the same number of values as the
+// ISD::CALL.
+SDValue
+AMDILTargetLowering::LowerCallResult(
+ SDValue Chain,
+ SDValue InFlag,
+ CallingConv::ID CallConv,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl,
+ SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const
+{
+ // Assign locations to each value returned by this call
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), RVLocs, *DAG.getContext());
+ CCInfo.AnalyzeCallResult(Ins, RetCC_AMDIL32);
+
+ // Copy all of the result registers out of their specified physreg.
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ EVT CopyVT = RVLocs[i].getValVT();
+ if (RVLocs[i].isRegLoc()) {
+ Chain = DAG.getCopyFromReg(
+ Chain,
+ dl,
+ RVLocs[i].getLocReg(),
+ CopyVT,
+ InFlag
+ ).getValue(1);
+ SDValue Val = Chain.getValue(0);
+ InFlag = Chain.getValue(2);
+ InVals.push_back(Val);
+ }
+ }
+
+ return Chain;
+
+}
+
+//===----------------------------------------------------------------------===//
+// Other Lowering Hooks
+//===----------------------------------------------------------------------===//
+
+// Recursively assign SDNodeOrdering to any unordered nodes
+// This is necessary to maintain source ordering of instructions
+// under -O0 to avoid odd-looking "skipping around" issues.
+ static const SDValue
+Ordered( SelectionDAG &DAG, unsigned order, const SDValue New )
+{
+ if (order != 0 && DAG.GetOrdering( New.getNode() ) == 0) {
+ DAG.AssignOrdering( New.getNode(), order );
+ for (unsigned i = 0, e = New.getNumOperands(); i < e; ++i)
+ Ordered( DAG, order, New.getOperand(i) );
+ }
+ return New;
+}
+
+#define LOWER(A) \
+ case ISD:: A: \
+return Ordered( DAG, DAG.GetOrdering( Op.getNode() ), Lower##A(Op, DAG) )
+
+SDValue
+AMDILTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
+{
+ switch (Op.getOpcode()) {
+ default:
+ Op.getNode()->dump();
+ assert(0 && "Custom lowering code for this"
+ "instruction is not implemented yet!");
+ break;
+ LOWER(GlobalAddress);
+ LOWER(JumpTable);
+ LOWER(ConstantPool);
+ LOWER(ExternalSymbol);
+ LOWER(SDIV);
+ LOWER(SREM);
+ LOWER(BUILD_VECTOR);
+ LOWER(SELECT);
+ LOWER(SETCC);
+ LOWER(SIGN_EXTEND_INREG);
+ LOWER(DYNAMIC_STACKALLOC);
+ LOWER(BRCOND);
+ LOWER(BR_CC);
+ }
+ return Op;
+}
+
+#undef LOWER
+
+SDValue
+AMDILTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const
+{
+ SDValue DST = Op;
+ const GlobalAddressSDNode *GADN = cast<GlobalAddressSDNode>(Op);
+ const GlobalValue *G = GADN->getGlobal();
+ DebugLoc DL = Op.getDebugLoc();
+ const GlobalVariable *GV = dyn_cast<GlobalVariable>(G);
+ if (!GV) {
+ DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32);
+ } else {
+ if (GV->hasInitializer()) {
+ const Constant *C = dyn_cast<Constant>(GV->getInitializer());
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(C)) {
+ DST = DAG.getConstant(CI->getValue(), Op.getValueType());
+ } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(C)) {
+ DST = DAG.getConstantFP(CF->getValueAPF(),
+ Op.getValueType());
+ } else if (dyn_cast<ConstantAggregateZero>(C)) {
+ EVT VT = Op.getValueType();
+ if (VT.isInteger()) {
+ DST = DAG.getConstant(0, VT);
+ } else {
+ DST = DAG.getConstantFP(0, VT);
+ }
+ } else {
+ assert(!"lowering this type of Global Address "
+ "not implemented yet!");
+ C->dump();
+ DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32);
+ }
+ } else {
+ DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32);
+ }
+ }
+ return DST;
+}
+
+SDValue
+AMDILTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const
+{
+ JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
+ SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), MVT::i32);
+ return Result;
+}
+SDValue
+AMDILTargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const
+{
+ ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
+ EVT PtrVT = Op.getValueType();
+ SDValue Result;
+ if (CP->isMachineConstantPoolEntry()) {
+ Result = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
+ CP->getAlignment(), CP->getOffset(), CP->getTargetFlags());
+ } else {
+ Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,
+ CP->getAlignment(), CP->getOffset(), CP->getTargetFlags());
+ }
+ return Result;
+}
+
+SDValue
+AMDILTargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const
+{
+ const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol();
+ SDValue Result = DAG.getTargetExternalSymbol(Sym, MVT::i32);
+ return Result;
+}
+
+/// LowerFORMAL_ARGUMENTS - transform physical registers into
+/// virtual registers and generate load operations for
+/// arguments places on the stack.
+/// TODO: isVarArg, hasStructRet, isMemReg
+ SDValue
+AMDILTargetLowering::LowerFormalArguments(SDValue Chain,
+ CallingConv::ID CallConv,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl,
+ SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals)
+const
+{
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ //const Function *Fn = MF.getFunction();
+ //MachineRegisterInfo &RegInfo = MF.getRegInfo();
+
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CallingConv::ID CC = MF.getFunction()->getCallingConv();
+ //bool hasStructRet = MF.getFunction()->hasStructRetAttr();
+
+ CCState CCInfo(CC, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ArgLocs, *DAG.getContext());
+
+ // When more calling conventions are added, they need to be chosen here
+ CCInfo.AnalyzeFormalArguments(Ins, CC_AMDIL32);
+ SDValue StackPtr;
+
+ //unsigned int FirstStackArgLoc = 0;
+
+ for (unsigned int i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+ if (VA.isRegLoc()) {
+ EVT RegVT = VA.getLocVT();
+ const TargetRegisterClass *RC = getRegClassFor(
+ RegVT.getSimpleVT().SimpleTy);
+
+ unsigned int Reg = MF.addLiveIn(VA.getLocReg(), RC);
+ SDValue ArgValue = DAG.getCopyFromReg(
+ Chain,
+ dl,
+ Reg,
+ RegVT);
+ // If this is an 8 or 16-bit value, it is really passed
+ // promoted to 32 bits. Insert an assert[sz]ext to capture
+ // this, then truncate to the right size.
+
+ if (VA.getLocInfo() == CCValAssign::SExt) {
+ ArgValue = DAG.getNode(
+ ISD::AssertSext,
+ dl,
+ RegVT,
+ ArgValue,
+ DAG.getValueType(VA.getValVT()));
+ } else if (VA.getLocInfo() == CCValAssign::ZExt) {
+ ArgValue = DAG.getNode(
+ ISD::AssertZext,
+ dl,
+ RegVT,
+ ArgValue,
+ DAG.getValueType(VA.getValVT()));
+ }
+ if (VA.getLocInfo() != CCValAssign::Full) {
+ ArgValue = DAG.getNode(
+ ISD::TRUNCATE,
+ dl,
+ VA.getValVT(),
+ ArgValue);
+ }
+ // Add the value to the list of arguments
+ // to be passed in registers
+ InVals.push_back(ArgValue);
+ if (isVarArg) {
+ assert(0 && "Variable arguments are not yet supported");
+ // See MipsISelLowering.cpp for ideas on how to implement
+ }
+ } else if(VA.isMemLoc()) {
+ InVals.push_back(LowerMemArgument(Chain, CallConv, Ins,
+ dl, DAG, VA, MFI, i));
+ } else {
+ assert(0 && "found a Value Assign that is "
+ "neither a register or a memory location");
+ }
+ }
+ /*if (hasStructRet) {
+ assert(0 && "Has struct return is not yet implemented");
+ // See MipsISelLowering.cpp for ideas on how to implement
+ }*/
+
+ if (isVarArg) {
+ assert(0 && "Variable arguments are not yet supported");
+ // See X86/PPC/CellSPU ISelLowering.cpp for ideas on how to implement
+ }
+ // This needs to be changed to non-zero if the return function needs
+ // to pop bytes
+ return Chain;
+}
+/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
+/// by "Src" to address "Dst" with size and alignment information specified by
+/// the specific parameter attribute. The copy will be passed as a byval
+/// function parameter.
+static SDValue
+CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
+ ISD::ArgFlagsTy Flags, SelectionDAG &DAG) {
+ assert(0 && "MemCopy does not exist yet");
+ SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32);
+
+ return DAG.getMemcpy(Chain,
+ Src.getDebugLoc(),
+ Dst, Src, SizeNode, Flags.getByValAlign(),
+ /*IsVol=*/false, /*AlwaysInline=*/true,
+ MachinePointerInfo(), MachinePointerInfo());
+}
+
+SDValue
+AMDILTargetLowering::LowerMemOpCallTo(SDValue Chain,
+ SDValue StackPtr, SDValue Arg,
+ DebugLoc dl, SelectionDAG &DAG,
+ const CCValAssign &VA,
+ ISD::ArgFlagsTy Flags) const
+{
+ unsigned int LocMemOffset = VA.getLocMemOffset();
+ SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
+ PtrOff = DAG.getNode(ISD::ADD,
+ dl,
+ getPointerTy(), StackPtr, PtrOff);
+ if (Flags.isByVal()) {
+ PtrOff = CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG);
+ } else {
+ PtrOff = DAG.getStore(Chain, dl, Arg, PtrOff,
+ MachinePointerInfo::getStack(LocMemOffset),
+ false, false, 0);
+ }
+ return PtrOff;
+}
+/// LowerCAL - functions arguments are copied from virtual
+/// regs to (physical regs)/(stack frame), CALLSEQ_START and
+/// CALLSEQ_END are emitted.
+/// TODO: isVarArg, isTailCall, hasStructRet
+SDValue
+AMDILTargetLowering::LowerCall(CallLoweringInfo &CLI,
+ SmallVectorImpl<SDValue> &InVals) const
+
+#if 0
+ SDValue Chain, SDValue Callee,
+ CallingConv::ID CallConv, bool isVarArg, bool doesNotRet,
+ bool& isTailCall,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+#endif
+{
+ CLI.IsTailCall = false;
+ MachineFunction& MF = CLI.DAG.getMachineFunction();
+ // FIXME: DO we need to handle fast calling conventions and tail call
+ // optimizations?? X86/PPC ISelLowering
+ /*bool hasStructRet = (TheCall->getNumArgs())
+ ? TheCall->getArgFlags(0).device()->isSRet()
+ : false;*/
+
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+
+ // Analyze operands of the call, assigning locations to each operand
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CLI.CallConv, CLI.IsVarArg, CLI.DAG.getMachineFunction(),
+ getTargetMachine(), ArgLocs, *CLI.DAG.getContext());
+ // Analyize the calling operands, but need to change
+ // if we have more than one calling convetion
+ CCInfo.AnalyzeCallOperands(CLI.Outs, CCAssignFnForNode(CLI.CallConv));
+
+ unsigned int NumBytes = CCInfo.getNextStackOffset();
+ if (CLI.IsTailCall) {
+ assert(CLI.IsTailCall && "Tail Call not handled yet!");
+ // See X86/PPC ISelLowering
+ }
+
+ CLI.Chain = CLI.DAG.getCALLSEQ_START(CLI.Chain,
+ CLI.DAG.getIntPtrConstant(NumBytes, true));
+
+ SmallVector<std::pair<unsigned int, SDValue>, 8> RegsToPass;
+ SmallVector<SDValue, 8> MemOpChains;
+ SDValue StackPtr;
+ //unsigned int FirstStacArgLoc = 0;
+ //int LastArgStackLoc = 0;
+
+ // Walk the register/memloc assignments, insert copies/loads
+ for (unsigned int i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+ //bool isByVal = Flags.isByVal(); // handle byval/bypointer registers
+ // Arguments start after the 5 first operands of ISD::CALL
+ SDValue Arg = CLI.OutVals[i];
+ //Promote the value if needed
+ switch(VA.getLocInfo()) {
+ default: assert(0 && "Unknown loc info!");
+ case CCValAssign::Full:
+ break;
+ case CCValAssign::SExt:
+ Arg = CLI.DAG.getNode(ISD::SIGN_EXTEND,
+ CLI.DL,
+ VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::ZExt:
+ Arg = CLI.DAG.getNode(ISD::ZERO_EXTEND,
+ CLI.DL,
+ VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::AExt:
+ Arg = CLI.DAG.getNode(ISD::ANY_EXTEND,
+ CLI.DL,
+ VA.getLocVT(), Arg);
+ break;
+ }
+
+ if (VA.isRegLoc()) {
+ RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+ } else if (VA.isMemLoc()) {
+ // Create the frame index object for this incoming parameter
+ int FI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8,
+ VA.getLocMemOffset(), true);
+ SDValue PtrOff = CLI.DAG.getFrameIndex(FI,getPointerTy());
+
+ // emit ISD::STORE whichs stores the
+ // parameter value to a stack Location
+ MemOpChains.push_back(CLI.DAG.getStore(CLI.Chain, CLI.DL, Arg, PtrOff,
+ MachinePointerInfo::getFixedStack(FI),
+ false, false, 0));
+ } else {
+ assert(0 && "Not a Reg/Mem Loc, major error!");
+ }
+ }
+ if (!MemOpChains.empty()) {
+ CLI.Chain = CLI.DAG.getNode(ISD::TokenFactor,
+ CLI.DL,
+ MVT::Other,
+ &MemOpChains[0],
+ MemOpChains.size());
+ }
+ SDValue InFlag;
+ if (!CLI.IsTailCall) {
+ for (unsigned int i = 0, e = RegsToPass.size(); i != e; ++i) {
+ CLI.Chain = CLI.DAG.getCopyToReg(CLI.Chain,
+ CLI.DL,
+ RegsToPass[i].first,
+ RegsToPass[i].second,
+ InFlag);
+ InFlag = CLI.Chain.getValue(1);
+ }
+ }
+
+ // If the callee is a GlobalAddress/ExternalSymbol node (quite common,
+ // every direct call is) turn it into a TargetGlobalAddress/
+ // TargetExternalSymbol
+ // node so that legalize doesn't hack it.
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(CLI.Callee)) {
+ CLI.Callee = CLI.DAG.getTargetGlobalAddress(G->getGlobal(), CLI.DL, getPointerTy());
+ }
+ else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(CLI.Callee)) {
+ CLI.Callee = CLI.DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy());
+ }
+ else if (CLI.IsTailCall) {
+ assert(0 && "Tail calls are not handled yet");
+ // see X86 ISelLowering for ideas on implementation: 1708
+ }
+
+ SDVTList NodeTys = CLI.DAG.getVTList(MVT::Other, MVTGLUE);
+ SmallVector<SDValue, 8> Ops;
+
+ if (CLI.IsTailCall) {
+ assert(0 && "Tail calls are not handled yet");
+ // see X86 ISelLowering for ideas on implementation: 1721
+ }
+ // If this is a direct call, pass the chain and the callee
+ if (CLI.Callee.getNode()) {
+ Ops.push_back(CLI.Chain);
+ Ops.push_back(CLI.Callee);
+ }
+
+ if (CLI.IsTailCall) {
+ assert(0 && "Tail calls are not handled yet");
+ // see X86 ISelLowering for ideas on implementation: 1739
+ }
+
+ // Add argument registers to the end of the list so that they are known
+ // live into the call
+ for (unsigned int i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Ops.push_back(CLI.DAG.getRegister(
+ RegsToPass[i].first,
+ RegsToPass[i].second.getValueType()));
+ }
+ if (InFlag.getNode()) {
+ Ops.push_back(InFlag);
+ }
+
+ // Emit Tail Call
+ if (CLI.IsTailCall) {
+ assert(0 && "Tail calls are not handled yet");
+ // see X86 ISelLowering for ideas on implementation: 1762
+ }
+
+ CLI.Chain = CLI.DAG.getNode(AMDILISD::CALL,
+ CLI.DL,
+ NodeTys, &Ops[0], Ops.size());
+ InFlag = CLI.Chain.getValue(1);
+
+ // Create the CALLSEQ_END node
+ CLI.Chain = CLI.DAG.getCALLSEQ_END(
+ CLI.Chain,
+ CLI.DAG.getIntPtrConstant(NumBytes, true),
+ CLI.DAG.getIntPtrConstant(0, true),
+ InFlag);
+ InFlag = CLI.Chain.getValue(1);
+ // Handle result values, copying them out of physregs into vregs that
+ // we return
+ return LowerCallResult(CLI.Chain, InFlag, CLI.CallConv, CLI.IsVarArg, CLI.Ins, CLI.DL, CLI.DAG,
+ InVals);
+}
+
+SDValue
+AMDILTargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const
+{
+ EVT OVT = Op.getValueType();
+ SDValue DST;
+ if (OVT.getScalarType() == MVT::i64) {
+ DST = LowerSDIV64(Op, DAG);
+ } else if (OVT.getScalarType() == MVT::i32) {
+ DST = LowerSDIV32(Op, DAG);
+ } else if (OVT.getScalarType() == MVT::i16
+ || OVT.getScalarType() == MVT::i8) {
+ DST = LowerSDIV24(Op, DAG);
+ } else {
+ DST = SDValue(Op.getNode(), 0);
+ }
+ return DST;
+}
+
+SDValue
+AMDILTargetLowering::LowerSREM(SDValue Op, SelectionDAG &DAG) const
+{
+ EVT OVT = Op.getValueType();
+ SDValue DST;
+ if (OVT.getScalarType() == MVT::i64) {
+ DST = LowerSREM64(Op, DAG);
+ } else if (OVT.getScalarType() == MVT::i32) {
+ DST = LowerSREM32(Op, DAG);
+ } else if (OVT.getScalarType() == MVT::i16) {
+ DST = LowerSREM16(Op, DAG);
+ } else if (OVT.getScalarType() == MVT::i8) {
+ DST = LowerSREM8(Op, DAG);
+ } else {
+ DST = SDValue(Op.getNode(), 0);
+ }
+ return DST;
+}
+
+SDValue
+AMDILTargetLowering::LowerBUILD_VECTOR( SDValue Op, SelectionDAG &DAG ) const
+{
+ EVT VT = Op.getValueType();
+ SDValue Nodes1;
+ SDValue second;
+ SDValue third;
+ SDValue fourth;
+ DebugLoc DL = Op.getDebugLoc();
+ Nodes1 = DAG.getNode(AMDILISD::VBUILD,
+ DL,
+ VT, Op.getOperand(0));
+#if 0
+ bool allEqual = true;
+ for (unsigned x = 1, y = Op.getNumOperands(); x < y; ++x) {
+ if (Op.getOperand(0) != Op.getOperand(x)) {
+ allEqual = false;
+ break;
+ }
+ }
+ if (allEqual) {
+ return Nodes1;
+ }
+#endif
+ switch(Op.getNumOperands()) {
+ default:
+ case 1:
+ break;
+ case 4:
+ fourth = Op.getOperand(3);
+ if (fourth.getOpcode() != ISD::UNDEF) {
+ Nodes1 = DAG.getNode(
+ ISD::INSERT_VECTOR_ELT,
+ DL,
+ Op.getValueType(),
+ Nodes1,
+ fourth,
+ DAG.getConstant(7, MVT::i32));
+ }
+ case 3:
+ third = Op.getOperand(2);
+ if (third.getOpcode() != ISD::UNDEF) {
+ Nodes1 = DAG.getNode(
+ ISD::INSERT_VECTOR_ELT,
+ DL,
+ Op.getValueType(),
+ Nodes1,
+ third,
+ DAG.getConstant(6, MVT::i32));
+ }
+ case 2:
+ second = Op.getOperand(1);
+ if (second.getOpcode() != ISD::UNDEF) {
+ Nodes1 = DAG.getNode(
+ ISD::INSERT_VECTOR_ELT,
+ DL,
+ Op.getValueType(),
+ Nodes1,
+ second,
+ DAG.getConstant(5, MVT::i32));
+ }
+ break;
+ };
+ return Nodes1;
+}
+
+SDValue
+AMDILTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const
+{
+ SDValue Cond = Op.getOperand(0);
+ SDValue LHS = Op.getOperand(1);
+ SDValue RHS = Op.getOperand(2);
+ DebugLoc DL = Op.getDebugLoc();
+ Cond = getConversionNode(DAG, Cond, Op, true);
+ Cond = DAG.getNode(AMDILISD::CMOVLOG,
+ DL,
+ Op.getValueType(), Cond, LHS, RHS);
+ return Cond;
+}
+SDValue
+AMDILTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const
+{
+ SDValue Cond;
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ SDValue CC = Op.getOperand(2);
+ DebugLoc DL = Op.getDebugLoc();
+ ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
+ unsigned int AMDILCC = CondCCodeToCC(
+ SetCCOpcode,
+ LHS.getValueType().getSimpleVT().SimpleTy);
+ assert((AMDILCC != AMDILCC::COND_ERROR) && "Invalid SetCC!");
+ Cond = DAG.getNode(
+ ISD::SELECT_CC,
+ Op.getDebugLoc(),
+ LHS.getValueType(),
+ LHS, RHS,
+ DAG.getConstant(-1, MVT::i32),
+ DAG.getConstant(0, MVT::i32),
+ CC);
+ Cond = getConversionNode(DAG, Cond, Op, true);
+ Cond = DAG.getNode(
+ ISD::AND,
+ DL,
+ Cond.getValueType(),
+ DAG.getConstant(1, Cond.getValueType()),
+ Cond);
+ return Cond;
+}
+
+SDValue
+AMDILTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const
+{
+ SDValue Data = Op.getOperand(0);
+ VTSDNode *BaseType = cast<VTSDNode>(Op.getOperand(1));
+ DebugLoc DL = Op.getDebugLoc();
+ EVT DVT = Data.getValueType();
+ EVT BVT = BaseType->getVT();
+ unsigned baseBits = BVT.getScalarType().getSizeInBits();
+ unsigned srcBits = DVT.isSimple() ? DVT.getScalarType().getSizeInBits() : 1;
+ unsigned shiftBits = srcBits - baseBits;
+ if (srcBits < 32) {
+ // If the op is less than 32 bits, then it needs to extend to 32bits
+ // so it can properly keep the upper bits valid.
+ EVT IVT = genIntType(32, DVT.isVector() ? DVT.getVectorNumElements() : 1);
+ Data = DAG.getNode(ISD::ZERO_EXTEND, DL, IVT, Data);
+ shiftBits = 32 - baseBits;
+ DVT = IVT;
+ }
+ SDValue Shift = DAG.getConstant(shiftBits, DVT);
+ // Shift left by 'Shift' bits.
+ Data = DAG.getNode(ISD::SHL, DL, DVT, Data, Shift);
+ // Signed shift Right by 'Shift' bits.
+ Data = DAG.getNode(ISD::SRA, DL, DVT, Data, Shift);
+ if (srcBits < 32) {
+ // Once the sign extension is done, the op needs to be converted to
+ // its original type.
+ Data = DAG.getSExtOrTrunc(Data, DL, Op.getOperand(0).getValueType());
+ }
+ return Data;
+}
+EVT
+AMDILTargetLowering::genIntType(uint32_t size, uint32_t numEle) const
+{
+ int iSize = (size * numEle);
+ int vEle = (iSize >> ((size == 64) ? 6 : 5));
+ if (!vEle) {
+ vEle = 1;
+ }
+ if (size == 64) {
+ if (vEle == 1) {
+ return EVT(MVT::i64);
+ } else {
+ return EVT(MVT::getVectorVT(MVT::i64, vEle));
+ }
+ } else {
+ if (vEle == 1) {
+ return EVT(MVT::i32);
+ } else {
+ return EVT(MVT::getVectorVT(MVT::i32, vEle));
+ }
+ }
+}
+
+SDValue
+AMDILTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
+ SelectionDAG &DAG) const
+{
+ SDValue Chain = Op.getOperand(0);
+ SDValue Size = Op.getOperand(1);
+ unsigned int SPReg = AMDGPU::SP;
+ DebugLoc DL = Op.getDebugLoc();
+ SDValue SP = DAG.getCopyFromReg(Chain,
+ DL,
+ SPReg, MVT::i32);
+ SDValue NewSP = DAG.getNode(ISD::ADD,
+ DL,
+ MVT::i32, SP, Size);
+ Chain = DAG.getCopyToReg(SP.getValue(1),
+ DL,
+ SPReg, NewSP);
+ SDValue Ops[2] = {NewSP, Chain};
+ Chain = DAG.getMergeValues(Ops, 2 ,DL);
+ return Chain;
+}
+SDValue
+AMDILTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const
+{
+ SDValue Chain = Op.getOperand(0);
+ SDValue Cond = Op.getOperand(1);
+ SDValue Jump = Op.getOperand(2);
+ SDValue Result;
+ Result = DAG.getNode(
+ AMDILISD::BRANCH_COND,
+ Op.getDebugLoc(),
+ Op.getValueType(),
+ Chain, Jump, Cond);
+ return Result;
+}
+
+SDValue
+AMDILTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const
+{
+ SDValue Chain = Op.getOperand(0);
+ SDValue CC = Op.getOperand(1);
+ SDValue LHS = Op.getOperand(2);
+ SDValue RHS = Op.getOperand(3);
+ SDValue JumpT = Op.getOperand(4);
+ SDValue CmpValue;
+ SDValue Result;
+ CmpValue = DAG.getNode(
+ ISD::SELECT_CC,
+ Op.getDebugLoc(),
+ LHS.getValueType(),
+ LHS, RHS,
+ DAG.getConstant(-1, MVT::i32),
+ DAG.getConstant(0, MVT::i32),
+ CC);
+ Result = DAG.getNode(
+ AMDILISD::BRANCH_COND,
+ CmpValue.getDebugLoc(),
+ MVT::Other, Chain,
+ JumpT, CmpValue);
+ return Result;
+}
+
+// LowerRET - Lower an ISD::RET node.
+SDValue
+AMDILTargetLowering::LowerReturn(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ DebugLoc dl, SelectionDAG &DAG)
+const
+{
+ //MachineFunction& MF = DAG.getMachineFunction();
+ // CCValAssign - represent the assignment of the return value
+ // to a location
+ SmallVector<CCValAssign, 16> RVLocs;
+
+ // CCState - Info about the registers and stack slot
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), RVLocs, *DAG.getContext());
+
+ // Analyze return values of ISD::RET
+ CCInfo.AnalyzeReturn(Outs, RetCC_AMDIL32);
+ // If this is the first return lowered for this function, add
+ // the regs to the liveout set for the function
+ MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
+ for (unsigned int i = 0, e = RVLocs.size(); i != e; ++i) {
+ if (RVLocs[i].isRegLoc() && !MRI.isLiveOut(RVLocs[i].getLocReg())) {
+ MRI.addLiveOut(RVLocs[i].getLocReg());
+ }
+ }
+ // FIXME: implement this when tail call is implemented
+ // Chain = GetPossiblePreceedingTailCall(Chain, AMDILISD::TAILCALL);
+ // both x86 and ppc implement this in ISelLowering
+
+ // Regular return here
+ SDValue Flag;
+ SmallVector<SDValue, 6> RetOps;
+ RetOps.push_back(Chain);
+ RetOps.push_back(DAG.getConstant(0/*getBytesToPopOnReturn()*/, MVT::i32));
+ for (unsigned int i = 0, e = RVLocs.size(); i != e; ++i) {
+ CCValAssign &VA = RVLocs[i];
+ SDValue ValToCopy = OutVals[i];
+ assert(VA.isRegLoc() && "Can only return in registers!");
+ // ISD::Ret => ret chain, (regnum1, val1), ...
+ // So i * 2 + 1 index only the regnums
+ Chain = DAG.getCopyToReg(Chain,
+ dl,
+ VA.getLocReg(),
+ ValToCopy,
+ Flag);
+ // guarantee that all emitted copies are stuck together
+ // avoiding something bad
+ Flag = Chain.getValue(1);
+ }
+ /*if (MF.getFunction()->hasStructRetAttr()) {
+ assert(0 && "Struct returns are not yet implemented!");
+ // Both MIPS and X86 have this
+ }*/
+ RetOps[0] = Chain;
+ if (Flag.getNode())
+ RetOps.push_back(Flag);
+
+ Flag = DAG.getNode(AMDILISD::RET_FLAG,
+ dl,
+ MVT::Other, &RetOps[0], RetOps.size());
+ return Flag;
+}
+
+unsigned int
+AMDILTargetLowering::getFunctionAlignment(const Function *) const
+{
+ return 0;
+}
+
+SDValue
+AMDILTargetLowering::LowerSDIV24(SDValue Op, SelectionDAG &DAG) const
+{
+ DebugLoc DL = Op.getDebugLoc();
+ EVT OVT = Op.getValueType();
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ MVT INTTY;
+ MVT FLTTY;
+ if (!OVT.isVector()) {
+ INTTY = MVT::i32;
+ FLTTY = MVT::f32;
+ } else if (OVT.getVectorNumElements() == 2) {
+ INTTY = MVT::v2i32;
+ FLTTY = MVT::v2f32;
+ } else if (OVT.getVectorNumElements() == 4) {
+ INTTY = MVT::v4i32;
+ FLTTY = MVT::v4f32;
+ }
+ unsigned bitsize = OVT.getScalarType().getSizeInBits();
+ // char|short jq = ia ^ ib;
+ SDValue jq = DAG.getNode(ISD::XOR, DL, OVT, LHS, RHS);
+
+ // jq = jq >> (bitsize - 2)
+ jq = DAG.getNode(ISD::SRA, DL, OVT, jq, DAG.getConstant(bitsize - 2, OVT));
+
+ // jq = jq | 0x1
+ jq = DAG.getNode(ISD::OR, DL, OVT, jq, DAG.getConstant(1, OVT));
+
+ // jq = (int)jq
+ jq = DAG.getSExtOrTrunc(jq, DL, INTTY);
+
+ // int ia = (int)LHS;
+ SDValue ia = DAG.getSExtOrTrunc(LHS, DL, INTTY);
+
+ // int ib, (int)RHS;
+ SDValue ib = DAG.getSExtOrTrunc(RHS, DL, INTTY);
+
+ // float fa = (float)ia;
+ SDValue fa = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ia);
+
+ // float fb = (float)ib;
+ SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib);
+
+ // float fq = native_divide(fa, fb);
+ SDValue fq = DAG.getNode(AMDILISD::DIV_INF, DL, FLTTY, fa, fb);
+
+ // fq = trunc(fq);
+ fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq);
+
+ // float fqneg = -fq;
+ SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FLTTY, fq);
+
+ // float fr = mad(fqneg, fb, fa);
+ SDValue fr = DAG.getNode(AMDILISD::MAD, DL, FLTTY, fqneg, fb, fa);
+
+ // int iq = (int)fq;
+ SDValue iq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq);
+
+ // fr = fabs(fr);
+ fr = DAG.getNode(ISD::FABS, DL, FLTTY, fr);
+
+ // fb = fabs(fb);
+ fb = DAG.getNode(ISD::FABS, DL, FLTTY, fb);
+
+ // int cv = fr >= fb;
+ SDValue cv;
+ if (INTTY == MVT::i32) {
+ cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
+ } else {
+ cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
+ }
+ // jq = (cv ? jq : 0);
+ jq = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, cv, jq,
+ DAG.getConstant(0, OVT));
+ // dst = iq + jq;
+ iq = DAG.getSExtOrTrunc(iq, DL, OVT);
+ iq = DAG.getNode(ISD::ADD, DL, OVT, iq, jq);
+ return iq;
+}
+
+SDValue
+AMDILTargetLowering::LowerSDIV32(SDValue Op, SelectionDAG &DAG) const
+{
+ DebugLoc DL = Op.getDebugLoc();
+ EVT OVT = Op.getValueType();
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ // The LowerSDIV32 function generates equivalent to the following IL.
+ // mov r0, LHS
+ // mov r1, RHS
+ // ilt r10, r0, 0
+ // ilt r11, r1, 0
+ // iadd r0, r0, r10
+ // iadd r1, r1, r11
+ // ixor r0, r0, r10
+ // ixor r1, r1, r11
+ // udiv r0, r0, r1
+ // ixor r10, r10, r11
+ // iadd r0, r0, r10
+ // ixor DST, r0, r10
+
+ // mov r0, LHS
+ SDValue r0 = LHS;
+
+ // mov r1, RHS
+ SDValue r1 = RHS;
+
+ // ilt r10, r0, 0
+ SDValue r10 = DAG.getSelectCC(DL,
+ r0, DAG.getConstant(0, OVT),
+ DAG.getConstant(-1, MVT::i32),
+ DAG.getConstant(0, MVT::i32),
+ ISD::SETLT);
+
+ // ilt r11, r1, 0
+ SDValue r11 = DAG.getSelectCC(DL,
+ r1, DAG.getConstant(0, OVT),
+ DAG.getConstant(-1, MVT::i32),
+ DAG.getConstant(0, MVT::i32),
+ ISD::SETLT);
+
+ // iadd r0, r0, r10
+ r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
+
+ // iadd r1, r1, r11
+ r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
+
+ // ixor r0, r0, r10
+ r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
+
+ // ixor r1, r1, r11
+ r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
+
+ // udiv r0, r0, r1
+ r0 = DAG.getNode(ISD::UDIV, DL, OVT, r0, r1);
+
+ // ixor r10, r10, r11
+ r10 = DAG.getNode(ISD::XOR, DL, OVT, r10, r11);
+
+ // iadd r0, r0, r10
+ r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
+
+ // ixor DST, r0, r10
+ SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
+ return DST;
+}
+
+SDValue
+AMDILTargetLowering::LowerSDIV64(SDValue Op, SelectionDAG &DAG) const
+{
+ return SDValue(Op.getNode(), 0);
+}
+
+SDValue
+AMDILTargetLowering::LowerSREM8(SDValue Op, SelectionDAG &DAG) const
+{
+ DebugLoc DL = Op.getDebugLoc();
+ EVT OVT = Op.getValueType();
+ MVT INTTY = MVT::i32;
+ if (OVT == MVT::v2i8) {
+ INTTY = MVT::v2i32;
+ } else if (OVT == MVT::v4i8) {
+ INTTY = MVT::v4i32;
+ }
+ SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
+ SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
+ LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
+ LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
+ return LHS;
+}
+
+SDValue
+AMDILTargetLowering::LowerSREM16(SDValue Op, SelectionDAG &DAG) const
+{
+ DebugLoc DL = Op.getDebugLoc();
+ EVT OVT = Op.getValueType();
+ MVT INTTY = MVT::i32;
+ if (OVT == MVT::v2i16) {
+ INTTY = MVT::v2i32;
+ } else if (OVT == MVT::v4i16) {
+ INTTY = MVT::v4i32;
+ }
+ SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
+ SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
+ LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
+ LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
+ return LHS;
+}
+
+SDValue
+AMDILTargetLowering::LowerSREM32(SDValue Op, SelectionDAG &DAG) const
+{
+ DebugLoc DL = Op.getDebugLoc();
+ EVT OVT = Op.getValueType();
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ // The LowerSREM32 function generates equivalent to the following IL.
+ // mov r0, LHS
+ // mov r1, RHS
+ // ilt r10, r0, 0
+ // ilt r11, r1, 0
+ // iadd r0, r0, r10
+ // iadd r1, r1, r11
+ // ixor r0, r0, r10
+ // ixor r1, r1, r11
+ // udiv r20, r0, r1
+ // umul r20, r20, r1
+ // sub r0, r0, r20
+ // iadd r0, r0, r10
+ // ixor DST, r0, r10
+
+ // mov r0, LHS
+ SDValue r0 = LHS;
+
+ // mov r1, RHS
+ SDValue r1 = RHS;
+
+ // ilt r10, r0, 0
+ SDValue r10 = DAG.getNode(AMDILISD::CMP, DL, OVT,
+ DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32),
+ r0, DAG.getConstant(0, OVT));
+
+ // ilt r11, r1, 0
+ SDValue r11 = DAG.getNode(AMDILISD::CMP, DL, OVT,
+ DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32),
+ r1, DAG.getConstant(0, OVT));
+
+ // iadd r0, r0, r10
+ r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
+
+ // iadd r1, r1, r11
+ r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
+
+ // ixor r0, r0, r10
+ r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
+
+ // ixor r1, r1, r11
+ r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
+
+ // udiv r20, r0, r1
+ SDValue r20 = DAG.getNode(ISD::UREM, DL, OVT, r0, r1);
+
+ // umul r20, r20, r1
+ r20 = DAG.getNode(AMDILISD::UMUL, DL, OVT, r20, r1);
+
+ // sub r0, r0, r20
+ r0 = DAG.getNode(ISD::SUB, DL, OVT, r0, r20);
+
+ // iadd r0, r0, r10
+ r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
+
+ // ixor DST, r0, r10
+ SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
+ return DST;
+}
+
+SDValue
+AMDILTargetLowering::LowerSREM64(SDValue Op, SelectionDAG &DAG) const
+{
+ return SDValue(Op.getNode(), 0);
+}
--- /dev/null
+//===-- AMDILISelLowering.h - AMDIL DAG Lowering Interface ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that AMDIL uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef AMDIL_ISELLOWERING_H_
+#define AMDIL_ISELLOWERING_H_
+#include "AMDIL.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Target/TargetLowering.h"
+
+namespace llvm
+{
+ namespace AMDILISD
+ {
+ enum
+ {
+ FIRST_NUMBER = ISD::BUILTIN_OP_END,
+ CMOVLOG, // 32bit FP Conditional move logical instruction
+ MAD, // 32bit Fused Multiply Add instruction
+ VBUILD, // scalar to vector mov instruction
+ CALL, // Function call based on a single integer
+ SELECT_CC, // Select the correct conditional instruction
+ UMUL, // 32bit unsigned multiplication
+ DIV_INF, // Divide with infinity returned on zero divisor
+ CMP,
+ IL_CC_I_GT,
+ IL_CC_I_LT,
+ IL_CC_I_GE,
+ IL_CC_I_LE,
+ IL_CC_I_EQ,
+ IL_CC_I_NE,
+ RET_FLAG,
+ BRANCH_COND,
+ LAST_ISD_NUMBER
+ };
+ } // AMDILISD
+
+ class MachineBasicBlock;
+ class MachineInstr;
+ class DebugLoc;
+ class TargetInstrInfo;
+
+ class AMDILTargetLowering : public TargetLowering
+ {
+ public:
+ AMDILTargetLowering(TargetMachine &TM);
+
+ virtual SDValue
+ LowerOperation(SDValue Op, SelectionDAG &DAG) const;
+
+ /// computeMaskedBitsForTargetNode - Determine which of
+ /// the bits specified
+ /// in Mask are known to be either zero or one and return them in
+ /// the
+ /// KnownZero/KnownOne bitsets.
+ virtual void
+ computeMaskedBitsForTargetNode(
+ const SDValue Op,
+ APInt &KnownZero,
+ APInt &KnownOne,
+ const SelectionDAG &DAG,
+ unsigned Depth = 0
+ ) const;
+
+ virtual bool
+ getTgtMemIntrinsic(IntrinsicInfo &Info,
+ const CallInst &I, unsigned Intrinsic) const;
+ virtual const char*
+ getTargetNodeName(
+ unsigned Opcode
+ ) const;
+ // We want to mark f32/f64 floating point values as
+ // legal
+ bool
+ isFPImmLegal(const APFloat &Imm, EVT VT) const;
+ // We don't want to shrink f64/f32 constants because
+ // they both take up the same amount of space and
+ // we don't want to use a f2d instruction.
+ bool ShouldShrinkFPConstant(EVT VT) const;
+
+ /// getFunctionAlignment - Return the Log2 alignment of this
+ /// function.
+ virtual unsigned int
+ getFunctionAlignment(const Function *F) const;
+
+ private:
+ CCAssignFn*
+ CCAssignFnForNode(unsigned int CC) const;
+
+ SDValue LowerCallResult(SDValue Chain,
+ SDValue InFlag,
+ CallingConv::ID CallConv,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl,
+ SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ SDValue LowerMemArgument(SDValue Chain,
+ CallingConv::ID CallConv,
+ const SmallVectorImpl<ISD::InputArg> &ArgInfo,
+ DebugLoc dl, SelectionDAG &DAG,
+ const CCValAssign &VA, MachineFrameInfo *MFI,
+ unsigned i) const;
+
+ SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr,
+ SDValue Arg,
+ DebugLoc dl, SelectionDAG &DAG,
+ const CCValAssign &VA,
+ ISD::ArgFlagsTy Flags) const;
+
+ virtual SDValue
+ LowerFormalArguments(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ virtual SDValue
+ LowerCall(CallLoweringInfo &CLI,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ virtual SDValue
+ LowerReturn(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ DebugLoc dl, SelectionDAG &DAG) const;
+
+ SDValue
+ LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue
+ LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue
+ LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue
+ LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue
+ LowerSREM(SDValue Op, SelectionDAG &DAG) const;
+ SDValue
+ LowerSREM8(SDValue Op, SelectionDAG &DAG) const;
+ SDValue
+ LowerSREM16(SDValue Op, SelectionDAG &DAG) const;
+ SDValue
+ LowerSREM32(SDValue Op, SelectionDAG &DAG) const;
+ SDValue
+ LowerSREM64(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue
+ LowerSDIV(SDValue Op, SelectionDAG &DAG) const;
+ SDValue
+ LowerSDIV24(SDValue Op, SelectionDAG &DAG) const;
+ SDValue
+ LowerSDI