+++ /dev/null
-//===-- AMDGPU.h - MachineFunction passes hw codegen --------------*- C++ -*-=//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef AMDGPU_H
-#define AMDGPU_H
-
-#include "AMDGPUTargetMachine.h"
-#include "llvm/Support/TargetRegistry.h"
-#include "llvm/Target/TargetMachine.h"
-
-namespace llvm {
-
-class FunctionPass;
-class AMDGPUTargetMachine;
-
-// R600 Passes
-FunctionPass* createR600KernelParametersPass(const TargetData* TD);
-FunctionPass *createR600CodeEmitterPass(formatted_raw_ostream &OS);
-
-// SI Passes
-FunctionPass *createSIAssignInterpRegsPass(TargetMachine &tm);
-FunctionPass *createSICodeEmitterPass(formatted_raw_ostream &OS);
-
-// Passes common to R600 and SI
-FunctionPass *createAMDGPUConvertToISAPass(TargetMachine &tm);
-
-} // End namespace llvm
-
-#endif // AMDGPU_H
+++ /dev/null
-//===-- AMDIL.td - AMDIL Tablegen files --*- tablegen -*-------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//==-----------------------------------------------------------------------===//
-
-// Include AMDIL TD files
-include "AMDILBase.td"
-include "AMDILVersion.td"
-
-// Include AMDGPU TD files
-include "R600Schedule.td"
-include "SISchedule.td"
-include "Processors.td"
-include "AMDGPUInstrInfo.td"
-include "AMDGPUIntrinsics.td"
-include "AMDGPURegisterInfo.td"
-include "AMDGPUInstructions.td"
+++ /dev/null
-//===-- AMDGPUConvertToISA.cpp - Lower AMDIL to HW ISA --------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This pass lowers AMDIL machine instructions to the appropriate hardware
-// instructions.
-//
-//===----------------------------------------------------------------------===//
-
-#include "AMDGPU.h"
-#include "AMDGPUInstrInfo.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-
-#include <stdio.h>
-using namespace llvm;
-
-namespace {
-
-class AMDGPUConvertToISAPass : public MachineFunctionPass {
-
-private:
- static char ID;
- TargetMachine &TM;
-
-public:
- AMDGPUConvertToISAPass(TargetMachine &tm) :
- MachineFunctionPass(ID), TM(tm) { }
-
- virtual bool runOnMachineFunction(MachineFunction &MF);
-
- virtual const char *getPassName() const {return "AMDGPU Convert to ISA";}
-
-};
-
-} // End anonymous namespace
-
-char AMDGPUConvertToISAPass::ID = 0;
-
-FunctionPass *llvm::createAMDGPUConvertToISAPass(TargetMachine &tm) {
- return new AMDGPUConvertToISAPass(tm);
-}
-
-bool AMDGPUConvertToISAPass::runOnMachineFunction(MachineFunction &MF)
-{
- const AMDGPUInstrInfo * TII =
- static_cast<const AMDGPUInstrInfo*>(TM.getInstrInfo());
-
- for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
- BB != BB_E; ++BB) {
- MachineBasicBlock &MBB = *BB;
- for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
- I != E; ++I) {
- MachineInstr &MI = *I;
- TII->convertToISA(MI, MF, MBB.findDebugLoc(I));
- }
- }
- return false;
-}
+++ /dev/null
-//===-- AMDGPUISelLowering.cpp - AMDGPU Common DAG lowering functions -----===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This is the parent TargetLowering class for hardware code gen targets.
-//
-//===----------------------------------------------------------------------===//
-
-#include "AMDGPUISelLowering.h"
-#include "AMDILIntrinsicInfo.h"
-#include "AMDGPUUtil.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-
-using namespace llvm;
-
-AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
- AMDILTargetLowering(TM)
-{
- // We need to custom lower some of the intrinsics
- setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
-
- setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
- setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
-
- // Library functions. These default to Expand, but we have instructions
- // for them.
- setOperationAction(ISD::FCEIL, MVT::f32, Legal);
- setOperationAction(ISD::FEXP2, MVT::f32, Legal);
- setOperationAction(ISD::FRINT, MVT::f32, Legal);
-
- setOperationAction(ISD::UDIV, MVT::i32, Expand);
- setOperationAction(ISD::UDIVREM, MVT::i32, Custom);
- setOperationAction(ISD::UREM, MVT::i32, Expand);
-}
-
-SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
- const
-{
- switch (Op.getOpcode()) {
- default: return AMDILTargetLowering::LowerOperation(Op, DAG);
- case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
- case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
- case ISD::UDIVREM: return LowerUDIVREM(Op, DAG);
- }
-}
-
-SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
- SelectionDAG &DAG) const
-{
- unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
- DebugLoc DL = Op.getDebugLoc();
- EVT VT = Op.getValueType();
-
- switch (IntrinsicID) {
- default: return Op;
- case AMDGPUIntrinsic::AMDIL_abs:
- return LowerIntrinsicIABS(Op, DAG);
- case AMDGPUIntrinsic::AMDIL_exp:
- return DAG.getNode(ISD::FEXP2, DL, VT, Op.getOperand(1));
- case AMDGPUIntrinsic::AMDIL_fabs:
- return DAG.getNode(ISD::FABS, DL, VT, Op.getOperand(1));
- case AMDGPUIntrinsic::AMDGPU_lrp:
- return LowerIntrinsicLRP(Op, DAG);
- case AMDGPUIntrinsic::AMDIL_fraction:
- return DAG.getNode(AMDGPUISD::FRACT, DL, VT, Op.getOperand(1));
- case AMDGPUIntrinsic::AMDIL_mad:
- return DAG.getNode(AMDILISD::MAD, DL, VT, Op.getOperand(1),
- Op.getOperand(2), Op.getOperand(3));
- case AMDGPUIntrinsic::AMDIL_max:
- return DAG.getNode(AMDGPUISD::FMAX, DL, VT, Op.getOperand(1),
- Op.getOperand(2));
- case AMDGPUIntrinsic::AMDGPU_imax:
- return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Op.getOperand(1),
- Op.getOperand(2));
- case AMDGPUIntrinsic::AMDGPU_umax:
- return DAG.getNode(AMDGPUISD::UMAX, DL, VT, Op.getOperand(1),
- Op.getOperand(2));
- case AMDGPUIntrinsic::AMDIL_min:
- return DAG.getNode(AMDGPUISD::FMIN, DL, VT, Op.getOperand(1),
- Op.getOperand(2));
- case AMDGPUIntrinsic::AMDGPU_imin:
- return DAG.getNode(AMDGPUISD::SMIN, DL, VT, Op.getOperand(1),
- Op.getOperand(2));
- case AMDGPUIntrinsic::AMDGPU_umin:
- return DAG.getNode(AMDGPUISD::UMIN, DL, VT, Op.getOperand(1),
- Op.getOperand(2));
- case AMDGPUIntrinsic::AMDIL_round_nearest:
- return DAG.getNode(ISD::FRINT, DL, VT, Op.getOperand(1));
- case AMDGPUIntrinsic::AMDIL_round_posinf:
- return DAG.getNode(ISD::FCEIL, DL, VT, Op.getOperand(1));
- }
-}
-
-///IABS(a) = SMAX(sub(0, a), a)
-SDValue AMDGPUTargetLowering::LowerIntrinsicIABS(SDValue Op,
- SelectionDAG &DAG) const
-{
-
- DebugLoc DL = Op.getDebugLoc();
- EVT VT = Op.getValueType();
- SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT),
- Op.getOperand(1));
-
- return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Neg, Op.getOperand(1));
-}
-
-/// Linear Interpolation
-/// LRP(a, b, c) = muladd(a, b, (1 - a) * c)
-SDValue AMDGPUTargetLowering::LowerIntrinsicLRP(SDValue Op,
- SelectionDAG &DAG) const
-{
- DebugLoc DL = Op.getDebugLoc();
- EVT VT = Op.getValueType();
- SDValue OneSubA = DAG.getNode(ISD::FSUB, DL, VT,
- DAG.getConstantFP(1.0f, MVT::f32),
- Op.getOperand(1));
- SDValue OneSubAC = DAG.getNode(ISD::FMUL, DL, VT, OneSubA,
- Op.getOperand(3));
- return DAG.getNode(AMDILISD::MAD, DL, VT, Op.getOperand(1),
- Op.getOperand(2),
- OneSubAC);
-}
-
-SDValue AMDGPUTargetLowering::LowerSELECT_CC(SDValue Op,
- SelectionDAG &DAG) const
-{
- DebugLoc DL = Op.getDebugLoc();
- EVT VT = Op.getValueType();
-
- SDValue LHS = Op.getOperand(0);
- SDValue RHS = Op.getOperand(1);
- SDValue True = Op.getOperand(2);
- SDValue False = Op.getOperand(3);
- SDValue CC = Op.getOperand(4);
- ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
- SDValue Temp;
-
- // LHS and RHS are guaranteed to be the same value type
- EVT CompareVT = LHS.getValueType();
-
- // We need all the operands of SELECT_CC to have the same value type, so if
- // necessary we need to convert LHS and RHS to be the same type True and
- // False. True and False are guaranteed to have the same type as this
- // SELECT_CC node.
-
- if (CompareVT != VT) {
- ISD::NodeType ConversionOp = ISD::DELETED_NODE;
- if (VT == MVT::f32 && CompareVT == MVT::i32) {
- if (isUnsignedIntSetCC(CCOpcode)) {
- ConversionOp = ISD::UINT_TO_FP;
- } else {
- ConversionOp = ISD::SINT_TO_FP;
- }
- } else if (VT == MVT::i32 && CompareVT == MVT::f32) {
- ConversionOp = ISD::FP_TO_SINT;
- } else {
- // I don't think there will be any other type pairings.
- assert(!"Unhandled operand type parings in SELECT_CC");
- }
- // XXX Check the value of LHS and RHS and avoid creating sequences like
- // (FTOI (ITOF))
- LHS = DAG.getNode(ConversionOp, DL, VT, LHS);
- RHS = DAG.getNode(ConversionOp, DL, VT, RHS);
- }
-
- // If True is a hardware TRUE value and False is a hardware FALSE value or
- // vice-versa we can handle this with a native instruction (SET* instructions).
- if ((isHWTrueValue(True) && isHWFalseValue(False))) {
- return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
- }
-
- // XXX If True is a hardware TRUE value and False is a hardware FALSE value,
- // we can handle this with a native instruction, but we need to swap true
- // and false and change the conditional.
- if (isHWTrueValue(False) && isHWFalseValue(True)) {
- }
-
- // XXX Check if we can lower this to a SELECT or if it is supported by a native
- // operation. (The code below does this but we don't have the Instruction
- // selection patterns to do this yet.
-#if 0
- if (isZero(LHS) || isZero(RHS)) {
- SDValue Cond = (isZero(LHS) ? RHS : LHS);
- bool SwapTF = false;
- switch (CCOpcode) {
- case ISD::SETOEQ:
- case ISD::SETUEQ:
- case ISD::SETEQ:
- SwapTF = true;
- // Fall through
- case ISD::SETONE:
- case ISD::SETUNE:
- case ISD::SETNE:
- // We can lower to select
- if (SwapTF) {
- Temp = True;
- True = False;
- False = Temp;
- }
- // CNDE
- return DAG.getNode(ISD::SELECT, DL, VT, Cond, True, False);
- default:
- // Supported by a native operation (CNDGE, CNDGT)
- return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
- }
- }
-#endif
-
- // If we make it this for it means we have no native instructions to handle
- // this SELECT_CC, so we must lower it.
- SDValue HWTrue, HWFalse;
-
- if (VT == MVT::f32) {
- HWTrue = DAG.getConstantFP(1.0f, VT);
- HWFalse = DAG.getConstantFP(0.0f, VT);
- } else if (VT == MVT::i32) {
- HWTrue = DAG.getConstant(-1, VT);
- HWFalse = DAG.getConstant(0, VT);
- }
- else {
- assert(!"Unhandled value type in LowerSELECT_CC");
- }
-
- // Lower this unsupported SELECT_CC into a combination of two supported
- // SELECT_CC operations.
- SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, HWTrue, HWFalse, CC);
-
- return DAG.getNode(ISD::SELECT, DL, VT, Cond, True, False);
-}
-
-
-SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op,
- SelectionDAG &DAG) const
-{
- DebugLoc DL = Op.getDebugLoc();
- EVT VT = Op.getValueType();
-
- SDValue Num = Op.getOperand(0);
- SDValue Den = Op.getOperand(1);
-
- SmallVector<SDValue, 8> Results;
-
- // RCP = URECIP(Den) = 2^32 / Den + e
- // e is rounding error.
- SDValue RCP = DAG.getNode(AMDGPUISD::URECIP, DL, VT, Den);
-
- // RCP_LO = umulo(RCP, Den) */
- SDValue RCP_LO = DAG.getNode(ISD::UMULO, DL, VT, RCP, Den);
-
- // RCP_HI = mulhu (RCP, Den) */
- SDValue RCP_HI = DAG.getNode(ISD::MULHU, DL, VT, RCP, Den);
-
- // NEG_RCP_LO = -RCP_LO
- SDValue NEG_RCP_LO = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT),
- RCP_LO);
-
- // ABS_RCP_LO = (RCP_HI == 0 ? NEG_RCP_LO : RCP_LO)
- SDValue ABS_RCP_LO = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT),
- NEG_RCP_LO, RCP_LO,
- ISD::SETEQ);
- // Calculate the rounding error from the URECIP instruction
- // E = mulhu(ABS_RCP_LO, RCP)
- SDValue E = DAG.getNode(ISD::MULHU, DL, VT, ABS_RCP_LO, RCP);
-
- // RCP_A_E = RCP + E
- SDValue RCP_A_E = DAG.getNode(ISD::ADD, DL, VT, RCP, E);
-
- // RCP_S_E = RCP - E
- SDValue RCP_S_E = DAG.getNode(ISD::SUB, DL, VT, RCP, E);
-
- // Tmp0 = (RCP_HI == 0 ? RCP_A_E : RCP_SUB_E)
- SDValue Tmp0 = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT),
- RCP_A_E, RCP_S_E,
- ISD::SETEQ);
- // Quotient = mulhu(Tmp0, Num)
- SDValue Quotient = DAG.getNode(ISD::MULHU, DL, VT, Tmp0, Num);
-
- // Num_S_Remainder = Quotient * Den
- SDValue Num_S_Remainder = DAG.getNode(ISD::UMULO, DL, VT, Quotient, Den);
-
- // Remainder = Num - Num_S_Remainder
- SDValue Remainder = DAG.getNode(ISD::SUB, DL, VT, Num, Num_S_Remainder);
-
- // Remainder_GE_Den = (Remainder >= Den ? -1 : 0)
- SDValue Remainder_GE_Den = DAG.getSelectCC(DL, Remainder, Den,
- DAG.getConstant(-1, VT),
- DAG.getConstant(0, VT),
- ISD::SETGE);
- // Remainder_GE_Zero = (Remainder >= 0 ? -1 : 0)
- SDValue Remainder_GE_Zero = DAG.getSelectCC(DL, Remainder,
- DAG.getConstant(0, VT),
- DAG.getConstant(-1, VT),
- DAG.getConstant(0, VT),
- ISD::SETGE);
- // Tmp1 = Remainder_GE_Den & Remainder_GE_Zero
- SDValue Tmp1 = DAG.getNode(ISD::AND, DL, VT, Remainder_GE_Den,
- Remainder_GE_Zero);
-
- // Calculate Division result:
-
- // Quotient_A_One = Quotient + 1
- SDValue Quotient_A_One = DAG.getNode(ISD::ADD, DL, VT, Quotient,
- DAG.getConstant(1, VT));
-
- // Quotient_S_One = Quotient - 1
- SDValue Quotient_S_One = DAG.getNode(ISD::SUB, DL, VT, Quotient,
- DAG.getConstant(1, VT));
-
- // Div = (Tmp1 == 0 ? Quotient : Quotient_A_One)
- SDValue Div = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT),
- Quotient, Quotient_A_One, ISD::SETEQ);
-
- // Div = (Remainder_GE_Zero == 0 ? Quotient_S_One : Div)
- Div = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT),
- Quotient_S_One, Div, ISD::SETEQ);
-
- // Calculate Rem result:
-
- // Remainder_S_Den = Remainder - Den
- SDValue Remainder_S_Den = DAG.getNode(ISD::SUB, DL, VT, Remainder, Den);
-
- // Remainder_A_Den = Remainder + Den
- SDValue Remainder_A_Den = DAG.getNode(ISD::ADD, DL, VT, Remainder, Den);
-
- // Rem = (Tmp1 == 0 ? Remainder : Remainder_S_Den)
- SDValue Rem = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT),
- Remainder, Remainder_S_Den, ISD::SETEQ);
-
- // Rem = (Remainder_GE_Zero == 0 ? Remainder_A_Den : Rem)
- Rem = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT),
- Remainder_A_Den, Rem, ISD::SETEQ);
-
- DAG.ReplaceAllUsesWith(Op.getValue(0).getNode(), &Div);
- DAG.ReplaceAllUsesWith(Op.getValue(1).getNode(), &Rem);
-
- return Op;
-}
-
-//===----------------------------------------------------------------------===//
-// Helper functions
-//===----------------------------------------------------------------------===//
-
-bool AMDGPUTargetLowering::isHWTrueValue(SDValue Op) const
-{
- if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
- return CFP->isExactlyValue(1.0);
- }
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
- return C->isAllOnesValue();
- }
- return false;
-}
-
-bool AMDGPUTargetLowering::isHWFalseValue(SDValue Op) const
-{
- if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
- return CFP->getValueAPF().isZero();
- }
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
- return C->isNullValue();
- }
- return false;
-}
-
-void AMDGPUTargetLowering::addLiveIn(MachineInstr * MI,
- MachineFunction * MF, MachineRegisterInfo & MRI,
- const TargetInstrInfo * TII, unsigned reg) const
-{
- AMDGPU::utilAddLiveIn(MF, MRI, TII, reg, MI->getOperand(0).getReg());
-}
-
-#define NODE_NAME_CASE(node) case AMDGPUISD::node: return #node;
-
-const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const
-{
- switch (Opcode) {
- default: return AMDILTargetLowering::getTargetNodeName(Opcode);
-
- NODE_NAME_CASE(FRACT)
- NODE_NAME_CASE(FMAX)
- NODE_NAME_CASE(SMAX)
- NODE_NAME_CASE(UMAX)
- NODE_NAME_CASE(FMIN)
- NODE_NAME_CASE(SMIN)
- NODE_NAME_CASE(UMIN)
- NODE_NAME_CASE(URECIP)
- }
-}
+++ /dev/null
-//===-- AMDGPUISelLowering.h - AMDGPU Lowering Interface --------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the interface defintiion of the TargetLowering class
-// that is common to all AMD GPUs.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef AMDGPUISELLOWERING_H
-#define AMDGPUISELLOWERING_H
-
-#include "AMDILISelLowering.h"
-
-namespace llvm {
-
-class AMDGPUTargetLowering : public AMDILTargetLowering
-{
-private:
- SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerUDIVREM(SDValue Op, SelectionDAG &DAG) const;
-
-protected:
-
- /// addLiveIn - This functions adds reg to the live in list of the entry block
- /// and emits a copy from reg to MI.getOperand(0).
- ///
- // Some registers are loaded with values before the program
- /// begins to execute. The loading of these values is modeled with pseudo
- /// instructions which are lowered using this function.
- void addLiveIn(MachineInstr * MI, MachineFunction * MF,
- MachineRegisterInfo & MRI, const TargetInstrInfo * TII,
- unsigned reg) const;
-
- bool isHWTrueValue(SDValue Op) const;
- bool isHWFalseValue(SDValue Op) const;
-
-public:
- AMDGPUTargetLowering(TargetMachine &TM);
-
- virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerIntrinsicIABS(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerIntrinsicLRP(SDValue Op, SelectionDAG &DAG) const;
- virtual const char* getTargetNodeName(unsigned Opcode) const;
-
-};
-
-namespace AMDGPUISD
-{
-
-enum
-{
- AMDGPU_FIRST = AMDILISD::LAST_ISD_NUMBER,
- BITALIGN,
- FRACT,
- FMAX,
- SMAX,
- UMAX,
- FMIN,
- SMIN,
- UMIN,
- URECIP,
- LAST_AMDGPU_ISD_NUMBER
-};
-
-
-} // End namespace AMDGPUISD
-
-} // End namespace llvm
-
-#endif // AMDGPUISELLOWERING_H
+++ /dev/null
-//===-- AMDGPUInstrInfo.cpp - Base class for AMD GPU InstrInfo ------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the implementation of the TargetInstrInfo class that is
-// common to all AMD GPUs.
-//
-//===----------------------------------------------------------------------===//
-
-#include "AMDGPUInstrInfo.h"
-#include "AMDGPURegisterInfo.h"
-#include "AMDGPUTargetMachine.h"
-#include "AMDIL.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-
-using namespace llvm;
-
-AMDGPUInstrInfo::AMDGPUInstrInfo(AMDGPUTargetMachine &tm)
- : AMDILInstrInfo(tm) { }
-
-void AMDGPUInstrInfo::convertToISA(MachineInstr & MI, MachineFunction &MF,
- DebugLoc DL) const
-{
- MachineRegisterInfo &MRI = MF.getRegInfo();
- const AMDGPURegisterInfo & RI = getRegisterInfo();
-
- for (unsigned i = 0; i < MI.getNumOperands(); i++) {
- MachineOperand &MO = MI.getOperand(i);
- // Convert dst regclass to one that is supported by the ISA
- if (MO.isReg() && MO.isDef()) {
- if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
- const TargetRegisterClass * oldRegClass = MRI.getRegClass(MO.getReg());
- const TargetRegisterClass * newRegClass = RI.getISARegClass(oldRegClass);
-
- assert(newRegClass);
-
- MRI.setRegClass(MO.getReg(), newRegClass);
- }
- }
- }
-}
+++ /dev/null
-//===-- AMDGPUInstrInfo.h - AMDGPU Instruction Information ------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the definition of a TargetInstrInfo class that is common
-// to all AMD GPUs.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef AMDGPUINSTRUCTIONINFO_H_
-#define AMDGPUINSTRUCTIONINFO_H_
-
-#include "AMDGPURegisterInfo.h"
-#include "AMDILInstrInfo.h"
-
-#include <map>
-
-namespace llvm {
-
-class AMDGPUTargetMachine;
-class MachineFunction;
-class MachineInstr;
-class MachineInstrBuilder;
-
-class AMDGPUInstrInfo : public AMDILInstrInfo {
-
-public:
- explicit AMDGPUInstrInfo(AMDGPUTargetMachine &tm);
-
- virtual const AMDGPURegisterInfo &getRegisterInfo() const = 0;
-
- /// convertToISA - Convert the AMDIL MachineInstr to a supported ISA
- /// MachineInstr
- virtual void convertToISA(MachineInstr & MI, MachineFunction &MF,
- DebugLoc DL) const;
-
-};
-
-} // End llvm namespace
-
-#endif // AMDGPUINSTRINFO_H_
+++ /dev/null
-//===-- AMDGPUInstrInfo.td - AMDGPU DAG nodes --------------*- tablegen -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains DAG node defintions for the AMDGPU target.
-//
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// AMDGPU DAG Profiles
-//===----------------------------------------------------------------------===//
-
-def AMDGPUDTIntTernaryOp : SDTypeProfile<1, 3, [
- SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisInt<0>, SDTCisInt<3>
-]>;
-
-//===----------------------------------------------------------------------===//
-// AMDGPU DAG Nodes
-//
-
-// out = ((a << 32) | b) >> c)
-//
-// Can be used to optimize rtol:
-// rotl(a, b) = bitalign(a, a, 32 - b)
-def AMDGPUbitalign : SDNode<"AMDGPUISD::BITALIGN", AMDGPUDTIntTernaryOp>;
-
-// out = a - floor(a)
-def AMDGPUfract : SDNode<"AMDGPUISD::FRACT", SDTFPUnaryOp>;
-
-// out = max(a, b) a and b are floats
-def AMDGPUfmax : SDNode<"AMDGPUISD::FMAX", SDTFPBinOp,
- [SDNPCommutative, SDNPAssociative]
->;
-
-// out = max(a, b) a and b are signed ints
-def AMDGPUsmax : SDNode<"AMDGPUISD::SMAX", SDTIntBinOp,
- [SDNPCommutative, SDNPAssociative]
->;
-
-// out = max(a, b) a and b are unsigned ints
-def AMDGPUumax : SDNode<"AMDGPUISD::UMAX", SDTIntBinOp,
- [SDNPCommutative, SDNPAssociative]
->;
-
-// out = min(a, b) a and b are floats
-def AMDGPUfmin : SDNode<"AMDGPUISD::FMIN", SDTFPBinOp,
- [SDNPCommutative, SDNPAssociative]
->;
-
-// out = min(a, b) a snd b are signed ints
-def AMDGPUsmin : SDNode<"AMDGPUISD::SMIN", SDTIntBinOp,
- [SDNPCommutative, SDNPAssociative]
->;
-
-// out = min(a, b) a and b are unsigned ints
-def AMDGPUumin : SDNode<"AMDGPUISD::UMIN", SDTIntBinOp,
- [SDNPCommutative, SDNPAssociative]
->;
-
-// urecip - This operation is a helper for integer division, it returns the
-// result of 1 / a as a fractional unsigned integer.
-// out = (2^32 / a) + e
-// e is rounding error
-def AMDGPUurecip : SDNode<"AMDGPUISD::URECIP", SDTIntUnaryOp>;
+++ /dev/null
-//===-- AMDGPUInstructions.td - Common instruction defs ---*- tablegen -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains instruction defs that are common to all hw codegen
-// targets.
-//
-//===----------------------------------------------------------------------===//
-
-class AMDGPUInst <dag outs, dag ins, string asm, list<dag> pattern> : Instruction {
- field bits<16> AMDILOp = 0;
- field bits<3> Gen = 0;
-
- let Namespace = "AMDGPU";
- let OutOperandList = outs;
- let InOperandList = ins;
- let AsmString = asm;
- let Pattern = pattern;
- let Itinerary = NullALU;
- let TSFlags{42-40} = Gen;
- let TSFlags{63-48} = AMDILOp;
-}
-
-class AMDGPUShaderInst <dag outs, dag ins, string asm, list<dag> pattern>
- : AMDGPUInst<outs, ins, asm, pattern> {
-
- field bits<32> Inst = 0xffffffff;
-
-}
-
-class Constants {
-int TWO_PI = 0x40c90fdb;
-int PI = 0x40490fdb;
-int TWO_PI_INV = 0x3e22f983;
-}
-def CONST : Constants;
-
-def FP_ZERO : PatLeaf <
- (fpimm),
- [{return N->getValueAPF().isZero();}]
->;
-
-def FP_ONE : PatLeaf <
- (fpimm),
- [{return N->isExactlyValue(1.0);}]
->;
-
-let isCodeGenOnly = 1, isPseudo = 1, usesCustomInserter = 1 in {
-
-class CLAMP <RegisterClass rc> : AMDGPUShaderInst <
- (outs rc:$dst),
- (ins rc:$src0),
- "CLAMP $dst, $src0",
- [(set rc:$dst, (int_AMDIL_clamp rc:$src0, (f32 FP_ZERO), (f32 FP_ONE)))]
->;
-
-class FABS <RegisterClass rc> : AMDGPUShaderInst <
- (outs rc:$dst),
- (ins rc:$src0),
- "FABS $dst, $src0",
- [(set rc:$dst, (fabs rc:$src0))]
->;
-
-class FNEG <RegisterClass rc> : AMDGPUShaderInst <
- (outs rc:$dst),
- (ins rc:$src0),
- "FNEG $dst, $src0",
- [(set rc:$dst, (fneg rc:$src0))]
->;
-
-} // End isCodeGenOnly = 1, isPseudo = 1, hasCustomInserter = 1
-
-/* Generic helper patterns for intrinsics */
-/* -------------------------------------- */
-
-class POW_Common <AMDGPUInst log_ieee, AMDGPUInst exp_ieee, AMDGPUInst mul,
- RegisterClass rc> : Pat <
- (int_AMDGPU_pow rc:$src0, rc:$src1),
- (exp_ieee (mul rc:$src1, (log_ieee rc:$src0)))
->;
-
-/* Other helper patterns */
-/* --------------------- */
-
-/* Extract element pattern */
-class Extract_Element <ValueType sub_type, ValueType vec_type,
- RegisterClass vec_class, int sub_idx,
- SubRegIndex sub_reg>: Pat<
- (sub_type (vector_extract (vec_type vec_class:$src), sub_idx)),
- (EXTRACT_SUBREG vec_class:$src, sub_reg)
->;
-
-/* Insert element pattern */
-class Insert_Element <ValueType elem_type, ValueType vec_type,
- RegisterClass elem_class, RegisterClass vec_class,
- int sub_idx, SubRegIndex sub_reg> : Pat <
-
- (vec_type (vector_insert (vec_type vec_class:$vec),
- (elem_type elem_class:$elem), sub_idx)),
- (INSERT_SUBREG vec_class:$vec, elem_class:$elem, sub_reg)
->;
-
-// Vector Build pattern
-class Vector_Build <ValueType vecType, RegisterClass elemClass> : Pat <
- (IL_vbuild elemClass:$src),
- (INSERT_SUBREG (vecType (IMPLICIT_DEF)), elemClass:$src, sel_x)
->;
-
-// bitconvert pattern
-class BitConvert <ValueType dt, ValueType st, RegisterClass rc> : Pat <
- (dt (bitconvert (st rc:$src0))),
- (dt rc:$src0)
->;
-
-include "R600Instructions.td"
-
-include "SIInstrInfo.td"
-
+++ /dev/null
-//===-- AMDGPUIntrinsics.td - Common intrinsics -*- tablegen -*-----------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines intrinsics that are used by all hw codegen targets.
-//
-//===----------------------------------------------------------------------===//
-
-let TargetPrefix = "AMDGPU", isTarget = 1 in {
-
- def int_AMDGPU_load_const : Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
- def int_AMDGPU_load_imm : Intrinsic<[llvm_v4f32_ty], [llvm_i32_ty], [IntrNoMem]>;
- def int_AMDGPU_reserve_reg : Intrinsic<[], [llvm_i32_ty], [IntrNoMem]>;
- def int_AMDGPU_store_output : Intrinsic<[], [llvm_float_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_AMDGPU_swizzle : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty], [IntrNoMem]>;
-
- def int_AMDGPU_arl : Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
- def int_AMDGPU_cndlt : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
- def int_AMDGPU_cos : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
- def int_AMDGPU_div : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
- def int_AMDGPU_dp4 : Intrinsic<[llvm_float_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
- def int_AMDGPU_floor : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
- def int_AMDGPU_kill : Intrinsic<[], [llvm_float_ty], []>;
- def int_AMDGPU_kilp : Intrinsic<[], [], []>;
- def int_AMDGPU_lrp : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
- def int_AMDGPU_mul : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
- def int_AMDGPU_pow : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
- def int_AMDGPU_rcp : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
- def int_AMDGPU_rsq : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
- def int_AMDGPU_seq : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
- def int_AMDGPU_sgt : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
- def int_AMDGPU_sge : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
- def int_AMDGPU_sin : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
- def int_AMDGPU_sle : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
- def int_AMDGPU_sne : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
- def int_AMDGPU_ssg : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
- def int_AMDGPU_mullit : Intrinsic<[llvm_v4f32_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
- def int_AMDGPU_tex : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_AMDGPU_txb : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_AMDGPU_txf : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_AMDGPU_txq : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_AMDGPU_txd : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_AMDGPU_txl : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_AMDGPU_trunc : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
- def int_AMDGPU_ddx : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_AMDGPU_ddy : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_AMDGPU_imax : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_AMDGPU_imin : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_AMDGPU_umax : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_AMDGPU_umin : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_AMDGPU_cube : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
-}
-
-let TargetPrefix = "TGSI", isTarget = 1 in {
-
- def int_TGSI_lit_z : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],[]>;
-}
-
-include "SIIntrinsics.td"
+++ /dev/null
-//===-- AMDGPURegisterInfo.cpp - AMDGPU Register Information -------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Parent TargetRegisterInfo class common to all hw codegen targets.
-//
-//===----------------------------------------------------------------------===//
-
-#include "AMDGPURegisterInfo.h"
-#include "AMDGPUTargetMachine.h"
-
-using namespace llvm;
-
-AMDGPURegisterInfo::AMDGPURegisterInfo(AMDGPUTargetMachine &tm,
- const TargetInstrInfo &tii)
-: AMDILRegisterInfo(tm, tii),
- TM(tm),
- TII(tii)
- { }
+++ /dev/null
-//===-- AMDGPURegisterInfo.h - AMDGPURegisterInfo Interface -*- C++ -*-----===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the TargetRegisterInfo interface that is implemented
-// by all hw codegen targets.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef AMDGPUREGISTERINFO_H_
-#define AMDGPUREGISTERINFO_H_
-
-#include "AMDILRegisterInfo.h"
-
-namespace llvm {
-
-class AMDGPUTargetMachine;
-class TargetInstrInfo;
-
-struct AMDGPURegisterInfo : public AMDILRegisterInfo
-{
- AMDGPUTargetMachine &TM;
- const TargetInstrInfo &TII;
-
- AMDGPURegisterInfo(AMDGPUTargetMachine &tm, const TargetInstrInfo &tii);
-
- virtual BitVector getReservedRegs(const MachineFunction &MF) const = 0;
-
- /// getISARegClass - rc is an AMDIL reg class. This function returns the
- /// ISA reg class that is equivalent to the given AMDIL reg class.
- virtual const TargetRegisterClass *
- getISARegClass(const TargetRegisterClass * rc) const = 0;
-};
-
-} // End namespace llvm
-
-#endif // AMDIDSAREGISTERINFO_H_
+++ /dev/null
-//===-- AMDGPURegisterInfo.td - AMDGPU register info -------*- tablegen -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Tablegen register definitions common to all hw codegen targets.
-//
-//===----------------------------------------------------------------------===//
-
-let Namespace = "AMDGPU" in {
- def sel_x : SubRegIndex;
- def sel_y : SubRegIndex;
- def sel_z : SubRegIndex;
- def sel_w : SubRegIndex;
-}
-
-include "R600RegisterInfo.td"
-include "SIRegisterInfo.td"
+++ /dev/null
-//=====-- AMDGPUSubtarget.h - Define Subtarget for the AMDIL ---*- C++ -*-====//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//==-----------------------------------------------------------------------===//
-//
-// This file declares the AMDGPU specific subclass of TargetSubtarget.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef _AMDGPUSUBTARGET_H_
-#define _AMDGPUSUBTARGET_H_
-#include "AMDILSubtarget.h"
-
-namespace llvm {
-
-class AMDGPUSubtarget : public AMDILSubtarget
-{
- InstrItineraryData InstrItins;
-
-public:
- AMDGPUSubtarget(StringRef TT, StringRef CPU, StringRef FS) :
- AMDILSubtarget(TT, CPU, FS)
- {
- InstrItins = getInstrItineraryForCPU(CPU);
- }
-
- const InstrItineraryData &getInstrItineraryData() const { return InstrItins; }
-};
-
-} // End namespace llvm
-
-#endif // AMDGPUSUBTARGET_H_
+++ /dev/null
-//===-- AMDGPUTargetMachine.cpp - TargetMachine for hw codegen targets-----===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// The AMDGPU target machine contains all of the hardware specific information
-// needed to emit code for R600 and SI GPUs.
-//
-//===----------------------------------------------------------------------===//
-
-#include "AMDGPUTargetMachine.h"
-#include "AMDGPU.h"
-#include "R600ISelLowering.h"
-#include "R600InstrInfo.h"
-#include "SIISelLowering.h"
-#include "SIInstrInfo.h"
-#include "llvm/Analysis/Passes.h"
-#include "llvm/Analysis/Verifier.h"
-#include "llvm/CodeGen/MachineFunctionAnalysis.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/PassManager.h"
-#include "llvm/Support/TargetRegistry.h"
-#include "llvm/Support/raw_os_ostream.h"
-#include "llvm/Transforms/IPO.h"
-#include "llvm/Transforms/Scalar.h"
-
-using namespace llvm;
-
-extern "C" void LLVMInitializeAMDGPUTarget() {
- // Register the target
- RegisterTargetMachine<AMDGPUTargetMachine> X(TheAMDGPUTarget);
-}
-
-AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS,
- TargetOptions Options,
- Reloc::Model RM, CodeModel::Model CM,
- CodeGenOpt::Level OptLevel
-)
-:
- LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OptLevel),
- Subtarget(TT, CPU, FS),
- DataLayout(Subtarget.getDataLayout()),
- FrameLowering(TargetFrameLowering::StackGrowsUp,
- Subtarget.device()->getStackAlignment(), 0),
- IntrinsicInfo(this),
- InstrItins(&Subtarget.getInstrItineraryData()),
- mDump(false)
-
-{
- // TLInfo uses InstrInfo so it must be initialized after.
- if (Subtarget.device()->getGeneration() <= AMDILDeviceInfo::HD6XXX) {
- InstrInfo = new R600InstrInfo(*this);
- TLInfo = new R600TargetLowering(*this);
- } else {
- InstrInfo = new SIInstrInfo(*this);
- TLInfo = new SITargetLowering(*this);
- }
-}
-
-AMDGPUTargetMachine::~AMDGPUTargetMachine()
-{
-}
-
-bool AMDGPUTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
- formatted_raw_ostream &Out,
- CodeGenFileType FileType,
- bool DisableVerify,
- AnalysisID StartAfter,
- AnalysisID StopAfter) {
- // XXX: Hack here addPassesToEmitFile will fail, but this is Ok since we are
- // only using it to access addPassesToGenerateCode()
- bool fail = LLVMTargetMachine::addPassesToEmitFile(PM, Out, FileType,
- DisableVerify);
- assert(fail);
-
- const AMDILSubtarget &STM = getSubtarget<AMDILSubtarget>();
- std::string gpu = STM.getDeviceName();
- if (gpu == "SI") {
- PM.add(createSICodeEmitterPass(Out));
- } else if (Subtarget.device()->getGeneration() <= AMDILDeviceInfo::HD6XXX) {
- PM.add(createR600CodeEmitterPass(Out));
- } else {
- abort();
- return true;
- }
- PM.add(createGCInfoDeleter());
-
- return false;
-}
-
-namespace {
-class AMDGPUPassConfig : public TargetPassConfig {
-public:
- AMDGPUPassConfig(AMDGPUTargetMachine *TM, PassManagerBase &PM)
- : TargetPassConfig(TM, PM) {}
-
- AMDGPUTargetMachine &getAMDGPUTargetMachine() const {
- return getTM<AMDGPUTargetMachine>();
- }
-
- virtual bool addPreISel();
- virtual bool addInstSelector();
- virtual bool addPreRegAlloc();
- virtual bool addPostRegAlloc();
- virtual bool addPreSched2();
- virtual bool addPreEmitPass();
-};
-} // End of anonymous namespace
-
-TargetPassConfig *AMDGPUTargetMachine::createPassConfig(PassManagerBase &PM) {
- return new AMDGPUPassConfig(this, PM);
-}
-
-bool
-AMDGPUPassConfig::addPreISel()
-{
- const AMDILSubtarget &ST = TM->getSubtarget<AMDILSubtarget>();
- if (ST.device()->getGeneration() <= AMDILDeviceInfo::HD6XXX) {
- addPass(createR600KernelParametersPass(
- getAMDGPUTargetMachine().getTargetData()));
- }
- return false;
-}
-
-bool AMDGPUPassConfig::addInstSelector() {
- addPass(createAMDILPeepholeOpt(*TM));
- addPass(createAMDILISelDag(getAMDGPUTargetMachine()));
- return false;
-}
-
-bool AMDGPUPassConfig::addPreRegAlloc() {
- const AMDILSubtarget &ST = TM->getSubtarget<AMDILSubtarget>();
-
- if (ST.device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
- addPass(createSIAssignInterpRegsPass(*TM));
- }
- addPass(createAMDGPUConvertToISAPass(*TM));
- return false;
-}
-
-bool AMDGPUPassConfig::addPostRegAlloc() {
- return false;
-}
-
-bool AMDGPUPassConfig::addPreSched2() {
- return false;
-}
-
-bool AMDGPUPassConfig::addPreEmitPass() {
- addPass(createAMDILCFGPreparationPass(*TM));
- addPass(createAMDILCFGStructurizerPass(*TM));
-
- return false;
-}
-
+++ /dev/null
-//===-- AMDGPUTargetMachine.h - AMDGPU TargetMachine Interface --*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// The AMDGPU TargetMachine interface definition for hw codgen targets.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef AMDGPU_TARGET_MACHINE_H
-#define AMDGPU_TARGET_MACHINE_H
-
-#include "AMDGPUInstrInfo.h"
-#include "AMDGPUSubtarget.h"
-#include "AMDILFrameLowering.h"
-#include "AMDILIntrinsicInfo.h"
-#include "R600ISelLowering.h"
-#include "llvm/ADT/OwningPtr.h"
-#include "llvm/Target/TargetData.h"
-
-namespace llvm {
-
-MCAsmInfo* createMCAsmInfo(const Target &T, StringRef TT);
-
-class AMDGPUTargetMachine : public LLVMTargetMachine {
-
- AMDGPUSubtarget Subtarget;
- const TargetData DataLayout;
- AMDILFrameLowering FrameLowering;
- AMDILIntrinsicInfo IntrinsicInfo;
- const AMDGPUInstrInfo * InstrInfo;
- AMDGPUTargetLowering * TLInfo;
- const InstrItineraryData* InstrItins;
- bool mDump;
-
-public:
- AMDGPUTargetMachine(const Target &T, StringRef TT, StringRef FS,
- StringRef CPU,
- TargetOptions Options,
- Reloc::Model RM, CodeModel::Model CM,
- CodeGenOpt::Level OL);
- ~AMDGPUTargetMachine();
- virtual const AMDILFrameLowering* getFrameLowering() const {
- return &FrameLowering;
- }
- virtual const AMDILIntrinsicInfo* getIntrinsicInfo() const {
- return &IntrinsicInfo;
- }
- virtual const AMDGPUInstrInfo *getInstrInfo() const {return InstrInfo;}
- virtual const AMDGPUSubtarget *getSubtargetImpl() const {return &Subtarget; }
- virtual const AMDGPURegisterInfo *getRegisterInfo() const {
- return &InstrInfo->getRegisterInfo();
- }
- virtual AMDGPUTargetLowering * getTargetLowering() const {
- return TLInfo;
- }
- virtual const InstrItineraryData* getInstrItineraryData() const {
- return InstrItins;
- }
- virtual const TargetData* getTargetData() const { return &DataLayout; }
- virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
- virtual bool addPassesToEmitFile(PassManagerBase &PM,
- formatted_raw_ostream &Out,
- CodeGenFileType FileType,
- bool DisableVerify,
- AnalysisID StartAfter = 0,
- AnalysisID StopAfter = 0);
-};
-
-} // End namespace llvm
-
-#endif // AMDGPU_TARGET_MACHINE_H
+++ /dev/null
-//===-- AMDGPUUtil.cpp - AMDGPU Utility functions -------------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Common utility functions used by hw codegen targets
-//
-//===----------------------------------------------------------------------===//
-
-#include "AMDGPUUtil.h"
-#include "AMDGPURegisterInfo.h"
-#include "AMDIL.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-
-using namespace llvm;
-
-// Some instructions act as place holders to emulate operations that the GPU
-// hardware does automatically. This function can be used to check if
-// an opcode falls into this category.
-bool AMDGPU::isPlaceHolderOpcode(unsigned opcode)
-{
- switch (opcode) {
- default: return false;
- case AMDGPU::RETURN:
- case AMDGPU::LOAD_INPUT:
- case AMDGPU::LAST:
- case AMDGPU::MASK_WRITE:
- case AMDGPU::RESERVE_REG:
- return true;
- }
-}
-
-bool AMDGPU::isTransOp(unsigned opcode)
-{
- switch(opcode) {
- default: return false;
-
- case AMDGPU::COS_r600:
- case AMDGPU::COS_eg:
- case AMDGPU::MULLIT:
- case AMDGPU::MUL_LIT_r600:
- case AMDGPU::MUL_LIT_eg:
- case AMDGPU::EXP_IEEE_r600:
- case AMDGPU::EXP_IEEE_eg:
- case AMDGPU::LOG_CLAMPED_r600:
- case AMDGPU::LOG_IEEE_r600:
- case AMDGPU::LOG_CLAMPED_eg:
- case AMDGPU::LOG_IEEE_eg:
- return true;
- }
-}
-
-bool AMDGPU::isTexOp(unsigned opcode)
-{
- switch(opcode) {
- default: return false;
- case AMDGPU::TEX_LD:
- case AMDGPU::TEX_GET_TEXTURE_RESINFO:
- case AMDGPU::TEX_SAMPLE:
- case AMDGPU::TEX_SAMPLE_C:
- case AMDGPU::TEX_SAMPLE_L:
- case AMDGPU::TEX_SAMPLE_C_L:
- case AMDGPU::TEX_SAMPLE_LB:
- case AMDGPU::TEX_SAMPLE_C_LB:
- case AMDGPU::TEX_SAMPLE_G:
- case AMDGPU::TEX_SAMPLE_C_G:
- case AMDGPU::TEX_GET_GRADIENTS_H:
- case AMDGPU::TEX_GET_GRADIENTS_V:
- case AMDGPU::TEX_SET_GRADIENTS_H:
- case AMDGPU::TEX_SET_GRADIENTS_V:
- return true;
- }
-}
-
-bool AMDGPU::isReductionOp(unsigned opcode)
-{
- switch(opcode) {
- default: return false;
- case AMDGPU::DOT4_r600:
- case AMDGPU::DOT4_eg:
- return true;
- }
-}
-
-bool AMDGPU::isCubeOp(unsigned opcode)
-{
- switch(opcode) {
- default: return false;
- case AMDGPU::CUBE_r600:
- case AMDGPU::CUBE_eg:
- return true;
- }
-}
-
-
-bool AMDGPU::isFCOp(unsigned opcode)
-{
- switch(opcode) {
- default: return false;
- case AMDGPU::BREAK_LOGICALZ_f32:
- case AMDGPU::BREAK_LOGICALNZ_i32:
- case AMDGPU::BREAK_LOGICALZ_i32:
- case AMDGPU::BREAK_LOGICALNZ_f32:
- case AMDGPU::CONTINUE_LOGICALNZ_f32:
- case AMDGPU::IF_LOGICALNZ_i32:
- case AMDGPU::IF_LOGICALZ_f32:
- case AMDGPU::ELSE:
- case AMDGPU::ENDIF:
- case AMDGPU::ENDLOOP:
- case AMDGPU::IF_LOGICALNZ_f32:
- case AMDGPU::WHILELOOP:
- return true;
- }
-}
-
-void AMDGPU::utilAddLiveIn(MachineFunction * MF,
- MachineRegisterInfo & MRI,
- const TargetInstrInfo * TII,
- unsigned physReg, unsigned virtReg)
-{
- if (!MRI.isLiveIn(physReg)) {
- MRI.addLiveIn(physReg, virtReg);
- MF->front().addLiveIn(physReg);
- BuildMI(MF->front(), MF->front().begin(), DebugLoc(),
- TII->get(TargetOpcode::COPY), virtReg)
- .addReg(physReg);
- } else {
- MRI.replaceRegWith(virtReg, MRI.getLiveInVirtReg(physReg));
- }
-}
+++ /dev/null
-//===-- AMDGPUUtil.h - AMDGPU Utility function declarations -----*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Declarations for utility functions common to all hw codegen targets.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef AMDGPU_UTIL_H
-#define AMDGPU_UTIL_H
-
-namespace llvm {
-
-class MachineFunction;
-class MachineRegisterInfo;
-class TargetInstrInfo;
-
-namespace AMDGPU {
-
-bool isPlaceHolderOpcode(unsigned opcode);
-
-bool isTransOp(unsigned opcode);
-bool isTexOp(unsigned opcode);
-bool isReductionOp(unsigned opcode);
-bool isCubeOp(unsigned opcode);
-bool isFCOp(unsigned opcode);
-
-// XXX: Move these to AMDGPUInstrInfo.h
-#define MO_FLAG_CLAMP (1 << 0)
-#define MO_FLAG_NEG (1 << 1)
-#define MO_FLAG_ABS (1 << 2)
-#define MO_FLAG_MASK (1 << 3)
-
-void utilAddLiveIn(MachineFunction * MF, MachineRegisterInfo & MRI,
- const TargetInstrInfo * TII, unsigned physReg, unsigned virtReg);
-
-} // End namespace AMDGPU
-
-} // End namespace llvm
-
-#endif // AMDGPU_UTIL_H
+++ /dev/null
-//===-- AMDIL.h - Top-level interface for AMDIL representation --*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//==-----------------------------------------------------------------------===//
-//
-// This file contains the entry points for global functions defined in the LLVM
-// AMDIL back-end.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef AMDIL_H_
-#define AMDIL_H_
-
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/Target/TargetMachine.h"
-
-#define AMDIL_MAJOR_VERSION 2
-#define AMDIL_MINOR_VERSION 0
-#define AMDIL_REVISION_NUMBER 74
-#define ARENA_SEGMENT_RESERVED_UAVS 12
-#define DEFAULT_ARENA_UAV_ID 8
-#define DEFAULT_RAW_UAV_ID 7
-#define GLOBAL_RETURN_RAW_UAV_ID 11
-#define HW_MAX_NUM_CB 8
-#define MAX_NUM_UNIQUE_UAVS 8
-#define OPENCL_MAX_NUM_ATOMIC_COUNTERS 8
-#define OPENCL_MAX_READ_IMAGES 128
-#define OPENCL_MAX_WRITE_IMAGES 8
-#define OPENCL_MAX_SAMPLERS 16
-
-// The next two values can never be zero, as zero is the ID that is
-// used to assert against.
-#define DEFAULT_LDS_ID 1
-#define DEFAULT_GDS_ID 1
-#define DEFAULT_SCRATCH_ID 1
-#define DEFAULT_VEC_SLOTS 8
-
-// SC->CAL version matchings.
-#define CAL_VERSION_SC_150 1700
-#define CAL_VERSION_SC_149 1700
-#define CAL_VERSION_SC_148 1525
-#define CAL_VERSION_SC_147 1525
-#define CAL_VERSION_SC_146 1525
-#define CAL_VERSION_SC_145 1451
-#define CAL_VERSION_SC_144 1451
-#define CAL_VERSION_SC_143 1441
-#define CAL_VERSION_SC_142 1441
-#define CAL_VERSION_SC_141 1420
-#define CAL_VERSION_SC_140 1400
-#define CAL_VERSION_SC_139 1387
-#define CAL_VERSION_SC_138 1387
-#define CAL_APPEND_BUFFER_SUPPORT 1340
-#define CAL_VERSION_SC_137 1331
-#define CAL_VERSION_SC_136 982
-#define CAL_VERSION_SC_135 950
-#define CAL_VERSION_GLOBAL_RETURN_BUFFER 990
-
-#define OCL_DEVICE_RV710 0x0001
-#define OCL_DEVICE_RV730 0x0002
-#define OCL_DEVICE_RV770 0x0004
-#define OCL_DEVICE_CEDAR 0x0008
-#define OCL_DEVICE_REDWOOD 0x0010
-#define OCL_DEVICE_JUNIPER 0x0020
-#define OCL_DEVICE_CYPRESS 0x0040
-#define OCL_DEVICE_CAICOS 0x0080
-#define OCL_DEVICE_TURKS 0x0100
-#define OCL_DEVICE_BARTS 0x0200
-#define OCL_DEVICE_CAYMAN 0x0400
-#define OCL_DEVICE_ALL 0x3FFF
-
-/// The number of function ID's that are reserved for
-/// internal compiler usage.
-const unsigned int RESERVED_FUNCS = 1024;
-
-#define AMDIL_OPT_LEVEL_DECL
-#define AMDIL_OPT_LEVEL_VAR
-#define AMDIL_OPT_LEVEL_VAR_NO_COMMA
-
-namespace llvm {
-class AMDILInstrPrinter;
-class FunctionPass;
-class MCAsmInfo;
-class raw_ostream;
-class Target;
-class TargetMachine;
-
-/// Instruction selection passes.
-FunctionPass*
- createAMDILISelDag(TargetMachine &TM AMDIL_OPT_LEVEL_DECL);
-FunctionPass*
- createAMDILPeepholeOpt(TargetMachine &TM AMDIL_OPT_LEVEL_DECL);
-
-/// Pre emit passes.
-FunctionPass*
- createAMDILCFGPreparationPass(TargetMachine &TM AMDIL_OPT_LEVEL_DECL);
-FunctionPass*
- createAMDILCFGStructurizerPass(TargetMachine &TM AMDIL_OPT_LEVEL_DECL);
-
-extern Target TheAMDILTarget;
-extern Target TheAMDGPUTarget;
-} // end namespace llvm;
-
-#define GET_REGINFO_ENUM
-#include "AMDGPUGenRegisterInfo.inc"
-#define GET_INSTRINFO_ENUM
-#include "AMDGPUGenInstrInfo.inc"
-
-/// Include device information enumerations
-#include "AMDILDeviceInfo.h"
-
-namespace llvm {
-/// OpenCL uses address spaces to differentiate between
-/// various memory regions on the hardware. On the CPU
-/// all of the address spaces point to the same memory,
-/// however on the GPU, each address space points to
-/// a seperate piece of memory that is unique from other
-/// memory locations.
-namespace AMDILAS {
-enum AddressSpaces {
- PRIVATE_ADDRESS = 0, // Address space for private memory.
- GLOBAL_ADDRESS = 1, // Address space for global memory (RAT0, VTX0).
- CONSTANT_ADDRESS = 2, // Address space for constant memory.
- LOCAL_ADDRESS = 3, // Address space for local memory.
- REGION_ADDRESS = 4, // Address space for region memory.
- ADDRESS_NONE = 5, // Address space for unknown memory.
- PARAM_D_ADDRESS = 6, // Address space for direct addressible parameter memory (CONST0)
- PARAM_I_ADDRESS = 7, // Address space for indirect addressible parameter memory (VTX1)
- USER_SGPR_ADDRESS = 8, // Address space for USER_SGPRS on SI
- LAST_ADDRESS = 9
-};
-
-// This union/struct combination is an easy way to read out the
-// exact bits that are needed.
-typedef union ResourceRec {
- struct {
-#ifdef __BIG_ENDIAN__
- unsigned short isImage : 1; // Reserved for future use/llvm.
- unsigned short ResourceID : 10; // Flag to specify the resourece ID for
- // the op.
- unsigned short HardwareInst : 1; // Flag to specify that this instruction
- // is a hardware instruction.
- unsigned short ConflictPtr : 1; // Flag to specify that the pointer has a
- // conflict.
- unsigned short ByteStore : 1; // Flag to specify if the op is a byte
- // store op.
- unsigned short PointerPath : 1; // Flag to specify if the op is on the
- // pointer path.
- unsigned short CacheableRead : 1; // Flag to specify if the read is
- // cacheable.
-#else
- unsigned short CacheableRead : 1; // Flag to specify if the read is
- // cacheable.
- unsigned short PointerPath : 1; // Flag to specify if the op is on the
- // pointer path.
- unsigned short ByteStore : 1; // Flag to specify if the op is byte
- // store op.
- unsigned short ConflictPtr : 1; // Flag to specify that the pointer has
- // a conflict.
- unsigned short HardwareInst : 1; // Flag to specify that this instruction
- // is a hardware instruction.
- unsigned short ResourceID : 10; // Flag to specify the resource ID for
- // the op.
- unsigned short isImage : 1; // Reserved for future use.
-#endif
- } bits;
- unsigned short u16all;
-} InstrResEnc;
-
-} // namespace AMDILAS
-
-// Enums corresponding to AMDIL condition codes for IL. These
-// values must be kept in sync with the ones in the .td file.
-namespace AMDILCC {
-enum CondCodes {
- // AMDIL specific condition codes. These correspond to the IL_CC_*
- // in AMDILInstrInfo.td and must be kept in the same order.
- IL_CC_D_EQ = 0, // DEQ instruction.
- IL_CC_D_GE = 1, // DGE instruction.
- IL_CC_D_LT = 2, // DLT instruction.
- IL_CC_D_NE = 3, // DNE instruction.
- IL_CC_F_EQ = 4, // EQ instruction.
- IL_CC_F_GE = 5, // GE instruction.
- IL_CC_F_LT = 6, // LT instruction.
- IL_CC_F_NE = 7, // NE instruction.
- IL_CC_I_EQ = 8, // IEQ instruction.
- IL_CC_I_GE = 9, // IGE instruction.
- IL_CC_I_LT = 10, // ILT instruction.
- IL_CC_I_NE = 11, // INE instruction.
- IL_CC_U_GE = 12, // UGE instruction.
- IL_CC_U_LT = 13, // ULE instruction.
- // Pseudo IL Comparison instructions here.
- IL_CC_F_GT = 14, // GT instruction.
- IL_CC_U_GT = 15,
- IL_CC_I_GT = 16,
- IL_CC_D_GT = 17,
- IL_CC_F_LE = 18, // LE instruction
- IL_CC_U_LE = 19,
- IL_CC_I_LE = 20,
- IL_CC_D_LE = 21,
- IL_CC_F_UNE = 22,
- IL_CC_F_UEQ = 23,
- IL_CC_F_ULT = 24,
- IL_CC_F_UGT = 25,
- IL_CC_F_ULE = 26,
- IL_CC_F_UGE = 27,
- IL_CC_F_ONE = 28,
- IL_CC_F_OEQ = 29,
- IL_CC_F_OLT = 30,
- IL_CC_F_OGT = 31,
- IL_CC_F_OLE = 32,
- IL_CC_F_OGE = 33,
- IL_CC_D_UNE = 34,
- IL_CC_D_UEQ = 35,
- IL_CC_D_ULT = 36,
- IL_CC_D_UGT = 37,
- IL_CC_D_ULE = 38,
- IL_CC_D_UGE = 39,
- IL_CC_D_ONE = 40,
- IL_CC_D_OEQ = 41,
- IL_CC_D_OLT = 42,
- IL_CC_D_OGT = 43,
- IL_CC_D_OLE = 44,
- IL_CC_D_OGE = 45,
- IL_CC_U_EQ = 46,
- IL_CC_U_NE = 47,
- IL_CC_F_O = 48,
- IL_CC_D_O = 49,
- IL_CC_F_UO = 50,
- IL_CC_D_UO = 51,
- IL_CC_L_LE = 52,
- IL_CC_L_GE = 53,
- IL_CC_L_EQ = 54,
- IL_CC_L_NE = 55,
- IL_CC_L_LT = 56,
- IL_CC_L_GT = 57,
- IL_CC_UL_LE = 58,
- IL_CC_UL_GE = 59,
- IL_CC_UL_EQ = 60,
- IL_CC_UL_NE = 61,
- IL_CC_UL_LT = 62,
- IL_CC_UL_GT = 63,
- COND_ERROR = 64
-};
-
-} // end namespace AMDILCC
-} // end namespace llvm
-#endif // AMDIL_H_
+++ /dev/null
-//===-- AMDIL7XXDevice.cpp - Device Info for 7XX GPUs ---------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//==-----------------------------------------------------------------------===//
-#include "AMDIL7XXDevice.h"
-#include "AMDILDevice.h"
-
-using namespace llvm;
-
-AMDIL7XXDevice::AMDIL7XXDevice(AMDILSubtarget *ST) : AMDILDevice(ST)
-{
- setCaps();
- std::string name = mSTM->getDeviceName();
- if (name == "rv710") {
- mDeviceFlag = OCL_DEVICE_RV710;
- } else if (name == "rv730") {
- mDeviceFlag = OCL_DEVICE_RV730;
- } else {
- mDeviceFlag = OCL_DEVICE_RV770;
- }
-}
-
-AMDIL7XXDevice::~AMDIL7XXDevice()
-{
-}
-
-void AMDIL7XXDevice::setCaps()
-{
- mSWBits.set(AMDILDeviceInfo::LocalMem);
-}
-
-size_t AMDIL7XXDevice::getMaxLDSSize() const
-{
- if (usesHardware(AMDILDeviceInfo::LocalMem)) {
- return MAX_LDS_SIZE_700;
- }
- return 0;
-}
-
-size_t AMDIL7XXDevice::getWavefrontSize() const
-{
- return AMDILDevice::HalfWavefrontSize;
-}
-
-uint32_t AMDIL7XXDevice::getGeneration() const
-{
- return AMDILDeviceInfo::HD4XXX;
-}
-
-uint32_t AMDIL7XXDevice::getResourceID(uint32_t DeviceID) const
-{
- switch (DeviceID) {
- default:
- assert(0 && "ID type passed in is unknown!");
- break;
- case GLOBAL_ID:
- case CONSTANT_ID:
- case RAW_UAV_ID:
- case ARENA_UAV_ID:
- break;
- case LDS_ID:
- if (usesHardware(AMDILDeviceInfo::LocalMem)) {
- return DEFAULT_LDS_ID;
- }
- break;
- case SCRATCH_ID:
- if (usesHardware(AMDILDeviceInfo::PrivateMem)) {
- return DEFAULT_SCRATCH_ID;
- }
- break;
- case GDS_ID:
- assert(0 && "GDS UAV ID is not supported on this chip");
- if (usesHardware(AMDILDeviceInfo::RegionMem)) {
- return DEFAULT_GDS_ID;
- }
- break;
- };
-
- return 0;
-}
-
-uint32_t AMDIL7XXDevice::getMaxNumUAVs() const
-{
- return 1;
-}
-
-AMDIL770Device::AMDIL770Device(AMDILSubtarget *ST): AMDIL7XXDevice(ST)
-{
- setCaps();
-}
-
-AMDIL770Device::~AMDIL770Device()
-{
-}
-
-void AMDIL770Device::setCaps()
-{
- if (mSTM->isOverride(AMDILDeviceInfo::DoubleOps)) {
- mSWBits.set(AMDILDeviceInfo::FMA);
- mHWBits.set(AMDILDeviceInfo::DoubleOps);
- }
- mSWBits.set(AMDILDeviceInfo::BarrierDetect);
- mHWBits.reset(AMDILDeviceInfo::LongOps);
- mSWBits.set(AMDILDeviceInfo::LongOps);
- mSWBits.set(AMDILDeviceInfo::LocalMem);
-}
-
-size_t AMDIL770Device::getWavefrontSize() const
-{
- return AMDILDevice::WavefrontSize;
-}
-
-AMDIL710Device::AMDIL710Device(AMDILSubtarget *ST) : AMDIL7XXDevice(ST)
-{
-}
-
-AMDIL710Device::~AMDIL710Device()
-{
-}
-
-size_t AMDIL710Device::getWavefrontSize() const
-{
- return AMDILDevice::QuarterWavefrontSize;
-}
+++ /dev/null
-//==-- AMDIL7XXDevice.h - Define 7XX Device Device for AMDIL ---*- C++ -*--===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//==-----------------------------------------------------------------------===//
-//
-// Interface for the subtarget data classes.
-//
-//===----------------------------------------------------------------------===//
-// This file will define the interface that each generation needs to
-// implement in order to correctly answer queries on the capabilities of the
-// specific hardware.
-//===----------------------------------------------------------------------===//
-#ifndef _AMDIL7XXDEVICEIMPL_H_
-#define _AMDIL7XXDEVICEIMPL_H_
-#include "AMDILDevice.h"
-#include "AMDILSubtarget.h"
-
-namespace llvm {
-class AMDILSubtarget;
-
-//===----------------------------------------------------------------------===//
-// 7XX generation of devices and their respective sub classes
-//===----------------------------------------------------------------------===//
-
-// The AMDIL7XXDevice class represents the generic 7XX device. All 7XX
-// devices are derived from this class. The AMDIL7XX device will only
-// support the minimal features that are required to be considered OpenCL 1.0
-// compliant and nothing more.
-class AMDIL7XXDevice : public AMDILDevice {
-public:
- AMDIL7XXDevice(AMDILSubtarget *ST);
- virtual ~AMDIL7XXDevice();
- virtual size_t getMaxLDSSize() const;
- virtual size_t getWavefrontSize() const;
- virtual uint32_t getGeneration() const;
- virtual uint32_t getResourceID(uint32_t DeviceID) const;
- virtual uint32_t getMaxNumUAVs() const;
-
-protected:
- virtual void setCaps();
-}; // AMDIL7XXDevice
-
-// The AMDIL770Device class represents the RV770 chip and it's
-// derivative cards. The difference between this device and the base
-// class is this device device adds support for double precision
-// and has a larger wavefront size.
-class AMDIL770Device : public AMDIL7XXDevice {
-public:
- AMDIL770Device(AMDILSubtarget *ST);
- virtual ~AMDIL770Device();
- virtual size_t getWavefrontSize() const;
-private:
- virtual void setCaps();
-}; // AMDIL770Device
-
-// The AMDIL710Device class derives from the 7XX base class, but this
-// class is a smaller derivative, so we need to overload some of the
-// functions in order to correctly specify this information.
-class AMDIL710Device : public AMDIL7XXDevice {
-public:
- AMDIL710Device(AMDILSubtarget *ST);
- virtual ~AMDIL710Device();
- virtual size_t getWavefrontSize() const;
-}; // AMDIL710Device
-
-} // namespace llvm
-#endif // _AMDILDEVICEIMPL_H_
+++ /dev/null
-//===------ AMDILAlgorithms.tpp - AMDIL Template Algorithms Header --------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file provides templates algorithms that extend the STL algorithms, but
-// are useful for the AMDIL backend
-//
-//===----------------------------------------------------------------------===//
-
-// A template function that loops through the iterators and passes the second
-// argument along with each iterator to the function. If the function returns
-// true, then the current iterator is invalidated and it moves back, before
-// moving forward to the next iterator, otherwise it moves forward without
-// issue. This is based on the for_each STL function, but allows a reference to
-// the second argument
-template<class InputIterator, class Function, typename Arg>
-Function binaryForEach(InputIterator First, InputIterator Last, Function F,
- Arg &Second)
-{
- for ( ; First!=Last; ++First ) {
- F(*First, Second);
- }
- return F;
-}
-
-template<class InputIterator, class Function, typename Arg>
-Function safeBinaryForEach(InputIterator First, InputIterator Last, Function F,
- Arg &Second)
-{
- for ( ; First!=Last; ++First ) {
- if (F(*First, Second)) {
- --First;
- }
- }
- return F;
-}
-
-// A template function that has two levels of looping before calling the
-// function with the passed in argument. See binaryForEach for further
-// explanation
-template<class InputIterator, class Function, typename Arg>
-Function binaryNestedForEach(InputIterator First, InputIterator Last,
- Function F, Arg &Second)
-{
- for ( ; First != Last; ++First) {
- binaryForEach(First->begin(), First->end(), F, Second);
- }
- return F;
-}
-template<class InputIterator, class Function, typename Arg>
-Function safeBinaryNestedForEach(InputIterator First, InputIterator Last,
- Function F, Arg &Second)
-{
- for ( ; First != Last; ++First) {
- safeBinaryForEach(First->begin(), First->end(), F, Second);
- }
- return F;
-}
-
-// Unlike the STL, a pointer to the iterator itself is passed in with the 'safe'
-// versions of these functions This allows the function to handle situations
-// such as invalidated iterators
-template<class InputIterator, class Function>
-Function safeForEach(InputIterator First, InputIterator Last, Function F)
-{
- for ( ; First!=Last; ++First ) F(&First)
- ; // Do nothing.
- return F;
-}
-
-// A template function that has two levels of looping before calling the
-// function with a pointer to the current iterator. See binaryForEach for
-// further explanation
-template<class InputIterator, class SecondIterator, class Function>
-Function safeNestedForEach(InputIterator First, InputIterator Last,
- SecondIterator S, Function F)
-{
- for ( ; First != Last; ++First) {
- SecondIterator sf, sl;
- for (sf = First->begin(), sl = First->end();
- sf != sl; ) {
- if (!F(&sf)) {
- ++sf;
- }
- }
- }
- return F;
-}
+++ /dev/null
-//===- AMDIL.td - AMDIL Target Machine -------------*- tablegen -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-// Target-independent interfaces which we are implementing
-//===----------------------------------------------------------------------===//
-
-include "llvm/Target/Target.td"
-
-// Dummy Instruction itineraries for pseudo instructions
-def ALU_NULL : FuncUnit;
-def NullALU : InstrItinClass;
-
-//===----------------------------------------------------------------------===//
-// AMDIL Subtarget features.
-//===----------------------------------------------------------------------===//
-def FeatureFP64 : SubtargetFeature<"fp64",
- "CapsOverride[AMDILDeviceInfo::DoubleOps]",
- "true",
- "Enable 64bit double precision operations">;
-def FeatureByteAddress : SubtargetFeature<"byte_addressable_store",
- "CapsOverride[AMDILDeviceInfo::ByteStores]",
- "true",
- "Enable byte addressable stores">;
-def FeatureBarrierDetect : SubtargetFeature<"barrier_detect",
- "CapsOverride[AMDILDeviceInfo::BarrierDetect]",
- "true",
- "Enable duplicate barrier detection(HD5XXX or later).">;
-def FeatureImages : SubtargetFeature<"images",
- "CapsOverride[AMDILDeviceInfo::Images]",
- "true",
- "Enable image functions">;
-def FeatureMultiUAV : SubtargetFeature<"multi_uav",
- "CapsOverride[AMDILDeviceInfo::MultiUAV]",
- "true",
- "Generate multiple UAV code(HD5XXX family or later)">;
-def FeatureMacroDB : SubtargetFeature<"macrodb",
- "CapsOverride[AMDILDeviceInfo::MacroDB]",
- "true",
- "Use internal macrodb, instead of macrodb in driver">;
-def FeatureNoAlias : SubtargetFeature<"noalias",
- "CapsOverride[AMDILDeviceInfo::NoAlias]",
- "true",
- "assert that all kernel argument pointers are not aliased">;
-def FeatureNoInline : SubtargetFeature<"no-inline",
- "CapsOverride[AMDILDeviceInfo::NoInline]",
- "true",
- "specify whether to not inline functions">;
-
-def Feature64BitPtr : SubtargetFeature<"64BitPtr",
- "mIs64bit",
- "false",
- "Specify if 64bit addressing should be used.">;
-
-def Feature32on64BitPtr : SubtargetFeature<"64on32BitPtr",
- "mIs32on64bit",
- "false",
- "Specify if 64bit sized pointers with 32bit addressing should be used.">;
-def FeatureDebug : SubtargetFeature<"debug",
- "CapsOverride[AMDILDeviceInfo::Debug]",
- "true",
- "Debug mode is enabled, so disable hardware accelerated address spaces.">;
-def FeatureDumpCode : SubtargetFeature <"DumpCode",
- "mDumpCode",
- "true",
- "Dump MachineInstrs in the CodeEmitter">;
-
-
-//===----------------------------------------------------------------------===//
-// Register File, Calling Conv, Instruction Descriptions
-//===----------------------------------------------------------------------===//
-
-
-include "AMDILRegisterInfo.td"
-include "AMDILCallingConv.td"
-include "AMDILInstrInfo.td"
-
-def AMDILInstrInfo : InstrInfo {}
-
-//===----------------------------------------------------------------------===//
-// AMDIL processors supported.
-//===----------------------------------------------------------------------===//
-//include "Processors.td"
-
-//===----------------------------------------------------------------------===//
-// Declare the target which we are implementing
-//===----------------------------------------------------------------------===//
-def AMDILAsmWriter : AsmWriter {
- string AsmWriterClassName = "AsmPrinter";
- int Variant = 0;
-}
-
-def AMDILAsmParser : AsmParser {
- string AsmParserClassName = "AsmParser";
- int Variant = 0;
-
- string CommentDelimiter = ";";
-
- string RegisterPrefix = "r";
-
-}
-
-
-def AMDIL : Target {
- // Pull in Instruction Info:
- let InstructionSet = AMDILInstrInfo;
- let AssemblyWriters = [AMDILAsmWriter];
- let AssemblyParsers = [AMDILAsmParser];
-}
+++ /dev/null
-//===-- AMDILCFGStructurizer.cpp - CFG Structurizer -----------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//==-----------------------------------------------------------------------===//
-
-#define DEBUGME 0
-#define DEBUG_TYPE "structcfg"
-
-#include "AMDIL.h"
-#include "AMDILInstrInfo.h"
-#include "AMDILRegisterInfo.h"
-#include "AMDILUtilityFunctions.h"
-#include "llvm/ADT/SCCIterator.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/DominatorInternals.h"
-#include "llvm/Analysis/Dominators.h"
-#include "llvm/CodeGen/MachineDominators.h"
-#include "llvm/CodeGen/MachineDominators.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineFunctionAnalysis.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineJumpTableInfo.h"
-#include "llvm/CodeGen/MachineLoopInfo.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Target/TargetInstrInfo.h"
-
-#define FirstNonDebugInstr(A) A->begin()
-using namespace llvm;
-
-// TODO: move-begin.
-
-//===----------------------------------------------------------------------===//
-//
-// Statistics for CFGStructurizer.
-//
-//===----------------------------------------------------------------------===//
-
-STATISTIC(numSerialPatternMatch, "CFGStructurizer number of serial pattern "
- "matched");
-STATISTIC(numIfPatternMatch, "CFGStructurizer number of if pattern "
- "matched");
-STATISTIC(numLoopbreakPatternMatch, "CFGStructurizer number of loop-break "
- "pattern matched");
-STATISTIC(numLoopcontPatternMatch, "CFGStructurizer number of loop-continue "
- "pattern matched");
-STATISTIC(numLoopPatternMatch, "CFGStructurizer number of loop pattern "
- "matched");
-STATISTIC(numClonedBlock, "CFGStructurizer cloned blocks");
-STATISTIC(numClonedInstr, "CFGStructurizer cloned instructions");
-
-//===----------------------------------------------------------------------===//
-//
-// Miscellaneous utility for CFGStructurizer.
-//
-//===----------------------------------------------------------------------===//
-namespace llvmCFGStruct
-{
-#define SHOWNEWINSTR(i) \
- if (DEBUGME) errs() << "New instr: " << *i << "\n"
-
-#define SHOWNEWBLK(b, msg) \
-if (DEBUGME) { \
- errs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
- errs() << "\n"; \
-}
-
-#define SHOWBLK_DETAIL(b, msg) \
-if (DEBUGME) { \
- if (b) { \
- errs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
- b->print(errs()); \
- errs() << "\n"; \
- } \
-}
-
-#define INVALIDSCCNUM -1
-#define INVALIDREGNUM 0
-
-template<class LoopinfoT>
-void PrintLoopinfo(const LoopinfoT &LoopInfo, llvm::raw_ostream &OS) {
- for (typename LoopinfoT::iterator iter = LoopInfo.begin(),
- iterEnd = LoopInfo.end();
- iter != iterEnd; ++iter) {
- (*iter)->print(OS, 0);
- }
-}
-
-template<class NodeT>
-void ReverseVector(SmallVector<NodeT *, DEFAULT_VEC_SLOTS> &Src) {
- size_t sz = Src.size();
- for (size_t i = 0; i < sz/2; ++i) {
- NodeT *t = Src[i];
- Src[i] = Src[sz - i - 1];
- Src[sz - i - 1] = t;
- }
-}
-
-} //end namespace llvmCFGStruct
-
-
-//===----------------------------------------------------------------------===//
-//
-// MachinePostDominatorTree
-//
-//===----------------------------------------------------------------------===//
-
-namespace llvm {
-
-/// PostDominatorTree Class - Concrete subclass of DominatorTree that is used
-/// to compute the a post-dominator tree.
-///
-struct MachinePostDominatorTree : public MachineFunctionPass {
- static char ID; // Pass identification, replacement for typeid
- DominatorTreeBase<MachineBasicBlock> *DT;
- MachinePostDominatorTree() : MachineFunctionPass(ID)
- {
- DT = new DominatorTreeBase<MachineBasicBlock>(true); //true indicate
- // postdominator
- }
-
- ~MachinePostDominatorTree();
-
- virtual bool runOnMachineFunction(MachineFunction &MF);
-
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.setPreservesAll();
- MachineFunctionPass::getAnalysisUsage(AU);
- }
-
- inline const std::vector<MachineBasicBlock *> &getRoots() const {
- return DT->getRoots();
- }
-
- inline MachineDomTreeNode *getRootNode() const {
- return DT->getRootNode();
- }
-
- inline MachineDomTreeNode *operator[](MachineBasicBlock *BB) const {
- return DT->getNode(BB);
- }
-
- inline MachineDomTreeNode *getNode(MachineBasicBlock *BB) const {
- return DT->getNode(BB);
- }
-
- inline bool dominates(MachineDomTreeNode *A, MachineDomTreeNode *B) const {
- return DT->dominates(A, B);
- }
-
- inline bool dominates(MachineBasicBlock *A, MachineBasicBlock *B) const {
- return DT->dominates(A, B);
- }
-
- inline bool
- properlyDominates(const MachineDomTreeNode *A, MachineDomTreeNode *B) const {
- return DT->properlyDominates(A, B);
- }
-
- inline bool
- properlyDominates(MachineBasicBlock *A, MachineBasicBlock *B) const {
- return DT->properlyDominates(A, B);
- }
-
- inline MachineBasicBlock *
- findNearestCommonDominator(MachineBasicBlock *A, MachineBasicBlock *B) {
- return DT->findNearestCommonDominator(A, B);
- }
-
- virtual void print(llvm::raw_ostream &OS, const Module *M = 0) const {
- DT->print(OS);
- }
-};
-} //end of namespace llvm
-
-char MachinePostDominatorTree::ID = 0;
-static RegisterPass<MachinePostDominatorTree>
-machinePostDominatorTreePass("machinepostdomtree",
- "MachinePostDominator Tree Construction",
- true, true);
-
-//const PassInfo *const llvm::MachinePostDominatorsID
-//= &machinePostDominatorTreePass;
-
-bool MachinePostDominatorTree::runOnMachineFunction(MachineFunction &F) {
- DT->recalculate(F);
- //DEBUG(DT->dump());
- return false;
-}
-
-MachinePostDominatorTree::~MachinePostDominatorTree() {
- delete DT;
-}
-
-//===----------------------------------------------------------------------===//
-//
-// supporting data structure for CFGStructurizer
-//
-//===----------------------------------------------------------------------===//
-
-namespace llvmCFGStruct
-{
-template<class PassT>
-struct CFGStructTraits {
-};
-
-template <class InstrT>
-class BlockInformation {
-public:
- bool isRetired;
- int sccNum;
- //SmallVector<InstrT*, DEFAULT_VEC_SLOTS> succInstr;
- //Instructions defining the corresponding successor.
- BlockInformation() : isRetired(false), sccNum(INVALIDSCCNUM) {}
-};
-
-template <class BlockT, class InstrT, class RegiT>
-class LandInformation {
-public:
- BlockT *landBlk;
- std::set<RegiT> breakInitRegs; //Registers that need to "reg = 0", before
- //WHILELOOP(thisloop) init before entering
- //thisloop.
- std::set<RegiT> contInitRegs; //Registers that need to "reg = 0", after
- //WHILELOOP(thisloop) init after entering
- //thisloop.
- std::set<RegiT> endbranchInitRegs; //Init before entering this loop, at loop
- //land block, branch cond on this reg.
- std::set<RegiT> breakOnRegs; //registers that need to "if (reg) break
- //endif" after ENDLOOP(thisloop) break
- //outerLoopOf(thisLoop).
- std::set<RegiT> contOnRegs; //registers that need to "if (reg) continue
- //endif" after ENDLOOP(thisloop) continue on
- //outerLoopOf(thisLoop).
- LandInformation() : landBlk(NULL) {}
-};
-
-} //end of namespace llvmCFGStruct
-
-//===----------------------------------------------------------------------===//
-//
-// CFGStructurizer
-//
-//===----------------------------------------------------------------------===//
-
-namespace llvmCFGStruct
-{
-// bixia TODO: port it to BasicBlock, not just MachineBasicBlock.
-template<class PassT>
-class CFGStructurizer
-{
-public:
- typedef enum {
- Not_SinglePath = 0,
- SinglePath_InPath = 1,
- SinglePath_NotInPath = 2
- } PathToKind;
-
-public:
- typedef typename PassT::InstructionType InstrT;
- typedef typename PassT::FunctionType FuncT;
- typedef typename PassT::DominatortreeType DomTreeT;
- typedef typename PassT::PostDominatortreeType PostDomTreeT;
- typedef typename PassT::DomTreeNodeType DomTreeNodeT;
- typedef typename PassT::LoopinfoType LoopInfoT;
-
- typedef GraphTraits<FuncT *> FuncGTraits;
- //typedef FuncGTraits::nodes_iterator BlockIterator;
- typedef typename FuncT::iterator BlockIterator;
-
- typedef typename FuncGTraits::NodeType BlockT;
- typedef GraphTraits<BlockT *> BlockGTraits;
- typedef GraphTraits<Inverse<BlockT *> > InvBlockGTraits;
- //typedef BlockGTraits::succ_iterator InstructionIterator;
- typedef typename BlockT::iterator InstrIterator;
-
- typedef CFGStructTraits<PassT> CFGTraits;
- typedef BlockInformation<InstrT> BlockInfo;
- typedef std::map<BlockT *, BlockInfo *> BlockInfoMap;
-
- typedef int RegiT;
- typedef typename PassT::LoopType LoopT;
- typedef LandInformation<BlockT, InstrT, RegiT> LoopLandInfo;
- typedef std::map<LoopT *, LoopLandInfo *> LoopLandInfoMap;
- //landing info for loop break
- typedef SmallVector<BlockT *, 32> BlockTSmallerVector;
-
-public:
- CFGStructurizer();
- ~CFGStructurizer();
-
- /// Perform the CFG structurization
- bool run(FuncT &Func, PassT &Pass, const AMDILRegisterInfo *tri);
-
- /// Perform the CFG preparation
- bool prepare(FuncT &Func, PassT &Pass, const AMDILRegisterInfo *tri);
-
-private:
- void orderBlocks();
- void printOrderedBlocks(llvm::raw_ostream &OS);
- int patternMatch(BlockT *CurBlock);
- int patternMatchGroup(BlockT *CurBlock);
-
- int serialPatternMatch(BlockT *CurBlock);
- int ifPatternMatch(BlockT *CurBlock);
- int switchPatternMatch(BlockT *CurBlock);
- int loopendPatternMatch(BlockT *CurBlock);
- int loopPatternMatch(BlockT *CurBlock);
-
- int loopbreakPatternMatch(LoopT *LoopRep, BlockT *LoopHeader);
- int loopcontPatternMatch(LoopT *LoopRep, BlockT *LoopHeader);
- //int loopWithoutBreak(BlockT *);
-
- void handleLoopbreak (BlockT *ExitingBlock, LoopT *ExitingLoop,
- BlockT *ExitBlock, LoopT *exitLoop, BlockT *landBlock);
- void handleLoopcontBlock(BlockT *ContingBlock, LoopT *contingLoop,
- BlockT *ContBlock, LoopT *contLoop);
- bool isSameloopDetachedContbreak(BlockT *Src1Block, BlockT *Src2Block);
- int handleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock,
- BlockT *FalseBlock);
- int handleJumpintoIfImp(BlockT *HeadBlock, BlockT *TrueBlock,
- BlockT *FalseBlock);
- int improveSimpleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock,
- BlockT *FalseBlock, BlockT **LandBlockPtr);
- void showImproveSimpleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock,
- BlockT *FalseBlock, BlockT *LandBlock,
- bool Detail = false);
- PathToKind singlePathTo(BlockT *SrcBlock, BlockT *DstBlock,
- bool AllowSideEntry = true);
- BlockT *singlePathEnd(BlockT *srcBlock, BlockT *DstBlock,
- bool AllowSideEntry = true);
- int cloneOnSideEntryTo(BlockT *PreBlock, BlockT *SrcBlock, BlockT *DstBlock);
- void mergeSerialBlock(BlockT *DstBlock, BlockT *srcBlock);
-
- void mergeIfthenelseBlock(InstrT *BranchInstr, BlockT *CurBlock,
- BlockT *TrueBlock, BlockT *FalseBlock,
- BlockT *LandBlock);
- void mergeLooplandBlock(BlockT *DstBlock, LoopLandInfo *LoopLand);
- void mergeLoopbreakBlock(BlockT *ExitingBlock, BlockT *ExitBlock,
- BlockT *ExitLandBlock, RegiT SetReg);
- void settleLoopcontBlock(BlockT *ContingBlock, BlockT *ContBlock,
- RegiT SetReg);
- BlockT *relocateLoopcontBlock(LoopT *ParentLoopRep, LoopT *LoopRep,
- std::set<BlockT*> &ExitBlockSet,
- BlockT *ExitLandBlk);
- BlockT *addLoopEndbranchBlock(LoopT *LoopRep,
- BlockTSmallerVector &ExitingBlocks,
- BlockTSmallerVector &ExitBlocks);
- BlockT *normalizeInfiniteLoopExit(LoopT *LoopRep);
- void removeUnconditionalBranch(BlockT *SrcBlock);
- void removeRedundantConditionalBranch(BlockT *SrcBlock);
- void addDummyExitBlock(SmallVector<BlockT *, DEFAULT_VEC_SLOTS> &RetBlocks);
-
- void removeSuccessor(BlockT *SrcBlock);
- BlockT *cloneBlockForPredecessor(BlockT *CurBlock, BlockT *PredBlock);
- BlockT *exitingBlock2ExitBlock (LoopT *LoopRep, BlockT *exitingBlock);
-
- void migrateInstruction(BlockT *SrcBlock, BlockT *DstBlock,
- InstrIterator InsertPos);
-
- void recordSccnum(BlockT *SrcBlock, int SCCNum);
- int getSCCNum(BlockT *srcBlk);
-
- void retireBlock(BlockT *DstBlock, BlockT *SrcBlock);
- bool isRetiredBlock(BlockT *SrcBlock);
- bool isActiveLoophead(BlockT *CurBlock);
- bool needMigrateBlock(BlockT *Block);
-
- BlockT *recordLoopLandBlock(LoopT *LoopRep, BlockT *LandBlock,
- BlockTSmallerVector &exitBlocks,
- std::set<BlockT*> &ExitBlockSet);
- void setLoopLandBlock(LoopT *LoopRep, BlockT *Block = NULL);
- BlockT *getLoopLandBlock(LoopT *LoopRep);
- LoopLandInfo *getLoopLandInfo(LoopT *LoopRep);
-
- void addLoopBreakOnReg(LoopT *LoopRep, RegiT RegNum);
- void addLoopContOnReg(LoopT *LoopRep, RegiT RegNum);
- void addLoopBreakInitReg(LoopT *LoopRep, RegiT RegNum);
- void addLoopContInitReg(LoopT *LoopRep, RegiT RegNum);
- void addLoopEndbranchInitReg(LoopT *LoopRep, RegiT RegNum);
-
- bool hasBackEdge(BlockT *curBlock);
- unsigned getLoopDepth (LoopT *LoopRep);
- int countActiveBlock(
- typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator IterStart,
- typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator IterEnd);
- BlockT *findNearestCommonPostDom(std::set<BlockT *>&);
- BlockT *findNearestCommonPostDom(BlockT *Block1, BlockT *Block2);
-
-private:
- DomTreeT *domTree;
- PostDomTreeT *postDomTree;
- LoopInfoT *loopInfo;
- PassT *passRep;
- FuncT *funcRep;
-
- BlockInfoMap blockInfoMap;
- LoopLandInfoMap loopLandInfoMap;
- SmallVector<BlockT *, DEFAULT_VEC_SLOTS> orderedBlks;
- const AMDILRegisterInfo *TRI;
-
-}; //template class CFGStructurizer
-
-template<class PassT> CFGStructurizer<PassT>::CFGStructurizer()
- : domTree(NULL), postDomTree(NULL), loopInfo(NULL) {
-}
-
-template<class PassT> CFGStructurizer<PassT>::~CFGStructurizer() {
- for (typename BlockInfoMap::iterator I = blockInfoMap.begin(),
- E = blockInfoMap.end(); I != E; ++I) {
- delete I->second;
- }
-}
-
-template<class PassT>
-bool CFGStructurizer<PassT>::prepare(FuncT &func, PassT &pass,
- const AMDILRegisterInfo * tri) {
- passRep = &pass;
- funcRep = &func;
- TRI = tri;
-
- bool changed = false;
- //func.RenumberBlocks();
-
- //to do, if not reducible flow graph, make it so ???
-
- if (DEBUGME) {
- errs() << "AMDILCFGStructurizer::prepare\n";
- //func.viewCFG();
- //func.viewCFGOnly();
- //func.dump();
- }
-
- //FIXME: gcc complains on this.
- //domTree = &pass.getAnalysis<DomTreeT>();
- //domTree = CFGTraits::getDominatorTree(pass);
- //if (DEBUGME) {
- // domTree->print(errs());
- //}
-
- //FIXME: gcc complains on this.
- //domTree = &pass.getAnalysis<DomTreeT>();
- //postDomTree = CFGTraits::getPostDominatorTree(pass);
- //if (DEBUGME) {
- // postDomTree->print(errs());
- //}
-
- //FIXME: gcc complains on this.
- //loopInfo = &pass.getAnalysis<LoopInfoT>();
- loopInfo = CFGTraits::getLoopInfo(pass);
- if (DEBUGME) {
- errs() << "LoopInfo:\n";
- PrintLoopinfo(*loopInfo, errs());
- }
-
- orderBlocks();
- if (DEBUGME) {
- errs() << "Ordered blocks:\n";
- printOrderedBlocks(errs());
- }
-
- SmallVector<BlockT *, DEFAULT_VEC_SLOTS> retBlks;
-
- for (typename LoopInfoT::iterator iter = loopInfo->begin(),
- iterEnd = loopInfo->end();
- iter != iterEnd; ++iter) {
- LoopT* loopRep = (*iter);
- BlockTSmallerVector exitingBlks;
- loopRep->getExitingBlocks(exitingBlks);
-
- if (exitingBlks.size() == 0) {
- BlockT* dummyExitBlk = normalizeInfiniteLoopExit(loopRep);
- if (dummyExitBlk != NULL)
- retBlks.push_back(dummyExitBlk);
- }
- }
-
- // Remove unconditional branch instr.
- // Add dummy exit block iff there are multiple returns.
-
- for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
- iterBlk = orderedBlks.begin(), iterEndBlk = orderedBlks.end();
- iterBlk != iterEndBlk;
- ++iterBlk) {
- BlockT *curBlk = *iterBlk;
- removeUnconditionalBranch(curBlk);
- removeRedundantConditionalBranch(curBlk);
- if (CFGTraits::isReturnBlock(curBlk)) {
- retBlks.push_back(curBlk);
- }
- assert(curBlk->succ_size() <= 2);
- //assert(curBlk->size() > 0);
- //removeEmptyBlock(curBlk) ??
- } //for
-
- if (retBlks.size() >= 2) {
- addDummyExitBlock(retBlks);
- changed = true;
- }
-
- return changed;
-} //CFGStructurizer::prepare
-
-template<class PassT>
-bool CFGStructurizer<PassT>::run(FuncT &func, PassT &pass,
- const AMDILRegisterInfo * tri) {
- passRep = &pass;
- funcRep = &func;
- TRI = tri;
-
- //func.RenumberBlocks();
-
- //Assume reducible CFG...
- if (DEBUGME) {
- errs() << "AMDILCFGStructurizer::run\n";
- //errs() << func.getFunction()->getNameStr() << "\n";
- func.viewCFG();
- //func.viewCFGOnly();
- //func.dump();
- }
-
-#if 1
- //FIXME: gcc complains on this.
- //domTree = &pass.getAnalysis<DomTreeT>();
- domTree = CFGTraits::getDominatorTree(pass);
- if (DEBUGME) {
- domTree->print(errs(), (const llvm::Module*)0);
- }
-#endif
-
- //FIXME: gcc complains on this.
- //domTree = &pass.getAnalysis<DomTreeT>();
- postDomTree = CFGTraits::getPostDominatorTree(pass);
- if (DEBUGME) {
- postDomTree->print(errs());
- }
-
- //FIXME: gcc complains on this.
- //loopInfo = &pass.getAnalysis<LoopInfoT>();
- loopInfo = CFGTraits::getLoopInfo(pass);
- if (DEBUGME) {
- errs() << "LoopInfo:\n";
- PrintLoopinfo(*loopInfo, errs());
- }
-
- orderBlocks();
-//#define STRESSTEST
-#ifdef STRESSTEST
- //Use the worse block ordering to test the algorithm.
- ReverseVector(orderedBlks);
-#endif
-
- if (DEBUGME) {
- errs() << "Ordered blocks:\n";
- printOrderedBlocks(errs());
- }
- int numIter = 0;
- bool finish = false;
- BlockT *curBlk;
- bool makeProgress = false;
- int numRemainedBlk = countActiveBlock(orderedBlks.begin(),
- orderedBlks.end());
-
- do {
- ++numIter;
- if (DEBUGME) {
- errs() << "numIter = " << numIter
- << ", numRemaintedBlk = " << numRemainedBlk << "\n";
- }
-
- typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
- iterBlk = orderedBlks.begin();
- typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
- iterBlkEnd = orderedBlks.end();
-
- typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
- sccBeginIter = iterBlk;
- BlockT *sccBeginBlk = NULL;
- int sccNumBlk = 0; // The number of active blocks, init to a
- // maximum possible number.
- int sccNumIter; // Number of iteration in this SCC.
-
- while (iterBlk != iterBlkEnd) {
- curBlk = *iterBlk;
-
- if (sccBeginBlk == NULL) {
- sccBeginIter = iterBlk;
- sccBeginBlk = curBlk;
- sccNumIter = 0;
- sccNumBlk = numRemainedBlk; // Init to maximum possible number.
- if (DEBUGME) {
- errs() << "start processing SCC" << getSCCNum(sccBeginBlk);
- errs() << "\n";
- }
- }
-
- if (!isRetiredBlock(curBlk)) {
- patternMatch(curBlk);
- }
-
- ++iterBlk;
-
- bool contNextScc = true;
- if (iterBlk == iterBlkEnd
- || getSCCNum(sccBeginBlk) != getSCCNum(*iterBlk)) {
- // Just finish one scc.
- ++sccNumIter;
- int sccRemainedNumBlk = countActiveBlock(sccBeginIter, iterBlk);
- if (sccRemainedNumBlk != 1 && sccRemainedNumBlk >= sccNumBlk) {
- if (DEBUGME) {
- errs() << "Can't reduce SCC " << getSCCNum(curBlk)
- << ", sccNumIter = " << sccNumIter;
- errs() << "doesn't make any progress\n";
- }
- contNextScc = true;
- } else if (sccRemainedNumBlk != 1 && sccRemainedNumBlk < sccNumBlk) {
- sccNumBlk = sccRemainedNumBlk;
- iterBlk = sccBeginIter;
- contNextScc = false;
- if (DEBUGME) {
- errs() << "repeat processing SCC" << getSCCNum(curBlk)
- << "sccNumIter = " << sccNumIter << "\n";
- func.viewCFG();
- //func.viewCFGOnly();
- }
- } else {
- // Finish the current scc.
- contNextScc = true;
- }
- } else {
- // Continue on next component in the current scc.
- contNextScc = false;
- }
-
- if (contNextScc) {
- sccBeginBlk = NULL;
- }
- } //while, "one iteration" over the function.
-
- BlockT *entryBlk = FuncGTraits::nodes_begin(&func);
- if (entryBlk->succ_size() == 0) {
- finish = true;
- if (DEBUGME) {
- errs() << "Reduce to one block\n";
- }
- } else {
- int newnumRemainedBlk
- = countActiveBlock(orderedBlks.begin(), orderedBlks.end());
- // consider cloned blocks ??
- if (newnumRemainedBlk == 1 || newnumRemainedBlk < numRemainedBlk) {
- makeProgress = true;
- numRemainedBlk = newnumRemainedBlk;
- } else {
- makeProgress = false;
- if (DEBUGME) {
- errs() << "No progress\n";
- }
- }
- }
- } while (!finish && makeProgress);
-
- // Misc wrap up to maintain the consistency of the Function representation.
- CFGTraits::wrapup(FuncGTraits::nodes_begin(&func));
-
- // Detach retired Block, release memory.
- for (typename BlockInfoMap::iterator iterMap = blockInfoMap.begin(),
- iterEndMap = blockInfoMap.end(); iterMap != iterEndMap; ++iterMap) {
- if ((*iterMap).second && (*iterMap).second->isRetired) {
- assert(((*iterMap).first)->getNumber() != -1);
- if (DEBUGME) {
- errs() << "Erase BB" << ((*iterMap).first)->getNumber() << "\n";
- }
- (*iterMap).first->eraseFromParent(); //Remove from the parent Function.
- }
- delete (*iterMap).second;
- }
- blockInfoMap.clear();
-
- // clear loopLandInfoMap
- for (typename LoopLandInfoMap::iterator iterMap = loopLandInfoMap.begin(),
- iterEndMap = loopLandInfoMap.end(); iterMap != iterEndMap; ++iterMap) {
- delete (*iterMap).second;
- }
- loopLandInfoMap.clear();
-
- if (DEBUGME) {
- func.viewCFG();
- //func.dump();
- }
-
- if (!finish) {
- assert(!"IRREDUCIBL_CF");
- }
-
- return true;
-} //CFGStructurizer::run
-
-/// Print the ordered Blocks.
-///
-template<class PassT>
-void CFGStructurizer<PassT>::printOrderedBlocks(llvm::raw_ostream &os) {
- size_t i = 0;
- for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
- iterBlk = orderedBlks.begin(), iterBlkEnd = orderedBlks.end();
- iterBlk != iterBlkEnd;
- ++iterBlk, ++i) {
- os << "BB" << (*iterBlk)->getNumber();
- os << "(" << getSCCNum(*iterBlk) << "," << (*iterBlk)->size() << ")";
- if (i != 0 && i % 10 == 0) {
- os << "\n";
- } else {
- os << " ";
- }
- }
-} //printOrderedBlocks
-
-/// Compute the reversed DFS post order of Blocks
-///
-template<class PassT> void CFGStructurizer<PassT>::orderBlocks() {
- int sccNum = 0;
- BlockT *bb;
- for (scc_iterator<FuncT *> sccIter = scc_begin(funcRep),
- sccEnd = scc_end(funcRep); sccIter != sccEnd; ++sccIter, ++sccNum) {
- std::vector<BlockT *> &sccNext = *sccIter;
- for (typename std::vector<BlockT *>::const_iterator
- blockIter = sccNext.begin(), blockEnd = sccNext.end();
- blockIter != blockEnd; ++blockIter) {
- bb = *blockIter;
- orderedBlks.push_back(bb);
- recordSccnum(bb, sccNum);
- }
- }
-
- //walk through all the block in func to check for unreachable
- for (BlockIterator blockIter1 = FuncGTraits::nodes_begin(funcRep),
- blockEnd1 = FuncGTraits::nodes_end(funcRep);
- blockIter1 != blockEnd1; ++blockIter1) {
- BlockT *bb = &(*blockIter1);
- sccNum = getSCCNum(bb);
- if (sccNum == INVALIDSCCNUM) {
- errs() << "unreachable block BB" << bb->getNumber() << "\n";
- }
- } //end of for
-} //orderBlocks
-
-template<class PassT> int CFGStructurizer<PassT>::patternMatch(BlockT *curBlk) {
- int numMatch = 0;
- int curMatch;
-
- if (DEBUGME) {
- errs() << "Begin patternMatch BB" << curBlk->getNumber() << "\n";
- }
-
- while ((curMatch = patternMatchGroup(curBlk)) > 0) {
- numMatch += curMatch;
- }
-
- if (DEBUGME) {
- errs() << "End patternMatch BB" << curBlk->getNumber()
- << ", numMatch = " << numMatch << "\n";
- }
-
- return numMatch;
-} //patternMatch
-
-template<class PassT>
-int CFGStructurizer<PassT>::patternMatchGroup(BlockT *curBlk) {
- int numMatch = 0;
- numMatch += serialPatternMatch(curBlk);
- numMatch += ifPatternMatch(curBlk);
- //numMatch += switchPatternMatch(curBlk);
- numMatch += loopendPatternMatch(curBlk);
- numMatch += loopPatternMatch(curBlk);
- return numMatch;
-}//patternMatchGroup
-
-template<class PassT>
-int CFGStructurizer<PassT>::serialPatternMatch(BlockT *curBlk) {
- if (curBlk->succ_size() != 1) {
- return 0;
- }
-
- BlockT *childBlk = *curBlk->succ_begin();
- if (childBlk->pred_size() != 1 || isActiveLoophead(childBlk)) {
- return 0;
- }
-
- mergeSerialBlock(curBlk, childBlk);
- ++numSerialPatternMatch;
- return 1;
-} //serialPatternMatch
-
-template<class PassT>
-int CFGStructurizer<PassT>::ifPatternMatch(BlockT *curBlk) {
- //two edges
- if (curBlk->succ_size() != 2) {
- return 0;
- }
-
- if (hasBackEdge(curBlk)) {
- return 0;
- }
-
- InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(curBlk);
- if (branchInstr == NULL) {
- return 0;
- }
-
- assert(CFGTraits::isCondBranch(branchInstr));
-
- BlockT *trueBlk = CFGTraits::getTrueBranch(branchInstr);
- BlockT *falseBlk = CFGTraits::getFalseBranch(curBlk, branchInstr);
- BlockT *landBlk;
- int cloned = 0;
-
- // TODO: Simplify
- if (trueBlk->succ_size() == 1 && falseBlk->succ_size() == 1
- && *trueBlk->succ_begin() == *falseBlk->succ_begin()) {
- landBlk = *trueBlk->succ_begin();
- } else if (trueBlk->succ_size() == 0 && falseBlk->succ_size() == 0) {
- landBlk = NULL;
- } else if (trueBlk->succ_size() == 1 && *trueBlk->succ_begin() == falseBlk) {
- landBlk = falseBlk;
- falseBlk = NULL;
- } else if (falseBlk->succ_size() == 1
- && *falseBlk->succ_begin() == trueBlk) {
- landBlk = trueBlk;
- trueBlk = NULL;
- } else if (falseBlk->succ_size() == 1
- && isSameloopDetachedContbreak(trueBlk, falseBlk)) {
- landBlk = *falseBlk->succ_begin();
- } else if (trueBlk->succ_size() == 1
- && isSameloopDetachedContbreak(falseBlk, trueBlk)) {
- landBlk = *trueBlk->succ_begin();
- } else {
- return handleJumpintoIf(curBlk, trueBlk, falseBlk);
- }
-
- // improveSimpleJumpinfoIf can handle the case where landBlk == NULL but the
- // new BB created for landBlk==NULL may introduce new challenge to the
- // reduction process.
- if (landBlk != NULL &&
- ((trueBlk && trueBlk->pred_size() > 1)
- || (falseBlk && falseBlk->pred_size() > 1))) {
- cloned += improveSimpleJumpintoIf(curBlk, trueBlk, falseBlk, &landBlk);
- }
-
- if (trueBlk && trueBlk->pred_size() > 1) {
- trueBlk = cloneBlockForPredecessor(trueBlk, curBlk);
- ++cloned;
- }
-
- if (falseBlk && falseBlk->pred_size() > 1) {
- falseBlk = cloneBlockForPredecessor(falseBlk, curBlk);
- ++cloned;
- }
-
- mergeIfthenelseBlock(branchInstr, curBlk, trueBlk, falseBlk, landBlk);
-
- ++numIfPatternMatch;
-
- numClonedBlock += cloned;
-
- return 1 + cloned;
-} //ifPatternMatch
-
-template<class PassT>
-int CFGStructurizer<PassT>::switchPatternMatch(BlockT *curBlk) {
- return 0;
-} //switchPatternMatch
-
-template<class PassT>
-int CFGStructurizer<PassT>::loopendPatternMatch(BlockT *curBlk) {
- LoopT *loopRep = loopInfo->getLoopFor(curBlk);
- typename std::vector<LoopT *> nestedLoops;
- while (loopRep) {
- nestedLoops.push_back(loopRep);
- loopRep = loopRep->getParentLoop();
- }
-
- if (nestedLoops.size() == 0) {
- return 0;
- }
-
- // Process nested loop outside->inside, so "continue" to a outside loop won't
- // be mistaken as "break" of the current loop.
- int num = 0;
- for (typename std::vector<LoopT *>::reverse_iterator
- iter = nestedLoops.rbegin(), iterEnd = nestedLoops.rend();
- iter != iterEnd; ++iter) {
- loopRep = *iter;
-
- if (getLoopLandBlock(loopRep) != NULL) {
- continue;
- }
-
- BlockT *loopHeader = loopRep->getHeader();
-
- int numBreak = loopbreakPatternMatch(loopRep, loopHeader);
-
- if (numBreak == -1) {
- break;
- }
-
- int numCont = loopcontPatternMatch(loopRep, loopHeader);
- num += numBreak + numCont;
- }
-
- return num;
-} //loopendPatternMatch
-
-template<class PassT>
-int CFGStructurizer<PassT>::loopPatternMatch(BlockT *curBlk) {
- if (curBlk->succ_size() != 0) {
- return 0;
- }
-
- int numLoop = 0;
- LoopT *loopRep = loopInfo->getLoopFor(curBlk);
- while (loopRep && loopRep->getHeader() == curBlk) {
- LoopLandInfo *loopLand = getLoopLandInfo(loopRep);
- if (loopLand) {
- BlockT *landBlk = loopLand->landBlk;
- assert(landBlk);
- if (!isRetiredBlock(landBlk)) {
- mergeLooplandBlock(curBlk, loopLand);
- ++numLoop;
- }
- }
- loopRep = loopRep->getParentLoop();
- }
-
- numLoopPatternMatch += numLoop;
-
- return numLoop;
-} //loopPatternMatch
-
-template<class PassT>
-int CFGStructurizer<PassT>::loopbreakPatternMatch(LoopT *loopRep,
- BlockT *loopHeader) {
- BlockTSmallerVector exitingBlks;
- loopRep->getExitingBlocks(exitingBlks);
-
- if (DEBUGME) {
- errs() << "Loop has " << exitingBlks.size() << " exiting blocks\n";
- }
-
- if (exitingBlks.size() == 0) {
- setLoopLandBlock(loopRep);
- return 0;
- }
-
- // Compute the corresponding exitBlks and exit block set.
- BlockTSmallerVector exitBlks;
- std::set<BlockT *> exitBlkSet;
- for (typename BlockTSmallerVector::const_iterator iter = exitingBlks.begin(),
- iterEnd = exitingBlks.end(); iter != iterEnd; ++iter) {
- BlockT *exitingBlk = *iter;
- BlockT *exitBlk = exitingBlock2ExitBlock(loopRep, exitingBlk);
- exitBlks.push_back(exitBlk);
- exitBlkSet.insert(exitBlk); //non-duplicate insert
- }
-
- assert(exitBlkSet.size() > 0);
- assert(exitBlks.size() == exitingBlks.size());
-
- if (DEBUGME) {
- errs() << "Loop has " << exitBlkSet.size() << " exit blocks\n";
- }
-
- // Find exitLandBlk.
- BlockT *exitLandBlk = NULL;
- int numCloned = 0;
- int numSerial = 0;
-
- if (exitBlkSet.size() == 1)
- {
- exitLandBlk = *exitBlkSet.begin();
- } else {
- exitLandBlk = findNearestCommonPostDom(exitBlkSet);
-
- if (exitLandBlk == NULL) {
- return -1;
- }
-
- bool allInPath = true;
- bool allNotInPath = true;
- for (typename std::set<BlockT*>::const_iterator
- iter = exitBlkSet.begin(),
- iterEnd = exitBlkSet.end();
- iter != iterEnd; ++iter) {
- BlockT *exitBlk = *iter;
-
- PathToKind pathKind = singlePathTo(exitBlk, exitLandBlk, true);
- if (DEBUGME) {
- errs() << "BB" << exitBlk->getNumber()
- << " to BB" << exitLandBlk->getNumber() << " PathToKind="
- << pathKind << "\n";
- }
-
- allInPath = allInPath && (pathKind == SinglePath_InPath);
- allNotInPath = allNotInPath && (pathKind == SinglePath_NotInPath);
-
- if (!allInPath && !allNotInPath) {
- if (DEBUGME) {
- errs() << "singlePath check fail\n";
- }
- return -1;
- }
- } // check all exit blocks
-
- if (allNotInPath) {
-#if 1
-
- // TODO: Simplify, maybe separate function?
- //funcRep->viewCFG();
- LoopT *parentLoopRep = loopRep->getParentLoop();
- BlockT *parentLoopHeader = NULL;
- if (parentLoopRep)
- parentLoopHeader = parentLoopRep->getHeader();
-
- if (exitLandBlk == parentLoopHeader &&
- (exitLandBlk = relocateLoopcontBlock(parentLoopRep,
- loopRep,
- exitBlkSet,
- exitLandBlk)) != NULL) {
- if (DEBUGME) {
- errs() << "relocateLoopcontBlock success\n";
- }
- } else if ((exitLandBlk = addLoopEndbranchBlock(loopRep,
- exitingBlks,
- exitBlks)) != NULL) {
- if (DEBUGME) {
- errs() << "insertEndbranchBlock success\n";
- }
- } else {
- if (DEBUGME) {
- errs() << "loop exit fail\n";
- }
- return -1;
- }
-#else
- return -1;
-#endif
- }
-
- // Handle side entry to exit path.
- exitBlks.clear();
- exitBlkSet.clear();
- for (typename BlockTSmallerVector::iterator iterExiting =
- exitingBlks.begin(),
- iterExitingEnd = exitingBlks.end();
- iterExiting != iterExitingEnd; ++iterExiting) {
- BlockT *exitingBlk = *iterExiting;
- BlockT *exitBlk = exitingBlock2ExitBlock(loopRep, exitingBlk);
- BlockT *newExitBlk = exitBlk;
-
- if (exitBlk != exitLandBlk && exitBlk->pred_size() > 1) {
- newExitBlk = cloneBlockForPredecessor(exitBlk, exitingBlk);
- ++numCloned;
- }
-
- numCloned += cloneOnSideEntryTo(exitingBlk, newExitBlk, exitLandBlk);
-
- exitBlks.push_back(newExitBlk);
- exitBlkSet.insert(newExitBlk);
- }
-
- for (typename BlockTSmallerVector::iterator iterExit = exitBlks.begin(),
- iterExitEnd = exitBlks.end();
- iterExit != iterExitEnd; ++iterExit) {
- BlockT *exitBlk = *iterExit;
- numSerial += serialPatternMatch(exitBlk);
- }
-
- for (typename BlockTSmallerVector::iterator iterExit = exitBlks.begin(),
- iterExitEnd = exitBlks.end();
- iterExit != iterExitEnd; ++iterExit) {
- BlockT *exitBlk = *iterExit;
- if (exitBlk->pred_size() > 1) {
- if (exitBlk != exitLandBlk) {
- return -1;
- }
- } else {
- if (exitBlk != exitLandBlk &&
- (exitBlk->succ_size() != 1 ||
- *exitBlk->succ_begin() != exitLandBlk)) {
- return -1;
- }
- }
- }
- } // else
-
- // LoopT *exitLandLoop = loopInfo->getLoopFor(exitLandBlk);
- exitLandBlk = recordLoopLandBlock(loopRep, exitLandBlk, exitBlks, exitBlkSet);
-
- // Fold break into the breaking block. Leverage across level breaks.
- assert(exitingBlks.size() == exitBlks.size());
- for (typename BlockTSmallerVector::const_iterator iterExit = exitBlks.begin(),
- iterExiting = exitingBlks.begin(), iterExitEnd = exitBlks.end();
- iterExit != iterExitEnd; ++iterExit, ++iterExiting) {
- BlockT *exitBlk = *iterExit;
- BlockT *exitingBlk = *iterExiting;
- assert(exitBlk->pred_size() == 1 || exitBlk == exitLandBlk);
- LoopT *exitingLoop = loopInfo->getLoopFor(exitingBlk);
- handleLoopbreak(exitingBlk, exitingLoop, exitBlk, loopRep, exitLandBlk);
- }
-
- int numBreak = static_cast<int>(exitingBlks.size());
- numLoopbreakPatternMatch += numBreak;
- numClonedBlock += numCloned;
- return numBreak + numSerial + numCloned;
-} //loopbreakPatternMatch
-
-template<class PassT>
-int CFGStructurizer<PassT>::loopcontPatternMatch(LoopT *loopRep,
- BlockT *loopHeader) {
- int numCont = 0;
- SmallVector<BlockT *, DEFAULT_VEC_SLOTS> contBlk;
- for (typename InvBlockGTraits::ChildIteratorType iter =
- InvBlockGTraits::child_begin(loopHeader),
- iterEnd = InvBlockGTraits::child_end(loopHeader);
- iter != iterEnd; ++iter) {
- BlockT *curBlk = *iter;
- if (loopRep->contains(curBlk)) {
- handleLoopcontBlock(curBlk, loopInfo->getLoopFor(curBlk),
- loopHeader, loopRep);
- contBlk.push_back(curBlk);
- ++numCont;
- }
- }
-
- for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::iterator
- iter = contBlk.begin(), iterEnd = contBlk.end();
- iter != iterEnd; ++iter) {
- (*iter)->removeSuccessor(loopHeader);
- }
-
- numLoopcontPatternMatch += numCont;
-
- return numCont;
-} //loopcontPatternMatch
-
-
-template<class PassT>
-bool CFGStructurizer<PassT>::isSameloopDetachedContbreak(BlockT *src1Blk,
- BlockT *src2Blk) {
- // return true iff src1Blk->succ_size() == 0 && src1Blk and src2Blk are in the
- // same loop with LoopLandInfo without explicitly keeping track of
- // loopContBlks and loopBreakBlks, this is a method to get the information.
- //
- if (src1Blk->succ_size() == 0) {
- LoopT *loopRep = loopInfo->getLoopFor(src1Blk);
- if (loopRep != NULL && loopRep == loopInfo->getLoopFor(src2Blk)) {
- LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
- if (theEntry != NULL) {
- if (DEBUGME) {
- errs() << "isLoopContBreakBlock yes src1 = BB"
- << src1Blk->getNumber()
- << " src2 = BB" << src2Blk->getNumber() << "\n";
- }
- return true;
- }
- }
- }
- return false;
-} //isSameloopDetachedContbreak
-
-template<class PassT>
-int CFGStructurizer<PassT>::handleJumpintoIf(BlockT *headBlk,
- BlockT *trueBlk,
- BlockT *falseBlk) {
- int num = handleJumpintoIfImp(headBlk, trueBlk, falseBlk);
- if (num == 0) {
- if (DEBUGME) {
- errs() << "handleJumpintoIf swap trueBlk and FalseBlk" << "\n";
- }
- num = handleJumpintoIfImp(headBlk, falseBlk, trueBlk);
- }
- return num;
-}
-
-template<class PassT>
-int CFGStructurizer<PassT>::handleJumpintoIfImp(BlockT *headBlk,
- BlockT *trueBlk,
- BlockT *falseBlk) {
- int num = 0;
- BlockT *downBlk;
-
- //trueBlk could be the common post dominator
- downBlk = trueBlk;
-
- if (DEBUGME) {
- errs() << "handleJumpintoIfImp head = BB" << headBlk->getNumber()
- << " true = BB" << trueBlk->getNumber()
- << ", numSucc=" << trueBlk->succ_size()
- << " false = BB" << falseBlk->getNumber() << "\n";
- }
-
- while (downBlk) {
- if (DEBUGME) {
- errs() << "check down = BB" << downBlk->getNumber();
- }
-
- if (//postDomTree->dominates(downBlk, falseBlk) &&
- singlePathTo(falseBlk, downBlk) == SinglePath_InPath) {
- if (DEBUGME) {
- errs() << " working\n";
- }
-
- num += cloneOnSideEntryTo(headBlk, trueBlk, downBlk);
- num += cloneOnSideEntryTo(headBlk, falseBlk, downBlk);
-
- numClonedBlock += num;
- num += serialPatternMatch(*headBlk->succ_begin());
- num += serialPatternMatch(*(++headBlk->succ_begin()));
- num += ifPatternMatch(headBlk);
- assert(num > 0); //
-
- break;
- }
- if (DEBUGME) {
- errs() << " not working\n";
- }
- downBlk = (downBlk->succ_size() == 1) ? (*downBlk->succ_begin()) : NULL;
- } // walk down the postDomTree
-
- return num;
-} //handleJumpintoIf
-
-template<class PassT>
-void CFGStructurizer<PassT>::showImproveSimpleJumpintoIf(BlockT *headBlk,
- BlockT *trueBlk,
- BlockT *falseBlk,
- BlockT *landBlk,
- bool detail) {
- errs() << "head = BB" << headBlk->getNumber()
- << " size = " << headBlk->size();
- if (detail) {
- errs() << "\n";
- headBlk->print(errs());
- errs() << "\n";
- }
-
- if (trueBlk) {
- errs() << ", true = BB" << trueBlk->getNumber() << " size = "
- << trueBlk->size() << " numPred = " << trueBlk->pred_size();
- if (detail) {
- errs() << "\n";
- trueBlk->print(errs());
- errs() << "\n";
- }
- }
- if (falseBlk) {
- errs() << ", false = BB" << falseBlk->getNumber() << " size = "
- << falseBlk->size() << " numPred = " << falseBlk->pred_size();
- if (detail) {
- errs() << "\n";
- falseBlk->print(errs());
- errs() << "\n";
- }
- }
- if (landBlk) {
- errs() << ", land = BB" << landBlk->getNumber() << " size = "
- << landBlk->size() << " numPred = " << landBlk->pred_size();
- if (detail) {
- errs() << "\n";
- landBlk->print(errs());
- errs() << "\n";
- }
- }
-
- errs() << "\n";
-} //showImproveSimpleJumpintoIf
-
-template<class PassT>
-int CFGStructurizer<PassT>::improveSimpleJumpintoIf(BlockT *headBlk,
- BlockT *trueBlk,
- BlockT *falseBlk,
- BlockT **plandBlk) {
- bool migrateTrue = false;
- bool migrateFalse = false;
-
- BlockT *landBlk = *plandBlk;
-
- assert((trueBlk == NULL || trueBlk->succ_size() <= 1)
- && (falseBlk == NULL || falseBlk->succ_size() <= 1));
-
- if (trueBlk == falseBlk) {
- return 0;
- }
-
-#if 0
- if (DEBUGME) {
- errs() << "improveSimpleJumpintoIf: ";
- showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 0);
- }
-#endif
-
- // unsigned landPredSize = landBlk ? landBlk->pred_size() : 0;
- // May consider the # landBlk->pred_size() as it represents the number of
- // assignment initReg = .. needed to insert.
- migrateTrue = needMigrateBlock(trueBlk);
- migrateFalse = needMigrateBlock(falseBlk);
-
- if (!migrateTrue && !migrateFalse) {
- return 0;
- }
-
- // If we need to migrate either trueBlk and falseBlk, migrate the rest that
- // have more than one predecessors. without doing this, its predecessor
- // rather than headBlk will have undefined value in initReg.
- if (!migrateTrue && trueBlk && trueBlk->pred_size() > 1) {
- migrateTrue = true;
- }
- if (!migrateFalse && falseBlk && falseBlk->pred_size() > 1) {
- migrateFalse = true;
- }
-
- if (DEBUGME) {
- errs() << "before improveSimpleJumpintoIf: ";
- showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 0);
- //showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 1);
- }
-
- // org: headBlk => if () {trueBlk} else {falseBlk} => landBlk
- //
- // new: headBlk => if () {initReg = 1; org trueBlk branch} else
- // {initReg = 0; org falseBlk branch }
- // => landBlk => if (initReg) {org trueBlk} else {org falseBlk}
- // => org landBlk
- // if landBlk->pred_size() > 2, put the about if-else inside
- // if (initReg !=2) {...}
- //
- // add initReg = initVal to headBlk
-
- const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
- unsigned initReg =
- funcRep->getRegInfo().createVirtualRegister(I32RC);
- if (!migrateTrue || !migrateFalse) {
- int initVal = migrateTrue ? 0 : 1;
- CFGTraits::insertAssignInstrBefore(headBlk, passRep, initReg, initVal);
- }
-
- int numNewBlk = 0;
-
- if (landBlk == NULL) {
- landBlk = funcRep->CreateMachineBasicBlock();
- funcRep->push_back(landBlk); //insert to function
-
- if (trueBlk) {
- trueBlk->addSuccessor(landBlk);
- } else {
- headBlk->addSuccessor(landBlk);
- }
-
- if (falseBlk) {
- falseBlk->addSuccessor(landBlk);
- } else {
- headBlk->addSuccessor(landBlk);
- }
-
- numNewBlk ++;
- }
-
- bool landBlkHasOtherPred = (landBlk->pred_size() > 2);
-
- //insert AMDGPU::ENDIF to avoid special case "input landBlk == NULL"
- typename BlockT::iterator insertPos =
- CFGTraits::getInstrPos
- (landBlk, CFGTraits::insertInstrBefore(landBlk, AMDGPU::ENDIF, passRep));
-
- if (landBlkHasOtherPred) {
- unsigned immReg =
- funcRep->getRegInfo().createVirtualRegister(I32RC);
- CFGTraits::insertAssignInstrBefore(insertPos, passRep, immReg, 2);
- unsigned cmpResReg =
- funcRep->getRegInfo().createVirtualRegister(I32RC);
-
- CFGTraits::insertCompareInstrBefore(landBlk, insertPos, passRep, cmpResReg,
- initReg, immReg);
- CFGTraits::insertCondBranchBefore(landBlk, insertPos,
- AMDGPU::IF_LOGICALZ_i32, passRep,
- cmpResReg, DebugLoc());
- }
-
- CFGTraits::insertCondBranchBefore(landBlk, insertPos, AMDGPU::IF_LOGICALNZ_i32,
- passRep, initReg, DebugLoc());
-
- if (migrateTrue) {
- migrateInstruction(trueBlk, landBlk, insertPos);
- // need to uncondionally insert the assignment to ensure a path from its
- // predecessor rather than headBlk has valid value in initReg if
- // (initVal != 1).
- CFGTraits::insertAssignInstrBefore(trueBlk, passRep, initReg, 1);
- }
- CFGTraits::insertInstrBefore(insertPos, AMDGPU::ELSE, passRep);
-
- if (migrateFalse) {
- migrateInstruction(falseBlk, landBlk, insertPos);
- // need to uncondionally insert the assignment to ensure a path from its
- // predecessor rather than headBlk has valid value in initReg if
- // (initVal != 0)
- CFGTraits::insertAssignInstrBefore(falseBlk, passRep, initReg, 0);
- }
- //CFGTraits::insertInstrBefore(insertPos, AMDGPU::ENDIF, passRep);
-
- if (landBlkHasOtherPred) {
- // add endif
- CFGTraits::insertInstrBefore(insertPos, AMDGPU::ENDIF, passRep);
-
- // put initReg = 2 to other predecessors of landBlk
- for (typename BlockT::pred_iterator predIter = landBlk->pred_begin(),
- predIterEnd = landBlk->pred_end(); predIter != predIterEnd;
- ++predIter) {
- BlockT *curBlk = *predIter;
- if (curBlk != trueBlk && curBlk != falseBlk) {
- CFGTraits::insertAssignInstrBefore(curBlk, passRep, initReg, 2);
- }
- } //for
- }
- if (DEBUGME) {
- errs() << "result from improveSimpleJumpintoIf: ";
- showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 0);
- //showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 1);
- }
-
- // update landBlk
- *plandBlk = landBlk;
-
- return numNewBlk;
-} //improveSimpleJumpintoIf
-
-template<class PassT>
-void CFGStructurizer<PassT>::handleLoopbreak(BlockT *exitingBlk,
- LoopT *exitingLoop,
- BlockT *exitBlk,
- LoopT *exitLoop,
- BlockT *landBlk) {
- if (DEBUGME) {
- errs() << "Trying to break loop-depth = " << getLoopDepth(exitLoop)
- << " from loop-depth = " << getLoopDepth(exitingLoop) << "\n";
- }
- const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
-
- RegiT initReg = INVALIDREGNUM;
- if (exitingLoop != exitLoop) {
- initReg = static_cast<int>
- (funcRep->getRegInfo().createVirtualRegister(I32RC));
- assert(initReg != INVALIDREGNUM);
- addLoopBreakInitReg(exitLoop, initReg);
- while (exitingLoop != exitLoop && exitingLoop) {
- addLoopBreakOnReg(exitingLoop, initReg);
- exitingLoop = exitingLoop->getParentLoop();
- }
- assert(exitingLoop == exitLoop);
- }
-
- mergeLoopbreakBlock(exitingBlk, exitBlk, landBlk, initReg);
-
-} //handleLoopbreak
-
-template<class PassT>
-void CFGStructurizer<PassT>::handleLoopcontBlock(BlockT *contingBlk,
- LoopT *contingLoop,
- BlockT *contBlk,
- LoopT *contLoop) {
- if (DEBUGME) {
- errs() << "loopcontPattern cont = BB" << contingBlk->getNumber()
- << " header = BB" << contBlk->getNumber() << "\n";
-
- errs() << "Trying to continue loop-depth = "
- << getLoopDepth(contLoop)
- << " from loop-depth = " << getLoopDepth(contingLoop) << "\n";
- }
-
- RegiT initReg = INVALIDREGNUM;
- const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
- if (contingLoop != contLoop) {
- initReg = static_cast<int>
- (funcRep->getRegInfo().createVirtualRegister(I32RC));
- assert(initReg != INVALIDREGNUM);
- addLoopContInitReg(contLoop, initReg);
- while (contingLoop && contingLoop->getParentLoop() != contLoop) {
- addLoopBreakOnReg(contingLoop, initReg); //not addLoopContOnReg
- contingLoop = contingLoop->getParentLoop();
- }
- assert(contingLoop && contingLoop->getParentLoop() == contLoop);
- addLoopContOnReg(contingLoop, initReg);
- }
-
- settleLoopcontBlock(contingBlk, contBlk, initReg);
- //contingBlk->removeSuccessor(loopHeader);
-} //handleLoopcontBlock
-
-template<class PassT>
-void CFGStructurizer<PassT>::mergeSerialBlock(BlockT *dstBlk, BlockT *srcBlk) {
- if (DEBUGME) {
- errs() << "serialPattern BB" << dstBlk->getNumber()
- << " <= BB" << srcBlk->getNumber() << "\n";
- }
- //removeUnconditionalBranch(dstBlk);
- dstBlk->splice(dstBlk->end(), srcBlk, FirstNonDebugInstr(srcBlk), srcBlk->end());
-
- dstBlk->removeSuccessor(srcBlk);
- CFGTraits::cloneSuccessorList(dstBlk, srcBlk);
-
- removeSuccessor(srcBlk);
- retireBlock(dstBlk, srcBlk);
-} //mergeSerialBlock
-
-template<class PassT>
-void CFGStructurizer<PassT>::mergeIfthenelseBlock(InstrT *branchInstr,
- BlockT *curBlk,
- BlockT *trueBlk,
- BlockT *falseBlk,
- BlockT *landBlk) {
- if (DEBUGME) {
- errs() << "ifPattern BB" << curBlk->getNumber();
- errs() << "{ ";
- if (trueBlk) {
- errs() << "BB" << trueBlk->getNumber();
- }
- errs() << " } else ";
- errs() << "{ ";
- if (falseBlk) {
- errs() << "BB" << falseBlk->getNumber();
- }
- errs() << " }\n ";
- errs() << "landBlock: ";
- if (landBlk == NULL) {
- errs() << "NULL";
- } else {
- errs() << "BB" << landBlk->getNumber();
- }
- errs() << "\n";
- }
-
- int oldOpcode = branchInstr->getOpcode();
- DebugLoc branchDL = branchInstr->getDebugLoc();
-
-// transform to
-// if cond
-// trueBlk
-// else
-// falseBlk
-// endif
-// landBlk
-
- typename BlockT::iterator branchInstrPos =
- CFGTraits::getInstrPos(curBlk, branchInstr);
- CFGTraits::insertCondBranchBefore(branchInstrPos,
- CFGTraits::getBranchNzeroOpcode(oldOpcode),
- passRep,
- branchDL);
-
- if (trueBlk) {
- curBlk->splice(branchInstrPos, trueBlk, FirstNonDebugInstr(trueBlk), trueBlk->end());
- curBlk->removeSuccessor(trueBlk);
- if (landBlk && trueBlk->succ_size()!=0) {
- trueBlk->removeSuccessor(landBlk);
- }
- retireBlock(curBlk, trueBlk);
- }
- CFGTraits::insertInstrBefore(branchInstrPos, AMDGPU::ELSE, passRep);
-
- if (falseBlk) {
- curBlk->splice(branchInstrPos, falseBlk, FirstNonDebugInstr(falseBlk),
- falseBlk->end());
- curBlk->removeSuccessor(falseBlk);
- if (landBlk && falseBlk->succ_size() != 0) {
- falseBlk->removeSuccessor(landBlk);
- }
- retireBlock(curBlk, falseBlk);
- }
- CFGTraits::insertInstrBefore(branchInstrPos, AMDGPU::ENDIF, passRep);
-
- //curBlk->remove(branchInstrPos);
- branchInstr->eraseFromParent();
-
- if (landBlk && trueBlk && falseBlk) {
- curBlk->addSuccessor(landBlk);
- }
-
-} //mergeIfthenelseBlock
-
-template<class PassT>
-void CFGStructurizer<PassT>::mergeLooplandBlock(BlockT *dstBlk,
- LoopLandInfo *loopLand) {
- BlockT *landBlk = loopLand->landBlk;
-
- if (DEBUGME) {
- errs() << "loopPattern header = BB" << dstBlk->getNumber()
- << " land = BB" << landBlk->getNumber() << "\n";
- }
-
- // Loop contInitRegs are init at the beginning of the loop.
- for (typename std::set<RegiT>::const_iterator iter =
- loopLand->contInitRegs.begin(),
- iterEnd = loopLand->contInitRegs.end(); iter != iterEnd; ++iter) {
- CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0);
- }
-
- /* we last inserterd the DebugLoc in the
- * BREAK_LOGICALZ_i32 or AMDGPU::BREAK_LOGICALNZ statement in the current dstBlk.
- * search for the DebugLoc in the that statement.
- * if not found, we have to insert the empty/default DebugLoc */
- InstrT *loopBreakInstr = CFGTraits::getLoopBreakInstr(dstBlk);
- DebugLoc DLBreak = (loopBreakInstr) ? loopBreakInstr->getDebugLoc() : DebugLoc();
-
- CFGTraits::insertInstrBefore(dstBlk, AMDGPU::WHILELOOP, passRep, DLBreak);
- // Loop breakInitRegs are init before entering the loop.
- for (typename std::set<RegiT>::const_iterator iter =
- loopLand->breakInitRegs.begin(),
- iterEnd = loopLand->breakInitRegs.end(); iter != iterEnd; ++iter)
- {
- CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0);
- }
- // Loop endbranchInitRegs are init before entering the loop.
- for (typename std::set<RegiT>::const_iterator iter =
- loopLand->endbranchInitRegs.begin(),
- iterEnd = loopLand->endbranchInitRegs.end(); iter != iterEnd; ++iter) {
- CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0);
- }
-
- /* we last inserterd the DebugLoc in the continue statement in the current dstBlk
- * search for the DebugLoc in the continue statement.
- * if not found, we have to insert the empty/default DebugLoc */
- InstrT *continueInstr = CFGTraits::getContinueInstr(dstBlk);
- DebugLoc DLContinue = (continueInstr) ? continueInstr->getDebugLoc() : DebugLoc();
-
- CFGTraits::insertInstrEnd(dstBlk, AMDGPU::ENDLOOP, passRep, DLContinue);
- // Loop breakOnRegs are check after the ENDLOOP: break the loop outside this
- // loop.
- for (typename std::set<RegiT>::const_iterator iter =
- loopLand->breakOnRegs.begin(),
- iterEnd = loopLand->breakOnRegs.end(); iter != iterEnd; ++iter) {
- CFGTraits::insertCondBranchEnd(dstBlk, AMDGPU::BREAK_LOGICALNZ_i32, passRep,
- *iter);
- }
-
- // Loop contOnRegs are check after the ENDLOOP: cont the loop outside this
- // loop.
- for (std::set<RegiT>::const_iterator iter = loopLand->contOnRegs.begin(),
- iterEnd = loopLand->contOnRegs.end(); iter != iterEnd; ++iter) {
- CFGTraits::insertCondBranchEnd(dstBlk, AMDGPU::CONTINUE_LOGICALNZ_i32,
- passRep, *iter);
- }
-
- dstBlk->splice(dstBlk->end(), landBlk, landBlk->begin(), landBlk->end());
-
- for (typename BlockT::succ_iterator iter = landBlk->succ_begin(),
- iterEnd = landBlk->succ_end(); iter != iterEnd; ++iter) {
- dstBlk->addSuccessor(*iter); // *iter's predecessor is also taken care of.
- }
-
- removeSuccessor(landBlk);
- retireBlock(dstBlk, landBlk);
-} //mergeLooplandBlock
-
-template<class PassT>
-void CFGStructurizer<PassT>::mergeLoopbreakBlock(BlockT *exitingBlk,
- BlockT *exitBlk,
- BlockT *exitLandBlk,
- RegiT setReg) {
- if (DEBUGME) {
- errs() << "loopbreakPattern exiting = BB" << exitingBlk->getNumber()
- << " exit = BB" << exitBlk->getNumber()
- << " land = BB" << exitLandBlk->getNumber() << "\n";
- }
-
- InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(exitingBlk);
- assert(branchInstr && CFGTraits::isCondBranch(branchInstr));
-
- DebugLoc DL = branchInstr->getDebugLoc();
-
- BlockT *trueBranch = CFGTraits::getTrueBranch(branchInstr);
- int oldOpcode = branchInstr->getOpcode();
-
- // transform exitingBlk to
- // if ( ) {
- // exitBlk (if exitBlk != exitLandBlk)
- // setReg = 1
- // break
- // }endif
- // successor = {orgSuccessor(exitingBlk) - exitBlk}
-
- typename BlockT::iterator branchInstrPos =
- CFGTraits::getInstrPos(exitingBlk, branchInstr);
-
- if (exitBlk == exitLandBlk && setReg == INVALIDREGNUM) {
- //break_logical
- int newOpcode =
- (trueBranch == exitBlk) ? CFGTraits::getBreakNzeroOpcode(oldOpcode)
- : CFGTraits::getBreakZeroOpcode(oldOpcode);
- CFGTraits::insertCondBranchBefore(branchInstrPos, newOpcode, passRep, DL);
- } else {
- int newOpcode =
- (trueBranch == exitBlk) ? CFGTraits::getBranchNzeroOpcode(oldOpcode)
- : CFGTraits::getBranchZeroOpcode(oldOpcode);
- CFGTraits::insertCondBranchBefore(branchInstrPos, newOpcode, passRep, DL);
- if (exitBlk != exitLandBlk) {
- //splice is insert-before ...
- exitingBlk->splice(branchInstrPos, exitBlk, exitBlk->begin(),
- exitBlk->end());
- }
- if (setReg != INVALIDREGNUM) {
- CFGTraits::insertAssignInstrBefore(branchInstrPos, passRep, setReg, 1);
- }
- CFGTraits::insertInstrBefore(branchInstrPos, AMDGPU::BREAK, passRep);
- CFGTraits::insertInstrBefore(branchInstrPos, AMDGPU::ENDIF, passRep);
- } //if_logical
-
- //now branchInst can be erase safely
- //exitingBlk->eraseFromParent(branchInstr);
- branchInstr->eraseFromParent();
-
- //now take care of successors, retire blocks
- exitingBlk->removeSuccessor(exitBlk);
- if (exitBlk != exitLandBlk) {
- //splice is insert-before ...
- exitBlk->removeSuccessor(exitLandBlk);
- retireBlock(exitingBlk, exitBlk);
- }
-
-} //mergeLoopbreakBlock
-
-template<class PassT>
-void CFGStructurizer<PassT>::settleLoopcontBlock(BlockT *contingBlk,
- BlockT *contBlk,
- RegiT setReg) {
- if (DEBUGME) {
- errs() << "settleLoopcontBlock conting = BB"
- << contingBlk->getNumber()
- << ", cont = BB" << contBlk->getNumber() << "\n";
- }
-
- InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(contingBlk);
- if (branchInstr) {
- assert(CFGTraits::isCondBranch(branchInstr));
- typename BlockT::iterator branchInstrPos =
- CFGTraits::getInstrPos(contingBlk, branchInstr);
- BlockT *trueBranch = CFGTraits::getTrueBranch(branchInstr);
- int oldOpcode = branchInstr->getOpcode();
- DebugLoc DL = branchInstr->getDebugLoc();
-
- // transform contingBlk to
- // if () {
- // move instr after branchInstr
- // continue
- // or
- // setReg = 1
- // break
- // }endif
- // successor = {orgSuccessor(contingBlk) - loopHeader}
-
- bool useContinueLogical =
- (setReg == INVALIDREGNUM && (&*contingBlk->rbegin()) == branchInstr);
-
- if (useContinueLogical == false)
- {
- int branchOpcode =
- trueBranch == contBlk ? CFGTraits::getBranchNzeroOpcode(oldOpcode)
- : CFGTraits::getBranchZeroOpcode(oldOpcode);
-
- CFGTraits::insertCondBranchBefore(branchInstrPos, branchOpcode, passRep, DL);
-
- if (setReg != INVALIDREGNUM) {
- CFGTraits::insertAssignInstrBefore(branchInstrPos, passRep, setReg, 1);
- // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
- CFGTraits::insertInstrEnd(contingBlk, AMDGPU::BREAK, passRep, DL);
- } else {
- // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
- CFGTraits::insertInstrEnd(contingBlk, AMDGPU::CONTINUE, passRep, DL);
- }
-
- CFGTraits::insertInstrEnd(contingBlk, AMDGPU::ENDIF, passRep, DL);
- } else {
- int branchOpcode =
- trueBranch == contBlk ? CFGTraits::getContinueNzeroOpcode(oldOpcode)
- : CFGTraits::getContinueZeroOpcode(oldOpcode);
-
- CFGTraits::insertCondBranchBefore(branchInstrPos, branchOpcode, passRep, DL);
- }
-
- //contingBlk->eraseFromParent(branchInstr);
- branchInstr->eraseFromParent();
- } else {
- /* if we've arrived here then we've already erased the branch instruction
- * travel back up the basic block to see the last reference of our debug location
- * we've just inserted that reference here so it should be representative */
- if (setReg != INVALIDREGNUM) {
- CFGTraits::insertAssignInstrBefore(contingBlk, passRep, setReg, 1);
- // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
- CFGTraits::insertInstrEnd(contingBlk, AMDGPU::BREAK, passRep, CFGTraits::getLastDebugLocInBB(contingBlk));
- } else {
- // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
- CFGTraits::insertInstrEnd(contingBlk, AMDGPU::CONTINUE, passRep, CFGTraits::getLastDebugLocInBB(contingBlk));
- }
- } //else
-
-} //settleLoopcontBlock
-
-// BBs in exitBlkSet are determined as in break-path for loopRep,
-// before we can put code for BBs as inside loop-body for loopRep
-// check whether those BBs are determined as cont-BB for parentLoopRep
-// earlier.
-// If so, generate a new BB newBlk
-// (1) set newBlk common successor of BBs in exitBlkSet
-// (2) change the continue-instr in BBs in exitBlkSet to break-instr
-// (3) generate continue-instr in newBlk
-//
-template<class PassT>
-typename CFGStructurizer<PassT>::BlockT *
-CFGStructurizer<PassT>::relocateLoopcontBlock(LoopT *parentLoopRep,
- LoopT *loopRep,
- std::set<BlockT *> &exitBlkSet,
- BlockT *exitLandBlk) {
- std::set<BlockT *> endBlkSet;
-
-// BlockT *parentLoopHead = parentLoopRep->getHeader();
-
-
- for (typename std::set<BlockT *>::const_iterator iter = exitBlkSet.begin(),
- iterEnd = exitBlkSet.end();
- iter != iterEnd; ++iter) {
- BlockT *exitBlk = *iter;
- BlockT *endBlk = singlePathEnd(exitBlk, exitLandBlk);
-
- if (endBlk == NULL || CFGTraits::getContinueInstr(endBlk) == NULL)
- return NULL;
-
- endBlkSet.insert(endBlk);
- }
-
- BlockT *newBlk = funcRep->CreateMachineBasicBlock();
- funcRep->push_back(newBlk); //insert to function
- CFGTraits::insertInstrEnd(newBlk, AMDGPU::CONTINUE, passRep);
- SHOWNEWBLK(newBlk, "New continue block: ");
-
- for (typename std::set<BlockT*>::const_iterator iter = endBlkSet.begin(),
- iterEnd = endBlkSet.end();
- iter != iterEnd; ++iter) {
- BlockT *endBlk = *iter;
- InstrT *contInstr = CFGTraits::getContinueInstr(endBlk);
- if (contInstr) {
- contInstr->eraseFromParent();
- }
- endBlk->addSuccessor(newBlk);
- if (DEBUGME) {
- errs() << "Add new continue Block to BB"
- << endBlk->getNumber() << " successors\n";
- }
- }
-
- return newBlk;
-} //relocateLoopcontBlock
-
-
-// LoopEndbranchBlock is a BB created by the CFGStructurizer to use as
-// LoopLandBlock. This BB branch on the loop endBranchInit register to the
-// pathes corresponding to the loop exiting branches.
-
-template<class PassT>
-typename CFGStructurizer<PassT>::BlockT *
-CFGStructurizer<PassT>::addLoopEndbranchBlock(LoopT *loopRep,
- BlockTSmallerVector &exitingBlks,
- BlockTSmallerVector &exitBlks) {
- const AMDILInstrInfo *tii =
- static_cast<const AMDILInstrInfo *>(passRep->getTargetInstrInfo());
- const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
-
- RegiT endBranchReg = static_cast<int>
- (funcRep->getRegInfo().createVirtualRegister(I32RC));
- assert(endBranchReg >= 0);
-
- // reg = 0 before entering the loop
- addLoopEndbranchInitReg(loopRep, endBranchReg);
-
- uint32_t numBlks = static_cast<uint32_t>(exitingBlks.size());
- assert(numBlks >=2 && numBlks == exitBlks.size());
-
- BlockT *preExitingBlk = exitingBlks[0];
- BlockT *preExitBlk = exitBlks[0];
- BlockT *preBranchBlk = funcRep->CreateMachineBasicBlock();
- funcRep->push_back(preBranchBlk); //insert to function
- SHOWNEWBLK(preBranchBlk, "New loopEndbranch block: ");
-
- BlockT *newLandBlk = preBranchBlk;
-
- CFGTraits::replaceInstrUseOfBlockWith(preExitingBlk, preExitBlk,
- newLandBlk);
- preExitingBlk->removeSuccessor(preExitBlk);
- preExitingBlk->addSuccessor(newLandBlk);
-
- //it is redundant to add reg = 0 to exitingBlks[0]
-
- // For 1..n th exiting path (the last iteration handles two pathes) create the
- // branch to the previous path and the current path.
- for (uint32_t i = 1; i < numBlks; ++i) {
- BlockT *curExitingBlk = exitingBlks[i];
- BlockT *curExitBlk = exitBlks[i];
- BlockT *curBranchBlk;
-
- if (i == numBlks - 1) {
- curBranchBlk = curExitBlk;
- } else {
- curBranchBlk = funcRep->CreateMachineBasicBlock();
- funcRep->push_back(curBranchBlk); //insert to function
- SHOWNEWBLK(curBranchBlk, "New loopEndbranch block: ");
- }
-
- // Add reg = i to exitingBlks[i].
- CFGTraits::insertAssignInstrBefore(curExitingBlk, passRep,
- endBranchReg, i);
-
- // Remove the edge (exitingBlks[i] exitBlks[i]) add new edge
- // (exitingBlks[i], newLandBlk).
- CFGTraits::replaceInstrUseOfBlockWith(curExitingBlk, curExitBlk,
- newLandBlk);
- curExitingBlk->removeSuccessor(curExitBlk);
- curExitingBlk->addSuccessor(newLandBlk);
-
- // add to preBranchBlk the branch instruction:
- // if (endBranchReg == preVal)
- // preExitBlk
- // else
- // curBranchBlk
- //
- // preValReg = i - 1
-
- DebugLoc DL;
- RegiT preValReg = static_cast<int>
- (funcRep->getRegInfo().createVirtualRegister(I32RC));
-
- preBranchBlk->insert(preBranchBlk->begin(),
- tii->getMovImmInstr(preBranchBlk->getParent(), preValReg,
- i - 1));
-
- // condResReg = (endBranchReg == preValReg)
- RegiT condResReg = static_cast<int>
- (funcRep->getRegInfo().createVirtualRegister(I32RC));
- BuildMI(preBranchBlk, DL, tii->get(tii->getIEQOpcode()), condResReg)
- .addReg(endBranchReg).addReg(preValReg);
-
- BuildMI(preBranchBlk, DL, tii->get(AMDGPU::BRANCH_COND_i32))
- .addMBB(preExitBlk).addReg(condResReg);
-
- preBranchBlk->addSuccessor(preExitBlk);
- preBranchBlk->addSuccessor(curBranchBlk);
-
- // Update preExitingBlk, preExitBlk, preBranchBlk.
- preExitingBlk = curExitingBlk;
- preExitBlk = curExitBlk;
- preBranchBlk = curBranchBlk;
-
- } //end for 1 .. n blocks
-
- return newLandBlk;
-} //addLoopEndbranchBlock
-
-template<class PassT>
-typename CFGStructurizer<PassT>::PathToKind
-CFGStructurizer<PassT>::singlePathTo(BlockT *srcBlk, BlockT *dstBlk,
- bool allowSideEntry) {
- assert(dstBlk);
-
- if (srcBlk == dstBlk) {
- return SinglePath_InPath;
- }
-
- while (srcBlk && srcBlk->succ_size() == 1) {
- srcBlk = *srcBlk->succ_begin();
- if (srcBlk == dstBlk) {
- return SinglePath_InPath;
- }
-
- if (!allowSideEntry && srcBlk->pred_size() > 1) {
- return Not_SinglePath;
- }
- }
-
- if (srcBlk && srcBlk->succ_size()==0) {
- return SinglePath_NotInPath;
- }
-
- return Not_SinglePath;
-} //singlePathTo
-
-// If there is a single path from srcBlk to dstBlk, return the last block before
-// dstBlk If there is a single path from srcBlk->end without dstBlk, return the
-// last block in the path Otherwise, return NULL
-template<class PassT>
-typename CFGStructurizer<PassT>::BlockT *
-CFGStructurizer<PassT>::singlePathEnd(BlockT *srcBlk, BlockT *dstBlk,
- bool allowSideEntry) {
- assert(dstBlk);
-
- if (srcBlk == dstBlk) {
- return srcBlk;
- }
-
- if (srcBlk->succ_size() == 0) {
- return srcBlk;
- }
-
- while (srcBlk && srcBlk->succ_size() == 1) {
- BlockT *preBlk = srcBlk;
-
- srcBlk = *srcBlk->succ_begin();
- if (srcBlk == NULL) {
- return preBlk;
- }
-
- if (!allowSideEntry && srcBlk->pred_size() > 1) {
- return NULL;
- }
- }
-
- if (srcBlk && srcBlk->succ_size()==0) {
- return srcBlk;
- }
-
- return NULL;
-
-} //singlePathEnd
-
-template<class PassT>
-int CFGStructurizer<PassT>::cloneOnSideEntryTo(BlockT *preBlk, BlockT *srcBlk,
- BlockT *dstBlk) {
- int cloned = 0;
- assert(preBlk->isSuccessor(srcBlk));
- while (srcBlk && srcBlk != dstBlk) {
- assert(srcBlk->succ_size() == 1);
- if (srcBlk->pred_size() > 1) {
- srcBlk = cloneBlockForPredecessor(srcBlk, preBlk);
- ++cloned;
- }
-
- preBlk = srcBlk;
- srcBlk = *srcBlk->succ_begin();
- }
-
- return cloned;
-} //cloneOnSideEntryTo
-
-template<class PassT>
-typename CFGStructurizer<PassT>::BlockT *
-CFGStructurizer<PassT>::cloneBlockForPredecessor(BlockT *curBlk,
- BlockT *predBlk) {
- assert(predBlk->isSuccessor(curBlk) &&
- "succBlk is not a prececessor of curBlk");
-
- BlockT *cloneBlk = CFGTraits::clone(curBlk); //clone instructions
- CFGTraits::replaceInstrUseOfBlockWith(predBlk, curBlk, cloneBlk);
- //srcBlk, oldBlk, newBlk
-
- predBlk->removeSuccessor(curBlk);
- predBlk->addSuccessor(cloneBlk);
-
- // add all successor to cloneBlk
- CFGTraits::cloneSuccessorList(cloneBlk, curBlk);
-
- numClonedInstr += curBlk->size();
-
- if (DEBUGME) {
- errs() << "Cloned block: " << "BB"
- << curBlk->getNumber() << "size " << curBlk->size() << "\n";
- }
-
- SHOWNEWBLK(cloneBlk, "result of Cloned block: ");
-
- return cloneBlk;
-} //cloneBlockForPredecessor
-
-template<class PassT>
-typename CFGStructurizer<PassT>::BlockT *
-CFGStructurizer<PassT>::exitingBlock2ExitBlock(LoopT *loopRep,
- BlockT *exitingBlk) {
- BlockT *exitBlk = NULL;
-
- for (typename BlockT::succ_iterator iterSucc = exitingBlk->succ_begin(),
- iterSuccEnd = exitingBlk->succ_end();
- iterSucc != iterSuccEnd; ++iterSucc) {
- BlockT *curBlk = *iterSucc;
- if (!loopRep->contains(curBlk)) {
- assert(exitBlk == NULL);
- exitBlk = curBlk;
- }
- }
-
- assert(exitBlk != NULL);
-
- return exitBlk;
-} //exitingBlock2ExitBlock
-
-template<class PassT>
-void CFGStructurizer<PassT>::migrateInstruction(BlockT *srcBlk,
- BlockT *dstBlk,
- InstrIterator insertPos) {
- InstrIterator spliceEnd;
- //look for the input branchinstr, not the AMDIL branchinstr
- InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(srcBlk);
- if (branchInstr == NULL) {
- if (DEBUGME) {
- errs() << "migrateInstruction don't see branch instr\n" ;
- }
- spliceEnd = srcBlk->end();
- } else {
- if (DEBUGME) {
- errs() << "migrateInstruction see branch instr\n" ;
- branchInstr->dump();
- }
- spliceEnd = CFGTraits::getInstrPos(srcBlk, branchInstr);
- }
- if (DEBUGME) {
- errs() << "migrateInstruction before splice dstSize = " << dstBlk->size()
- << "srcSize = " << srcBlk->size() << "\n";
- }
-
- //splice insert before insertPos
- dstBlk->splice(insertPos, srcBlk, srcBlk->begin(), spliceEnd);
-
- if (DEBUGME) {
- errs() << "migrateInstruction after splice dstSize = " << dstBlk->size()
- << "srcSize = " << srcBlk->size() << "\n";
- }
-} //migrateInstruction
-
-// normalizeInfiniteLoopExit change
-// B1:
-// uncond_br LoopHeader
-//
-// to
-// B1:
-// cond_br 1 LoopHeader dummyExit
-// and return the newly added dummy exit block
-//
-template<class PassT>
-typename CFGStructurizer<PassT>::BlockT *
-CFGStructurizer<PassT>::normalizeInfiniteLoopExit(LoopT* LoopRep) {
- BlockT *loopHeader;
- BlockT *loopLatch;
- loopHeader = LoopRep->getHeader();
- loopLatch = LoopRep->getLoopLatch();
- BlockT *dummyExitBlk = NULL;
- const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
- if (loopHeader!=NULL && loopLatch!=NULL) {
- InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(loopLatch);
- if (branchInstr!=NULL && CFGTraits::isUncondBranch(branchInstr)) {
- dummyExitBlk = funcRep->CreateMachineBasicBlock();
- funcRep->push_back(dummyExitBlk); //insert to function
- SHOWNEWBLK(dummyExitBlk, "DummyExitBlock to normalize infiniteLoop: ");
-
- if (DEBUGME) errs() << "Old branch instr: " << *branchInstr << "\n";
-
- typename BlockT::iterator insertPos =
- CFGTraits::getInstrPos(loopLatch, branchInstr);
- unsigned immReg =
- funcRep->getRegInfo().createVirtualRegister(I32RC);
- CFGTraits::insertAssignInstrBefore(insertPos, passRep, immReg, 1);
- InstrT *newInstr =
- CFGTraits::insertInstrBefore(insertPos, AMDGPU::BRANCH_COND_i32, passRep);
- MachineInstrBuilder(newInstr).addMBB(loopHeader).addReg(immReg, false);
-
- SHOWNEWINSTR(newInstr);
-
- branchInstr->eraseFromParent();
- loopLatch->addSuccessor(dummyExitBlk);
- }
- }
-
- return dummyExitBlk;
-} //normalizeInfiniteLoopExit
-
-template<class PassT>
-void CFGStructurizer<PassT>::removeUnconditionalBranch(BlockT *srcBlk) {
- InstrT *branchInstr;
-
- // I saw two unconditional branch in one basic block in example
- // test_fc_do_while_or.c need to fix the upstream on this to remove the loop.
- while ((branchInstr = CFGTraits::getLoopendBlockBranchInstr(srcBlk))
- && CFGTraits::isUncondBranch(branchInstr)) {
- if (DEBUGME) {
- errs() << "Removing unconditional branch instruction" ;
- branchInstr->dump();
- }
- branchInstr->eraseFromParent();
- }
-} //removeUnconditionalBranch
-
-template<class PassT>
-void CFGStructurizer<PassT>::removeRedundantConditionalBranch(BlockT *srcBlk) {
- if (srcBlk->succ_size() == 2) {
- BlockT *blk1 = *srcBlk->succ_begin();
- BlockT *blk2 = *(++srcBlk->succ_begin());
-
- if (blk1 == blk2) {
- InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(srcBlk);
- assert(branchInstr && CFGTraits::isCondBranch(branchInstr));
- if (DEBUGME) {
- errs() << "Removing unneeded conditional branch instruction" ;
- branchInstr->dump();
- }
- branchInstr->eraseFromParent();
- SHOWNEWBLK(blk1, "Removing redundant successor");
- srcBlk->removeSuccessor(blk1);
- }
- }
-} //removeRedundantConditionalBranch
-
-template<class PassT>
-void CFGStructurizer<PassT>::addDummyExitBlock(SmallVector<BlockT*,
- DEFAULT_VEC_SLOTS> &retBlks) {
- BlockT *dummyExitBlk = funcRep->CreateMachineBasicBlock();
- funcRep->push_back(dummyExitBlk); //insert to function
- CFGTraits::insertInstrEnd(dummyExitBlk, AMDGPU::RETURN, passRep);
-
- for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::iterator iter =
- retBlks.begin(),
- iterEnd = retBlks.end(); iter != iterEnd; ++iter) {
- BlockT *curBlk = *iter;
- InstrT *curInstr = CFGTraits::getReturnInstr(curBlk);
- if (curInstr) {
- curInstr->eraseFromParent();
- }
-#if 0
- if (curBlk->size()==0 && curBlk->pred_size() == 1) {
- if (DEBUGME) {
- errs() << "Replace empty block BB" << curBlk->getNumber()
- << " with dummyExitBlock\n";
- }
- BlockT *predb = *curBlk->pred_begin();
- predb->removeSuccessor(curBlk);
- curBlk = predb;
- } //handle empty curBlk
-#endif
- curBlk->addSuccessor(dummyExitBlk);
- if (DEBUGME) {
- errs() << "Add dummyExitBlock to BB" << curBlk->getNumber()
- << " successors\n";
- }
- } //for
-
- SHOWNEWBLK(dummyExitBlk, "DummyExitBlock: ");
-} //addDummyExitBlock
-
-template<class PassT>
-void CFGStructurizer<PassT>::removeSuccessor(BlockT *srcBlk) {
- while (srcBlk->succ_size()) {
- srcBlk->removeSuccessor(*srcBlk->succ_begin());
- }
-}
-
-template<class PassT>
-void CFGStructurizer<PassT>::recordSccnum(BlockT *srcBlk, int sccNum) {
- BlockInfo *&srcBlkInfo = blockInfoMap[srcBlk];
-
- if (srcBlkInfo == NULL) {
- srcBlkInfo = new BlockInfo();
- }
-
- srcBlkInfo->sccNum = sccNum;
-}
-
-template<class PassT>
-int CFGStructurizer<PassT>::getSCCNum(BlockT *srcBlk) {
- BlockInfo *srcBlkInfo = blockInfoMap[srcBlk];
- return srcBlkInfo ? srcBlkInfo->sccNum : INVALIDSCCNUM;
-}
-
-template<class PassT>
-void CFGStructurizer<PassT>::retireBlock(BlockT *dstBlk, BlockT *srcBlk) {
- if (DEBUGME) {
- errs() << "Retiring BB" << srcBlk->getNumber() << "\n";
- }
-
- BlockInfo *&srcBlkInfo = blockInfoMap[srcBlk];
-
- if (srcBlkInfo == NULL) {
- srcBlkInfo = new BlockInfo();
- }
-
- srcBlkInfo->isRetired = true;
- //int i = srcBlk->succ_size();
- //int j = srcBlk->pred_size();
- assert(srcBlk->succ_size() == 0 && srcBlk->pred_size() == 0
- && "can't retire block yet");
-}
-
-template<class PassT>
-bool CFGStructurizer<PassT>::isRetiredBlock(BlockT *srcBlk) {
- BlockInfo *srcBlkInfo = blockInfoMap[srcBlk];
- return (srcBlkInfo && srcBlkInfo->isRetired);
-}
-
-template<class PassT>
-bool CFGStructurizer<PassT>::isActiveLoophead(BlockT *curBlk) {
- LoopT *loopRep = loopInfo->getLoopFor(curBlk);
- while (loopRep && loopRep->getHeader() == curBlk) {
- LoopLandInfo *loopLand = getLoopLandInfo(loopRep);
-
- if(loopLand == NULL)
- return true;
-
- BlockT *landBlk = loopLand->landBlk;
- assert(landBlk);
- if (!isRetiredBlock(landBlk)) {
- return true;
- }
-
- loopRep = loopRep->getParentLoop();
- }
-
- return false;
-} //isActiveLoophead
-
-template<class PassT>
-bool CFGStructurizer<PassT>::needMigrateBlock(BlockT *blk) {
- const unsigned blockSizeThreshold = 30;
- const unsigned cloneInstrThreshold = 100;
-
- bool multiplePreds = blk && (blk->pred_size() > 1);
-
- if(!multiplePreds)
- return false;
-
- unsigned blkSize = blk->size();
- return ((blkSize > blockSizeThreshold)
- && (blkSize * (blk->pred_size() - 1) > cloneInstrThreshold));
-} //needMigrateBlock
-
-template<class PassT>
-typename CFGStructurizer<PassT>::BlockT *
-CFGStructurizer<PassT>::recordLoopLandBlock(LoopT *loopRep, BlockT *landBlk,
- BlockTSmallerVector &exitBlks,
- std::set<BlockT *> &exitBlkSet) {
- SmallVector<BlockT *, DEFAULT_VEC_SLOTS> inpathBlks; //in exit path blocks
-
- for (typename BlockT::pred_iterator predIter = landBlk->pred_begin(),
- predIterEnd = landBlk->pred_end();
- predIter != predIterEnd; ++predIter) {
- BlockT *curBlk = *predIter;
- if (loopRep->contains(curBlk) || exitBlkSet.count(curBlk)) {
- inpathBlks.push_back(curBlk);
- }
- } //for
-
- //if landBlk has predecessors that are not in the given loop,
- //create a new block
- BlockT *newLandBlk = landBlk;
- if (inpathBlks.size() != landBlk->pred_size()) {
- newLandBlk = funcRep->CreateMachineBasicBlock();
- funcRep->push_back(newLandBlk); //insert to function
- newLandBlk->addSuccessor(landBlk);
- for (typename SmallVector<BlockT*, DEFAULT_VEC_SLOTS>::iterator iter =
- inpathBlks.begin(),
- iterEnd = inpathBlks.end(); iter != iterEnd; ++iter) {
- BlockT *curBlk = *iter;
- CFGTraits::replaceInstrUseOfBlockWith(curBlk, landBlk, newLandBlk);
- //srcBlk, oldBlk, newBlk
- curBlk->removeSuccessor(landBlk);
- curBlk->addSuccessor(newLandBlk);
- }
- for (size_t i = 0, tot = exitBlks.size(); i < tot; ++i) {
- if (exitBlks[i] == landBlk) {
- exitBlks[i] = newLandBlk;
- }
- }
- SHOWNEWBLK(newLandBlk, "NewLandingBlock: ");
- }
-
- setLoopLandBlock(loopRep, newLandBlk);
-
- return newLandBlk;
-} // recordLoopbreakLand
-
-template<class PassT>
-void CFGStructurizer<PassT>::setLoopLandBlock(LoopT *loopRep, BlockT *blk) {
- LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
-
- if (theEntry == NULL) {
- theEntry = new LoopLandInfo();
- }
- assert(theEntry->landBlk == NULL);
-
- if (blk == NULL) {
- blk = funcRep->CreateMachineBasicBlock();
- funcRep->push_back(blk); //insert to function
- SHOWNEWBLK(blk, "DummyLandingBlock for loop without break: ");
- }
-
- theEntry->landBlk = blk;
-
- if (DEBUGME) {
- errs() << "setLoopLandBlock loop-header = BB"
- << loopRep->getHeader()->getNumber()
- << " landing-block = BB" << blk->getNumber() << "\n";
- }
-} // setLoopLandBlock
-
-template<class PassT>
-void CFGStructurizer<PassT>::addLoopBreakOnReg(LoopT *loopRep, RegiT regNum) {
- LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
-
- if (theEntry == NULL) {
- theEntry = new LoopLandInfo();
- }
-
- theEntry->breakOnRegs.insert(regNum);
-
- if (DEBUGME) {
- errs() << "addLoopBreakOnReg loop-header = BB"
- << loopRep->getHeader()->getNumber()
- << " regNum = " << regNum << "\n";
- }
-} // addLoopBreakOnReg
-
-template<class PassT>
-void CFGStructurizer<PassT>::addLoopContOnReg(LoopT *loopRep, RegiT regNum) {
- LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
-
- if (theEntry == NULL) {
- theEntry = new LoopLandInfo();
- }
- theEntry->contOnRegs.insert(regNum);
-
- if (DEBUGME) {
- errs() << "addLoopContOnReg loop-header = BB"
- << loopRep->getHeader()->getNumber()
- << " regNum = " << regNum << "\n";
- }
-} // addLoopContOnReg
-
-template<class PassT>
-void CFGStructurizer<PassT>::addLoopBreakInitReg(LoopT *loopRep, RegiT regNum) {
- LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
-
- if (theEntry == NULL) {
- theEntry = new LoopLandInfo();
- }
- theEntry->breakInitRegs.insert(regNum);
-
- if (DEBUGME) {
- errs() << "addLoopBreakInitReg loop-header = BB"
- << loopRep->getHeader()->getNumber()
- << " regNum = " << regNum << "\n";
- }
-} // addLoopBreakInitReg
-
-template<class PassT>
-void CFGStructurizer<PassT>::addLoopContInitReg(LoopT *loopRep, RegiT regNum) {
- LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
-
- if (theEntry == NULL) {
- theEntry = new LoopLandInfo();
- }
- theEntry->contInitRegs.insert(regNum);
-
- if (DEBUGME) {
- errs() << "addLoopContInitReg loop-header = BB"
- << loopRep->getHeader()->getNumber()
- << " regNum = " << regNum << "\n";
- }
-} // addLoopContInitReg
-
-template<class PassT>
-void CFGStructurizer<PassT>::addLoopEndbranchInitReg(LoopT *loopRep,
- RegiT regNum) {
- LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
-
- if (theEntry == NULL) {
- theEntry = new LoopLandInfo();
- }
- theEntry->endbranchInitRegs.insert(regNum);
-
- if (DEBUGME)
- {
- errs() << "addLoopEndbranchInitReg loop-header = BB"
- << loopRep->getHeader()->getNumber()
- << " regNum = " << regNum << "\n";
- }
-} // addLoopEndbranchInitReg
-
-template<class PassT>
-typename CFGStructurizer<PassT>::LoopLandInfo *
-CFGStructurizer<PassT>::getLoopLandInfo(LoopT *loopRep) {
- LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
-
- return theEntry;
-} // getLoopLandInfo
-
-template<class PassT>
-typename CFGStructurizer<PassT>::BlockT *
-CFGStructurizer<PassT>::getLoopLandBlock(LoopT *loopRep) {
- LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
-
- return theEntry ? theEntry->landBlk : NULL;
-} // getLoopLandBlock
-
-
-template<class PassT>
-bool CFGStructurizer<PassT>::hasBackEdge(BlockT *curBlk) {
- LoopT *loopRep = loopInfo->getLoopFor(curBlk);
- if (loopRep == NULL)
- return false;
-
- BlockT *loopHeader = loopRep->getHeader();
-
- return curBlk->isSuccessor(loopHeader);
-
-} //hasBackEdge
-
-template<class PassT>
-unsigned CFGStructurizer<PassT>::getLoopDepth(LoopT *loopRep) {
- return loopRep ? loopRep->getLoopDepth() : 0;
-} //getLoopDepth
-
-template<class PassT>
-int CFGStructurizer<PassT>::countActiveBlock
-(typename SmallVector<BlockT*, DEFAULT_VEC_SLOTS>::const_iterator iterStart,
- typename SmallVector<BlockT*, DEFAULT_VEC_SLOTS>::const_iterator iterEnd) {
- int count = 0;
- while (iterStart != iterEnd) {
- if (!isRetiredBlock(*iterStart)) {
- ++count;
- }
- ++iterStart;
- }
-
- return count;
-} //countActiveBlock
-
-// This is work around solution for findNearestCommonDominator not avaiable to
-// post dom a proper fix should go to Dominators.h.
-
-template<class PassT>
-typename CFGStructurizer<PassT>::BlockT*
-CFGStructurizer<PassT>::findNearestCommonPostDom(BlockT *blk1, BlockT *blk2) {
-
- if (postDomTree->dominates(blk1, blk2)) {
- return blk1;
- }
- if (postDomTree->dominates(blk2, blk1)) {
- return blk2;
- }
-
- DomTreeNodeT *node1 = postDomTree->getNode(blk1);
- DomTreeNodeT *node2 = postDomTree->getNode(blk2);
-
- // Handle newly cloned node.
- if (node1 == NULL && blk1->succ_size() == 1) {
- return findNearestCommonPostDom(*blk1->succ_begin(), blk2);
- }
- if (node2 == NULL && blk2->succ_size() == 1) {
- return findNearestCommonPostDom(blk1, *blk2->succ_begin());
- }
-
- if (node1 == NULL || node2 == NULL) {
- return NULL;
- }
-
- node1 = node1->getIDom();
- while (node1) {
- if (postDomTree->dominates(node1, node2)) {
- return node1->getBlock();
- }
- node1 = node1->getIDom();
- }
-
- return NULL;
-}
-
-template<class PassT>
-typename CFGStructurizer<PassT>::BlockT *
-CFGStructurizer<PassT>::findNearestCommonPostDom
-(typename std::set<BlockT *> &blks) {
- BlockT *commonDom;
- typename std::set<BlockT *>::const_iterator iter = blks.begin();
- typename std::set<BlockT *>::const_iterator iterEnd = blks.end();
- for (commonDom = *iter; iter != iterEnd && commonDom != NULL; ++iter) {
- BlockT *curBlk = *iter;
- if (curBlk != commonDom) {
- commonDom = findNearestCommonPostDom(curBlk, commonDom);
- }
- }
-
- if (DEBUGME) {
- errs() << "Common post dominator for exit blocks is ";
- if (commonDom) {
- errs() << "BB" << commonDom->getNumber() << "\n";
- } else {
- errs() << "NULL\n";
- }
- }
-
- return commonDom;
-} //findNearestCommonPostDom
-
-} //end namespace llvm
-
-//todo: move-end
-
-
-//===----------------------------------------------------------------------===//
-//
-// CFGStructurizer for AMDIL
-//
-//===----------------------------------------------------------------------===//
-
-
-using namespace llvmCFGStruct;
-
-namespace llvm
-{
-class AMDILCFGStructurizer : public MachineFunctionPass
-{
-public:
- typedef MachineInstr InstructionType;
- typedef MachineFunction FunctionType;
- typedef MachineBasicBlock BlockType;
- typedef MachineLoopInfo LoopinfoType;
- typedef MachineDominatorTree DominatortreeType;
- typedef MachinePostDominatorTree PostDominatortreeType;
- typedef MachineDomTreeNode DomTreeNodeType;
- typedef MachineLoop LoopType;
-
-protected:
- TargetMachine &TM;
- const TargetInstrInfo *TII;
- const AMDILRegisterInfo *TRI;
-
-public:
- AMDILCFGStructurizer(char &pid, TargetMachine &tm AMDIL_OPT_LEVEL_DECL);
- const TargetInstrInfo *getTargetInstrInfo() const;
- //bool runOnMachineFunction(MachineFunction &F);
-
-private:
-
-}; //end of class AMDILCFGStructurizer
-
-//char AMDILCFGStructurizer::ID = 0;
-} //end of namespace llvm
-AMDILCFGStructurizer::AMDILCFGStructurizer(char &pid, TargetMachine &tm
- AMDIL_OPT_LEVEL_DECL)
-: MachineFunctionPass(pid), TM(tm), TII(tm.getInstrInfo()),
- TRI(static_cast<const AMDILRegisterInfo *>(tm.getRegisterInfo())
- ) {
-}
-
-const TargetInstrInfo *AMDILCFGStructurizer::getTargetInstrInfo() const {
- return TII;
-}
-//===----------------------------------------------------------------------===//
-//
-// CFGPrepare
-//
-//===----------------------------------------------------------------------===//
-
-
-using namespace llvmCFGStruct;
-
-namespace llvm
-{
-class AMDILCFGPrepare : public AMDILCFGStructurizer
-{
-public:
- static char ID;
-
-public:
- AMDILCFGPrepare(TargetMachine &tm AMDIL_OPT_LEVEL_DECL);
-
- virtual const char *getPassName() const;
- virtual void getAnalysisUsage(AnalysisUsage &AU) const;
-
- bool runOnMachineFunction(MachineFunction &F);
-
-private:
-
-}; //end of class AMDILCFGPrepare
-
-char AMDILCFGPrepare::ID = 0;
-} //end of namespace llvm
-
-AMDILCFGPrepare::AMDILCFGPrepare(TargetMachine &tm AMDIL_OPT_LEVEL_DECL)
- : AMDILCFGStructurizer(ID, tm AMDIL_OPT_LEVEL_VAR)
-{
-}
-const char *AMDILCFGPrepare::getPassName() const {
- return "AMD IL Control Flow Graph Preparation Pass";
-}
-
-void AMDILCFGPrepare::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addPreserved<MachineFunctionAnalysis>();
- AU.addRequired<MachineFunctionAnalysis>();
- AU.addRequired<MachineDominatorTree>();
- AU.addRequired<MachinePostDominatorTree>();
- AU.addRequired<MachineLoopInfo>();
-}
-
-//===----------------------------------------------------------------------===//
-//
-// CFGPerform
-//
-//===----------------------------------------------------------------------===//
-
-
-using namespace llvmCFGStruct;
-
-namespace llvm
-{
-class AMDILCFGPerform : public AMDILCFGStructurizer
-{
-public:
- static char ID;
-
-public:
- AMDILCFGPerform(TargetMachine &tm AMDIL_OPT_LEVEL_DECL);
- virtual const char *getPassName() const;
- virtual void getAnalysisUsage(AnalysisUsage &AU) const;
- bool runOnMachineFunction(MachineFunction &F);
-
-private:
-
-}; //end of class AMDILCFGPerform
-
-char AMDILCFGPerform::ID = 0;
-} //end of namespace llvm
-
- AMDILCFGPerform::AMDILCFGPerform(TargetMachine &tm AMDIL_OPT_LEVEL_DECL)
-: AMDILCFGStructurizer(ID, tm AMDIL_OPT_LEVEL_VAR)
-{
-}
-
-const char *AMDILCFGPerform::getPassName() const {
- return "AMD IL Control Flow Graph structurizer Pass";
-}
-
-void AMDILCFGPerform::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addPreserved<MachineFunctionAnalysis>();
- AU.addRequired<MachineFunctionAnalysis>();
- AU.addRequired<MachineDominatorTree>();
- AU.addRequired<MachinePostDominatorTree>();
- AU.addRequired<MachineLoopInfo>();
-}
-
-//===----------------------------------------------------------------------===//
-//
-// CFGStructTraits<AMDILCFGStructurizer>
-//
-//===----------------------------------------------------------------------===//
-
-namespace llvmCFGStruct
-{
-// this class is tailor to the AMDIL backend
-template<>
-struct CFGStructTraits<AMDILCFGStructurizer>
-{
- typedef int RegiT;
-
- static int getBreakNzeroOpcode(int oldOpcode) {
- switch(oldOpcode) {
- ExpandCaseToAllScalarReturn(AMDGPU::BRANCH_COND, AMDGPU::BREAK_LOGICALNZ);
- default:
- assert(0 && "internal error");
- };
- return -1;
- }
-
- static int getBreakZeroOpcode(int oldOpcode) {
- switch(oldOpcode) {
- ExpandCaseToAllScalarReturn(AMDGPU::BRANCH_COND, AMDGPU::BREAK_LOGICALZ);
- default:
- assert(0 && "internal error");
- };
- return -1;
- }
-
- static int getBranchNzeroOpcode(int oldOpcode) {
- switch(oldOpcode) {
- ExpandCaseToAllScalarReturn(AMDGPU::BRANCH_COND, AMDGPU::IF_LOGICALNZ);
- default:
- assert(0 && "internal error");
- };
- return -1;
- }
-
- static int getBranchZeroOpcode(int oldOpcode) {
- switch(oldOpcode) {
- ExpandCaseToAllScalarReturn(AMDGPU::BRANCH_COND, AMDGPU::IF_LOGICALZ);
- default:
- assert(0 && "internal error");
- };
- return -1;
- }
-
- static int getContinueNzeroOpcode(int oldOpcode)
- {
- switch(oldOpcode) {
- ExpandCaseToAllScalarReturn(AMDGPU::BRANCH_COND, AMDGPU::CONTINUE_LOGICALNZ);
- default:
- assert(0 && "internal error");
- };
- return -1;
- }
-
- static int getContinueZeroOpcode(int oldOpcode) {
- switch(oldOpcode) {
- ExpandCaseToAllScalarReturn(AMDGPU::BRANCH_COND, AMDGPU::CONTINUE_LOGICALZ);
- default:
- assert(0 && "internal error");
- };
- return -1;
- }
-
-// the explicitly represented branch target is the true branch target
-#define getExplicitBranch getTrueBranch
-#define setExplicitBranch setTrueBranch
-
- static MachineBasicBlock *getTrueBranch(MachineInstr *instr) {
- return instr->getOperand(0).getMBB();
- }
-
- static void setTrueBranch(MachineInstr *instr, MachineBasicBlock *blk) {
- instr->getOperand(0).setMBB(blk);
- }
-
- static MachineBasicBlock *
- getFalseBranch(MachineBasicBlock *blk, MachineInstr *instr) {
- assert(blk->succ_size() == 2);
- MachineBasicBlock *trueBranch = getTrueBranch(instr);
- MachineBasicBlock::succ_iterator iter = blk->succ_begin();
- MachineBasicBlock::succ_iterator iterNext = iter;
- ++iterNext;
-
- return (*iter == trueBranch) ? *iterNext : *iter;
- }
-
- static bool isCondBranch(MachineInstr *instr) {
- switch (instr->getOpcode()) {
- ExpandCaseToAllScalarTypes(AMDGPU::BRANCH_COND);
- break;
- default:
- return false;
- }
- return true;
- }
-
- static bool isUncondBranch(MachineInstr *instr) {
- switch (instr->getOpcode()) {
- case AMDGPU::BRANCH:
- break;
- default:
- return false;
- }
- return true;
- }
-
- static DebugLoc getLastDebugLocInBB(MachineBasicBlock *blk) {
- //get DebugLoc from the first MachineBasicBlock instruction with debug info
- DebugLoc DL;
- for (MachineBasicBlock::iterator iter = blk->begin(); iter != blk->end(); ++iter) {
- MachineInstr *instr = &(*iter);
- if (instr->getDebugLoc().isUnknown() == false) {
- DL = instr->getDebugLoc();
- }
- }
- return DL;
- }
-
- static MachineInstr *getNormalBlockBranchInstr(MachineBasicBlock *blk) {
- MachineBasicBlock::reverse_iterator iter = blk->rbegin();
- MachineInstr *instr = &*iter;
- if (instr && (isCondBranch(instr) || isUncondBranch(instr))) {
- return instr;
- }
- return NULL;
- }
-
- // The correct naming for this is getPossibleLoopendBlockBranchInstr.
- //
- // BB with backward-edge could have move instructions after the branch
- // instruction. Such move instruction "belong to" the loop backward-edge.
- //
- static MachineInstr *getLoopendBlockBranchInstr(MachineBasicBlock *blk) {
- const AMDILInstrInfo * TII = static_cast<const AMDILInstrInfo *>(
- blk->getParent()->getTarget().getInstrInfo());
-
- for (MachineBasicBlock::reverse_iterator iter = blk->rbegin(),
- iterEnd = blk->rend(); iter != iterEnd; ++iter) {
- // FIXME: Simplify
- MachineInstr *instr = &*iter;
- if (instr) {
- if (isCondBranch(instr) || isUncondBranch(instr)) {
- return instr;
- } else if (!TII->isMov(instr->getOpcode())) {
- break;
- }
- }
- }
- return NULL;
- }
-
- static MachineInstr *getReturnInstr(MachineBasicBlock *blk) {
- MachineBasicBlock::reverse_iterator iter = blk->rbegin();
- if (iter != blk->rend()) {
- MachineInstr *instr = &(*iter);
- if (instr->getOpcode() == AMDGPU::RETURN) {
- return instr;
- }
- }
- return NULL;
- }
-
- static MachineInstr *getContinueInstr(MachineBasicBlock *blk) {
- MachineBasicBlock::reverse_iterator iter = blk->rbegin();
- if (iter != blk->rend()) {
- MachineInstr *instr = &(*iter);
- if (instr->getOpcode() == AMDGPU::CONTINUE) {
- return instr;
- }
- }
- return NULL;
- }
-
- static MachineInstr *getLoopBreakInstr(MachineBasicBlock *blk) {
- for (MachineBasicBlock::iterator iter = blk->begin(); (iter != blk->end()); ++iter) {
- MachineInstr *instr = &(*iter);
- if ((instr->getOpcode() == AMDGPU::BREAK_LOGICALNZ_i32) || (instr->getOpcode() == AMDGPU::BREAK_LOGICALZ_i32)) {
- return instr;
- }
- }
- return NULL;
- }
-
- static bool isReturnBlock(MachineBasicBlock *blk) {
- MachineInstr *instr = getReturnInstr(blk);
- bool isReturn = (blk->succ_size() == 0);
- if (instr) {
- assert(isReturn);
- } else if (isReturn) {
- if (DEBUGME) {
- errs() << "BB" << blk->getNumber()
- <<" is return block without RETURN instr\n";
- }
- }
-
- return isReturn;
- }
-
- static MachineBasicBlock::iterator
- getInstrPos(MachineBasicBlock *blk, MachineInstr *instr) {
- assert(instr->getParent() == blk && "instruction doesn't belong to block");
- MachineBasicBlock::iterator iter = blk->begin();
- MachineBasicBlock::iterator iterEnd = blk->end();
- while (&(*iter) != instr && iter != iterEnd) {
- ++iter;
- }
-
- assert(iter != iterEnd);
- return iter;
- }//getInstrPos
-
- static MachineInstr *insertInstrBefore(MachineBasicBlock *blk, int newOpcode,
- AMDILCFGStructurizer *passRep) {
- return insertInstrBefore(blk,newOpcode,passRep,DebugLoc());
- } //insertInstrBefore
-
- static MachineInstr *insertInstrBefore(MachineBasicBlock *blk, int newOpcode,
- AMDILCFGStructurizer *passRep, DebugLoc DL) {
- const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
- MachineInstr *newInstr =
- blk->getParent()->CreateMachineInstr(tii->get(newOpcode), DL);
-
- MachineBasicBlock::iterator res;
- if (blk->begin() != blk->end()) {
- blk->insert(blk->begin(), newInstr);
- } else {
- blk->push_back(newInstr);
- }
-
- SHOWNEWINSTR(newInstr);
-
- return newInstr;
- } //insertInstrBefore
-
- static void insertInstrEnd(MachineBasicBlock *blk, int newOpcode,
- AMDILCFGStructurizer *passRep) {
- insertInstrEnd(blk,newOpcode,passRep,DebugLoc());
- } //insertInstrEnd
-
- static void insertInstrEnd(MachineBasicBlock *blk, int newOpcode,
- AMDILCFGStructurizer *passRep, DebugLoc DL) {
- const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
- MachineInstr *newInstr = blk->getParent()
- ->CreateMachineInstr(tii->get(newOpcode), DL);
-
- blk->push_back(newInstr);
- //assume the instruction doesn't take any reg operand ...
-
- SHOWNEWINSTR(newInstr);
- } //insertInstrEnd
-
- static MachineInstr *insertInstrBefore(MachineBasicBlock::iterator instrPos,
- int newOpcode,
- AMDILCFGStructurizer *passRep) {
- MachineInstr *oldInstr = &(*instrPos);
- const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
- MachineBasicBlock *blk = oldInstr->getParent();
- MachineInstr *newInstr =
- blk->getParent()->CreateMachineInstr(tii->get(newOpcode),
- DebugLoc());
-
- blk->insert(instrPos, newInstr);
- //assume the instruction doesn't take any reg operand ...
-
- SHOWNEWINSTR(newInstr);
- return newInstr;
- } //insertInstrBefore
-
- static void insertCondBranchBefore(MachineBasicBlock::iterator instrPos,
- int newOpcode,
- AMDILCFGStructurizer *passRep,
- DebugLoc DL) {
- MachineInstr *oldInstr = &(*instrPos);
- const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
- MachineBasicBlock *blk = oldInstr->getParent();
- MachineInstr *newInstr =
- blk->getParent()->CreateMachineInstr(tii->get(newOpcode),
- DL);
-
- blk->insert(instrPos, newInstr);
- MachineInstrBuilder(newInstr).addReg(oldInstr->getOperand(1).getReg(),
- false);
-
- SHOWNEWINSTR(newInstr);
- //erase later oldInstr->eraseFromParent();
- } //insertCondBranchBefore
-
- static void insertCondBranchBefore(MachineBasicBlock *blk,
- MachineBasicBlock::iterator insertPos,
- int newOpcode,
- AMDILCFGStructurizer *passRep,
- RegiT regNum,
- DebugLoc DL) {
- const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
-
- MachineInstr *newInstr =
- blk->getParent()->CreateMachineInstr(tii->get(newOpcode), DL);
-
- //insert before
- blk->insert(insertPos, newInstr);
- MachineInstrBuilder(newInstr).addReg(regNum, false);
-
- SHOWNEWINSTR(newInstr);
- } //insertCondBranchBefore
-
- static void insertCondBranchEnd(MachineBasicBlock *blk,
- int newOpcode,
- AMDILCFGStructurizer *passRep,
- RegiT regNum) {
- const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
- MachineInstr *newInstr =
- blk->getParent()->CreateMachineInstr(tii->get(newOpcode), DebugLoc());
-
- blk->push_back(newInstr);
- MachineInstrBuilder(newInstr).addReg(regNum, false);
-
- SHOWNEWINSTR(newInstr);
- } //insertCondBranchEnd
-
-
- static void insertAssignInstrBefore(MachineBasicBlock::iterator instrPos,
- AMDILCFGStructurizer *passRep,
- RegiT regNum, int regVal) {
- MachineInstr *oldInstr = &(*instrPos);
- const AMDILInstrInfo *tii =
- static_cast<const AMDILInstrInfo *>(passRep->getTargetInstrInfo());
- MachineBasicBlock *blk = oldInstr->getParent();
- MachineInstr *newInstr = tii->getMovImmInstr(blk->getParent(), regNum,
- regVal);
- blk->insert(instrPos, newInstr);
-
- SHOWNEWINSTR(newInstr);
- } //insertAssignInstrBefore
-
- static void insertAssignInstrBefore(MachineBasicBlock *blk,
- AMDILCFGStructurizer *passRep,
- RegiT regNum, int regVal) {
- const AMDILInstrInfo *tii =
- static_cast<const AMDILInstrInfo *>(passRep->getTargetInstrInfo());
-
- MachineInstr *newInstr = tii->getMovImmInstr(blk->getParent(), regNum,
- regVal);
- if (blk->begin() != blk->end()) {
- blk->insert(blk->begin(), newInstr);
- } else {
- blk->push_back(newInstr);
- }
-
- SHOWNEWINSTR(newInstr);
-
- } //insertInstrBefore
-
- static void insertCompareInstrBefore(MachineBasicBlock *blk,
- MachineBasicBlock::iterator instrPos,
- AMDILCFGStructurizer *passRep,
- RegiT dstReg, RegiT src1Reg,
- RegiT src2Reg) {
- const AMDILInstrInfo *tii =
- static_cast<const AMDILInstrInfo *>(passRep->getTargetInstrInfo());
- MachineInstr *newInstr =
- blk->getParent()->CreateMachineInstr(tii->get(tii->getIEQOpcode()), DebugLoc());
-
- MachineInstrBuilder(newInstr).addReg(dstReg, RegState::Define); //set target
- MachineInstrBuilder(newInstr).addReg(src1Reg); //set src value
- MachineInstrBuilder(newInstr).addReg(src2Reg); //set src value
-
- blk->insert(instrPos, newInstr);
- SHOWNEWINSTR(newInstr);
-
- } //insertCompareInstrBefore
-
- static void cloneSuccessorList(MachineBasicBlock *dstBlk,
- MachineBasicBlock *srcBlk) {
- for (MachineBasicBlock::succ_iterator iter = srcBlk->succ_begin(),
- iterEnd = srcBlk->succ_end(); iter != iterEnd; ++iter) {
- dstBlk->addSuccessor(*iter); // *iter's predecessor is also taken care of
- }
- } //cloneSuccessorList
-
- static MachineBasicBlock *clone(MachineBasicBlock *srcBlk) {
- MachineFunction *func = srcBlk->getParent();
- MachineBasicBlock *newBlk = func->CreateMachineBasicBlock();
- func->push_back(newBlk); //insert to function
- //newBlk->setNumber(srcBlk->getNumber());
- for (MachineBasicBlock::iterator iter = srcBlk->begin(),
- iterEnd = srcBlk->end();
- iter != iterEnd; ++iter) {
- MachineInstr *instr = func->CloneMachineInstr(iter);
- newBlk->push_back(instr);
- }
- return newBlk;
- }
-
- //MachineBasicBlock::ReplaceUsesOfBlockWith doesn't serve the purpose because
- //the AMDIL instruction is not recognized as terminator fix this and retire
- //this routine
- static void replaceInstrUseOfBlockWith(MachineBasicBlock *srcBlk,
- MachineBasicBlock *oldBlk,
- MachineBasicBlock *newBlk) {
- MachineInstr *branchInstr = getLoopendBlockBranchInstr(srcBlk);
- if (branchInstr && isCondBranch(branchInstr) &&
- getExplicitBranch(branchInstr) == oldBlk) {
- setExplicitBranch(branchInstr, newBlk);
- }
- }
-
- static void wrapup(MachineBasicBlock *entryBlk) {
- assert((!entryBlk->getParent()->getJumpTableInfo()
- || entryBlk->getParent()->getJumpTableInfo()->isEmpty())
- && "found a jump table");
-
- //collect continue right before endloop
- SmallVector<MachineInstr *, DEFAULT_VEC_SLOTS> contInstr;
- MachineBasicBlock::iterator pre = entryBlk->begin();
- MachineBasicBlock::iterator iterEnd = entryBlk->end();
- MachineBasicBlock::iterator iter = pre;
- while (iter != iterEnd) {
- if (pre->getOpcode() == AMDGPU::CONTINUE
- && iter->getOpcode() == AMDGPU::ENDLOOP) {
- contInstr.push_back(pre);
- }
- pre = iter;
- ++iter;
- } //end while
-
- //delete continue right before endloop
- for (unsigned i = 0; i < contInstr.size(); ++i) {
- contInstr[i]->eraseFromParent();
- }
-
- // TODO to fix up jump table so later phase won't be confused. if
- // (jumpTableInfo->isEmpty() == false) { need to clean the jump table, but
- // there isn't such an interface yet. alternatively, replace all the other
- // blocks in the jump table with the entryBlk //}
-
- } //wrapup
-
- static MachineDominatorTree *getDominatorTree(AMDILCFGStructurizer &pass) {
- return &pass.getAnalysis<MachineDominatorTree>();
- }
-
- static MachinePostDominatorTree*
- getPostDominatorTree(AMDILCFGStructurizer &pass) {
- return &pass.getAnalysis<MachinePostDominatorTree>();
- }
-
- static MachineLoopInfo *getLoopInfo(AMDILCFGStructurizer &pass) {
- return &pass.getAnalysis<MachineLoopInfo>();
- }
-}; // template class CFGStructTraits
-} //end of namespace llvm
-
-// createAMDILCFGPreparationPass- Returns a pass
-FunctionPass *llvm::createAMDILCFGPreparationPass(TargetMachine &tm
- AMDIL_OPT_LEVEL_DECL) {
- return new AMDILCFGPrepare(tm AMDIL_OPT_LEVEL_VAR);
-}
-
-bool AMDILCFGPrepare::runOnMachineFunction(MachineFunction &func) {
- return llvmCFGStruct::CFGStructurizer<AMDILCFGStructurizer>().prepare(func,
- *this,
- TRI);
-}
-
-// createAMDILCFGStructurizerPass- Returns a pass
-FunctionPass *llvm::createAMDILCFGStructurizerPass(TargetMachine &tm
- AMDIL_OPT_LEVEL_DECL) {
- return new AMDILCFGPerform(tm AMDIL_OPT_LEVEL_VAR);
-}
-
-bool AMDILCFGPerform::runOnMachineFunction(MachineFunction &func) {
- return llvmCFGStruct::CFGStructurizer<AMDILCFGStructurizer>().run(func,
- *this,
- TRI);
-}
-
-//end of file newline goes below
-
+++ /dev/null
-//===- AMDILCallingConv.td - Calling Conventions AMDIL -----*- tablegen -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//==-----------------------------------------------------------------------===//
-//
-// This describes the calling conventions for the AMDIL architectures.
-//
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// Return Value Calling Conventions
-//===----------------------------------------------------------------------===//
-
-// AMDIL 32-bit C return-value convention.
-def RetCC_AMDIL32 : CallingConv<[
- // Since IL has no return values, all values can be emulated on the stack
- // The stack can then be mapped to a number of sequential virtual registers
- // in IL
-
- // Integer and FP scalar values get put on the stack at 16-byte alignment
- // but with a size of 4 bytes
- CCIfType<[i32, f32], CCAssignToReg<
- [
- R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, R13, R14, R15, R16, R17, R18, R19, R20
-]> >, CCAssignToStack<16, 16>]>;
-
-// AMDIL 32-bit C Calling convention.
-def CC_AMDIL32 : CallingConv<[
- // Since IL has parameter values, all values can be emulated on the stack
- // The stack can then be mapped to a number of sequential virtual registers
- // in IL
- // Integer and FP scalar values get put on the stack at 16-byte alignment
- // but with a size of 4 bytes
- // Integer and FP scalar values get put on the stack at 16-byte alignment
- // but with a size of 4 bytes
- CCIfType<[i32, f32], CCAssignToReg<
-[R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, R13, R14, R15, R16, R17, R18, R19, R20
-]> >, CCAssignToStack<16, 16>]>;
+++ /dev/null
-//===-- AMDILCodeEmitter.h - AMDIL Code Emitter interface -----------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// CodeEmitter interface for R600 and SI codegen.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef AMDILCODEEMITTER_H
-#define AMDILCODEEMITTER_H
-
-namespace llvm {
-
- class AMDILCodeEmitter {
- public:
- uint64_t getBinaryCodeForInstr(const MachineInstr &MI) const;
- virtual uint64_t getMachineOpValue(const MachineInstr &MI,
- const MachineOperand &MO) const { return 0; }
- virtual unsigned GPR4AlignEncode(const MachineInstr &MI,
- unsigned OpNo) const {
- return 0;
- }
- virtual unsigned GPR2AlignEncode(const MachineInstr &MI,
- unsigned OpNo) const {
- return 0;
- }
- virtual uint64_t VOPPostEncode(const MachineInstr &MI,
- uint64_t Value) const {
- return Value;
- }
- virtual uint64_t i32LiteralEncode(const MachineInstr &MI,
- unsigned OpNo) const {
- return 0;
- }
- virtual uint32_t SMRDmemriEncode(const MachineInstr &MI, unsigned OpNo)
- const {
- return 0;
- }
- };
-
-} // End namespace llvm
-
-#endif // AMDILCODEEMITTER_H
+++ /dev/null
-//===-- AMDILDevice.cpp - Base class for AMDIL Devices --------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//==-----------------------------------------------------------------------===//
-#include "AMDILDevice.h"
-#include "AMDILSubtarget.h"
-
-using namespace llvm;
-// Default implementation for all of the classes.
-AMDILDevice::AMDILDevice(AMDILSubtarget *ST) : mSTM(ST)
-{
- mHWBits.resize(AMDILDeviceInfo::MaxNumberCapabilities);
- mSWBits.resize(AMDILDeviceInfo::MaxNumberCapabilities);
- setCaps();
- mDeviceFlag = OCL_DEVICE_ALL;
-}
-
-AMDILDevice::~AMDILDevice()
-{
- mHWBits.clear();
- mSWBits.clear();
-}
-
-size_t AMDILDevice::getMaxGDSSize() const
-{
- return 0;
-}
-
-uint32_t
-AMDILDevice::getDeviceFlag() const
-{
- return mDeviceFlag;
-}
-
-size_t AMDILDevice::getMaxNumCBs() const
-{
- if (usesHardware(AMDILDeviceInfo::ConstantMem)) {
- return HW_MAX_NUM_CB;
- }
-
- return 0;
-}
-
-size_t AMDILDevice::getMaxCBSize() const
-{
- if (usesHardware(AMDILDeviceInfo::ConstantMem)) {
- return MAX_CB_SIZE;
- }
-
- return 0;
-}
-
-size_t AMDILDevice::getMaxScratchSize() const
-{
- return 65536;
-}
-
-uint32_t AMDILDevice::getStackAlignment() const
-{
- return 16;
-}
-
-void AMDILDevice::setCaps()
-{
- mSWBits.set(AMDILDeviceInfo::HalfOps);
- mSWBits.set(AMDILDeviceInfo::ByteOps);
- mSWBits.set(AMDILDeviceInfo::ShortOps);
- mSWBits.set(AMDILDeviceInfo::HW64BitDivMod);
- if (mSTM->isOverride(AMDILDeviceInfo::NoInline)) {
- mSWBits.set(AMDILDeviceInfo::NoInline);
- }
- if (mSTM->isOverride(AMDILDeviceInfo::MacroDB)) {
- mSWBits.set(AMDILDeviceInfo::MacroDB);
- }
- if (mSTM->isOverride(AMDILDeviceInfo::Debug)) {
- mSWBits.set(AMDILDeviceInfo::ConstantMem);
- } else {
- mHWBits.set(AMDILDeviceInfo::ConstantMem);
- }
- if (mSTM->isOverride(AMDILDeviceInfo::Debug)) {
- mSWBits.set(AMDILDeviceInfo::PrivateMem);
- } else {
- mHWBits.set(AMDILDeviceInfo::PrivateMem);
- }
- if (mSTM->isOverride(AMDILDeviceInfo::BarrierDetect)) {
- mSWBits.set(AMDILDeviceInfo::BarrierDetect);
- }
- mSWBits.set(AMDILDeviceInfo::ByteLDSOps);
- mSWBits.set(AMDILDeviceInfo::LongOps);
-}
-
-AMDILDeviceInfo::ExecutionMode
-AMDILDevice::getExecutionMode(AMDILDeviceInfo::Caps Caps) const
-{
- if (mHWBits[Caps]) {
- assert(!mSWBits[Caps] && "Cannot set both SW and HW caps");
- return AMDILDeviceInfo::Hardware;
- }
-
- if (mSWBits[Caps]) {
- assert(!mHWBits[Caps] && "Cannot set both SW and HW caps");
- return AMDILDeviceInfo::Software;
- }
-
- return AMDILDeviceInfo::Unsupported;
-
-}
-
-bool AMDILDevice::isSupported(AMDILDeviceInfo::Caps Mode) const
-{
- return getExecutionMode(Mode) != AMDILDeviceInfo::Unsupported;
-}
-
-bool AMDILDevice::usesHardware(AMDILDeviceInfo::Caps Mode) const
-{
- return getExecutionMode(Mode) == AMDILDeviceInfo::Hardware;
-}
-
-bool AMDILDevice::usesSoftware(AMDILDeviceInfo::Caps Mode) const
-{
- return getExecutionMode(Mode) == AMDILDeviceInfo::Software;
-}
-
-std::string
-AMDILDevice::getDataLayout() const
-{
- return std::string("e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16"
- "-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:32:32"
- "-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64"
- "-v96:128:128-v128:128:128-v192:256:256-v256:256:256"
- "-v512:512:512-v1024:1024:1024-v2048:2048:2048"
- "-n8:16:32:64");
-}
+++ /dev/null
-//===---- AMDILDevice.h - Define Device Data for AMDIL -----*- C++ -*------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//==-----------------------------------------------------------------------===//
-//
-// Interface for the subtarget data classes.
-//
-//===----------------------------------------------------------------------===//
-// This file will define the interface that each generation needs to
-// implement in order to correctly answer queries on the capabilities of the
-// specific hardware.
-//===----------------------------------------------------------------------===//
-#ifndef _AMDILDEVICEIMPL_H_
-#define _AMDILDEVICEIMPL_H_
-#include "AMDIL.h"
-#include "llvm/ADT/BitVector.h"
-
-namespace llvm {
- class AMDILSubtarget;
- class MCStreamer;
-//===----------------------------------------------------------------------===//
-// Interface for data that is specific to a single device
-//===----------------------------------------------------------------------===//
-class AMDILDevice {
-public:
- AMDILDevice(AMDILSubtarget *ST);
- virtual ~AMDILDevice();
-
- // Enum values for the various memory types.
- enum {
- RAW_UAV_ID = 0,
- ARENA_UAV_ID = 1,
- LDS_ID = 2,
- GDS_ID = 3,
- SCRATCH_ID = 4,
- CONSTANT_ID = 5,
- GLOBAL_ID = 6,
- MAX_IDS = 7
- } IO_TYPE_IDS;
-
- // Returns the max LDS size that the hardware supports. Size is in
- // bytes.
- virtual size_t getMaxLDSSize() const = 0;
-
- // Returns the max GDS size that the hardware supports if the GDS is
- // supported by the hardware. Size is in bytes.
- virtual size_t getMaxGDSSize() const;
-
- // Returns the max number of hardware constant address spaces that
- // are supported by this device.
- virtual size_t getMaxNumCBs() const;
-
- // Returns the max number of bytes a single hardware constant buffer
- // can support. Size is in bytes.
- virtual size_t getMaxCBSize() const;
-
- // Returns the max number of bytes allowed by the hardware scratch
- // buffer. Size is in bytes.
- virtual size_t getMaxScratchSize() const;
-
- // Get the flag that corresponds to the device.
- virtual uint32_t getDeviceFlag() const;
-
- // Returns the number of work-items that exist in a single hardware
- // wavefront.
- virtual size_t getWavefrontSize() const = 0;
-
- // Get the generational name of this specific device.
- virtual uint32_t getGeneration() const = 0;
-
- // Get the stack alignment of this specific device.
- virtual uint32_t getStackAlignment() const;
-
- // Get the resource ID for this specific device.
- virtual uint32_t getResourceID(uint32_t DeviceID) const = 0;
-
- // Get the max number of UAV's for this device.
- virtual uint32_t getMaxNumUAVs() const = 0;
-
-
- // API utilizing more detailed capabilities of each family of
- // cards. If a capability is supported, then either usesHardware or
- // usesSoftware returned true. If usesHardware returned true, then
- // usesSoftware must return false for the same capability. Hardware
- // execution means that the feature is done natively by the hardware
- // and is not emulated by the softare. Software execution means
- // that the feature could be done in the hardware, but there is
- // software that emulates it with possibly using the hardware for
- // support since the hardware does not fully comply with OpenCL
- // specs.
- bool isSupported(AMDILDeviceInfo::Caps Mode) const;
- bool usesHardware(AMDILDeviceInfo::Caps Mode) const;
- bool usesSoftware(AMDILDeviceInfo::Caps Mode) const;
- virtual std::string getDataLayout() const;
- static const unsigned int MAX_LDS_SIZE_700 = 16384;
- static const unsigned int MAX_LDS_SIZE_800 = 32768;
- static const unsigned int WavefrontSize = 64;
- static const unsigned int HalfWavefrontSize = 32;
- static const unsigned int QuarterWavefrontSize = 16;
-protected:
- virtual void setCaps();
- llvm::BitVector mHWBits;
- llvm::BitVector mSWBits;
- AMDILSubtarget *mSTM;
- uint32_t mDeviceFlag;
-private:
- AMDILDeviceInfo::ExecutionMode
- getExecutionMode(AMDILDeviceInfo::Caps Caps) const;
-}; // AMDILDevice
-
-} // namespace llvm
-#endif // _AMDILDEVICEIMPL_H_
+++ /dev/null
-//===-- AMDILDeviceInfo.cpp - AMDILDeviceInfo class -----------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//==-----------------------------------------------------------------------===//
-//
-// Function that creates DeviceInfo from a device name and other information.
-//
-//==-----------------------------------------------------------------------===//
-#include "AMDILDevices.h"
-#include "AMDILSubtarget.h"
-
-using namespace llvm;
-namespace llvm {
-namespace AMDILDeviceInfo {
- AMDILDevice*
-getDeviceFromName(const std::string &deviceName, AMDILSubtarget *ptr, bool is64bit, bool is64on32bit)
-{
- if (deviceName.c_str()[2] == '7') {
- switch (deviceName.c_str()[3]) {
- case '1':
- return new AMDIL710Device(ptr);
- case '7':
- return new AMDIL770Device(ptr);
- default:
- return new AMDIL7XXDevice(ptr);
- };
- } else if (deviceName == "cypress") {
-#if DEBUG
- assert(!is64bit && "This device does not support 64bit pointers!");
- assert(!is64on32bit && "This device does not support 64bit"
- " on 32bit pointers!");
-#endif
- return new AMDILCypressDevice(ptr);
- } else if (deviceName == "juniper") {
-#if DEBUG
- assert(!is64bit && "This device does not support 64bit pointers!");
- assert(!is64on32bit && "This device does not support 64bit"
- " on 32bit pointers!");
-#endif
- return new AMDILEvergreenDevice(ptr);
- } else if (deviceName == "redwood") {
-#if DEBUG
- assert(!is64bit && "This device does not support 64bit pointers!");
- assert(!is64on32bit && "This device does not support 64bit"
- " on 32bit pointers!");
-#endif
- return new AMDILRedwoodDevice(ptr);
- } else if (deviceName == "cedar") {
-#if DEBUG
- assert(!is64bit && "This device does not support 64bit pointers!");
- assert(!is64on32bit && "This device does not support 64bit"
- " on 32bit pointers!");
-#endif
- return new AMDILCedarDevice(ptr);
- } else if (deviceName == "barts"
- || deviceName == "turks") {
-#if DEBUG
- assert(!is64bit && "This device does not support 64bit pointers!");
- assert(!is64on32bit && "This device does not support 64bit"
- " on 32bit pointers!");
-#endif
- return new AMDILNIDevice(ptr);
- } else if (deviceName == "cayman") {
-#if DEBUG
- assert(!is64bit && "This device does not support 64bit pointers!");
- assert(!is64on32bit && "This device does not support 64bit"
- " on 32bit pointers!");
-#endif
- return new AMDILCaymanDevice(ptr);
- } else if (deviceName == "caicos") {
-#if DEBUG
- assert(!is64bit && "This device does not support 64bit pointers!");
- assert(!is64on32bit && "This device does not support 64bit"
- " on 32bit pointers!");
-#endif
- return new AMDILNIDevice(ptr);
- } else if (deviceName == "SI") {
- return new AMDILSIDevice(ptr);
- } else {
-#if DEBUG
- assert(!is64bit && "This device does not support 64bit pointers!");
- assert(!is64on32bit && "This device does not support 64bit"
- " on 32bit pointers!");
-#endif
- return new AMDIL7XXDevice(ptr);
- }
-}
-} // End namespace AMDILDeviceInfo
-} // End namespace llvm
+++ /dev/null
-//===-- AMDILDeviceInfo.h - Constants for describing devices --------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//==-----------------------------------------------------------------------===//
-#ifndef _AMDILDEVICEINFO_H_
-#define _AMDILDEVICEINFO_H_
-
-
-#include <string>
-
-namespace llvm
-{
- class AMDILDevice;
- class AMDILSubtarget;
- namespace AMDILDeviceInfo
- {
- // Each Capabilities can be executed using a hardware instruction,
- // emulated with a sequence of software instructions, or not
- // supported at all.
- enum ExecutionMode {
- Unsupported = 0, // Unsupported feature on the card(Default value)
- Software, // This is the execution mode that is set if the
- // feature is emulated in software
- Hardware // This execution mode is set if the feature exists
- // natively in hardware
- };
-
- // Any changes to this needs to have a corresponding update to the
- // twiki page GPUMetadataABI
- enum Caps {
- HalfOps = 0x1, // Half float is supported or not.
- DoubleOps = 0x2, // Double is supported or not.
- ByteOps = 0x3, // Byte(char) is support or not.
- ShortOps = 0x4, // Short is supported or not.
- LongOps = 0x5, // Long is supported or not.
- Images = 0x6, // Images are supported or not.
- ByteStores = 0x7, // ByteStores available(!HD4XXX).
- ConstantMem = 0x8, // Constant/CB memory.
- LocalMem = 0x9, // Local/LDS memory.
- PrivateMem = 0xA, // Scratch/Private/Stack memory.
- RegionMem = 0xB, // OCL GDS Memory Extension.
- FMA = 0xC, // Use HW FMA or SW FMA.
- ArenaSegment = 0xD, // Use for Arena UAV per pointer 12-1023.
- MultiUAV = 0xE, // Use for UAV per Pointer 0-7.
- Reserved0 = 0xF, // ReservedFlag
- NoAlias = 0x10, // Cached loads.
- Signed24BitOps = 0x11, // Peephole Optimization.
- // Debug mode implies that no hardware features or optimizations
- // are performned and that all memory access go through a single
- // uav(Arena on HD5XXX/HD6XXX and Raw on HD4XXX).
- Debug = 0x12, // Debug mode is enabled.
- CachedMem = 0x13, // Cached mem is available or not.
- BarrierDetect = 0x14, // Detect duplicate barriers.
- Reserved1 = 0x15, // Reserved flag
- ByteLDSOps = 0x16, // Flag to specify if byte LDS ops are available.
- ArenaVectors = 0x17, // Flag to specify if vector loads from arena work.
- TmrReg = 0x18, // Flag to specify if Tmr register is supported.
- NoInline = 0x19, // Flag to specify that no inlining should occur.
- MacroDB = 0x1A, // Flag to specify that backend handles macrodb.
- HW64BitDivMod = 0x1B, // Flag for backend to generate 64bit div/mod.
- ArenaUAV = 0x1C, // Flag to specify that arena uav is supported.
- PrivateUAV = 0x1D, // Flag to specify that private memory uses uav's.
- // If more capabilities are required, then
- // this number needs to be increased.
- // All capabilities must come before this
- // number.
- MaxNumberCapabilities = 0x20
- };
- // These have to be in order with the older generations
- // having the lower number enumerations.
- enum Generation {
- HD4XXX = 0, // 7XX based devices.
- HD5XXX, // Evergreen based devices.
- HD6XXX, // NI/Evergreen+ based devices.
- HD7XXX,
- HDTEST, // Experimental feature testing device.
- HDNUMGEN
- };
-
-
- AMDILDevice*
- getDeviceFromName(const std::string &name, AMDILSubtarget *ptr, bool is64bit = false, bool is64on32bit = false);
- } // namespace AMDILDeviceInfo
-} // namespace llvm
-#endif // _AMDILDEVICEINFO_H_
+++ /dev/null
-//===-- AMDILDevices.h - Consolidate AMDIL Device headers -----------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//==-----------------------------------------------------------------------===//
-#ifndef __AMDIL_DEVICES_H_
-#define __AMDIL_DEVICES_H_
-// Include all of the device specific header files
-// This file is for Internal use only!
-#include "AMDIL7XXDevice.h"
-#include "AMDILDevice.h"
-#include "AMDILEvergreenDevice.h"
-#include "AMDILNIDevice.h"
-#include "AMDILSIDevice.h"
-
-#endif // _AMDIL_DEVICES_H_
+++ /dev/null
-//===-- AMDILEnumeratedTypes.td - IL Type definitions --*- tablegen -*-----===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//==-----------------------------------------------------------------------===//
-// ILEnumreatedTypes.td - The IL Enumerated Types
-//===--------------------------------------------------------------------===//
-
-// Section 5.1 IL Shader
-class ILShader<bits<8> val> {
- bits<8> Value = val;
-}
-// Table 5-1
-def IL_SHADER_PIXEL : ILShader<0>;
-def IL_SHADER_COMPUTE : ILShader<1>;
-
-// Section 5.2 IL RegType
-class ILRegType<bits<6> val> {
- bits<6> Value = val;
-}
-// Table 5-2
-def IL_REGTYPE_TEMP : ILRegType<0>;
-def IL_REGTYPE_WINCOORD : ILRegType<1>;
-def IL_REGTYPE_CONST_BUF : ILRegType<2>;
-def IL_REGTYPE_LITERAL : ILRegType<3>;
-def IL_REGTYPE_ITEMP : ILRegType<4>;
-def IL_REGTYPE_GLOBAL : ILRegType<5>;
-
-// Section 5.3 IL Component Select
-class ILComponentSelect<bits<3> val, string text> {
- bits<3> Value = val;
- string Text = text;
-}
-// Table 5-3
-def IL_COMPSEL_X : ILComponentSelect<0, "x">;
-def IL_COMPSEL_Y : ILComponentSelect<1, "y">;
-def IL_COMPSEL_Z : ILComponentSelect<2, "z">;
-def IL_COMPSEL_W : ILComponentSelect<3, "w">;
-def IL_COMPSEL_0 : ILComponentSelect<4, "0">;
-def IL_COMPSEL_1 : ILComponentSelect<5, "1">;
-
-// Section 5.4 IL Mod Dst Comp
-class ILModDstComp<bits<2> val, string text> {
- bits<2> Value = val;
- string Text = text;
-}
-// Table 5-4
-def IL_MODCOMP_NOWRITE : ILModDstComp<0, "_">;
-def IL_MODCOMP_WRITE_X : ILModDstComp<1, "x">;
-def IL_MODCOMP_WRITE_y : ILModDstComp<1, "y">;
-def IL_MODCOMP_WRITE_z : ILModDstComp<1, "z">;
-def IL_MODCOMP_WRITE_w : ILModDstComp<1, "w">;
-def IL_MODCOMP_0 : ILModDstComp<2, "0">;
-def IL_MODCOMP_1 : ILModDstComp<3, "1">;
-
-// Section 5.5 IL Import Usage
-class ILImportUsage<bits<1> val, string usage> {
- bits<1> Value = val;
- string Text = usage;
-}
-// Table 5-5
-def IL_IMPORTUSAGE_WINCOORD : ILImportUsage<0, "_usage(wincoord)">;
-
-// Section 5.6 Il Shift Scale
-class ILShiftScale<bits<4> val, string scale> {
- bits<4> Value = val;
- string Text = scale;
-}
-
-// Table 5-6
-def IL_SHIFT_NONE : ILShiftScale<0, "">;
-def IL_SHIFT_X2 : ILShiftScale<1, "_x2">;
-def IL_SHIFT_X4 : ILShiftScale<2, "_x4">;
-def IL_SHIFT_X8 : ILShiftScale<3, "_x8">;
-def IL_SHIFT_D2 : ILShiftScale<4, "_d2">;
-def IL_SHIFT_D4 : ILShiftScale<5, "_d4">;
-def IL_SHIFT_D8 : ILShiftScale<6, "_d8">;
-
-// Section 5.7 IL Divide Component
-class ILDivComp<bits<3> val, string divcomp> {
- bits<3> Value = val;
- string Text = divcomp;
-}
-
-// Table 5-7
-def IL_DIVCOMP_NONE : ILDivComp<0, "_divcomp(none)">;
-def IL_DIVCOMP_Y : ILDivComp<1, "_divcomp(y)">;
-def IL_DIVCOMP_Z : ILDivComp<2, "_divcomp(z)">;
-def IL_DIVCOMP_W : ILDivComp<3, "_divcomp(w)">;
-//def IL_DIVCOMP_UNKNOWN : ILDivComp<4, "_divcomp(unknown)">;
-
-// Section 5.8 IL Relational Op
-class ILRelOp<bits<3> val, string op> {
- bits<3> Value = val;
- string Text = op;
-}
-
-// Table 5-8
-def IL_RELOP_EQ : ILRelOp<0, "_relop(eq)">;
-def IL_RELOP_NE : ILRelOp<1, "_relop(ne)">;
-def IL_RELOP_GT : ILRelOp<2, "_relop(gt)">;
-def IL_RELOP_GE : ILRelOp<3, "_relop(ge)">;
-def IL_RELOP_LT : ILRelOp<4, "_relop(lt)">;
-def IL_RELOP_LE : ILRelOp<5, "_relop(le)">;
-
-// Section 5.9 IL Zero Op
-class ILZeroOp<bits<3> val, string behavior> {
- bits<3> Value = val;
- string Text = behavior;
-}
-
-// Table 5-9
-def IL_ZEROOP_FLTMAX : ILZeroOp<0, "_zeroop(fltmax)">;
-def IL_ZEROOP_0 : ILZeroOp<1, "_zeroop(zero)">;
-def IL_ZEROOP_INFINITY : ILZeroOp<2, "_zeroop(infinity)">;
-def IL_ZEROOP_INF_ELSE_MAX : ILZeroOp<3, "_zeroop(inf_else_max)">;
-
-// Section 5.10 IL Cmp Value
-class ILCmpValue<bits<3> val, string num> {
- bits<3> Value = val;
- string Text = num;
-}
-
-// Table 5-10
-def IL_CMPVAL_0_0 : ILCmpValue<0, "0.0">;
-def IL_CMPVAL_0_5 : ILCmpValue<1, "0.5">;
-def IL_CMPVAL_1_0 : ILCmpValue<2, "1.0">;
-def IL_CMPVAL_NEG_0_5 : ILCmpValue<3, "-0.5">;
-def IL_CMPVAL_NEG_1_0 : ILCmpValue<4, "-1.0">;
-
-// Section 5.11 IL Addressing
-class ILAddressing<bits<3> val> {
- bits<3> Value = val;
-}
-
-// Table 5-11
-def IL_ADDR_ABSOLUTE : ILAddressing<0>;
-def IL_ADDR_RELATIVE : ILAddressing<1>;
-def IL_ADDR_REG_RELATIVE : ILAddressing<2>;
-
-// Section 5.11 IL Element Format
-class ILElementFormat<bits<5> val> {
- bits<5> Value = val;
-}
-
-// Table 5-11
-def IL_ELEMENTFORMAT_UNKNOWN : ILElementFormat<0>;
-def IL_ELEMENTFORMAT_SNORM : ILElementFormat<1>;
-def IL_ELEMENTFORMAT_UNORM : ILElementFormat<2>;
-def IL_ELEMENTFORMAT_SINT : ILElementFormat<3>;
-def IL_ELEMENTFORMAT_UINT : ILElementFormat<4>;
-def IL_ELEMENTFORMAT_FLOAT : ILElementFormat<5>;
-def IL_ELEMENTFORMAT_SRGB : ILElementFormat<6>;
-def IL_ELEMENTFORMAT_MIXED : ILElementFormat<7>;
-def IL_ELEMENTFORMAT_Last : ILElementFormat<8>;
-
-// Section 5.12 IL Op Code
-class ILOpCode<bits<16> val = -1, string cmd> {
- bits<16> Value = val;
- string Text = cmd;
-}
-
-// Table 5-12
-def IL_DCL_CONST_BUFFER : ILOpCode<0, "dcl_cb">;
-def IL_DCL_INDEXED_TEMP_ARRAY : ILOpCode<1, "dcl_index_temp_array">;
-def IL_DCL_INPUT : ILOpCode<2, "dcl_input">;
-def IL_DCL_LITERAL : ILOpCode<3, "dcl_literal">;
-def IL_DCL_OUTPUT : ILOpCode<4, "dcl_output">;
-def IL_DCL_RESOURCE : ILOpCode<5, "dcl_resource">;
-def IL_OP_ABS : ILOpCode<6, "abs">;
-def IL_OP_ADD : ILOpCode<7, "add">;
-def IL_OP_AND : ILOpCode<8, "iand">;
-def IL_OP_BREAK : ILOpCode<9, "break">;
-def IL_OP_BREAK_LOGICALNZ : ILOpCode<10, "break_logicalnz">;
-def IL_OP_BREAK_LOGICALZ : ILOpCode<11, "break_logicalz">;
-def IL_OP_BREAKC : ILOpCode<12, "breakc">;
-def IL_OP_CALL : ILOpCode<13, "call">;
-def IL_OP_CALL_LOGICALNZ : ILOpCode<14, "call_logicalnz">;
-def IL_OP_CALL_LOGICALZ : ILOpCode<15, "call_logicalz">;
-def IL_OP_CASE : ILOpCode<16, "case">;
-def IL_OP_CLG : ILOpCode<17, "clg">;
-def IL_OP_CMOV : ILOpCode<18, "cmov">;
-def IL_OP_CMOV_LOGICAL : ILOpCode<19, "cmov_logical">;
-def IL_OP_CMP : ILOpCode<20, "cmp">;
-def IL_OP_CONTINUE : ILOpCode<21, "continue">;
-def IL_OP_CONTINUE_LOGICALNZ : ILOpCode<22, "continue_logicalnz">;
-def IL_OP_CONTINUE_LOGICALZ : ILOpCode<23, "continue_logicalz">;
-def IL_OP_CONTINUEC : ILOpCode<24, "continuec">;
-def IL_OP_COS : ILOpCode<25, "cos">;
-def IL_OP_COS_VEC : ILOpCode<26, "cos_vec">;
-def IL_OP_D_2_F : ILOpCode<27, "d2f">;
-def IL_OP_D_ADD : ILOpCode<28, "dadd">;
-def IL_OP_D_EQ : ILOpCode<29, "deq">;
-def IL_OP_D_FRC : ILOpCode<30, "dfrac">;
-def IL_OP_D_FREXP : ILOpCode<31, "dfrexp">;
-def IL_OP_D_GE : ILOpCode<32, "dge">;
-def IL_OP_D_LDEXP : ILOpCode<33, "dldexp">;
-def IL_OP_D_LT : ILOpCode<34, "dlt">;
-def IL_OP_D_MAD : ILOpCode<35, "dmad">;
-def IL_OP_D_MUL : ILOpCode<36, "dmul">;
-def IL_OP_D_NE : ILOpCode<37, "dne">;
-def IL_OP_DEFAULT : ILOpCode<38, "default">;
-def IL_OP_DISCARD_LOGICALNZ : ILOpCode<39, "discard_logicalnz">;
-def IL_OP_DISCARD_LOGICALZ : ILOpCode<40, "discard_logicalz">;
-def IL_OP_DIV : ILOpCode<41, "div_zeroop(infinity)">;
-def IL_OP_DP2 : ILOpCode<42, "dp2">;
-def IL_OP_DP3 : ILOpCode<43, "dp3">;
-def IL_OP_DP4 : ILOpCode<44, "dp4">;
-def IL_OP_ELSE : ILOpCode<45, "else">;
-def IL_OP_END : ILOpCode<46, "end">;
-def IL_OP_ENDFUNC : ILOpCode<47, "endfunc">;
-def IL_OP_ENDIF : ILOpCode<48, "endif">;
-def IL_OP_ENDLOOP : ILOpCode<49, "endloop">;
-def IL_OP_ENDMAIN : ILOpCode<50, "endmain">;
-def IL_OP_ENDSWITCH : ILOpCode<51, "endswitch">;
-def IL_OP_EQ : ILOpCode<52, "eq">;
-def IL_OP_EXP : ILOpCode<53, "exp">;
-def IL_OP_EXP_VEC : ILOpCode<54, "exp_vec">;
-def IL_OP_F_2_D : ILOpCode<55, "f2d">;
-def IL_OP_FLR : ILOpCode<56, "flr">;
-def IL_OP_FRC : ILOpCode<57, "frc">;
-def IL_OP_FTOI : ILOpCode<58, "ftoi">;
-def IL_OP_FTOU : ILOpCode<59, "ftou">;
-def IL_OP_FUNC : ILOpCode<60, "func">;
-def IL_OP_GE : ILOpCode<61, "ge">;
-def IL_OP_I_ADD : ILOpCode<62, "iadd">;
-def IL_OP_I_EQ : ILOpCode<63, "ieq">;
-def IL_OP_I_GE : ILOpCode<64, "ige">;
-def IL_OP_I_LT : ILOpCode<65, "ilt">;
-def IL_OP_I_MAD : ILOpCode<66, "imad">;
-def IL_OP_I_MAX : ILOpCode<67, "imax">;
-def IL_OP_I_MIN : ILOpCode<68, "imin">;
-def IL_OP_I_MUL : ILOpCode<69, "imul">;
-def IL_OP_I_MUL_HIGH : ILOpCode<70, "imul_high">;
-def IL_OP_I_NE : ILOpCode<71, "ine">;
-def IL_OP_I_NEGATE : ILOpCode<72, "inegate">;
-def IL_OP_I_NOT : ILOpCode<73, "inot">;
-def IL_OP_I_OR : ILOpCode<74, "ior">;
-def IL_OP_I_SHL : ILOpCode<75, "ishl">;
-def IL_OP_I_SHR : ILOpCode<76, "ishr">;
-def IL_OP_I_XOR : ILOpCode<77, "ixor">;
-def IL_OP_IF_LOGICALNZ : ILOpCode<78, "if_logicalnz">;
-def IL_OP_IF_LOGICALZ : ILOpCode<79, "if_logicalz">;
-def IL_OP_IFC : ILOpCode<80, "ifc">;
-def IL_OP_ITOF : ILOpCode<81, "itof">;
-def IL_OP_LN : ILOpCode<82, "ln">;
-def IL_OP_LOG : ILOpCode<83, "log">;
-def IL_OP_LOG_VEC : ILOpCode<84, "log_vec">;
-def IL_OP_LOOP : ILOpCode<85, "loop">;
-def IL_OP_LT : ILOpCode<86, "lt">;
-def IL_OP_MAD : ILOpCode<87, "mad_ieee">;
-def IL_OP_MAX : ILOpCode<88, "max_ieee">;
-def IL_OP_MIN : ILOpCode<89, "min_ieee">;
-def IL_OP_MOD : ILOpCode<90, "mod_ieee">;
-def IL_OP_MOV : ILOpCode<91, "mov">;
-def IL_OP_MUL_IEEE : ILOpCode<92, "mul_ieee">;
-def IL_OP_NE : ILOpCode<93, "ne">;
-def IL_OP_NRM : ILOpCode<94, "nrm_nrm4_zeroop(zero)">;
-def IL_OP_POW : ILOpCode<95, "pow">;
-def IL_OP_RCP : ILOpCode<96, "rcp">;
-def IL_OP_RET : ILOpCode<97, "ret">;
-def IL_OP_RET_DYN : ILOpCode<98, "ret_dyn">;
-def IL_OP_RET_LOGICALNZ : ILOpCode<99, "ret_logicalnz">;
-def IL_OP_RET_LOGICALZ : ILOpCode<100, "ret_logicalz">;
-def IL_OP_RND : ILOpCode<101, "rnd">;
-def IL_OP_ROUND_NEAR : ILOpCode<102, "round_nearest">;
-def IL_OP_ROUND_NEG_INF : ILOpCode<103, "round_neginf">;
-def IL_OP_ROUND_POS_INF : ILOpCode<104, "round_plusinf">;
-def IL_OP_ROUND_ZERO : ILOpCode<105, "round_z">;
-def IL_OP_RSQ : ILOpCode<106, "rsq">;
-def IL_OP_RSQ_VEC : ILOpCode<107, "rsq_vec">;
-def IL_OP_SAMPLE : ILOpCode<108, "sample">;
-def IL_OP_SAMPLE_L : ILOpCode<109, "sample_l">;
-def IL_OP_SET : ILOpCode<110, "set">;
-def IL_OP_SGN : ILOpCode<111, "sgn">;
-def IL_OP_SIN : ILOpCode<112, "sin">;
-def IL_OP_SIN_VEC : ILOpCode<113, "sin_vec">;
-def IL_OP_SUB : ILOpCode<114, "sub">;
-def IL_OP_SWITCH : ILOpCode<115, "switch">;
-def IL_OP_TRC : ILOpCode<116, "trc">;
-def IL_OP_U_DIV : ILOpCode<117, "udiv">;
-def IL_OP_U_GE : ILOpCode<118, "uge">;
-def IL_OP_U_LT : ILOpCode<119, "ult">;
-def IL_OP_U_MAD : ILOpCode<120, "umad">;
-def IL_OP_U_MAX : ILOpCode<121, "umax">;
-def IL_OP_U_MIN : ILOpCode<122, "umin">;
-def IL_OP_U_MOD : ILOpCode<123, "umod">;
-def IL_OP_U_MUL : ILOpCode<124, "umul">;
-def IL_OP_U_MUL_HIGH : ILOpCode<125, "umul_high">;
-def IL_OP_U_SHR : ILOpCode<126, "ushr">;
-def IL_OP_UTOF : ILOpCode<127, "utof">;
-def IL_OP_WHILE : ILOpCode<128, "whileloop">;
-// SC IL instructions that are not in CAL IL
-def IL_OP_ACOS : ILOpCode<129, "acos">;
-def IL_OP_ASIN : ILOpCode<130, "asin">;
-def IL_OP_EXN : ILOpCode<131, "exn">;
-def IL_OP_UBIT_REVERSE : ILOpCode<132, "ubit_reverse">;
-def IL_OP_UBIT_EXTRACT : ILOpCode<133, "ubit_extract">;
-def IL_OP_IBIT_EXTRACT : ILOpCode<134, "ibit_extract">;
-def IL_OP_SQRT : ILOpCode<135, "sqrt">;
-def IL_OP_SQRT_VEC : ILOpCode<136, "sqrt_vec">;
-def IL_OP_ATAN : ILOpCode<137, "atan">;
-def IL_OP_TAN : ILOpCode<137, "tan">;
-def IL_OP_D_DIV : ILOpCode<138, "ddiv">;
-def IL_OP_F_NEG : ILOpCode<139, "mov">;
-def IL_OP_GT : ILOpCode<140, "gt">;
-def IL_OP_LE : ILOpCode<141, "lt">;
-def IL_OP_DIST : ILOpCode<142, "dist">;
-def IL_OP_LEN : ILOpCode<143, "len">;
-def IL_OP_MACRO : ILOpCode<144, "mcall">;
-def IL_OP_INTR : ILOpCode<145, "call">;
-def IL_OP_I_FFB_HI : ILOpCode<146, "ffb_hi">;
-def IL_OP_I_FFB_LO : ILOpCode<147, "ffb_lo">;
-def IL_OP_BARRIER : ILOpCode<148, "fence_threads_memory_lds">;
-def IL_OP_BARRIER_LOCAL : ILOpCode<149, "fence_threads_lds">;
-def IL_OP_BARRIER_GLOBAL : ILOpCode<150, "fence_threads_memory">;
-def IL_OP_FENCE : ILOpCode<151, "fence_lds_memory">;
-def IL_OP_FENCE_READ_ONLY : ILOpCode<152, "fence_lds_mem_read_only">;
-def IL_OP_FENCE_WRITE_ONLY : ILOpCode<153, "fence_lds_mem_write_only">;
-def IL_PSEUDO_INST : ILOpCode<154, ";Pseudo Op">;
-def IL_OP_UNPACK_0 : ILOpCode<155, "unpack0">;
-def IL_OP_UNPACK_1 : ILOpCode<156, "unpack1">;
-def IL_OP_UNPACK_2 : ILOpCode<157, "unpack2">;
-def IL_OP_UNPACK_3 : ILOpCode<158, "unpack3">;
-def IL_OP_PI_REDUCE : ILOpCode<159, "pireduce">;
-def IL_OP_IBIT_COUNT : ILOpCode<160, "icbits">;
-def IL_OP_I_FFB_SGN : ILOpCode<161, "ffb_shi">;
-def IL_OP_F2U4 : ILOpCode<162, "f_2_u4">;
-def IL_OP_BIT_ALIGN : ILOpCode<163, "bitalign">;
-def IL_OP_BYTE_ALIGN : ILOpCode<164, "bytealign">;
-def IL_OP_U4_LERP : ILOpCode<165, "u4lerp">;
-def IL_OP_SAD : ILOpCode<166, "sad">;
-def IL_OP_SAD_HI : ILOpCode<167, "sadhi">;
-def IL_OP_SAD4 : ILOpCode<168, "sad4">;
-def IL_OP_UBIT_INSERT : ILOpCode<169, "ubit_insert">;
-def IL_OP_I_CARRY : ILOpCode<170, "icarry">;
-def IL_OP_I_BORROW : ILOpCode<171, "iborrow">;
-def IL_OP_U_MAD24 : ILOpCode<172, "umad24">;
-def IL_OP_U_MUL24 : ILOpCode<173, "umul24">;
-def IL_OP_I_MAD24 : ILOpCode<174, "imad24">;
-def IL_OP_I_MUL24 : ILOpCode<175, "imul24">;
-def IL_OP_CLAMP : ILOpCode<176, "clamp">;
-def IL_OP_LERP : ILOpCode<177, "lrp">;
-def IL_OP_FMA : ILOpCode<178, "fma">;
-def IL_OP_D_MIN : ILOpCode<179, "dmin">;
-def IL_OP_D_MAX : ILOpCode<180, "dmax">;
-def IL_OP_D_SQRT : ILOpCode<181, "dsqrt">;
-def IL_OP_DP2_ADD : ILOpCode<182, "dp2add">;
-def IL_OP_F16_TO_F32 : ILOpCode<183, "f162f">;
-def IL_OP_F32_TO_F16 : ILOpCode<184, "f2f16">;
-def IL_REG_LOCAL_ID_FLAT : ILOpCode<185, "vTidInGrpFlat">;
-def IL_REG_LOCAL_ID : ILOpCode<186, "vTidInGrp">;
-def IL_REG_GLOBAL_ID_FLAT : ILOpCode<187, "vAbsTidFlag">;
-def IL_REG_GLOBAL_ID : ILOpCode<188, "vAbsTid">;
-def IL_REG_GROUP_ID_FLAT : ILOpCode<189, "vThreadGrpIDFlat">;
-def IL_REG_GROUP_ID : ILOpCode<190, "vThreadGrpID">;
-def IL_OP_D_RCP : ILOpCode<191, "drcp_zeroop(infinity)">;
-def IL_OP_D_RSQ : ILOpCode<192, "drsq_zeroop(infinity)">;
-def IL_OP_D_MOV : ILOpCode<193, "dmov">;
-def IL_OP_D_MOVC : ILOpCode<194, "dmovc">;
-def IL_OP_NOP : ILOpCode<195, "nop">;
-def IL_OP_UAV_ADD : ILOpCode<196, "uav_add">;
-def IL_OP_UAV_AND : ILOpCode<197, "uav_and">;
-def IL_OP_UAV_MAX : ILOpCode<198, "uav_max">;
-def IL_OP_UAV_MIN : ILOpCode<199, "uav_min">;
-def IL_OP_UAV_OR : ILOpCode<200, "uav_or">;
-def IL_OP_UAV_RSUB : ILOpCode<201, "uav_rsub">;
-def IL_OP_UAV_SUB : ILOpCode<202, "uav_sub">;
-def IL_OP_UAV_UMAX : ILOpCode<203, "uav_umax">;
-def IL_OP_UAV_UMIN : ILOpCode<204, "uav_umin">;
-def IL_OP_UAV_XOR : ILOpCode<205, "uav_xor">;
-def IL_OP_UAV_INC : ILOpCode<206, "uav_uinc">;
-def IL_OP_UAV_DEC : ILOpCode<207, "uav_udec">;
-def IL_OP_UAV_CMP : ILOpCode<208, "uav_cmp">;
-def IL_OP_UAV_READ_ADD : ILOpCode<209, "uav_read_add">;
-def IL_OP_UAV_READ_AND : ILOpCode<210, "uav_read_and">;
-def IL_OP_UAV_READ_MAX : ILOpCode<211, "uav_read_max">;
-def IL_OP_UAV_READ_MIN : ILOpCode<212, "uav_read_min">;
-def IL_OP_UAV_READ_OR : ILOpCode<213, "uav_read_or">;
-def IL_OP_UAV_READ_RSUB : ILOpCode<214, "uav_read_rsub">;
-def IL_OP_UAV_READ_SUB : ILOpCode<215, "uav_read_sub">;
-def IL_OP_UAV_READ_UMAX : ILOpCode<216, "uav_read_umax">;
-def IL_OP_UAV_READ_UMIN : ILOpCode<217, "uav_read_umin">;
-def IL_OP_UAV_READ_XOR : ILOpCode<218, "uav_read_xor">;
-def IL_OP_UAV_READ_INC : ILOpCode<219, "uav_read_uinc">;
-def IL_OP_UAV_READ_DEC : ILOpCode<220, "uav_read_udec">;
-def IL_OP_UAV_READ_XCHG : ILOpCode<221, "uav_read_xchg">;
-def IL_OP_UAV_READ_CMPXCHG : ILOpCode<222, "uav_read_cmp_xchg">;
-def IL_OP_LDS_ADD : ILOpCode<223, "lds_add">;
-def IL_OP_LDS_AND : ILOpCode<224, "lds_and">;
-def IL_OP_LDS_MAX : ILOpCode<225, "lds_max">;
-def IL_OP_LDS_MIN : ILOpCode<226, "lds_min">;
-def IL_OP_LDS_OR : ILOpCode<227, "lds_or">;
-def IL_OP_LDS_RSUB : ILOpCode<228, "lds_rsub">;
-def IL_OP_LDS_SUB : ILOpCode<229, "lds_sub">;
-def IL_OP_LDS_UMAX : ILOpCode<230, "lds_umax">;
-def IL_OP_LDS_UMIN : ILOpCode<231, "lds_umin">;
-def IL_OP_LDS_XOR : ILOpCode<232, "lds_xor">;
-def IL_OP_LDS_INC : ILOpCode<233, "lds_inc">;
-def IL_OP_LDS_DEC : ILOpCode<234, "lds_dec">;
-def IL_OP_LDS_CMP : ILOpCode<235, "lds_cmp">;
-def IL_OP_LDS_READ_ADD : ILOpCode<236, "lds_read_add">;
-def IL_OP_LDS_READ_AND : ILOpCode<237, "lds_read_and">;
-def IL_OP_LDS_READ_MAX : ILOpCode<238, "lds_read_max">;
-def IL_OP_LDS_READ_MIN : ILOpCode<239, "lds_read_min">;
-def IL_OP_LDS_READ_OR : ILOpCode<240, "lds_read_or">;
-def IL_OP_LDS_READ_RSUB : ILOpCode<241, "lds_read_rsub">;
-def IL_OP_LDS_READ_SUB : ILOpCode<242, "lds_read_sub">;
-def IL_OP_LDS_READ_UMAX : ILOpCode<243, "lds_read_umax">;
-def IL_OP_LDS_READ_UMIN : ILOpCode<244, "lds_read_umin">;
-def IL_OP_LDS_READ_XOR : ILOpCode<245, "lds_read_xor">;
-def IL_OP_LDS_READ_INC : ILOpCode<246, "lds_read_inc">;
-def IL_OP_LDS_READ_DEC : ILOpCode<247, "lds_read_dec">;
-def IL_OP_LDS_READ_XCHG : ILOpCode<248, "lds_read_xchg">;
-def IL_OP_LDS_READ_CMPXCHG : ILOpCode<249, "lds_read_cmp_xchg">;
-def IL_OP_GDS_ADD : ILOpCode<250, "gds_add">;
-def IL_OP_GDS_AND : ILOpCode<251, "gds_and">;
-def IL_OP_GDS_MAX : ILOpCode<252, "gds_max">;
-def IL_OP_GDS_MIN : ILOpCode<253, "gds_min">;
-def IL_OP_GDS_OR : ILOpCode<254, "gds_or">;
-def IL_OP_GDS_RSUB : ILOpCode<255, "gds_rsub">;
-def IL_OP_GDS_SUB : ILOpCode<256, "gds_sub">;
-def IL_OP_GDS_UMAX : ILOpCode<257, "gds_umax">;
-def IL_OP_GDS_UMIN : ILOpCode<258, "gds_umin">;
-def IL_OP_GDS_MSKOR : ILOpCode<259, "gds_mskor">;
-def IL_OP_GDS_XOR : ILOpCode<260, "gds_xor">;
-def IL_OP_GDS_INC : ILOpCode<261, "gds_inc">;
-def IL_OP_GDS_DEC : ILOpCode<262, "gds_dec">;
-def IL_OP_GDS_CMP : ILOpCode<263, "gds_cmp">;
-def IL_OP_GDS_READ_ADD : ILOpCode<264, "gds_read_add">;
-def IL_OP_GDS_READ_AND : ILOpCode<265, "gds_read_and">;
-def IL_OP_GDS_READ_MAX : ILOpCode<266, "gds_read_max">;
-def IL_OP_GDS_READ_MIN : ILOpCode<267, "gds_read_min">;
-def IL_OP_GDS_READ_OR : ILOpCode<268, "gds_read_or">;
-def IL_OP_GDS_READ_RSUB : ILOpCode<269, "gds_read_rsub">;
-def IL_OP_GDS_READ_SUB : ILOpCode<270, "gds_read_sub">;
-def IL_OP_GDS_READ_UMAX : ILOpCode<271, "gds_read_umax">;
-def IL_OP_GDS_READ_UMIN : ILOpCode<272, "gds_read_umin">;
-def IL_OP_GDS_READ_MSKOR : ILOpCode<273, "gds_read_mskor">;
-def IL_OP_GDS_READ_XOR : ILOpCode<274, "gds_read_xor">;
-def IL_OP_GDS_READ_INC : ILOpCode<275, "gds_read_inc">;
-def IL_OP_GDS_READ_DEC : ILOpCode<276, "gds_read_dec">;
-def IL_OP_GDS_READ_XCHG : ILOpCode<277, "gds_read_xchg">;
-def IL_OP_GDS_READ_CMPXCHG : ILOpCode<278, "gds_read_cmp_xchg">;
-def IL_OP_APPEND_BUF_ALLOC : ILOpCode<279, "append_buf_alloc">;
-def IL_OP_APPEND_BUF_CONSUME : ILOpCode<280, "append_buf_consume">;
-def IL_OP_I64_ADD : ILOpCode<281, "i64add">;
-def IL_OP_I64_MAX : ILOpCode<282, "i64max">;
-def IL_OP_U64_MAX : ILOpCode<283, "u64max">;
-def IL_OP_I64_MIN : ILOpCode<284, "i64min">;
-def IL_OP_U64_MIN : ILOpCode<285, "u64min">;
-def IL_OP_I64_NEGATE : ILOpCode<286, "i64negate">;
-def IL_OP_I64_SHL : ILOpCode<287, "i64shl">;
-def IL_OP_I64_SHR : ILOpCode<288, "i64shr">;
-def IL_OP_U64_SHR : ILOpCode<289, "u64shr">;
-def IL_OP_I64_EQ : ILOpCode<290, "i64eq">;
-def IL_OP_I64_GE : ILOpCode<291, "i64ge">;
-def IL_OP_U64_GE : ILOpCode<292, "u64ge">;
-def IL_OP_I64_LT : ILOpCode<293, "i64lt">;
-def IL_OP_U64_LT : ILOpCode<294, "u64lt">;
-def IL_OP_I64_NE : ILOpCode<295, "i64ne">;
-def IL_OP_U_MULHI24 : ILOpCode<296, "umul24_high">;
-def IL_OP_I_MULHI24 : ILOpCode<297, "imul24_high">;
-def IL_OP_GDS_LOAD : ILOpCode<298, "gds_load">;
-def IL_OP_GDS_STORE : ILOpCode<299, "gds_store">;
-def IL_OP_LDS_LOAD : ILOpCode<300, "lds_load">;
-def IL_OP_LDS_LOAD_VEC : ILOpCode<301, "lds_load_vec">;
-def IL_OP_LDS_LOAD_BYTE : ILOpCode<302, "lds_load_byte">;
-def IL_OP_LDS_LOAD_UBYTE : ILOpCode<303, "lds_load_ubyte">;
-def IL_OP_LDS_LOAD_SHORT : ILOpCode<304, "lds_load_short">;
-def IL_OP_LDS_LOAD_USHORT : ILOpCode<305, "lds_load_ushort">;
-def IL_OP_LDS_STORE : ILOpCode<306, "lds_store">;
-def IL_OP_LDS_STORE_VEC : ILOpCode<307, "lds_store_vec">;
-def IL_OP_LDS_STORE_BYTE : ILOpCode<308, "lds_store_byte">;
-def IL_OP_LDS_STORE_SHORT : ILOpCode<309, "lds_store_short">;
-def IL_OP_RAW_UAV_LOAD : ILOpCode<310, "uav_raw_load">;
-def IL_OP_RAW_UAV_STORE : ILOpCode<311, "uav_raw_store">;
-def IL_OP_ARENA_UAV_LOAD : ILOpCode<312, "uav_arena_load">;
-def IL_OP_ARENA_UAV_STORE : ILOpCode<313, "uav_arena_store">;
-def IL_OP_LDS_MSKOR : ILOpCode<314, "lds_mskor">;
-def IL_OP_LDS_READ_MSKOR : ILOpCode<315, "lds_read_mskor">;
-def IL_OP_UAV_BYTE_LOAD : ILOpCode<316, "uav_byte_load">;
-def IL_OP_UAV_UBYTE_LOAD : ILOpCode<317, "uav_ubyte_load">;
-def IL_OP_UAV_SHORT_LOAD : ILOpCode<318, "uav_short_load">;
-def IL_OP_UAV_USHORT_LOAD : ILOpCode<319, "uav_ushort_load">;
-def IL_OP_UAV_BYTE_STORE : ILOpCode<320, "uav_byte_store">;
-def IL_OP_UAV_SHORT_STORE : ILOpCode<320, "uav_short_store">;
-def IL_OP_UAV_STORE : ILOpCode<321, "uav_store">;
-def IL_OP_UAV_LOAD : ILOpCode<322, "uav_load">;
-def IL_OP_MUL : ILOpCode<323, "mul">;
-def IL_OP_DIV_INF : ILOpCode<324, "div_zeroop(infinity)">;
-def IL_OP_DIV_FLTMAX : ILOpCode<325, "div_zeroop(fltmax)">;
-def IL_OP_DIV_ZERO : ILOpCode<326, "div_zeroop(zero)">;
-def IL_OP_DIV_INFELSEMAX : ILOpCode<327, "div_zeroop(inf_else_max)">;
-def IL_OP_FTOI_FLR : ILOpCode<328, "ftoi_flr">;
-def IL_OP_FTOI_RPI : ILOpCode<329, "ftoi_rpi">;
-def IL_OP_F32_TO_F16_NEAR : ILOpCode<330, "f2f16_near">;
-def IL_OP_F32_TO_F16_NEG_INF : ILOpCode<331, "f2f16_neg_inf">;
-def IL_OP_F32_TO_F16_PLUS_INF : ILOpCode<332, "f2f16_plus_inf">;
-def IL_OP_I64_MUL : ILOpCode<333, "i64mul">;
-def IL_OP_U64_MUL : ILOpCode<334, "u64mul">;
-def IL_OP_CU_ID : ILOpCode<355, "cu_id">;
-def IL_OP_WAVE_ID : ILOpCode<356, "wave_id">;
-def IL_OP_I64_SUB : ILOpCode<357, "i64sub">;
-def IL_OP_I64_DIV : ILOpCode<358, "i64div">;
-def IL_OP_U64_DIV : ILOpCode<359, "u64div">;
-def IL_OP_I64_MOD : ILOpCode<360, "i64mod">;
-def IL_OP_U64_MOD : ILOpCode<361, "u64mod">;
-def IL_DCL_GWS_THREAD_COUNT : ILOpCode<362, "dcl_gws_thread_count">;
-def IL_DCL_SEMAPHORE : ILOpCode<363, "dcl_semaphore">;
-def IL_OP_SEMAPHORE_INIT : ILOpCode<364, "init_semaphore">;
-def IL_OP_SEMAPHORE_WAIT : ILOpCode<365, "semaphore_wait">;
-def IL_OP_SEMAPHORE_SIGNAL : ILOpCode<366, "semaphore_signal">;
-def IL_OP_BARRIER_REGION : ILOpCode<377, "fence_threads_gds">;
-def IL_OP_BFI : ILOpCode<394, "bfi">;
-def IL_OP_BFM : ILOpCode<395, "bfm">;
-def IL_DBG_STRING : ILOpCode<396, "dbg_string">;
-def IL_DBG_LINE : ILOpCode<397, "dbg_line">;
-def IL_DBG_TEMPLOC : ILOpCode<398, "dbg_temploc">;
+++ /dev/null
-//===-- AMDILEvergreenDevice.cpp - Device Info for Evergreen --------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//==-----------------------------------------------------------------------===//
-#include "AMDILEvergreenDevice.h"
-
-using namespace llvm;
-
-AMDILEvergreenDevice::AMDILEvergreenDevice(AMDILSubtarget *ST)
-: AMDILDevice(ST) {
- setCaps();
- std::string name = ST->getDeviceName();
- if (name == "cedar") {
- mDeviceFlag = OCL_DEVICE_CEDAR;
- } else if (name == "redwood") {
- mDeviceFlag = OCL_DEVICE_REDWOOD;
- } else if (name == "cypress") {
- mDeviceFlag = OCL_DEVICE_CYPRESS;
- } else {
- mDeviceFlag = OCL_DEVICE_JUNIPER;
- }
-}
-
-AMDILEvergreenDevice::~AMDILEvergreenDevice() {
-}
-
-size_t AMDILEvergreenDevice::getMaxLDSSize() const {
- if (usesHardware(AMDILDeviceInfo::LocalMem)) {
- return MAX_LDS_SIZE_800;
- } else {
- return 0;
- }
-}
-size_t AMDILEvergreenDevice::getMaxGDSSize() const {
- if (usesHardware(AMDILDeviceInfo::RegionMem)) {
- return MAX_LDS_SIZE_800;
- } else {
- return 0;
- }
-}
-uint32_t AMDILEvergreenDevice::getMaxNumUAVs() const {
- return 12;
-}
-
-uint32_t AMDILEvergreenDevice::getResourceID(uint32_t id) const {
- switch(id) {
- default:
- assert(0 && "ID type passed in is unknown!");
- break;
- case CONSTANT_ID:
- case RAW_UAV_ID:
- if (mSTM->calVersion() >= CAL_VERSION_GLOBAL_RETURN_BUFFER) {
- return GLOBAL_RETURN_RAW_UAV_ID;
- } else {
- return DEFAULT_RAW_UAV_ID;
- }
- case GLOBAL_ID:
- case ARENA_UAV_ID:
- return DEFAULT_ARENA_UAV_ID;
- case LDS_ID:
- if (usesHardware(AMDILDeviceInfo::LocalMem)) {
- return DEFAULT_LDS_ID;
- } else {
- return DEFAULT_ARENA_UAV_ID;
- }
- case GDS_ID:
- if (usesHardware(AMDILDeviceInfo::RegionMem)) {
- return DEFAULT_GDS_ID;
- } else {
- return DEFAULT_ARENA_UAV_ID;
- }
- case SCRATCH_ID:
- if (usesHardware(AMDILDeviceInfo::PrivateMem)) {
- return DEFAULT_SCRATCH_ID;
- } else {
- return DEFAULT_ARENA_UAV_ID;
- }
- };
- return 0;
-}
-
-size_t AMDILEvergreenDevice::getWavefrontSize() const {
- return AMDILDevice::WavefrontSize;
-}
-
-uint32_t AMDILEvergreenDevice::getGeneration() const {
- return AMDILDeviceInfo::HD5XXX;
-}
-
-void AMDILEvergreenDevice::setCaps() {
- mSWBits.set(AMDILDeviceInfo::ArenaSegment);
- mHWBits.set(AMDILDeviceInfo::ArenaUAV);
- if (mSTM->calVersion() >= CAL_VERSION_SC_140) {
- mHWBits.set(AMDILDeviceInfo::HW64BitDivMod);
- mSWBits.reset(AMDILDeviceInfo::HW64BitDivMod);
- }
- mSWBits.set(AMDILDeviceInfo::Signed24BitOps);
- if (mSTM->isOverride(AMDILDeviceInfo::ByteStores)) {
- mHWBits.set(AMDILDeviceInfo::ByteStores);
- }
- if (mSTM->isOverride(AMDILDeviceInfo::Debug)) {
- mSWBits.set(AMDILDeviceInfo::LocalMem);
- mSWBits.set(AMDILDeviceInfo::RegionMem);
- } else {
- mHWBits.set(AMDILDeviceInfo::LocalMem);
- mHWBits.set(AMDILDeviceInfo::RegionMem);
- }
- mHWBits.set(AMDILDeviceInfo::Images);
- if (mSTM->isOverride(AMDILDeviceInfo::NoAlias)) {
- mHWBits.set(AMDILDeviceInfo::NoAlias);
- }
- if (mSTM->calVersion() > CAL_VERSION_GLOBAL_RETURN_BUFFER) {
- mHWBits.set(AMDILDeviceInfo::CachedMem);
- }
- if (mSTM->isOverride(AMDILDeviceInfo::MultiUAV)) {
- mHWBits.set(AMDILDeviceInfo::MultiUAV);
- }
- if (mSTM->calVersion() > CAL_VERSION_SC_136) {
- mHWBits.set(AMDILDeviceInfo::ByteLDSOps);
- mSWBits.reset(AMDILDeviceInfo::ByteLDSOps);
- mHWBits.set(AMDILDeviceInfo::ArenaVectors);
- } else {
- mSWBits.set(AMDILDeviceInfo::ArenaVectors);
- }
- if (mSTM->calVersion() > CAL_VERSION_SC_137) {
- mHWBits.set(AMDILDeviceInfo::LongOps);
- mSWBits.reset(AMDILDeviceInfo::LongOps);
- }
- mHWBits.set(AMDILDeviceInfo::TmrReg);
-}
-
-AMDILCypressDevice::AMDILCypressDevice(AMDILSubtarget *ST)
- : AMDILEvergreenDevice(ST) {
- setCaps();
-}
-
-AMDILCypressDevice::~AMDILCypressDevice() {
-}
-
-void AMDILCypressDevice::setCaps() {
- if (mSTM->isOverride(AMDILDeviceInfo::DoubleOps)) {
- mHWBits.set(AMDILDeviceInfo::DoubleOps);
- mHWBits.set(AMDILDeviceInfo::FMA);
- }
-}
-
-
-AMDILCedarDevice::AMDILCedarDevice(AMDILSubtarget *ST)
- : AMDILEvergreenDevice(ST) {
- setCaps();
-}
-
-AMDILCedarDevice::~AMDILCedarDevice() {
-}
-
-void AMDILCedarDevice::setCaps() {
- mSWBits.set(AMDILDeviceInfo::FMA);
-}
-
-size_t AMDILCedarDevice::getWavefrontSize() const {
- return AMDILDevice::QuarterWavefrontSize;
-}
-
-AMDILRedwoodDevice::AMDILRedwoodDevice(AMDILSubtarget *ST)
- : AMDILEvergreenDevice(ST) {
- setCaps();
-}
-
-AMDILRedwoodDevice::~AMDILRedwoodDevice()
-{
-}
-
-void AMDILRedwoodDevice::setCaps() {
- mSWBits.set(AMDILDeviceInfo::FMA);
-}
-
-size_t AMDILRedwoodDevice::getWavefrontSize() const {
- return AMDILDevice::HalfWavefrontSize;
-}
+++ /dev/null
-//==- AMDILEvergreenDevice.h - Define Evergreen Device for AMDIL -*- C++ -*--=//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//==-----------------------------------------------------------------------===//
-//
-// Interface for the subtarget data classes.
-//
-//===----------------------------------------------------------------------===//
-// This file will define the interface that each generation needs to
-// implement in order to correctly answer queries on the capabilities of the
-// specific hardware.
-//===----------------------------------------------------------------------===//
-#ifndef _AMDILEVERGREENDEVICE_H_
-#define _AMDILEVERGREENDEVICE_H_
-#include "AMDILDevice.h"
-#include "AMDILSubtarget.h"
-
-namespace llvm {
- class AMDILSubtarget;
-//===----------------------------------------------------------------------===//
-// Evergreen generation of devices and their respective sub classes
-//===----------------------------------------------------------------------===//
-
-
-// The AMDILEvergreenDevice is the base device class for all of the Evergreen
-// series of cards. This class contains information required to differentiate
-// the Evergreen device from the generic AMDILDevice. This device represents
-// that capabilities of the 'Juniper' cards, also known as the HD57XX.
-class AMDILEvergreenDevice : public AMDILDevice {
-public:
- AMDILEvergreenDevice(AMDILSubtarget *ST);
- virtual ~AMDILEvergreenDevice();
- virtual size_t getMaxLDSSize() const;
- virtual size_t getMaxGDSSize() const;
- virtual size_t getWavefrontSize() const;
- virtual uint32_t getGeneration() const;
- virtual uint32_t getMaxNumUAVs() const;
- virtual uint32_t getResourceID(uint32_t) const;
-protected:
- virtual void setCaps();
-}; // AMDILEvergreenDevice
-
-// The AMDILCypressDevice is similiar to the AMDILEvergreenDevice, except it has
-// support for double precision operations. This device is used to represent
-// both the Cypress and Hemlock cards, which are commercially known as HD58XX
-// and HD59XX cards.
-class AMDILCypressDevice : public AMDILEvergreenDevice {
-public:
- AMDILCypressDevice(AMDILSubtarget *ST);
- virtual ~AMDILCypressDevice();
-private:
- virtual void setCaps();
-}; // AMDILCypressDevice
-
-
-// The AMDILCedarDevice is the class that represents all of the 'Cedar' based
-// devices. This class differs from the base AMDILEvergreenDevice in that the
-// device is a ~quarter of the 'Juniper'. These are commercially known as the
-// HD54XX and HD53XX series of cards.
-class AMDILCedarDevice : public AMDILEvergreenDevice {
-public:
- AMDILCedarDevice(AMDILSubtarget *ST);
- virtual ~AMDILCedarDevice();
- virtual size_t getWavefrontSize() const;
-private:
- virtual void setCaps();
-}; // AMDILCedarDevice
-
-// The AMDILRedwoodDevice is the class the represents all of the 'Redwood' based
-// devices. This class differs from the base class, in that these devices are
-// considered about half of a 'Juniper' device. These are commercially known as
-// the HD55XX and HD56XX series of cards.
-class AMDILRedwoodDevice : public AMDILEvergreenDevice {
-public:
- AMDILRedwoodDevice(AMDILSubtarget *ST);
- virtual ~AMDILRedwoodDevice();
- virtual size_t getWavefrontSize() const;
-private:
- virtual void setCaps();
-}; // AMDILRedwoodDevice
-
-} // namespace llvm
-#endif // _AMDILEVERGREENDEVICE_H_
+++ /dev/null
-//==- AMDILFormats.td - AMDIL Instruction Formats ----*- tablegen -*-==//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//==-----------------------------------------------------------------------===//
-//
-//===--------------------------------------------------------------------===//
-include "AMDILTokenDesc.td"
-
-//===--------------------------------------------------------------------===//
-// The parent IL instruction class that inherits the Instruction class. This
-// class sets the corresponding namespace, the out and input dag lists the
-// pattern to match to and the string to print out for the assembly printer.
-//===--------------------------------------------------------------------===//
-class ILFormat<ILOpCode op, dag outs, dag ins, string asmstr, list<dag> pattern>
-: Instruction {
-
- let Namespace = "AMDGPU";
- dag OutOperandList = outs;
- dag InOperandList = ins;
- ILOpCode operation = op;
- let Pattern = pattern;
- let AsmString = !strconcat(asmstr, "\n");
- let isPseudo = 1;
- let Itinerary = NullALU;
- bit hasIEEEFlag = 0;
- bit hasZeroOpFlag = 0;
-}
-
-//===--------------------------------------------------------------------===//
-// Class that has one input parameters and one output parameter.
-// The basic pattern for this class is "Opcode Dst, Src0" and
-// handles the unary math operators.
-// It sets the binary token ILSrc, ILSrcMod, ILRelAddr and ILSrc and ILSrcMod
-// if the addressing is register relative for input and output register 0.
-//===--------------------------------------------------------------------===//
-class OneInOneOut<ILOpCode op, dag outs, dag ins,
- string asmstr, list<dag> pattern>
- : ILFormat<op, outs, ins, asmstr, pattern>
-{
- ILDst dst_reg;
- ILDstMod dst_mod;
- ILRelAddr dst_rel;
- ILSrc dst_reg_rel;
- ILSrcMod dst_reg_rel_mod;
- ILSrc src0_reg;
- ILSrcMod src0_mod;
- ILRelAddr src0_rel;
- ILSrc src0_reg_rel;
- ILSrcMod src0_reg_rel_mod;
-}
-
-//===--------------------------------------------------------------------===//
-// This class is similiar to the UnaryOp class, however, there is no
-// result value to assign.
-//===--------------------------------------------------------------------===//
-class UnaryOpNoRet<ILOpCode op, dag outs, dag ins,
- string asmstr, list<dag> pattern>
- : ILFormat<op, outs, ins, asmstr, pattern>
-{
- ILSrc src0_reg;
- ILSrcMod src0_mod;
- ILRelAddr src0_rel;
- ILSrc src0_reg_rel;
- ILSrcMod src0_reg_rel_mod;
-}
-
-//===--------------------------------------------------------------------===//
-// Set of classes that have two input parameters and one output parameter.
-// The basic pattern for this class is "Opcode Dst, Src0, Src1" and
-// handles the binary math operators and comparison operations.
-// It sets the binary token ILSrc, ILSrcMod, ILRelAddr and ILSrc and ILSrcMod
-// if the addressing is register relative for input register 1.
-//===--------------------------------------------------------------------===//
-class TwoInOneOut<ILOpCode op, dag outs, dag ins,
- string asmstr, list<dag> pattern>
- : OneInOneOut<op, outs, ins, asmstr, pattern>
-{
- ILSrc src1_reg;
- ILSrcMod src1_mod;
- ILRelAddr src1_rel;
- ILSrc src1_reg_rel;
- ILSrcMod src1_reg_rel_mod;
-}
-
-//===--------------------------------------------------------------------===//
-// Similiar to the UnaryOpNoRet class, but takes as arguments two input
-// operands. Used mainly for barrier instructions on PC platform.
-//===--------------------------------------------------------------------===//
-class BinaryOpNoRet<ILOpCode op, dag outs, dag ins,
- string asmstr, list<dag> pattern>
- : UnaryOpNoRet<op, outs, ins, asmstr, pattern>
-{
- ILSrc src1_reg;
- ILSrcMod src1_mod;
- ILRelAddr src1_rel;
- ILSrc src1_reg_rel;
- ILSrcMod src1_reg_rel_mod;
-}
-
-//===--------------------------------------------------------------------===//
-// Set of classes that have three input parameters and one output parameter.
-// The basic pattern for this class is "Opcode Dst, Src0, Src1, Src2" and
-// handles the mad and conditional mov instruction.
-// It sets the binary token ILSrc, ILSrcMod, ILRelAddr and ILSrc and ILSrcMod
-// if the addressing is register relative.
-// This class is the parent class of TernaryOp
-//===--------------------------------------------------------------------===//
-class ThreeInOneOut<ILOpCode op, dag outs, dag ins,
- string asmstr, list<dag> pattern>
- : TwoInOneOut<op, outs, ins, asmstr, pattern> {
- ILSrc src2_reg;
- ILSrcMod src2_mod;
- ILRelAddr src2_rel;
- ILSrc src2_reg_rel;
- ILSrcMod src2_reg_rel_mod;
- }
-
-//===--------------------------------------------------------------------===//
-// Intrinsic classes
-// Generic versions of the above classes but for Target specific intrinsics
-// instead of SDNode patterns.
-//===--------------------------------------------------------------------===//
-let TargetPrefix = "AMDIL", isTarget = 1 in {
- class VoidIntLong :
- Intrinsic<[llvm_i64_ty], [], []>;
- class VoidIntInt :
- Intrinsic<[llvm_i32_ty], [], []>;
- class VoidIntBool :
- Intrinsic<[llvm_i32_ty], [], []>;
- class UnaryIntInt :
- Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [IntrNoMem]>;
- class UnaryIntFloat :
- Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
- class ConvertIntFTOI :
- Intrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty], [IntrNoMem]>;
- class ConvertIntITOF :
- Intrinsic<[llvm_anyfloat_ty], [llvm_anyint_ty], [IntrNoMem]>;
- class UnaryIntNoRetInt :
- Intrinsic<[], [llvm_anyint_ty], []>;
- class UnaryIntNoRetFloat :
- Intrinsic<[], [llvm_anyfloat_ty], []>;
- class BinaryIntInt :
- Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
- class BinaryIntFloat :
- Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
- class BinaryIntNoRetInt :
- Intrinsic<[], [llvm_anyint_ty, LLVMMatchType<0>], []>;
- class BinaryIntNoRetFloat :
- Intrinsic<[], [llvm_anyfloat_ty, LLVMMatchType<0>], []>;
- class TernaryIntInt :
- Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>,
- LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
- class TernaryIntFloat :
- Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>,
- LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
- class QuaternaryIntInt :
- Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>,
- LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
- class UnaryAtomicInt :
- Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
- class BinaryAtomicInt :
- Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
- class TernaryAtomicInt :
- Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty]>;
- class UnaryAtomicIntNoRet :
- Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
- class BinaryAtomicIntNoRet :
- Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
- class TernaryAtomicIntNoRet :
- Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
-}
+++ /dev/null
-//===----------------------- AMDILFrameLowering.cpp -----------------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//==-----------------------------------------------------------------------===//
-//
-// Interface to describe a layout of a stack frame on a AMDIL target machine
-//
-//===----------------------------------------------------------------------===//
-#include "AMDILFrameLowering.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-
-using namespace llvm;
-AMDILFrameLowering::AMDILFrameLowering(StackDirection D, unsigned StackAl,
- int LAO, unsigned TransAl)
- : TargetFrameLowering(D, StackAl, LAO, TransAl)
-{
-}
-
-AMDILFrameLowering::~AMDILFrameLowering()
-{
-}
-
-/// getFrameIndexOffset - Returns the displacement from the frame register to
-/// the stack frame of the specified index.
-int AMDILFrameLowering::getFrameIndexOffset(const MachineFunction &MF,
- int FI) const {
- const MachineFrameInfo *MFI = MF.getFrameInfo();
- return MFI->getObjectOffset(FI);
-}
-
-const TargetFrameLowering::SpillSlot *
-AMDILFrameLowering::getCalleeSavedSpillSlots(unsigned &NumEntries) const
-{
- NumEntries = 0;
- return 0;
-}
-void
-AMDILFrameLowering::emitPrologue(MachineFunction &MF) const
-{
-}
-void
-AMDILFrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const
-{
-}
-bool
-AMDILFrameLowering::hasFP(const MachineFunction &MF) const
-{
- return false;
-}
+++ /dev/null
-//===--------------------- AMDILFrameLowering.h -----------------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//==-----------------------------------------------------------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Interface to describe a layout of a stack frame on a AMDIL target machine
-//
-//===----------------------------------------------------------------------===//
-#ifndef _AMDILFRAME_LOWERING_H_
-#define _AMDILFRAME_LOWERING_H_
-
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/Target/TargetFrameLowering.h"
-
-/// Information about the stack frame layout on the AMDIL targets. It holds
-/// the direction of the stack growth, the known stack alignment on entry to
-/// each function, and the offset to the locals area.
-/// See TargetFrameInfo for more comments.
-
-namespace llvm {
- class AMDILFrameLowering : public TargetFrameLowering {
- public:
- AMDILFrameLowering(StackDirection D, unsigned StackAl, int LAO, unsigned
- TransAl = 1);
- virtual ~AMDILFrameLowering();
- virtual int getFrameIndexOffset(const MachineFunction &MF,
- int FI) const;
- virtual const SpillSlot *
- getCalleeSavedSpillSlots(unsigned &NumEntries) const;
- virtual void emitPrologue(MachineFunction &MF) const;
- virtual void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
- virtual bool hasFP(const MachineFunction &MF) const;
- }; // class AMDILFrameLowering
-} // namespace llvm
-#endif // _AMDILFRAME_LOWERING_H_
+++ /dev/null
-//===-- AMDILISelDAGToDAG.cpp - A dag to dag inst selector for AMDIL ------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//==-----------------------------------------------------------------------===//
-//
-// This file defines an instruction selector for the AMDIL target.
-//
-//===----------------------------------------------------------------------===//
-#include "AMDGPUISelLowering.h" // For AMDGPUISD
-#include "AMDILDevices.h"
-#include "AMDILUtilityFunctions.h"
-#include "llvm/ADT/ValueMap.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
-#include "llvm/CodeGen/SelectionDAGISel.h"
-#include "llvm/Support/Compiler.h"
-#include <list>
-#include <queue>
-
-using namespace llvm;
-
-//===----------------------------------------------------------------------===//
-// Instruction Selector Implementation
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// AMDILDAGToDAGISel - AMDIL specific code to select AMDIL machine instructions
-// //for SelectionDAG operations.
-//
-namespace {
-class AMDILDAGToDAGISel : public SelectionDAGISel {
- // Subtarget - Keep a pointer to the AMDIL Subtarget around so that we can
- // make the right decision when generating code for different targets.
- const AMDILSubtarget &Subtarget;
-public:
- AMDILDAGToDAGISel(TargetMachine &TM AMDIL_OPT_LEVEL_DECL);
- virtual ~AMDILDAGToDAGISel();
-
- SDNode *Select(SDNode *N);
- virtual const char *getPassName() const;
-
-private:
- inline SDValue getSmallIPtrImm(unsigned Imm);
-
- // Complex pattern selectors
- bool SelectADDRParam(SDValue Addr, SDValue& R1, SDValue& R2);
- bool SelectADDR(SDValue N, SDValue &R1, SDValue &R2);
- bool SelectADDR64(SDValue N, SDValue &R1, SDValue &R2);
-
- static bool checkType(const Value *ptr, unsigned int addrspace);
- static const Value *getBasePointerValue(const Value *V);
-
- static bool isGlobalStore(const StoreSDNode *N);
- static bool isPrivateStore(const StoreSDNode *N);
- static bool isLocalStore(const StoreSDNode *N);
- static bool isRegionStore(const StoreSDNode *N);
-
- static bool isCPLoad(const LoadSDNode *N);
- static bool isConstantLoad(const LoadSDNode *N, int cbID);
- static bool isGlobalLoad(const LoadSDNode *N);
- static bool isPrivateLoad(const LoadSDNode *N);
- static bool isLocalLoad(const LoadSDNode *N);
- static bool isRegionLoad(const LoadSDNode *N);
-
- bool SelectADDR8BitOffset(SDValue Addr, SDValue& Base, SDValue& Offset);
- bool SelectADDRReg(SDValue Addr, SDValue& Base, SDValue& Offset);
- bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
-
- // Include the pieces autogenerated from the target description.
-#include "AMDGPUGenDAGISel.inc"
-};
-} // end anonymous namespace
-
-// createAMDILISelDag - This pass converts a legalized DAG into a AMDIL-specific
-// DAG, ready for instruction scheduling.
-//
-FunctionPass *llvm::createAMDILISelDag(TargetMachine &TM
- AMDIL_OPT_LEVEL_DECL) {
- return new AMDILDAGToDAGISel(TM AMDIL_OPT_LEVEL_VAR);
-}
-
-AMDILDAGToDAGISel::AMDILDAGToDAGISel(TargetMachine &TM
- AMDIL_OPT_LEVEL_DECL)
- : SelectionDAGISel(TM AMDIL_OPT_LEVEL_VAR), Subtarget(TM.getSubtarget<AMDILSubtarget>())
-{
-}
-
-AMDILDAGToDAGISel::~AMDILDAGToDAGISel() {
-}
-
-SDValue AMDILDAGToDAGISel::getSmallIPtrImm(unsigned int Imm) {
- return CurDAG->getTargetConstant(Imm, MVT::i32);
-}
-
-bool AMDILDAGToDAGISel::SelectADDRParam(
- SDValue Addr, SDValue& R1, SDValue& R2) {
-
- if (Addr.getOpcode() == ISD::FrameIndex) {
- if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
- R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
- R2 = CurDAG->getTargetConstant(0, MVT::i32);
- } else {
- R1 = Addr;
- R2 = CurDAG->getTargetConstant(0, MVT::i32);
- }
- } else if (Addr.getOpcode() == ISD::ADD) {
- R1 = Addr.getOperand(0);
- R2 = Addr.getOperand(1);
- } else {
- R1 = Addr;
- R2 = CurDAG->getTargetConstant(0, MVT::i32);
- }
- return true;
-}
-
-bool AMDILDAGToDAGISel::SelectADDR(SDValue Addr, SDValue& R1, SDValue& R2) {
- if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
- Addr.getOpcode() == ISD::TargetGlobalAddress) {
- return false;
- }
- return SelectADDRParam(Addr, R1, R2);
-}
-
-
-bool AMDILDAGToDAGISel::SelectADDR64(SDValue Addr, SDValue& R1, SDValue& R2) {
- if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
- Addr.getOpcode() == ISD::TargetGlobalAddress) {
- return false;
- }
-
- if (Addr.getOpcode() == ISD::FrameIndex) {
- if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
- R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i64);
- R2 = CurDAG->getTargetConstant(0, MVT::i64);
- } else {
- R1 = Addr;
- R2 = CurDAG->getTargetConstant(0, MVT::i64);
- }
- } else if (Addr.getOpcode() == ISD::ADD) {
- R1 = Addr.getOperand(0);
- R2 = Addr.getOperand(1);
- } else {
- R1 = Addr;
- R2 = CurDAG->getTargetConstant(0, MVT::i64);
- }
- return true;
-}
-
-SDNode *AMDILDAGToDAGISel::Select(SDNode *N) {
- unsigned int Opc = N->getOpcode();
- if (N->isMachineOpcode()) {
- return NULL; // Already selected.
- }
- switch (Opc) {
- default: break;
- case ISD::FrameIndex:
- {
- if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(N)) {
- unsigned int FI = FIN->getIndex();
- EVT OpVT = N->getValueType(0);
- unsigned int NewOpc = AMDGPU::COPY;
- SDValue TFI = CurDAG->getTargetFrameIndex(FI, MVT::i32);
- return CurDAG->SelectNodeTo(N, NewOpc, OpVT, TFI);
- }
- }
- break;
- }
- return SelectCode(N);
-}
-
-bool AMDILDAGToDAGISel::checkType(const Value *ptr, unsigned int addrspace) {
- if (!ptr) {
- return false;
- }
- Type *ptrType = ptr->getType();
- return dyn_cast<PointerType>(ptrType)->getAddressSpace() == addrspace;
-}
-
-const Value * AMDILDAGToDAGISel::getBasePointerValue(const Value *V)
-{
- if (!V) {
- return NULL;
- }
- const Value *ret = NULL;
- ValueMap<const Value *, bool> ValueBitMap;
- std::queue<const Value *, std::list<const Value *> > ValueQueue;
- ValueQueue.push(V);
- while (!ValueQueue.empty()) {
- V = ValueQueue.front();
- if (ValueBitMap.find(V) == ValueBitMap.end()) {
- ValueBitMap[V] = true;
- if (dyn_cast<Argument>(V) && dyn_cast<PointerType>(V->getType())) {
- ret = V;
- break;
- } else if (dyn_cast<GlobalVariable>(V)) {
- ret = V;
- break;
- } else if (dyn_cast<Constant>(V)) {
- const ConstantExpr *CE = dyn_cast<ConstantExpr>(V);
- if (CE) {
- ValueQueue.push(CE->getOperand(0));
- }
- } else if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
- ret = AI;
- break;
- } else if (const Instruction *I = dyn_cast<Instruction>(V)) {
- uint32_t numOps = I->getNumOperands();
- for (uint32_t x = 0; x < numOps; ++x) {
- ValueQueue.push(I->getOperand(x));
- }
- } else {
- // assert(0 && "Found a Value that we didn't know how to handle!");
- }
- }
- ValueQueue.pop();
- }
- return ret;
-}
-
-bool AMDILDAGToDAGISel::isGlobalStore(const StoreSDNode *N) {
- return checkType(N->getSrcValue(), AMDILAS::GLOBAL_ADDRESS);
-}
-
-bool AMDILDAGToDAGISel::isPrivateStore(const StoreSDNode *N) {
- return (!checkType(N->getSrcValue(), AMDILAS::LOCAL_ADDRESS)
- && !checkType(N->getSrcValue(), AMDILAS::GLOBAL_ADDRESS)
- && !checkType(N->getSrcValue(), AMDILAS::REGION_ADDRESS));
-}
-
-bool AMDILDAGToDAGISel::isLocalStore(const StoreSDNode *N) {
- return checkType(N->getSrcValue(), AMDILAS::LOCAL_ADDRESS);
-}
-
-bool AMDILDAGToDAGISel::isRegionStore(const StoreSDNode *N) {
- return checkType(N->getSrcValue(), AMDILAS::REGION_ADDRESS);
-}
-
-bool AMDILDAGToDAGISel::isConstantLoad(const LoadSDNode *N, int cbID) {
- if (checkType(N->getSrcValue(), AMDILAS::CONSTANT_ADDRESS)) {
- return true;
- }
- MachineMemOperand *MMO = N->getMemOperand();
- const Value *V = MMO->getValue();
- const Value *BV = getBasePointerValue(V);
- if (MMO
- && MMO->getValue()
- && ((V && dyn_cast<GlobalValue>(V))
- || (BV && dyn_cast<GlobalValue>(
- getBasePointerValue(MMO->getValue()))))) {
- return checkType(N->getSrcValue(), AMDILAS::PRIVATE_ADDRESS);
- } else {
- return false;
- }
-}
-
-bool AMDILDAGToDAGISel::isGlobalLoad(const LoadSDNode *N) {
- return checkType(N->getSrcValue(), AMDILAS::GLOBAL_ADDRESS);
-}
-
-bool AMDILDAGToDAGISel::isLocalLoad(const LoadSDNode *N) {
- return checkType(N->getSrcValue(), AMDILAS::LOCAL_ADDRESS);
-}
-
-bool AMDILDAGToDAGISel::isRegionLoad(const LoadSDNode *N) {
- return checkType(N->getSrcValue(), AMDILAS::REGION_ADDRESS);
-}
-
-bool AMDILDAGToDAGISel::isCPLoad(const LoadSDNode *N) {
- MachineMemOperand *MMO = N->getMemOperand();
- if (checkType(N->getSrcValue(), AMDILAS::PRIVATE_ADDRESS)) {
- if (MMO) {
- const Value *V = MMO->getValue();
- const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V);
- if (PSV && PSV == PseudoSourceValue::getConstantPool()) {
- return true;
- }
- }
- }
- return false;
-}
-
-bool AMDILDAGToDAGISel::isPrivateLoad(const LoadSDNode *N) {
- if (checkType(N->getSrcValue(), AMDILAS::PRIVATE_ADDRESS)) {
- // Check to make sure we are not a constant pool load or a constant load
- // that is marked as a private load
- if (isCPLoad(N) || isConstantLoad(N, -1)) {
- return false;
- }
- }
- if (!checkType(N->getSrcValue(), AMDILAS::LOCAL_ADDRESS)
- && !checkType(N->getSrcValue(), AMDILAS::GLOBAL_ADDRESS)
- && !checkType(N->getSrcValue(), AMDILAS::REGION_ADDRESS)
- && !checkType(N->getSrcValue(), AMDILAS::CONSTANT_ADDRESS)
- && !checkType(N->getSrcValue(), AMDILAS::PARAM_D_ADDRESS)
- && !checkType(N->getSrcValue(), AMDILAS::PARAM_I_ADDRESS))
- {
- return true;
- }
- return false;
-}
-
-const char *AMDILDAGToDAGISel::getPassName() const {
- return "AMDIL DAG->DAG Pattern Instruction Selection";
-}
-
-#ifdef DEBUGTMP
-#undef INT64_C
-#endif
-#undef DEBUGTMP
-
-///==== AMDGPU Functions ====///
-
-bool AMDILDAGToDAGISel::SelectADDR8BitOffset(SDValue Addr, SDValue& Base,
- SDValue& Offset) {
- if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
- Addr.getOpcode() == ISD::TargetGlobalAddress) {
- return false;
- }
-
-
- if (Addr.getOpcode() == ISD::ADD) {
- bool Match = false;
-
- // Find the base ptr and the offset
- for (unsigned i = 0; i < Addr.getNumOperands(); i++) {
- SDValue Arg = Addr.getOperand(i);
- ConstantSDNode * OffsetNode = dyn_cast<ConstantSDNode>(Arg);
- // This arg isn't a constant so it must be the base PTR.
- if (!OffsetNode) {
- Base = Addr.getOperand(i);
- continue;
- }
- // Check if the constant argument fits in 8-bits. The offset is in bytes
- // so we need to convert it to dwords.
- if (isInt<8>(OffsetNode->getZExtValue() >> 2)) {
- Match = true;
- Offset = CurDAG->getTargetConstant(OffsetNode->getZExtValue() >> 2,
- MVT::i32);
- }
- }
- return Match;
- }
-
- // Default case, no offset
- Base = Addr;
- Offset = CurDAG->getTargetConstant(0, MVT::i32);
- return true;
-}
-
-bool AMDILDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
- SDValue &Offset)
-{
- ConstantSDNode * IMMOffset;
-
- if (Addr.getOpcode() == ISD::ADD
- && (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
- && isInt<16>(IMMOffset->getZExtValue())) {
-
- Base = Addr.getOperand(0);
- Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), MVT::i32);
- return true;
- // If the pointer address is constant, we can move it to the offset field.
- } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr))
- && isInt<16>(IMMOffset->getZExtValue())) {
- Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
- CurDAG->getEntryNode().getDebugLoc(),
- AMDGPU::ZERO, MVT::i32);
- Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), MVT::i32);
- return true;
- }
-
- // Default case, no offset
- Base = Addr;
- Offset = CurDAG->getTargetConstant(0, MVT::i32);
- return true;
-}
-
-bool AMDILDAGToDAGISel::SelectADDRReg(SDValue Addr, SDValue& Base,
- SDValue& Offset) {
- if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
- Addr.getOpcode() == ISD::TargetGlobalAddress ||
- Addr.getOpcode() != ISD::ADD) {
- return false;
- }
-
- Base = Addr.getOperand(0);
- Offset = Addr.getOperand(1);
-
- return false;
-}
+++ /dev/null
-//===-- AMDILISelLowering.cpp - AMDIL DAG Lowering Implementation ---------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//==-----------------------------------------------------------------------===//
-//
-// This file implements the interfaces that AMDIL uses to lower LLVM code into a
-// selection DAG.
-//
-//===----------------------------------------------------------------------===//
-
-#include "AMDILISelLowering.h"
-#include "AMDILDevices.h"
-#include "AMDILIntrinsicInfo.h"
-#include "AMDILRegisterInfo.h"
-#include "AMDILSubtarget.h"
-#include "AMDILUtilityFunctions.h"
-#include "llvm/CallingConv.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
-#include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/CodeGen/SelectionDAGNodes.h"
-#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Instructions.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetOptions.h"
-
-using namespace llvm;
-#define ISDBITCAST ISD::BITCAST
-#define MVTGLUE MVT::Glue
-//===----------------------------------------------------------------------===//
-// Calling Convention Implementation
-//===----------------------------------------------------------------------===//
-#include "AMDGPUGenCallingConv.inc"
-
-//===----------------------------------------------------------------------===//
-// TargetLowering Implementation Help Functions Begin
-//===----------------------------------------------------------------------===//
- static SDValue
-getConversionNode(SelectionDAG &DAG, SDValue& Src, SDValue& Dst, bool asType)
-{
- DebugLoc DL = Src.getDebugLoc();
- EVT svt = Src.getValueType().getScalarType();
- EVT dvt = Dst.getValueType().getScalarType();
- if (svt.isFloatingPoint() && dvt.isFloatingPoint()) {
- if (dvt.bitsGT(svt)) {
- Src = DAG.getNode(ISD::FP_EXTEND, DL, dvt, Src);
- } else if (svt.bitsLT(svt)) {
- Src = DAG.getNode(ISD::FP_ROUND, DL, dvt, Src,
- DAG.getConstant(1, MVT::i32));
- }
- } else if (svt.isInteger() && dvt.isInteger()) {
- if (!svt.bitsEq(dvt)) {
- Src = DAG.getSExtOrTrunc(Src, DL, dvt);
- }
- } else if (svt.isInteger()) {
- unsigned opcode = (asType) ? ISDBITCAST : ISD::SINT_TO_FP;
- if (!svt.bitsEq(dvt)) {
- if (dvt.getSimpleVT().SimpleTy == MVT::f32) {
- Src = DAG.getSExtOrTrunc(Src, DL, MVT::i32);
- } else if (dvt.getSimpleVT().SimpleTy == MVT::f64) {
- Src = DAG.getSExtOrTrunc(Src, DL, MVT::i64);
- } else {
- assert(0 && "We only support 32 and 64bit fp types");
- }
- }
- Src = DAG.getNode(opcode, DL, dvt, Src);
- } else if (dvt.isInteger()) {
- unsigned opcode = (asType) ? ISDBITCAST : ISD::FP_TO_SINT;
- if (svt.getSimpleVT().SimpleTy == MVT::f32) {
- Src = DAG.getNode(opcode, DL, MVT::i32, Src);
- } else if (svt.getSimpleVT().SimpleTy == MVT::f64) {
- Src = DAG.getNode(opcode, DL, MVT::i64, Src);
- } else {
- assert(0 && "We only support 32 and 64bit fp types");
- }
- Src = DAG.getSExtOrTrunc(Src, DL, dvt);
- }
- return Src;
-}
-// CondCCodeToCC - Convert a DAG condition code to a AMDIL CC
-// condition.
- static AMDILCC::CondCodes
-CondCCodeToCC(ISD::CondCode CC, const MVT::SimpleValueType& type)
-{
- switch (CC) {
- default:
- {
- errs()<<"Condition Code: "<< (unsigned int)CC<<"\n";
- assert(0 && "Unknown condition code!");
- }
- case ISD::SETO:
- switch(type) {
- case MVT::f32:
- return AMDILCC::IL_CC_F_O;
- case MVT::f64:
- return AMDILCC::IL_CC_D_O;
- default:
- assert(0 && "Opcode combination not generated correctly!");
- return AMDILCC::COND_ERROR;
- };
- case ISD::SETUO:
- switch(type) {
- case MVT::f32:
- return AMDILCC::IL_CC_F_UO;
- case MVT::f64:
- return AMDILCC::IL_CC_D_UO;
- default:
- assert(0 && "Opcode combination not generated correctly!");
- return AMDILCC::COND_ERROR;
- };
- case ISD::SETGT:
- switch (type) {
- case MVT::i1:
- case MVT::i8:
- case MVT::i16:
- case MVT::i32:
- return AMDILCC::IL_CC_I_GT;
- case MVT::f32:
- return AMDILCC::IL_CC_F_GT;
- case MVT::f64:
- return AMDILCC::IL_CC_D_GT;
- case MVT::i64:
- return AMDILCC::IL_CC_L_GT;
- default:
- assert(0 && "Opcode combination not generated correctly!");
- return AMDILCC::COND_ERROR;
- };
- case ISD::SETGE:
- switch (type) {
- case MVT::i1:
- case MVT::i8:
- case MVT::i16:
- case MVT::i32:
- return AMDILCC::IL_CC_I_GE;
- case MVT::f32:
- return AMDILCC::IL_CC_F_GE;
- case MVT::f64:
- return AMDILCC::IL_CC_D_GE;
- case MVT::i64:
- return AMDILCC::IL_CC_L_GE;
- default:
- assert(0 && "Opcode combination not generated correctly!");
- return AMDILCC::COND_ERROR;
- };
- case ISD::SETLT:
- switch (type) {
- case MVT::i1:
- case MVT::i8:
- case MVT::i16:
- case MVT::i32:
- return AMDILCC::IL_CC_I_LT;
- case MVT::f32:
- return AMDILCC::IL_CC_F_LT;
- case MVT::f64:
- return AMDILCC::IL_CC_D_LT;
- case MVT::i64:
- return AMDILCC::IL_CC_L_LT;
- default:
- assert(0 && "Opcode combination not generated correctly!");
- return AMDILCC::COND_ERROR;
- };
- case ISD::SETLE:
- switch (type) {
- case MVT::i1:
- case MVT::i8:
- case MVT::i16:
- case MVT::i32:
- return AMDILCC::IL_CC_I_LE;
- case MVT::f32:
- return AMDILCC::IL_CC_F_LE;
- case MVT::f64:
- return AMDILCC::IL_CC_D_LE;
- case MVT::i64:
- return AMDILCC::IL_CC_L_LE;
- default:
- assert(0 && "Opcode combination not generated correctly!");
- return AMDILCC::COND_ERROR;
- };
- case ISD::SETNE:
- switch (type) {
- case MVT::i1:
- case MVT::i8:
- case MVT::i16:
- case MVT::i32:
- return AMDILCC::IL_CC_I_NE;
- case MVT::f32:
- return AMDILCC::IL_CC_F_NE;
- case MVT::f64:
- return AMDILCC::IL_CC_D_NE;
- case MVT::i64:
- return AMDILCC::IL_CC_L_NE;
- default:
- assert(0 && "Opcode combination not generated correctly!");
- return AMDILCC::COND_ERROR;
- };
- case ISD::SETEQ:
- switch (type) {
- case MVT::i1:
- case MVT::i8:
- case MVT::i16:
- case MVT::i32:
- return AMDILCC::IL_CC_I_EQ;
- case MVT::f32:
- return AMDILCC::IL_CC_F_EQ;
- case MVT::f64:
- return AMDILCC::IL_CC_D_EQ;
- case MVT::i64:
- return AMDILCC::IL_CC_L_EQ;
- default:
- assert(0 && "Opcode combination not generated correctly!");
- return AMDILCC::COND_ERROR;
- };
- case ISD::SETUGT:
- switch (type) {
- case MVT::i1:
- case MVT::i8:
- case MVT::i16:
- case MVT::i32:
- return AMDILCC::IL_CC_U_GT;
- case MVT::f32:
- return AMDILCC::IL_CC_F_UGT;
- case MVT::f64:
- return AMDILCC::IL_CC_D_UGT;
- case MVT::i64:
- return AMDILCC::IL_CC_UL_GT;
- default:
- assert(0 && "Opcode combination not generated correctly!");
- return AMDILCC::COND_ERROR;
- };
- case ISD::SETUGE:
- switch (type) {
- case MVT::i1:
- case MVT::i8:
- case MVT::i16:
- case MVT::i32:
- return AMDILCC::IL_CC_U_GE;
- case MVT::f32:
- return AMDILCC::IL_CC_F_UGE;
- case MVT::f64:
- return AMDILCC::IL_CC_D_UGE;
- case MVT::i64:
- return AMDILCC::IL_CC_UL_GE;
- default:
- assert(0 && "Opcode combination not generated correctly!");
- return AMDILCC::COND_ERROR;
- };
- case ISD::SETULT:
- switch (type) {
- case MVT::i1:
- case MVT::i8:
- case MVT::i16:
- case MVT::i32:
- return AMDILCC::IL_CC_U_LT;
- case MVT::f32:
- return AMDILCC::IL_CC_F_ULT;
- case MVT::f64:
- return AMDILCC::IL_CC_D_ULT;
- case MVT::i64:
- return AMDILCC::IL_CC_UL_LT;
- default:
- assert(0 && "Opcode combination not generated correctly!");
- return AMDILCC::COND_ERROR;
- };
- case ISD::SETULE:
- switch (type) {
- case MVT::i1:
- case MVT::i8:
- case MVT::i16:
- case MVT::i32:
- return AMDILCC::IL_CC_U_LE;
- case MVT::f32:
- return AMDILCC::IL_CC_F_ULE;
- case MVT::f64:
- return AMDILCC::IL_CC_D_ULE;
- case MVT::i64:
- return AMDILCC::IL_CC_UL_LE;
- default:
- assert(0 && "Opcode combination not generated correctly!");
- return AMDILCC::COND_ERROR;
- };
- case ISD::SETUNE:
- switch (type) {
- case MVT::i1:
- case MVT::i8:
- case MVT::i16:
- case MVT::i32:
- return AMDILCC::IL_CC_U_NE;
- case MVT::f32:
- return AMDILCC::IL_CC_F_UNE;
- case MVT::f64:
- return AMDILCC::IL_CC_D_UNE;
- case MVT::i64:
- return AMDILCC::IL_CC_UL_NE;
- default:
- assert(0 && "Opcode combination not generated correctly!");
- return AMDILCC::COND_ERROR;
- };
- case ISD::SETUEQ:
- switch (type) {
- case MVT::i1:
- case MVT::i8:
- case MVT::i16:
- case MVT::i32:
- return AMDILCC::IL_CC_U_EQ;
- case MVT::f32:
- return AMDILCC::IL_CC_F_UEQ;
- case MVT::f64:
- return AMDILCC::IL_CC_D_UEQ;
- case MVT::i64:
- return AMDILCC::IL_CC_UL_EQ;
- default:
- assert(0 && "Opcode combination not generated correctly!");
- return AMDILCC::COND_ERROR;
- };
- case ISD::SETOGT:
- switch (type) {
- case MVT::f32:
- return AMDILCC::IL_CC_F_OGT;
- case MVT::f64:
- return AMDILCC::IL_CC_D_OGT;
- case MVT::i1:
- case MVT::i8:
- case MVT::i16:
- case MVT::i32:
- case MVT::i64:
- default:
- assert(0 && "Opcode combination not generated correctly!");
- return AMDILCC::COND_ERROR;
- };
- case ISD::SETOGE:
- switch (type) {
- case MVT::f32:
- return AMDILCC::IL_CC_F_OGE;
- case MVT::f64:
- return AMDILCC::IL_CC_D_OGE;
- case MVT::i1:
- case MVT::i8:
- case MVT::i16:
- case MVT::i32:
- case MVT::i64:
- default:
- assert(0 && "Opcode combination not generated correctly!");
- return AMDILCC::COND_ERROR;
- };
- case ISD::SETOLT:
- switch (type) {
- case MVT::f32:
- return AMDILCC::IL_CC_F_OLT;
- case MVT::f64:
- return AMDILCC::IL_CC_D_OLT;
- case MVT::i1:
- case MVT::i8:
- case MVT::i16:
- case MVT::i32:
- case MVT::i64:
- default:
- assert(0 && "Opcode combination not generated correctly!");
- return AMDILCC::COND_ERROR;
- };
- case ISD::SETOLE:
- switch (type) {
- case MVT::f32:
- return AMDILCC::IL_CC_F_OLE;
- case MVT::f64:
- return AMDILCC::IL_CC_D_OLE;
- case MVT::i1:
- case MVT::i8:
- case MVT::i16:
- case MVT::i32:
- case MVT::i64:
- default:
- assert(0 && "Opcode combination not generated correctly!");
- return AMDILCC::COND_ERROR;
- };
- case ISD::SETONE:
- switch (type) {
- case MVT::f32:
- return AMDILCC::IL_CC_F_ONE;
- case MVT::f64:
- return AMDILCC::IL_CC_D_ONE;
- case MVT::i1:
- case MVT::i8:
- case MVT::i16:
- case MVT::i32:
- case MVT::i64:
- default:
- assert(0 && "Opcode combination not generated correctly!");
- return AMDILCC::COND_ERROR;
- };
- case ISD::SETOEQ:
- switch (type) {
- case MVT::f32:
- return AMDILCC::IL_CC_F_OEQ;
- case MVT::f64:
- return AMDILCC::IL_CC_D_OEQ;
- case MVT::i1:
- case MVT::i8:
- case MVT::i16:
- case MVT::i32:
- case MVT::i64:
- default:
- assert(0 && "Opcode combination not generated correctly!");
- return AMDILCC::COND_ERROR;
- };
- };
-}
-
-SDValue
-AMDILTargetLowering::LowerMemArgument(
- SDValue Chain,
- CallingConv::ID CallConv,
- const SmallVectorImpl<ISD::InputArg> &Ins,
- DebugLoc dl, SelectionDAG &DAG,
- const CCValAssign &VA,
- MachineFrameInfo *MFI,
- unsigned i) const
-{
- // Create the nodes corresponding to a load from this parameter slot.
- ISD::ArgFlagsTy Flags = Ins[i].Flags;
-
- bool AlwaysUseMutable = (CallConv==CallingConv::Fast) &&
- getTargetMachine().Options.GuaranteedTailCallOpt;
- bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
-
- // FIXME: For now, all byval parameter objects are marked mutable. This can
- // be changed with more analysis.
- // In case of tail call optimization mark all arguments mutable. Since they
- // could be overwritten by lowering of arguments in case of a tail call.
- int FI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8,
- VA.getLocMemOffset(), isImmutable);
- SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
-
- if (Flags.isByVal())
- return FIN;
- return DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
- MachinePointerInfo::getFixedStack(FI),
- false, false, false, 0);
-}
-//===----------------------------------------------------------------------===//
-// TargetLowering Implementation Help Functions End
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// TargetLowering Class Implementation Begins
-//===----------------------------------------------------------------------===//
- AMDILTargetLowering::AMDILTargetLowering(TargetMachine &TM)
-: TargetLowering(TM, new TargetLoweringObjectFileELF())
-{
- int types[] =
- {
- (int)MVT::i8,
- (int)MVT::i16,
- (int)MVT::i32,
- (int)MVT::f32,
- (int)MVT::f64,
- (int)MVT::i64,
- (int)MVT::v2i8,
- (int)MVT::v4i8,
- (int)MVT::v2i16,
- (int)MVT::v4i16,
- (int)MVT::v4f32,
- (int)MVT::v4i32,
- (int)MVT::v2f32,
- (int)MVT::v2i32,
- (int)MVT::v2f64,
- (int)MVT::v2i64
- };
-
- int IntTypes[] =
- {
- (int)MVT::i8,
- (int)MVT::i16,
- (int)MVT::i32,
- (int)MVT::i64
- };
-
- int FloatTypes[] =
- {
- (int)MVT::f32,
- (int)MVT::f64
- };
-
- int VectorTypes[] =
- {
- (int)MVT::v2i8,
- (int)MVT::v4i8,
- (int)MVT::v2i16,
- (int)MVT::v4i16,
- (int)MVT::v4f32,
- (int)MVT::v4i32,
- (int)MVT::v2f32,
- (int)MVT::v2i32,
- (int)MVT::v2f64,
- (int)MVT::v2i64
- };
- size_t numTypes = sizeof(types) / sizeof(*types);
- size_t numFloatTypes = sizeof(FloatTypes) / sizeof(*FloatTypes);
- size_t numIntTypes = sizeof(IntTypes) / sizeof(*IntTypes);
- size_t numVectorTypes = sizeof(VectorTypes) / sizeof(*VectorTypes);
-
- const AMDILSubtarget &STM = getTargetMachine().getSubtarget<AMDILSubtarget>();
- // These are the current register classes that are
- // supported
-
- for (unsigned int x = 0; x < numTypes; ++x) {
- MVT::SimpleValueType VT = (MVT::SimpleValueType)types[x];
-
- //FIXME: SIGN_EXTEND_INREG is not meaningful for floating point types
- // We cannot sextinreg, expand to shifts
- setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Custom);
- setOperationAction(ISD::SUBE, VT, Expand);
- setOperationAction(ISD::SUBC, VT, Expand);
- setOperationAction(ISD::ADDE, VT, Expand);
- setOperationAction(ISD::ADDC, VT, Expand);
- setOperationAction(ISD::SETCC, VT, Custom);
- setOperationAction(ISD::BRCOND, VT, Custom);
- setOperationAction(ISD::BR_CC, VT, Custom);
- setOperationAction(ISD::BR_JT, VT, Expand);
- setOperationAction(ISD::BRIND, VT, Expand);
- // TODO: Implement custom UREM/SREM routines
- setOperationAction(ISD::SREM, VT, Expand);
- setOperationAction(ISD::GlobalAddress, VT, Custom);
- setOperationAction(ISD::JumpTable, VT, Custom);
- setOperationAction(ISD::ConstantPool, VT, Custom);
- setOperationAction(ISD::SELECT, VT, Custom);
- setOperationAction(ISD::SMUL_LOHI, VT, Expand);
- setOperationAction(ISD::UMUL_LOHI, VT, Expand);
- if (VT != MVT::i64 && VT != MVT::v2i64) {
- setOperationAction(ISD::SDIV, VT, Custom);
- }
- }
- for (unsigned int x = 0; x < numFloatTypes; ++x) {
- MVT::SimpleValueType VT = (MVT::SimpleValueType)FloatTypes[x];
-
- // IL does not have these operations for floating point types
- setOperationAction(ISD::FP_ROUND_INREG, VT, Expand);
- setOperationAction(ISD::SETOLT, VT, Expand);
- setOperationAction(ISD::SETOGE, VT, Expand);
- setOperationAction(ISD::SETOGT, VT, Expand);
- setOperationAction(ISD::SETOLE, VT, Expand);
- setOperationAction(ISD::SETULT, VT, Expand);
- setOperationAction(ISD::SETUGE, VT, Expand);
- setOperationAction(ISD::SETUGT, VT, Expand);
- setOperationAction(ISD::SETULE, VT, Expand);
- }
-
- for (unsigned int x = 0; x < numIntTypes; ++x) {
- MVT::SimpleValueType VT = (MVT::SimpleValueType)IntTypes[x];
-
- // GPU also does not have divrem function for signed or unsigned
- setOperationAction(ISD::SDIVREM, VT, Expand);
-
- // GPU does not have [S|U]MUL_LOHI functions as a single instruction
- setOperationAction(ISD::SMUL_LOHI, VT, Expand);
- setOperationAction(ISD::UMUL_LOHI, VT, Expand);
-
- // GPU doesn't have a rotl, rotr, or byteswap instruction
- setOperationAction(ISD::ROTR, VT, Expand);
- setOperationAction(ISD::BSWAP, VT, Expand);
-
- // GPU doesn't have any counting operators
- setOperationAction(ISD::CTPOP, VT, Expand);
- setOperationAction(ISD::CTTZ, VT, Expand);
- setOperationAction(ISD::CTLZ, VT, Expand);
- }
-
- for ( unsigned int ii = 0; ii < numVectorTypes; ++ii )
- {
- MVT::SimpleValueType VT = (MVT::SimpleValueType)VectorTypes[ii];
-
- setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
- setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand);
- setOperationAction(ISD::SDIVREM, VT, Expand);
- setOperationAction(ISD::SMUL_LOHI, VT, Expand);
- // setOperationAction(ISD::VSETCC, VT, Expand);
- setOperationAction(ISD::SETCC, VT, Expand);
- setOperationAction(ISD::SELECT_CC, VT, Expand);
- setOperationAction(ISD::SELECT, VT, Expand);
-
- }
- if (STM.device()->isSupported(AMDILDeviceInfo::LongOps)) {
- setOperationAction(ISD::MULHU, MVT::i64, Expand);
- setOperationAction(ISD::MULHU, MVT::v2i64, Expand);
- setOperationAction(ISD::MULHS, MVT::i64, Expand);
- setOperationAction(ISD::MULHS, MVT::v2i64, Expand);
- setOperationAction(ISD::ADD, MVT::v2i64, Expand);
- setOperationAction(ISD::SREM, MVT::v2i64, Expand);
- setOperationAction(ISD::Constant , MVT::i64 , Legal);
- setOperationAction(ISD::SDIV, MVT::v2i64, Expand);
- setOperationAction(ISD::TRUNCATE, MVT::v2i64, Expand);
- setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Expand);
- setOperationAction(ISD::ZERO_EXTEND, MVT::v2i64, Expand);
- setOperationAction(ISD::ANY_EXTEND, MVT::v2i64, Expand);
- }
- if (STM.device()->isSupported(AMDILDeviceInfo::DoubleOps)) {
- // we support loading/storing v2f64 but not operations on the type
- setOperationAction(ISD::FADD, MVT::v2f64, Expand);
- setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
- setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
- setOperationAction(ISD::FP_ROUND_INREG, MVT::v2f64, Expand);
- setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand);
- setOperationAction(ISD::ConstantFP , MVT::f64 , Legal);
- // We want to expand vector conversions into their scalar
- // counterparts.
- setOperationAction(ISD::TRUNCATE, MVT::v2f64, Expand);
- setOperationAction(ISD::SIGN_EXTEND, MVT::v2f64, Expand);
- setOperationAction(ISD::ZERO_EXTEND, MVT::v2f64, Expand);
- setOperationAction(ISD::ANY_EXTEND, MVT::v2f64, Expand);
- setOperationAction(ISD::FABS, MVT::f64, Expand);
- setOperationAction(ISD::FABS, MVT::v2f64, Expand);
- }
- // TODO: Fix the UDIV24 algorithm so it works for these
- // types correctly. This needs vector comparisons
- // for this to work correctly.
- setOperationAction(ISD::UDIV, MVT::v2i8, Expand);
- setOperationAction(ISD::UDIV, MVT::v4i8, Expand);
- setOperationAction(ISD::UDIV, MVT::v2i16, Expand);
- setOperationAction(ISD::UDIV, MVT::v4i16, Expand);
- setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Custom);
- setOperationAction(ISD::SUBC, MVT::Other, Expand);
- setOperationAction(ISD::ADDE, MVT::Other, Expand);
- setOperationAction(ISD::ADDC, MVT::Other, Expand);
- setOperationAction(ISD::BRCOND, MVT::Other, Custom);
- setOperationAction(ISD::BR_CC, MVT::Other, Custom);
- setOperationAction(ISD::BR_JT, MVT::Other, Expand);
- setOperationAction(ISD::BRIND, MVT::Other, Expand);
- setOperationAction(ISD::SETCC, MVT::Other, Custom);
- setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand);
-
- setOperationAction(ISD::BUILD_VECTOR, MVT::Other, Custom);
- // Use the default implementation.
- setOperationAction(ISD::VAARG , MVT::Other, Expand);
- setOperationAction(ISD::VACOPY , MVT::Other, Expand);
- setOperationAction(ISD::VAEND , MVT::Other, Expand);
- setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
- setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
- setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom);
- setOperationAction(ISD::ConstantFP , MVT::f32 , Legal);
- setOperationAction(ISD::Constant , MVT::i32 , Legal);
- setOperationAction(ISD::TRAP , MVT::Other , Legal);
-
- setStackPointerRegisterToSaveRestore(AMDGPU::SP);
- setSchedulingPreference(Sched::RegPressure);
- setPow2DivIsCheap(false);
- setPrefLoopAlignment(16);
- setSelectIsExpensive(true);
- setJumpIsExpensive(true);
-
- maxStoresPerMemcpy = 4096;
- maxStoresPerMemmove = 4096;
- maxStoresPerMemset = 4096;
-
-#undef numTypes
-#undef numIntTypes
-#undef numVectorTypes
-#undef numFloatTypes
-}
-
-const char *
-AMDILTargetLowering::getTargetNodeName(unsigned Opcode) const
-{
- switch (Opcode) {
- default: return 0;
- case AMDILISD::CMOVLOG: return "AMDILISD::CMOVLOG";
- case AMDILISD::MAD: return "AMDILISD::MAD";
- case AMDILISD::CALL: return "AMDILISD::CALL";
- case AMDILISD::SELECT_CC: return "AMDILISD::SELECT_CC";
- case AMDILISD::UMUL: return "AMDILISD::UMUL";
- case AMDILISD::DIV_INF: return "AMDILISD::DIV_INF";
- case AMDILISD::VBUILD: return "AMDILISD::VBUILD";
- case AMDILISD::CMP: return "AMDILISD::CMP";
- case AMDILISD::IL_CC_I_LT: return "AMDILISD::IL_CC_I_LT";
- case AMDILISD::IL_CC_I_LE: return "AMDILISD::IL_CC_I_LE";
- case AMDILISD::IL_CC_I_GT: return "AMDILISD::IL_CC_I_GT";
- case AMDILISD::IL_CC_I_GE: return "AMDILISD::IL_CC_I_GE";
- case AMDILISD::IL_CC_I_EQ: return "AMDILISD::IL_CC_I_EQ";
- case AMDILISD::IL_CC_I_NE: return "AMDILISD::IL_CC_I_NE";
- case AMDILISD::RET_FLAG: return "AMDILISD::RET_FLAG";
- case AMDILISD::BRANCH_COND: return "AMDILISD::BRANCH_COND";
-
- };
-}
-bool
-AMDILTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
- const CallInst &I, unsigned Intrinsic) const
-{
- return false;
-}
-
-// The backend supports 32 and 64 bit floating point immediates
-bool
-AMDILTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const
-{
- if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
- || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
- return true;
- } else {
- return false;
- }
-}
-
-bool
-AMDILTargetLowering::ShouldShrinkFPConstant(EVT VT) const
-{
- if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
- || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
- return false;
- } else {
- return true;
- }
-}
-
-
-// isMaskedValueZeroForTargetNode - Return true if 'Op & Mask' is known to
-// be zero. Op is expected to be a target specific node. Used by DAG
-// combiner.
-
-void
-AMDILTargetLowering::computeMaskedBitsForTargetNode(
- const SDValue Op,
- APInt &KnownZero,
- APInt &KnownOne,
- const SelectionDAG &DAG,
- unsigned Depth) const
-{
- APInt KnownZero2;
- APInt KnownOne2;
- KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); // Don't know anything
- switch (Op.getOpcode()) {
- default: break;
- case AMDILISD::SELECT_CC:
- DAG.ComputeMaskedBits(
- Op.getOperand(1),
- KnownZero,
- KnownOne,
- Depth + 1
- );
- DAG.ComputeMaskedBits(
- Op.getOperand(0),
- KnownZero2,
- KnownOne2
- );
- assert((KnownZero & KnownOne) == 0
- && "Bits known to be one AND zero?");
- assert((KnownZero2 & KnownOne2) == 0
- && "Bits known to be one AND zero?");
- // Only known if known in both the LHS and RHS
- KnownOne &= KnownOne2;
- KnownZero &= KnownZero2;
- break;
- };
-}
-
-// This is the function that determines which calling convention should
-// be used. Currently there is only one calling convention
-CCAssignFn*
-AMDILTargetLowering::CCAssignFnForNode(unsigned int Op) const
-{
- //uint64_t CC = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
- return CC_AMDIL32;
-}
-
-// LowerCallResult - Lower the result values of an ISD::CALL into the
-// appropriate copies out of appropriate physical registers. This assumes that
-// Chain/InFlag are the input chain/flag to use, and that TheCall is the call
-// being lowered. The returns a SDNode with the same number of values as the
-// ISD::CALL.
-SDValue
-AMDILTargetLowering::LowerCallResult(
- SDValue Chain,
- SDValue InFlag,
- CallingConv::ID CallConv,
- bool isVarArg,
- const SmallVectorImpl<ISD::InputArg> &Ins,
- DebugLoc dl,
- SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals) const
-{
- // Assign locations to each value returned by this call
- SmallVector<CCValAssign, 16> RVLocs;
- CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), RVLocs, *DAG.getContext());
- CCInfo.AnalyzeCallResult(Ins, RetCC_AMDIL32);
-
- // Copy all of the result registers out of their specified physreg.
- for (unsigned i = 0; i != RVLocs.size(); ++i) {
- EVT CopyVT = RVLocs[i].getValVT();
- if (RVLocs[i].isRegLoc()) {
- Chain = DAG.getCopyFromReg(
- Chain,
- dl,
- RVLocs[i].getLocReg(),
- CopyVT,
- InFlag
- ).getValue(1);
- SDValue Val = Chain.getValue(0);
- InFlag = Chain.getValue(2);
- InVals.push_back(Val);
- }
- }
-
- return Chain;
-
-}
-
-//===----------------------------------------------------------------------===//
-// Other Lowering Hooks
-//===----------------------------------------------------------------------===//
-
-// Recursively assign SDNodeOrdering to any unordered nodes
-// This is necessary to maintain source ordering of instructions
-// under -O0 to avoid odd-looking "skipping around" issues.
- static const SDValue
-Ordered( SelectionDAG &DAG, unsigned order, const SDValue New )
-{
- if (order != 0 && DAG.GetOrdering( New.getNode() ) == 0) {
- DAG.AssignOrdering( New.getNode(), order );
- for (unsigned i = 0, e = New.getNumOperands(); i < e; ++i)
- Ordered( DAG, order, New.getOperand(i) );
- }
- return New;
-}
-
-#define LOWER(A) \
- case ISD:: A: \
-return Ordered( DAG, DAG.GetOrdering( Op.getNode() ), Lower##A(Op, DAG) )
-
-SDValue
-AMDILTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
-{
- switch (Op.getOpcode()) {
- default:
- Op.getNode()->dump();
- assert(0 && "Custom lowering code for this"
- "instruction is not implemented yet!");
- break;
- LOWER(GlobalAddress);
- LOWER(JumpTable);
- LOWER(ConstantPool);
- LOWER(ExternalSymbol);
- LOWER(SDIV);
- LOWER(SREM);
- LOWER(BUILD_VECTOR);
- LOWER(SELECT);
- LOWER(SETCC);
- LOWER(SIGN_EXTEND_INREG);
- LOWER(DYNAMIC_STACKALLOC);
- LOWER(BRCOND);
- LOWER(BR_CC);
- }
- return Op;
-}
-
-#undef LOWER
-
-SDValue
-AMDILTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const
-{
- SDValue DST = Op;
- const GlobalAddressSDNode *GADN = cast<GlobalAddressSDNode>(Op);
- const GlobalValue *G = GADN->getGlobal();
- DebugLoc DL = Op.getDebugLoc();
- const GlobalVariable *GV = dyn_cast<GlobalVariable>(G);
- if (!GV) {
- DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32);
- } else {
- if (GV->hasInitializer()) {
- const Constant *C = dyn_cast<Constant>(GV->getInitializer());
- if (const ConstantInt *CI = dyn_cast<ConstantInt>(C)) {
- DST = DAG.getConstant(CI->getValue(), Op.getValueType());
- } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(C)) {
- DST = DAG.getConstantFP(CF->getValueAPF(),
- Op.getValueType());
- } else if (dyn_cast<ConstantAggregateZero>(C)) {
- EVT VT = Op.getValueType();
- if (VT.isInteger()) {
- DST = DAG.getConstant(0, VT);
- } else {
- DST = DAG.getConstantFP(0, VT);
- }
- } else {
- assert(!"lowering this type of Global Address "
- "not implemented yet!");
- C->dump();
- DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32);
- }
- } else {
- DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32);
- }
- }
- return DST;
-}
-
-SDValue
-AMDILTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const
-{
- JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
- SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), MVT::i32);
- return Result;
-}
-SDValue
-AMDILTargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const
-{
- ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
- EVT PtrVT = Op.getValueType();
- SDValue Result;
- if (CP->isMachineConstantPoolEntry()) {
- Result = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
- CP->getAlignment(), CP->getOffset(), CP->getTargetFlags());
- } else {
- Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,
- CP->getAlignment(), CP->getOffset(), CP->getTargetFlags());
- }
- return Result;
-}
-
-SDValue
-AMDILTargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const
-{
- const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol();
- SDValue Result = DAG.getTargetExternalSymbol(Sym, MVT::i32);
- return Result;
-}
-
-/// LowerFORMAL_ARGUMENTS - transform physical registers into
-/// virtual registers and generate load operations for
-/// arguments places on the stack.
-/// TODO: isVarArg, hasStructRet, isMemReg
- SDValue
-AMDILTargetLowering::LowerFormalArguments(SDValue Chain,
- CallingConv::ID CallConv,
- bool isVarArg,
- const SmallVectorImpl<ISD::InputArg> &Ins,
- DebugLoc dl,
- SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals)
-const
-{
-
- MachineFunction &MF = DAG.getMachineFunction();
- MachineFrameInfo *MFI = MF.getFrameInfo();
- //const Function *Fn = MF.getFunction();
- //MachineRegisterInfo &RegInfo = MF.getRegInfo();
-
- SmallVector<CCValAssign, 16> ArgLocs;
- CallingConv::ID CC = MF.getFunction()->getCallingConv();
- //bool hasStructRet = MF.getFunction()->hasStructRetAttr();
-
- CCState CCInfo(CC, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), ArgLocs, *DAG.getContext());
-
- // When more calling conventions are added, they need to be chosen here
- CCInfo.AnalyzeFormalArguments(Ins, CC_AMDIL32);
- SDValue StackPtr;
-
- //unsigned int FirstStackArgLoc = 0;
-
- for (unsigned int i = 0, e = ArgLocs.size(); i != e; ++i) {
- CCValAssign &VA = ArgLocs[i];
- if (VA.isRegLoc()) {
- EVT RegVT = VA.getLocVT();
- const TargetRegisterClass *RC = getRegClassFor(
- RegVT.getSimpleVT().SimpleTy);
-
- unsigned int Reg = MF.addLiveIn(VA.getLocReg(), RC);
- SDValue ArgValue = DAG.getCopyFromReg(
- Chain,
- dl,
- Reg,
- RegVT);
- // If this is an 8 or 16-bit value, it is really passed
- // promoted to 32 bits. Insert an assert[sz]ext to capture
- // this, then truncate to the right size.
-
- if (VA.getLocInfo() == CCValAssign::SExt) {
- ArgValue = DAG.getNode(
- ISD::AssertSext,
- dl,
- RegVT,
- ArgValue,
- DAG.getValueType(VA.getValVT()));
- } else if (VA.getLocInfo() == CCValAssign::ZExt) {
- ArgValue = DAG.getNode(
- ISD::AssertZext,
- dl,
- RegVT,
- ArgValue,
- DAG.getValueType(VA.getValVT()));
- }
- if (VA.getLocInfo() != CCValAssign::Full) {
- ArgValue = DAG.getNode(
- ISD::TRUNCATE,
- dl,
- VA.getValVT(),
- ArgValue);
- }
- // Add the value to the list of arguments
- // to be passed in registers
- InVals.push_back(ArgValue);
- if (isVarArg) {
- assert(0 && "Variable arguments are not yet supported");
- // See MipsISelLowering.cpp for ideas on how to implement
- }
- } else if(VA.isMemLoc()) {
- InVals.push_back(LowerMemArgument(Chain, CallConv, Ins,
- dl, DAG, VA, MFI, i));
- } else {
- assert(0 && "found a Value Assign that is "
- "neither a register or a memory location");
- }
- }
- /*if (hasStructRet) {
- assert(0 && "Has struct return is not yet implemented");
- // See MipsISelLowering.cpp for ideas on how to implement
- }*/
-
- if (isVarArg) {
- assert(0 && "Variable arguments are not yet supported");
- // See X86/PPC/CellSPU ISelLowering.cpp for ideas on how to implement
- }
- // This needs to be changed to non-zero if the return function needs
- // to pop bytes
- return Chain;
-}
-/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
-/// by "Src" to address "Dst" with size and alignment information specified by
-/// the specific parameter attribute. The copy will be passed as a byval
-/// function parameter.
-static SDValue
-CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
- ISD::ArgFlagsTy Flags, SelectionDAG &DAG) {
- assert(0 && "MemCopy does not exist yet");
- SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32);
-
- return DAG.getMemcpy(Chain,
- Src.getDebugLoc(),
- Dst, Src, SizeNode, Flags.getByValAlign(),
- /*IsVol=*/false, /*AlwaysInline=*/true,
- MachinePointerInfo(), MachinePointerInfo());
-}
-
-SDValue
-AMDILTargetLowering::LowerMemOpCallTo(SDValue Chain,
- SDValue StackPtr, SDValue Arg,
- DebugLoc dl, SelectionDAG &DAG,
- const CCValAssign &VA,
- ISD::ArgFlagsTy Flags) const
-{
- unsigned int LocMemOffset = VA.getLocMemOffset();
- SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
- PtrOff = DAG.getNode(ISD::ADD,
- dl,
- getPointerTy(), StackPtr, PtrOff);
- if (Flags.isByVal()) {
- PtrOff = CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG);
- } else {
- PtrOff = DAG.getStore(Chain, dl, Arg, PtrOff,
- MachinePointerInfo::getStack(LocMemOffset),
- false, false, 0);
- }
- return PtrOff;
-}
-/// LowerCAL - functions arguments are copied from virtual
-/// regs to (physical regs)/(stack frame), CALLSEQ_START and
-/// CALLSEQ_END are emitted.
-/// TODO: isVarArg, isTailCall, hasStructRet
-SDValue
-AMDILTargetLowering::LowerCall(CallLoweringInfo &CLI,
- SmallVectorImpl<SDValue> &InVals) const
-
-#if 0
- SDValue Chain, SDValue Callee,
- CallingConv::ID CallConv, bool isVarArg, bool doesNotRet,
- bool& isTailCall,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
- const SmallVectorImpl<SDValue> &OutVals,
- const SmallVectorImpl<ISD::InputArg> &Ins,
- DebugLoc dl, SelectionDAG &DAG,
-#endif
-{
- CLI.IsTailCall = false;
- MachineFunction& MF = CLI.DAG.getMachineFunction();
- // FIXME: DO we need to handle fast calling conventions and tail call
- // optimizations?? X86/PPC ISelLowering
- /*bool hasStructRet = (TheCall->getNumArgs())
- ? TheCall->getArgFlags(0).device()->isSRet()
- : false;*/
-
- MachineFrameInfo *MFI = MF.getFrameInfo();
-
- // Analyze operands of the call, assigning locations to each operand
- SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CLI.CallConv, CLI.IsVarArg, CLI.DAG.getMachineFunction(),
- getTargetMachine(), ArgLocs, *CLI.DAG.getContext());
- // Analyize the calling operands, but need to change
- // if we have more than one calling convetion
- CCInfo.AnalyzeCallOperands(CLI.Outs, CCAssignFnForNode(CLI.CallConv));
-
- unsigned int NumBytes = CCInfo.getNextStackOffset();
- if (CLI.IsTailCall) {
- assert(CLI.IsTailCall && "Tail Call not handled yet!");
- // See X86/PPC ISelLowering
- }
-
- CLI.Chain = CLI.DAG.getCALLSEQ_START(CLI.Chain,
- CLI.DAG.getIntPtrConstant(NumBytes, true));
-
- SmallVector<std::pair<unsigned int, SDValue>, 8> RegsToPass;
- SmallVector<SDValue, 8> MemOpChains;
- SDValue StackPtr;
- //unsigned int FirstStacArgLoc = 0;
- //int LastArgStackLoc = 0;
-
- // Walk the register/memloc assignments, insert copies/loads
- for (unsigned int i = 0, e = ArgLocs.size(); i != e; ++i) {
- CCValAssign &VA = ArgLocs[i];
- //bool isByVal = Flags.isByVal(); // handle byval/bypointer registers
- // Arguments start after the 5 first operands of ISD::CALL
- SDValue Arg = CLI.OutVals[i];
- //Promote the value if needed
- switch(VA.getLocInfo()) {
- default: assert(0 && "Unknown loc info!");
- case CCValAssign::Full:
- break;
- case CCValAssign::SExt:
- Arg = CLI.DAG.getNode(ISD::SIGN_EXTEND,
- CLI.DL,
- VA.getLocVT(), Arg);
- break;
- case CCValAssign::ZExt:
- Arg = CLI.DAG.getNode(ISD::ZERO_EXTEND,
- CLI.DL,
- VA.getLocVT(), Arg);
- break;
- case CCValAssign::AExt:
- Arg = CLI.DAG.getNode(ISD::ANY_EXTEND,
- CLI.DL,
- VA.getLocVT(), Arg);
- break;
- }
-
- if (VA.isRegLoc()) {
- RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
- } else if (VA.isMemLoc()) {
- // Create the frame index object for this incoming parameter
- int FI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8,
- VA.getLocMemOffset(), true);
- SDValue PtrOff = CLI.DAG.getFrameIndex(FI,getPointerTy());
-
- // emit ISD::STORE whichs stores the
- // parameter value to a stack Location
- MemOpChains.push_back(CLI.DAG.getStore(CLI.Chain, CLI.DL, Arg, PtrOff,
- MachinePointerInfo::getFixedStack(FI),
- false, false, 0));
- } else {
- assert(0 && "Not a Reg/Mem Loc, major error!");
- }
- }
- if (!MemOpChains.empty()) {
- CLI.Chain = CLI.DAG.getNode(ISD::TokenFactor,
- CLI.DL,
- MVT::Other,
- &MemOpChains[0],
- MemOpChains.size());
- }
- SDValue InFlag;
- if (!CLI.IsTailCall) {
- for (unsigned int i = 0, e = RegsToPass.size(); i != e; ++i) {
- CLI.Chain = CLI.DAG.getCopyToReg(CLI.Chain,
- CLI.DL,
- RegsToPass[i].first,
- RegsToPass[i].second,
- InFlag);
- InFlag = CLI.Chain.getValue(1);
- }
- }
-
- // If the callee is a GlobalAddress/ExternalSymbol node (quite common,
- // every direct call is) turn it into a TargetGlobalAddress/
- // TargetExternalSymbol
- // node so that legalize doesn't hack it.
- if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(CLI.Callee)) {
- CLI.Callee = CLI.DAG.getTargetGlobalAddress(G->getGlobal(), CLI.DL, getPointerTy());
- }
- else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(CLI.Callee)) {
- CLI.Callee = CLI.DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy());
- }
- else if (CLI.IsTailCall) {
- assert(0 && "Tail calls are not handled yet");
- // see X86 ISelLowering for ideas on implementation: 1708
- }
-
- SDVTList NodeTys = CLI.DAG.getVTList(MVT::Other, MVTGLUE);
- SmallVector<SDValue, 8> Ops;
-
- if (CLI.IsTailCall) {
- assert(0 && "Tail calls are not handled yet");
- // see X86 ISelLowering for ideas on implementation: 1721
- }
- // If this is a direct call, pass the chain and the callee
- if (CLI.Callee.getNode()) {
- Ops.push_back(CLI.Chain);
- Ops.push_back(CLI.Callee);
- }
-
- if (CLI.IsTailCall) {
- assert(0 && "Tail calls are not handled yet");
- // see X86 ISelLowering for ideas on implementation: 1739
- }
-
- // Add argument registers to the end of the list so that they are known
- // live into the call
- for (unsigned int i = 0, e = RegsToPass.size(); i != e; ++i) {
- Ops.push_back(CLI.DAG.getRegister(
- RegsToPass[i].first,
- RegsToPass[i].second.getValueType()));
- }
- if (InFlag.getNode()) {
- Ops.push_back(InFlag);
- }
-
- // Emit Tail Call
- if (CLI.IsTailCall) {
- assert(0 && "Tail calls are not handled yet");
- // see X86 ISelLowering for ideas on implementation: 1762
- }
-
- CLI.Chain = CLI.DAG.getNode(AMDILISD::CALL,
- CLI.DL,
- NodeTys, &Ops[0], Ops.size());
- InFlag = CLI.Chain.getValue(1);
-
- // Create the CALLSEQ_END node
- CLI.Chain = CLI.DAG.getCALLSEQ_END(
- CLI.Chain,
- CLI.DAG.getIntPtrConstant(NumBytes, true),
- CLI.DAG.getIntPtrConstant(0, true),
- InFlag);
- InFlag = CLI.Chain.getValue(1);
- // Handle result values, copying them out of physregs into vregs that
- // we return
- return LowerCallResult(CLI.Chain, InFlag, CLI.CallConv, CLI.IsVarArg, CLI.Ins, CLI.DL, CLI.DAG,
- InVals);
-}
-
-SDValue
-AMDILTargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const
-{
- EVT OVT = Op.getValueType();
- SDValue DST;
- if (OVT.getScalarType() == MVT::i64) {
- DST = LowerSDIV64(Op, DAG);
- } else if (OVT.getScalarType() == MVT::i32) {
- DST = LowerSDIV32(Op, DAG);
- } else if (OVT.getScalarType() == MVT::i16
- || OVT.getScalarType() == MVT::i8) {
- DST = LowerSDIV24(Op, DAG);
- } else {
- DST = SDValue(Op.getNode(), 0);
- }
- return DST;
-}
-
-SDValue
-AMDILTargetLowering::LowerSREM(SDValue Op, SelectionDAG &DAG) const
-{
- EVT OVT = Op.getValueType();
- SDValue DST;
- if (OVT.getScalarType() == MVT::i64) {
- DST = LowerSREM64(Op, DAG);
- } else if (OVT.getScalarType() == MVT::i32) {
- DST = LowerSREM32(Op, DAG);
- } else if (OVT.getScalarType() == MVT::i16) {
- DST = LowerSREM16(Op, DAG);
- } else if (OVT.getScalarType() == MVT::i8) {
- DST = LowerSREM8(Op, DAG);
- } else {
- DST = SDValue(Op.getNode(), 0);
- }
- return DST;
-}
-
-SDValue
-AMDILTargetLowering::LowerBUILD_VECTOR( SDValue Op, SelectionDAG &DAG ) const
-{
- EVT VT = Op.getValueType();
- SDValue Nodes1;
- SDValue second;
- SDValue third;
- SDValue fourth;
- DebugLoc DL = Op.getDebugLoc();
- Nodes1 = DAG.getNode(AMDILISD::VBUILD,
- DL,
- VT, Op.getOperand(0));
-#if 0
- bool allEqual = true;
- for (unsigned x = 1, y = Op.getNumOperands(); x < y; ++x) {
- if (Op.getOperand(0) != Op.getOperand(x)) {
- allEqual = false;
- break;
- }
- }
- if (allEqual) {
- return Nodes1;
- }
-#endif
- switch(Op.getNumOperands()) {
- default:
- case 1:
- break;
- case 4:
- fourth = Op.getOperand(3);
- if (fourth.getOpcode() != ISD::UNDEF) {
- Nodes1 = DAG.getNode(
- ISD::INSERT_VECTOR_ELT,
- DL,
- Op.getValueType(),
- Nodes1,
- fourth,
- DAG.getConstant(7, MVT::i32));
- }
- case 3:
- third = Op.getOperand(2);
- if (third.getOpcode() != ISD::UNDEF) {
- Nodes1 = DAG.getNode(
- ISD::INSERT_VECTOR_ELT,
- DL,
- Op.getValueType(),
- Nodes1,
- third,
- DAG.getConstant(6, MVT::i32));
- }
- case 2:
- second = Op.getOperand(1);
- if (second.getOpcode() != ISD::UNDEF) {
- Nodes1 = DAG.getNode(
- ISD::INSERT_VECTOR_ELT,
- DL,
- Op.getValueType(),
- Nodes1,
- second,
- DAG.getConstant(5, MVT::i32));
- }
- break;
- };
- return Nodes1;
-}
-
-SDValue
-AMDILTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const
-{
- SDValue Cond = Op.getOperand(0);
- SDValue LHS = Op.getOperand(1);
- SDValue RHS = Op.getOperand(2);
- DebugLoc DL = Op.getDebugLoc();
- Cond = getConversionNode(DAG, Cond, Op, true);
- Cond = DAG.getNode(AMDILISD::CMOVLOG,
- DL,
- Op.getValueType(), Cond, LHS, RHS);
- return Cond;
-}
-SDValue
-AMDILTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const
-{
- SDValue Cond;
- SDValue LHS = Op.getOperand(0);
- SDValue RHS = Op.getOperand(1);
- SDValue CC = Op.getOperand(2);
- DebugLoc DL = Op.getDebugLoc();
- ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
- unsigned int AMDILCC = CondCCodeToCC(
- SetCCOpcode,
- LHS.getValueType().getSimpleVT().SimpleTy);
- assert((AMDILCC != AMDILCC::COND_ERROR) && "Invalid SetCC!");
- Cond = DAG.getNode(
- ISD::SELECT_CC,
- Op.getDebugLoc(),
- LHS.getValueType(),
- LHS, RHS,
- DAG.getConstant(-1, MVT::i32),
- DAG.getConstant(0, MVT::i32),
- CC);
- Cond = getConversionNode(DAG, Cond, Op, true);
- Cond = DAG.getNode(
- ISD::AND,
- DL,
- Cond.getValueType(),
- DAG.getConstant(1, Cond.getValueType()),
- Cond);
- return Cond;
-}
-
-SDValue
-AMDILTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const
-{
- SDValue Data = Op.getOperand(0);
- VTSDNode *BaseType = cast<VTSDNode>(Op.getOperand(1));
- DebugLoc DL = Op.getDebugLoc();
- EVT DVT = Data.getValueType();
- EVT BVT = BaseType->getVT();
- unsigned baseBits = BVT.getScalarType().getSizeInBits();
- unsigned srcBits = DVT.isSimple() ? DVT.getScalarType().getSizeInBits() : 1;
- unsigned shiftBits = srcBits - baseBits;
- if (srcBits < 32) {
- // If the op is less than 32 bits, then it needs to extend to 32bits
- // so it can properly keep the upper bits valid.
- EVT IVT = genIntType(32, DVT.isVector() ? DVT.getVectorNumElements() : 1);
- Data = DAG.getNode(ISD::ZERO_EXTEND, DL, IVT, Data);
- shiftBits = 32 - baseBits;
- DVT = IVT;
- }
- SDValue Shift = DAG.getConstant(shiftBits, DVT);
- // Shift left by 'Shift' bits.
- Data = DAG.getNode(ISD::SHL, DL, DVT, Data, Shift);
- // Signed shift Right by 'Shift' bits.
- Data = DAG.getNode(ISD::SRA, DL, DVT, Data, Shift);
- if (srcBits < 32) {
- // Once the sign extension is done, the op needs to be converted to
- // its original type.
- Data = DAG.getSExtOrTrunc(Data, DL, Op.getOperand(0).getValueType());
- }
- return Data;
-}
-EVT
-AMDILTargetLowering::genIntType(uint32_t size, uint32_t numEle) const
-{
- int iSize = (size * numEle);
- int vEle = (iSize >> ((size == 64) ? 6 : 5));
- if (!vEle) {
- vEle = 1;
- }
- if (size == 64) {
- if (vEle == 1) {
- return EVT(MVT::i64);
- } else {
- return EVT(MVT::getVectorVT(MVT::i64, vEle));
- }
- } else {
- if (vEle == 1) {
- return EVT(MVT::i32);
- } else {
- return EVT(MVT::getVectorVT(MVT::i32, vEle));
- }
- }
-}
-
-SDValue
-AMDILTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
- SelectionDAG &DAG) const
-{
- SDValue Chain = Op.getOperand(0);
- SDValue Size = Op.getOperand(1);
- unsigned int SPReg = AMDGPU::SP;
- DebugLoc DL = Op.getDebugLoc();
- SDValue SP = DAG.getCopyFromReg(Chain,
- DL,
- SPReg, MVT::i32);
- SDValue NewSP = DAG.getNode(ISD::ADD,
- DL,
- MVT::i32, SP, Size);
- Chain = DAG.getCopyToReg(SP.getValue(1),
- DL,
- SPReg, NewSP);
- SDValue Ops[2] = {NewSP, Chain};
- Chain = DAG.getMergeValues(Ops, 2 ,DL);
- return Chain;
-}
-SDValue
-AMDILTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const
-{
- SDValue Chain = Op.getOperand(0);
- SDValue Cond = Op.getOperand(1);
- SDValue Jump = Op.getOperand(2);
- SDValue Result;
- Result = DAG.getNode(
- AMDILISD::BRANCH_COND,
- Op.getDebugLoc(),
- Op.getValueType(),
- Chain, Jump, Cond);
- return Result;
-}
-
-SDValue
-AMDILTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const
-{
- SDValue Chain = Op.getOperand(0);
- SDValue CC = Op.getOperand(1);
- SDValue LHS = Op.getOperand(2);
- SDValue RHS = Op.getOperand(3);
- SDValue JumpT = Op.getOperand(4);
- SDValue CmpValue;
- SDValue Result;
- CmpValue = DAG.getNode(
- ISD::SELECT_CC,
- Op.getDebugLoc(),
- LHS.getValueType(),
- LHS, RHS,
- DAG.getConstant(-1, MVT::i32),
- DAG.getConstant(0, MVT::i32),
- CC);
- Result = DAG.getNode(
- AMDILISD::BRANCH_COND,
- CmpValue.getDebugLoc(),
- MVT::Other, Chain,
- JumpT, CmpValue);
- return Result;
-}
-
-// LowerRET - Lower an ISD::RET node.
-SDValue
-AMDILTargetLowering::LowerReturn(SDValue Chain,
- CallingConv::ID CallConv, bool isVarArg,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
- const SmallVectorImpl<SDValue> &OutVals,
- DebugLoc dl, SelectionDAG &DAG)
-const
-{
- //MachineFunction& MF = DAG.getMachineFunction();
- // CCValAssign - represent the assignment of the return value
- // to a location
- SmallVector<CCValAssign, 16> RVLocs;
-
- // CCState - Info about the registers and stack slot
- CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), RVLocs, *DAG.getContext());
-
- // Analyze return values of ISD::RET
- CCInfo.AnalyzeReturn(Outs, RetCC_AMDIL32);
- // If this is the first return lowered for this function, add
- // the regs to the liveout set for the function
- MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
- for (unsigned int i = 0, e = RVLocs.size(); i != e; ++i) {
- if (RVLocs[i].isRegLoc() && !MRI.isLiveOut(RVLocs[i].getLocReg())) {
- MRI.addLiveOut(RVLocs[i].getLocReg());
- }
- }
- // FIXME: implement this when tail call is implemented
- // Chain = GetPossiblePreceedingTailCall(Chain, AMDILISD::TAILCALL);
- // both x86 and ppc implement this in ISelLowering
-
- // Regular return here
- SDValue Flag;
- SmallVector<SDValue, 6> RetOps;
- RetOps.push_back(Chain);
- RetOps.push_back(DAG.getConstant(0/*getBytesToPopOnReturn()*/, MVT::i32));
- for (unsigned int i = 0, e = RVLocs.size(); i != e; ++i) {
- CCValAssign &VA = RVLocs[i];
- SDValue ValToCopy = OutVals[i];
- assert(VA.isRegLoc() && "Can only return in registers!");
- // ISD::Ret => ret chain, (regnum1, val1), ...
- // So i * 2 + 1 index only the regnums
- Chain = DAG.getCopyToReg(Chain,
- dl,
- VA.getLocReg(),
- ValToCopy,
- Flag);
- // guarantee that all emitted copies are stuck together
- // avoiding something bad
- Flag = Chain.getValue(1);
- }
- /*if (MF.getFunction()->hasStructRetAttr()) {
- assert(0 && "Struct returns are not yet implemented!");
- // Both MIPS and X86 have this
- }*/
- RetOps[0] = Chain;
- if (Flag.getNode())
- RetOps.push_back(Flag);
-
- Flag = DAG.getNode(AMDILISD::RET_FLAG,
- dl,
- MVT::Other, &RetOps[0], RetOps.size());
- return Flag;
-}
-
-unsigned int
-AMDILTargetLowering::getFunctionAlignment(const Function *) const
-{
- return 0;
-}
-
-SDValue
-AMDILTargetLowering::LowerSDIV24(SDValue Op, SelectionDAG &DAG) const
-{
- DebugLoc DL = Op.getDebugLoc();
- EVT OVT = Op.getValueType();
- SDValue LHS = Op.getOperand(0);
- SDValue RHS = Op.getOperand(1);
- MVT INTTY;
- MVT FLTTY;
- if (!OVT.isVector()) {
- INTTY = MVT::i32;
- FLTTY = MVT::f32;
- } else if (OVT.getVectorNumElements() == 2) {
- INTTY = MVT::v2i32;
- FLTTY = MVT::v2f32;
- } else if (OVT.getVectorNumElements() == 4) {
- INTTY = MVT::v4i32;
- FLTTY = MVT::v4f32;
- }
- unsigned bitsize = OVT.getScalarType().getSizeInBits();
- // char|short jq = ia ^ ib;
- SDValue jq = DAG.getNode(ISD::XOR, DL, OVT, LHS, RHS);
-
- // jq = jq >> (bitsize - 2)
- jq = DAG.getNode(ISD::SRA, DL, OVT, jq, DAG.getConstant(bitsize - 2, OVT));
-
- // jq = jq | 0x1
- jq = DAG.getNode(ISD::OR, DL, OVT, jq, DAG.getConstant(1, OVT));
-
- // jq = (int)jq
- jq = DAG.getSExtOrTrunc(jq, DL, INTTY);
-
- // int ia = (int)LHS;
- SDValue ia = DAG.getSExtOrTrunc(LHS, DL, INTTY);
-
- // int ib, (int)RHS;
- SDValue ib = DAG.getSExtOrTrunc(RHS, DL, INTTY);
-
- // float fa = (float)ia;
- SDValue fa = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ia);
-
- // float fb = (float)ib;
- SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib);
-
- // float fq = native_divide(fa, fb);
- SDValue fq = DAG.getNode(AMDILISD::DIV_INF, DL, FLTTY, fa, fb);
-
- // fq = trunc(fq);
- fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq);
-
- // float fqneg = -fq;
- SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FLTTY, fq);
-
- // float fr = mad(fqneg, fb, fa);
- SDValue fr = DAG.getNode(AMDILISD::MAD, DL, FLTTY, fqneg, fb, fa);
-
- // int iq = (int)fq;
- SDValue iq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq);
-
- // fr = fabs(fr);
- fr = DAG.getNode(ISD::FABS, DL, FLTTY, fr);
-
- // fb = fabs(fb);
- fb = DAG.getNode(ISD::FABS, DL, FLTTY, fb);
-
- // int cv = fr >= fb;
- SDValue cv;
- if (INTTY == MVT::i32) {
- cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
- } else {
- cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
- }
- // jq = (cv ? jq : 0);
- jq = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, cv, jq,
- DAG.getConstant(0, OVT));
- // dst = iq + jq;
- iq = DAG.getSExtOrTrunc(iq, DL, OVT);
- iq = DAG.getNode(ISD::ADD, DL, OVT, iq, jq);
- return iq;
-}
-
-SDValue
-AMDILTargetLowering::LowerSDIV32(SDValue Op, SelectionDAG &DAG) const
-{
- DebugLoc DL = Op.getDebugLoc();
- EVT OVT = Op.getValueType();
- SDValue LHS = Op.getOperand(0);
- SDValue RHS = Op.getOperand(1);
- // The LowerSDIV32 function generates equivalent to the following IL.
- // mov r0, LHS
- // mov r1, RHS
- // ilt r10, r0, 0
- // ilt r11, r1, 0
- // iadd r0, r0, r10
- // iadd r1, r1, r11
- // ixor r0, r0, r10
- // ixor r1, r1, r11
- // udiv r0, r0, r1
- // ixor r10, r10, r11
- // iadd r0, r0, r10
- // ixor DST, r0, r10
-
- // mov r0, LHS
- SDValue r0 = LHS;
-
- // mov r1, RHS
- SDValue r1 = RHS;
-
- // ilt r10, r0, 0
- SDValue r10 = DAG.getSelectCC(DL,
- r0, DAG.getConstant(0, OVT),
- DAG.getConstant(-1, MVT::i32),
- DAG.getConstant(0, MVT::i32),
- ISD::SETLT);
-
- // ilt r11, r1, 0
- SDValue r11 = DAG.getSelectCC(DL,
- r1, DAG.getConstant(0, OVT),
- DAG.getConstant(-1, MVT::i32),
- DAG.getConstant(0, MVT::i32),
- ISD::SETLT);
-
- // iadd r0, r0, r10
- r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
-
- // iadd r1, r1, r11
- r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
-
- // ixor r0, r0, r10
- r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
-
- // ixor r1, r1, r11
- r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
-
- // udiv r0, r0, r1
- r0 = DAG.getNode(ISD::UDIV, DL, OVT, r0, r1);
-
- // ixor r10, r10, r11
- r10 = DAG.getNode(ISD::XOR, DL, OVT, r10, r11);
-
- // iadd r0, r0, r10
- r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
-
- // ixor DST, r0, r10
- SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
- return DST;
-}
-
-SDValue
-AMDILTargetLowering::LowerSDIV64(SDValue Op, SelectionDAG &DAG) const
-{
- return SDValue(Op.getNode(), 0);
-}
-
-SDValue
-AMDILTargetLowering::LowerSREM8(SDValue Op, SelectionDAG &DAG) const
-{
- DebugLoc DL = Op.getDebugLoc();
- EVT OVT = Op.getValueType();
- MVT INTTY = MVT::i32;
- if (OVT == MVT::v2i8) {
- INTTY = MVT::v2i32;
- } else if (OVT == MVT::v4i8) {
- INTTY = MVT::v4i32;
- }
- SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
- SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
- LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
- LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
- return LHS;
-}
-
-SDValue
-AMDILTargetLowering::LowerSREM16(SDValue Op, SelectionDAG &DAG) const
-{
- DebugLoc DL = Op.getDebugLoc();
- EVT OVT = Op.getValueType();
- MVT INTTY = MVT::i32;
- if (OVT == MVT::v2i16) {
- INTTY = MVT::v2i32;
- } else if (OVT == MVT::v4i16) {
- INTTY = MVT::v4i32;
- }
- SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
- SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
- LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
- LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
- return LHS;
-}
-
-SDValue
-AMDILTargetLowering::LowerSREM32(SDValue Op, SelectionDAG &DAG) const
-{
- DebugLoc DL = Op.getDebugLoc();
- EVT OVT = Op.getValueType();
- SDValue LHS = Op.getOperand(0);
- SDValue RHS = Op.getOperand(1);
- // The LowerSREM32 function generates equivalent to the following IL.
- // mov r0, LHS
- // mov r1, RHS
- // ilt r10, r0, 0
- // ilt r11, r1, 0
- // iadd r0, r0, r10
- // iadd r1, r1, r11
- // ixor r0, r0, r10
- // ixor r1, r1, r11
- // udiv r20, r0, r1
- // umul r20, r20, r1
- // sub r0, r0, r20
- // iadd r0, r0, r10
- // ixor DST, r0, r10
-
- // mov r0, LHS
- SDValue r0 = LHS;
-
- // mov r1, RHS
- SDValue r1 = RHS;
-
- // ilt r10, r0, 0
- SDValue r10 = DAG.getNode(AMDILISD::CMP, DL, OVT,
- DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32),
- r0, DAG.getConstant(0, OVT));
-
- // ilt r11, r1, 0
- SDValue r11 = DAG.getNode(AMDILISD::CMP, DL, OVT,
- DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32),
- r1, DAG.getConstant(0, OVT));
-
- // iadd r0, r0, r10
- r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
-
- // iadd r1, r1, r11
- r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
-
- // ixor r0, r0, r10
- r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
-
- // ixor r1, r1, r11
- r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
-
- // udiv r20, r0, r1
- SDValue r20 = DAG.getNode(ISD::UREM, DL, OVT, r0, r1);
-
- // umul r20, r20, r1
- r20 = DAG.getNode(AMDILISD::UMUL, DL, OVT, r20, r1);
-
- // sub r0, r0, r20
- r0 = DAG.getNode(ISD::SUB, DL, OVT, r0, r20);
-
- // iadd r0, r0, r10
- r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
-
- // ixor DST, r0, r10
- SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
- return DST;
-}
-
-SDValue
-AMDILTargetLowering::LowerSREM64(SDValue Op, SelectionDAG &DAG) const
-{
- return SDValue(Op.getNode(), 0);
-}
+++ /dev/null
-//===-- AMDILISelLowering.h - AMDIL DAG Lowering Interface ------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//==-----------------------------------------------------------------------===//
-//
-// This file defines the interfaces that AMDIL uses to lower LLVM code into a
-// selection DAG.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef AMDIL_ISELLOWERING_H_
-#define AMDIL_ISELLOWERING_H_
-#include "AMDIL.h"
-#include "llvm/CodeGen/CallingConvLower.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/Target/TargetLowering.h"
-
-namespace llvm
-{
- namespace AMDILISD
- {
- enum
- {
- FIRST_NUMBER = ISD::BUILTIN_OP_END,
- CMOVLOG, // 32bit FP Conditional move logical instruction
- MAD, // 32bit Fused Multiply Add instruction
- VBUILD, // scalar to vector mov instruction
- CALL, // Function call based on a single integer
- SELECT_CC, // Select the correct conditional instruction
- UMUL, // 32bit unsigned multiplication
- DIV_INF, // Divide with infinity returned on zero divisor
- CMP,
- IL_CC_I_GT,
- IL_CC_I_LT,
- IL_CC_I_GE,
- IL_CC_I_LE,
- IL_CC_I_EQ,
- IL_CC_I_NE,
- RET_FLAG,
- BRANCH_COND,
- LAST_ISD_NUMBER
- };
- } // AMDILISD
-
- class MachineBasicBlock;
- class MachineInstr;
- class DebugLoc;
- class TargetInstrInfo;
-
- class AMDILTargetLowering : public TargetLowering
- {
- public:
- AMDILTargetLowering(TargetMachine &TM);
-
- virtual SDValue
- LowerOperation(SDValue Op, SelectionDAG &DAG) const;
-
- /// computeMaskedBitsForTargetNode - Determine which of
- /// the bits specified
- /// in Mask are known to be either zero or one and return them in
- /// the
- /// KnownZero/KnownOne bitsets.
- virtual void
- computeMaskedBitsForTargetNode(
- const SDValue Op,
- APInt &KnownZero,
- APInt &KnownOne,
- const SelectionDAG &DAG,
- unsigned Depth = 0
- ) const;
-
- virtual bool
- getTgtMemIntrinsic(IntrinsicInfo &Info,
- const CallInst &I, unsigned Intrinsic) const;
- virtual const char*
- getTargetNodeName(
- unsigned Opcode
- ) const;
- // We want to mark f32/f64 floating point values as
- // legal
- bool
- isFPImmLegal(const APFloat &Imm, EVT VT) const;
- // We don't want to shrink f64/f32 constants because
- // they both take up the same amount of space and
- // we don't want to use a f2d instruction.
-