//===----------------------------------------------------------------------===//
#include "SIISelLowering.h"
-#include "AMDIL.h"
#include "AMDGPU.h"
+#include "AMDIL.h"
#include "AMDILIntrinsicInfo.h"
#include "SIInstrInfo.h"
#include "SIMachineFunctionInfo.h"
#include "SIRegisterInfo.h"
-#include "llvm/IR/Function.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/IR/Function.h"
using namespace llvm;
CallingConv::ID CallConv,
bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins,
- DebugLoc DL, SelectionDAG &DAG,
+ SDLoc DL, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const {
const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
for (unsigned i = 0, e = Ins.size(), PSInputNum = 0; i != e; ++i) {
const ISD::InputArg &Arg = Ins[i];
-
- // First check if it's a PS input addr
+
+ // First check if it's a PS input addr
if (Info->ShaderType == ShaderType::PIXEL && !Arg.Flags.isInReg()) {
assert((PSInputNum <= 15) && "Too many PS inputs!");
for (unsigned i = 0, e = Ins.size(), ArgIdx = 0; i != e; ++i) {
+ const ISD::InputArg &Arg = Ins[i];
if (Skipped & (1 << i)) {
- InVals.push_back(SDValue());
+ InVals.push_back(DAG.getUNDEF(Arg.VT));
continue;
}
Reg = MF.addLiveIn(Reg, RC);
SDValue Val = DAG.getCopyFromReg(Chain, DL, Reg, VT);
- const ISD::InputArg &Arg = Ins[i];
if (Arg.VT.isVector()) {
// Build a vector from the registers
NumElements = Arg.VT.getVectorNumElements() - NumElements;
for (unsigned j = 0; j != NumElements; ++j)
Regs.push_back(DAG.getUNDEF(VT));
-
+
InVals.push_back(DAG.getNode(ISD::BUILD_VECTOR, DL, Arg.VT,
Regs.data(), Regs.size()));
continue;
return BB;
}
-EVT SITargetLowering::getSetCCResultType(EVT VT) const {
+EVT SITargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
return MVT::i1;
}
SDValue SITargetLowering::LowerBRCOND(SDValue BRCOND,
SelectionDAG &DAG) const {
- DebugLoc DL = BRCOND.getDebugLoc();
+ SDLoc DL(BRCOND);
SDNode *Intr = BRCOND.getOperand(1).getNode();
SDValue Target = BRCOND.getOperand(2);
return Chain;
}
-#define RSRC_DATA_FORMAT 0xf00000000000
+const uint64_t RSRC_DATA_FORMAT = 0xf00000000000LL;
SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
SDValue Chain = Op.getOperand(0);
SDValue Value = Op.getOperand(1);
SDValue VirtualAddress = Op.getOperand(2);
- DebugLoc DL = Op.getDebugLoc();
+ SDLoc DL(Op);
if (StoreNode->getAddressSpace() != AMDGPUAS::GLOBAL_ADDRESS) {
return SDValue();
}
- SDValue SrcSrc = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128,
- DAG.getConstant(0, MVT::i64),
- DAG.getConstant(RSRC_DATA_FORMAT, MVT::i64));
+ SDValue Zero = DAG.getConstant(0, MVT::i64);
+ SDValue Format = DAG.getConstant(RSRC_DATA_FORMAT, MVT::i64);
+ SDValue SrcSrc = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128, Zero, Format);
SDValue Ops[2];
Ops[0] = DAG.getNode(AMDGPUISD::BUFFER_STORE, DL, MVT::Other, Chain,
SDValue False = Op.getOperand(3);
SDValue CC = Op.getOperand(4);
EVT VT = Op.getValueType();
- DebugLoc DL = Op.getDebugLoc();
+ SDLoc DL(Op);
// Possible Min/Max pattern
SDValue MinMax = LowerMinMax(Op, DAG);
SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
- DebugLoc DL = N->getDebugLoc();
+ SDLoc DL(N);
EVT VT = N->getValueType(0);
switch (N->getOpcode()) {
return SDValue();
}
-/// \brief Test if RegClass is one of the VSrc classes
+/// \brief Test if RegClass is one of the VSrc classes
static bool isVSrc(unsigned RegClass) {
return AMDGPU::VSrc_32RegClassID == RegClass ||
AMDGPU::VSrc_64RegClassID == RegClass;
}
-/// \brief Test if RegClass is one of the SSrc classes
+/// \brief Test if RegClass is one of the SSrc classes
static bool isSSrc(unsigned RegClass) {
return AMDGPU::SSrc_32RegClassID == RegClass ||
AMDGPU::SSrc_64RegClassID == RegClass;
}
/// \brief Does "Op" fit into register class "RegClass" ?
-bool SITargetLowering::fitsRegClass(SelectionDAG &DAG, SDValue &Op,
+bool SITargetLowering::fitsRegClass(SelectionDAG &DAG, const SDValue &Op,
unsigned RegClass) const {
- MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
+ MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
SDNode *Node = Op.getNode();
const TargetRegisterClass *OpClass;
// This is a conservative aproach, it is possible that we can't determine
// the correct register class and copy too often, but better save than sorry.
SDValue RC = DAG.getTargetConstant(RegClass, MVT::i32);
- SDNode *Node = DAG.getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DebugLoc(),
+ SDNode *Node = DAG.getMachineNode(TargetOpcode::COPY_TO_REGCLASS, SDLoc(),
Operand.getValueType(), Operand, RC);
Operand = SDValue(Node, 0);
}
-SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node,
- SelectionDAG &DAG) const {
+/// \brief Try to fold the Nodes operands into the Node
+SDNode *SITargetLowering::foldOperands(MachineSDNode *Node,
+ SelectionDAG &DAG) const {
// Original encoding (either e32 or e64)
int Opcode = Node->getMachineOpcode();
Ops.push_back(Node->getOperand(i));
// Create a complete new instruction
- return DAG.getMachineNode(Desc->Opcode, Node->getDebugLoc(),
- Node->getVTList(), Ops.data(), Ops.size());
+ return DAG.getMachineNode(Desc->Opcode, SDLoc(Node), Node->getVTList(), Ops);
+}
+
+/// \brief Helper function for adjustWritemask
+static unsigned SubIdx2Lane(unsigned Idx) {
+ switch (Idx) {
+ default: return 0;
+ case AMDGPU::sub0: return 0;
+ case AMDGPU::sub1: return 1;
+ case AMDGPU::sub2: return 2;
+ case AMDGPU::sub3: return 3;
+ }
+}
+
+/// \brief Adjust the writemask of MIMG instructions
+void SITargetLowering::adjustWritemask(MachineSDNode *&Node,
+ SelectionDAG &DAG) const {
+ SDNode *Users[4] = { };
+ unsigned Writemask = 0, Lane = 0;
+
+ // Try to figure out the used register components
+ for (SDNode::use_iterator I = Node->use_begin(), E = Node->use_end();
+ I != E; ++I) {
+
+ // Abort if we can't understand the usage
+ if (!I->isMachineOpcode() ||
+ I->getMachineOpcode() != TargetOpcode::EXTRACT_SUBREG)
+ return;
+
+ Lane = SubIdx2Lane(I->getConstantOperandVal(1));
+
+ // Abort if we have more than one user per component
+ if (Users[Lane])
+ return;
+
+ Users[Lane] = *I;
+ Writemask |= 1 << Lane;
+ }
+
+ // Abort if all components are used
+ if (Writemask == 0xf)
+ return;
+
+ // Adjust the writemask in the node
+ std::vector<SDValue> Ops;
+ Ops.push_back(DAG.getTargetConstant(Writemask, MVT::i32));
+ for (unsigned i = 1, e = Node->getNumOperands(); i != e; ++i)
+ Ops.push_back(Node->getOperand(i));
+ Node = (MachineSDNode*)DAG.UpdateNodeOperands(Node, Ops.data(), Ops.size());
+
+ // If we only got one lane, replace it with a copy
+ if (Writemask == (1U << Lane)) {
+ SDValue RC = DAG.getTargetConstant(AMDGPU::VReg_32RegClassID, MVT::i32);
+ SDNode *Copy = DAG.getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
+ SDLoc(), Users[Lane]->getValueType(0),
+ SDValue(Node, 0), RC);
+ DAG.ReplaceAllUsesWith(Users[Lane], Copy);
+ return;
+ }
+
+ // Update the users of the node with the new indices
+ for (unsigned i = 0, Idx = AMDGPU::sub0; i < 4; ++i) {
+
+ SDNode *User = Users[i];
+ if (!User)
+ continue;
+
+ SDValue Op = DAG.getTargetConstant(Idx, MVT::i32);
+ DAG.UpdateNodeOperands(User, User->getOperand(0), Op);
+
+ switch (Idx) {
+ default: break;
+ case AMDGPU::sub0: Idx = AMDGPU::sub1; break;
+ case AMDGPU::sub1: Idx = AMDGPU::sub2; break;
+ case AMDGPU::sub2: Idx = AMDGPU::sub3; break;
+ }
+ }
+}
+
+/// \brief Fold the instructions after slecting them
+SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node,
+ SelectionDAG &DAG) const {
+
+ if (AMDGPU::isMIMG(Node->getMachineOpcode()) != -1)
+ adjustWritemask(Node, DAG);
+
+ return foldOperands(Node, DAG);
+}
+
+/// \brief Assign the register class depending on the number of
+/// bits set in the writemask
+void SITargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI,
+ SDNode *Node) const {
+ if (AMDGPU::isMIMG(MI->getOpcode()) == -1)
+ return;
+
+ unsigned VReg = MI->getOperand(0).getReg();
+ unsigned Writemask = MI->getOperand(1).getImm();
+ unsigned BitsSet = 0;
+ for (unsigned i = 0; i < 4; ++i)
+ BitsSet += Writemask & (1 << i) ? 1 : 0;
+
+ const TargetRegisterClass *RC;
+ switch (BitsSet) {
+ default: return;
+ case 1: RC = &AMDGPU::VReg_32RegClass; break;
+ case 2: RC = &AMDGPU::VReg_64RegClass; break;
+ case 3: RC = &AMDGPU::VReg_96RegClass; break;
+ }
+
+ MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
+ MRI.setRegClass(VReg, RC);
}