#include "NVPTX.h"
#include "NVPTXMachineFunctionInfo.h"
+#include "NVPTXSubtarget.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
public:
NVPTXReplaceImageHandles();
- bool runOnMachineFunction(MachineFunction &MF) override;
+ bool runOnMachineFunction(MachineFunction &MF);
+
+ virtual const char *getPassName() const {
+ return "NVPTX Replace Image Handles";
+ }
private:
bool processInstr(MachineInstr &MI);
void replaceImageHandle(MachineOperand &Op, MachineFunction &MF);
+ bool findIndexForHandle(MachineOperand &Op, MachineFunction &MF,
+ unsigned &Idx);
};
}
E = InstrsToRemove.end(); I != E; ++I) {
(*I)->eraseFromParent();
}
-
return Changed;
}
bool NVPTXReplaceImageHandles::processInstr(MachineInstr &MI) {
MachineFunction &MF = *MI.getParent()->getParent();
- // Check if we have a surface/texture instruction
- switch (MI.getOpcode()) {
- default: return false;
- case NVPTX::TEX_1D_F32_I32:
- case NVPTX::TEX_1D_F32_F32:
- case NVPTX::TEX_1D_F32_F32_LEVEL:
- case NVPTX::TEX_1D_F32_F32_GRAD:
- case NVPTX::TEX_1D_I32_I32:
- case NVPTX::TEX_1D_I32_F32:
- case NVPTX::TEX_1D_I32_F32_LEVEL:
- case NVPTX::TEX_1D_I32_F32_GRAD:
- case NVPTX::TEX_1D_ARRAY_F32_I32:
- case NVPTX::TEX_1D_ARRAY_F32_F32:
- case NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL:
- case NVPTX::TEX_1D_ARRAY_F32_F32_GRAD:
- case NVPTX::TEX_1D_ARRAY_I32_I32:
- case NVPTX::TEX_1D_ARRAY_I32_F32:
- case NVPTX::TEX_1D_ARRAY_I32_F32_LEVEL:
- case NVPTX::TEX_1D_ARRAY_I32_F32_GRAD:
- case NVPTX::TEX_2D_F32_I32:
- case NVPTX::TEX_2D_F32_F32:
- case NVPTX::TEX_2D_F32_F32_LEVEL:
- case NVPTX::TEX_2D_F32_F32_GRAD:
- case NVPTX::TEX_2D_I32_I32:
- case NVPTX::TEX_2D_I32_F32:
- case NVPTX::TEX_2D_I32_F32_LEVEL:
- case NVPTX::TEX_2D_I32_F32_GRAD:
- case NVPTX::TEX_2D_ARRAY_F32_I32:
- case NVPTX::TEX_2D_ARRAY_F32_F32:
- case NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL:
- case NVPTX::TEX_2D_ARRAY_F32_F32_GRAD:
- case NVPTX::TEX_2D_ARRAY_I32_I32:
- case NVPTX::TEX_2D_ARRAY_I32_F32:
- case NVPTX::TEX_2D_ARRAY_I32_F32_LEVEL:
- case NVPTX::TEX_2D_ARRAY_I32_F32_GRAD:
- case NVPTX::TEX_3D_F32_I32:
- case NVPTX::TEX_3D_F32_F32:
- case NVPTX::TEX_3D_F32_F32_LEVEL:
- case NVPTX::TEX_3D_F32_F32_GRAD:
- case NVPTX::TEX_3D_I32_I32:
- case NVPTX::TEX_3D_I32_F32:
- case NVPTX::TEX_3D_I32_F32_LEVEL:
- case NVPTX::TEX_3D_I32_F32_GRAD: {
+ const MCInstrDesc &MCID = MI.getDesc();
+
+ if (MCID.TSFlags & NVPTXII::IsTexFlag) {
// This is a texture fetch, so operand 4 is a texref and operand 5 is
// a samplerref
MachineOperand &TexHandle = MI.getOperand(4);
- MachineOperand &SampHandle = MI.getOperand(5);
-
replaceImageHandle(TexHandle, MF);
- replaceImageHandle(SampHandle, MF);
-
- return true;
- }
- case NVPTX::SULD_1D_I8_TRAP:
- case NVPTX::SULD_1D_I16_TRAP:
- case NVPTX::SULD_1D_I32_TRAP:
- case NVPTX::SULD_1D_ARRAY_I8_TRAP:
- case NVPTX::SULD_1D_ARRAY_I16_TRAP:
- case NVPTX::SULD_1D_ARRAY_I32_TRAP:
- case NVPTX::SULD_2D_I8_TRAP:
- case NVPTX::SULD_2D_I16_TRAP:
- case NVPTX::SULD_2D_I32_TRAP:
- case NVPTX::SULD_2D_ARRAY_I8_TRAP:
- case NVPTX::SULD_2D_ARRAY_I16_TRAP:
- case NVPTX::SULD_2D_ARRAY_I32_TRAP:
- case NVPTX::SULD_3D_I8_TRAP:
- case NVPTX::SULD_3D_I16_TRAP:
- case NVPTX::SULD_3D_I32_TRAP: {
- // This is a V1 surface load, so operand 1 is a surfref
- MachineOperand &SurfHandle = MI.getOperand(1);
- replaceImageHandle(SurfHandle, MF);
+ if (!(MCID.TSFlags & NVPTXII::IsTexModeUnifiedFlag)) {
+ MachineOperand &SampHandle = MI.getOperand(5);
+ replaceImageHandle(SampHandle, MF);
+ }
return true;
- }
- case NVPTX::SULD_1D_V2I8_TRAP:
- case NVPTX::SULD_1D_V2I16_TRAP:
- case NVPTX::SULD_1D_V2I32_TRAP:
- case NVPTX::SULD_1D_ARRAY_V2I8_TRAP:
- case NVPTX::SULD_1D_ARRAY_V2I16_TRAP:
- case NVPTX::SULD_1D_ARRAY_V2I32_TRAP:
- case NVPTX::SULD_2D_V2I8_TRAP:
- case NVPTX::SULD_2D_V2I16_TRAP:
- case NVPTX::SULD_2D_V2I32_TRAP:
- case NVPTX::SULD_2D_ARRAY_V2I8_TRAP:
- case NVPTX::SULD_2D_ARRAY_V2I16_TRAP:
- case NVPTX::SULD_2D_ARRAY_V2I32_TRAP:
- case NVPTX::SULD_3D_V2I8_TRAP:
- case NVPTX::SULD_3D_V2I16_TRAP:
- case NVPTX::SULD_3D_V2I32_TRAP: {
- // This is a V2 surface load, so operand 2 is a surfref
- MachineOperand &SurfHandle = MI.getOperand(2);
-
- replaceImageHandle(SurfHandle, MF);
+ } else if (MCID.TSFlags & NVPTXII::IsSuldMask) {
+ unsigned VecSize =
+ 1 << (((MCID.TSFlags & NVPTXII::IsSuldMask) >> NVPTXII::IsSuldShift) - 1);
- return true;
- }
- case NVPTX::SULD_1D_V4I8_TRAP:
- case NVPTX::SULD_1D_V4I16_TRAP:
- case NVPTX::SULD_1D_V4I32_TRAP:
- case NVPTX::SULD_1D_ARRAY_V4I8_TRAP:
- case NVPTX::SULD_1D_ARRAY_V4I16_TRAP:
- case NVPTX::SULD_1D_ARRAY_V4I32_TRAP:
- case NVPTX::SULD_2D_V4I8_TRAP:
- case NVPTX::SULD_2D_V4I16_TRAP:
- case NVPTX::SULD_2D_V4I32_TRAP:
- case NVPTX::SULD_2D_ARRAY_V4I8_TRAP:
- case NVPTX::SULD_2D_ARRAY_V4I16_TRAP:
- case NVPTX::SULD_2D_ARRAY_V4I32_TRAP:
- case NVPTX::SULD_3D_V4I8_TRAP:
- case NVPTX::SULD_3D_V4I16_TRAP:
- case NVPTX::SULD_3D_V4I32_TRAP: {
- // This is a V4 surface load, so operand 4 is a surfref
- MachineOperand &SurfHandle = MI.getOperand(4);
+ // For a surface load of vector size N, the Nth operand will be the surfref
+ MachineOperand &SurfHandle = MI.getOperand(VecSize);
replaceImageHandle(SurfHandle, MF);
return true;
- }
- case NVPTX::SUST_B_1D_B8_TRAP:
- case NVPTX::SUST_B_1D_B16_TRAP:
- case NVPTX::SUST_B_1D_B32_TRAP:
- case NVPTX::SUST_B_1D_V2B8_TRAP:
- case NVPTX::SUST_B_1D_V2B16_TRAP:
- case NVPTX::SUST_B_1D_V2B32_TRAP:
- case NVPTX::SUST_B_1D_V4B8_TRAP:
- case NVPTX::SUST_B_1D_V4B16_TRAP:
- case NVPTX::SUST_B_1D_V4B32_TRAP:
- case NVPTX::SUST_B_1D_ARRAY_B8_TRAP:
- case NVPTX::SUST_B_1D_ARRAY_B16_TRAP:
- case NVPTX::SUST_B_1D_ARRAY_B32_TRAP:
- case NVPTX::SUST_B_1D_ARRAY_V2B8_TRAP:
- case NVPTX::SUST_B_1D_ARRAY_V2B16_TRAP:
- case NVPTX::SUST_B_1D_ARRAY_V2B32_TRAP:
- case NVPTX::SUST_B_1D_ARRAY_V4B8_TRAP:
- case NVPTX::SUST_B_1D_ARRAY_V4B16_TRAP:
- case NVPTX::SUST_B_1D_ARRAY_V4B32_TRAP:
- case NVPTX::SUST_B_2D_B8_TRAP:
- case NVPTX::SUST_B_2D_B16_TRAP:
- case NVPTX::SUST_B_2D_B32_TRAP:
- case NVPTX::SUST_B_2D_V2B8_TRAP:
- case NVPTX::SUST_B_2D_V2B16_TRAP:
- case NVPTX::SUST_B_2D_V2B32_TRAP:
- case NVPTX::SUST_B_2D_V4B8_TRAP:
- case NVPTX::SUST_B_2D_V4B16_TRAP:
- case NVPTX::SUST_B_2D_V4B32_TRAP:
- case NVPTX::SUST_B_2D_ARRAY_B8_TRAP:
- case NVPTX::SUST_B_2D_ARRAY_B16_TRAP:
- case NVPTX::SUST_B_2D_ARRAY_B32_TRAP:
- case NVPTX::SUST_B_2D_ARRAY_V2B8_TRAP:
- case NVPTX::SUST_B_2D_ARRAY_V2B16_TRAP:
- case NVPTX::SUST_B_2D_ARRAY_V2B32_TRAP:
- case NVPTX::SUST_B_2D_ARRAY_V4B8_TRAP:
- case NVPTX::SUST_B_2D_ARRAY_V4B16_TRAP:
- case NVPTX::SUST_B_2D_ARRAY_V4B32_TRAP:
- case NVPTX::SUST_B_3D_B8_TRAP:
- case NVPTX::SUST_B_3D_B16_TRAP:
- case NVPTX::SUST_B_3D_B32_TRAP:
- case NVPTX::SUST_B_3D_V2B8_TRAP:
- case NVPTX::SUST_B_3D_V2B16_TRAP:
- case NVPTX::SUST_B_3D_V2B32_TRAP:
- case NVPTX::SUST_B_3D_V4B8_TRAP:
- case NVPTX::SUST_B_3D_V4B16_TRAP:
- case NVPTX::SUST_B_3D_V4B32_TRAP:
- case NVPTX::SUST_P_1D_B8_TRAP:
- case NVPTX::SUST_P_1D_B16_TRAP:
- case NVPTX::SUST_P_1D_B32_TRAP:
- case NVPTX::SUST_P_1D_V2B8_TRAP:
- case NVPTX::SUST_P_1D_V2B16_TRAP:
- case NVPTX::SUST_P_1D_V2B32_TRAP:
- case NVPTX::SUST_P_1D_V4B8_TRAP:
- case NVPTX::SUST_P_1D_V4B16_TRAP:
- case NVPTX::SUST_P_1D_V4B32_TRAP:
- case NVPTX::SUST_P_1D_ARRAY_B8_TRAP:
- case NVPTX::SUST_P_1D_ARRAY_B16_TRAP:
- case NVPTX::SUST_P_1D_ARRAY_B32_TRAP:
- case NVPTX::SUST_P_1D_ARRAY_V2B8_TRAP:
- case NVPTX::SUST_P_1D_ARRAY_V2B16_TRAP:
- case NVPTX::SUST_P_1D_ARRAY_V2B32_TRAP:
- case NVPTX::SUST_P_1D_ARRAY_V4B8_TRAP:
- case NVPTX::SUST_P_1D_ARRAY_V4B16_TRAP:
- case NVPTX::SUST_P_1D_ARRAY_V4B32_TRAP:
- case NVPTX::SUST_P_2D_B8_TRAP:
- case NVPTX::SUST_P_2D_B16_TRAP:
- case NVPTX::SUST_P_2D_B32_TRAP:
- case NVPTX::SUST_P_2D_V2B8_TRAP:
- case NVPTX::SUST_P_2D_V2B16_TRAP:
- case NVPTX::SUST_P_2D_V2B32_TRAP:
- case NVPTX::SUST_P_2D_V4B8_TRAP:
- case NVPTX::SUST_P_2D_V4B16_TRAP:
- case NVPTX::SUST_P_2D_V4B32_TRAP:
- case NVPTX::SUST_P_2D_ARRAY_B8_TRAP:
- case NVPTX::SUST_P_2D_ARRAY_B16_TRAP:
- case NVPTX::SUST_P_2D_ARRAY_B32_TRAP:
- case NVPTX::SUST_P_2D_ARRAY_V2B8_TRAP:
- case NVPTX::SUST_P_2D_ARRAY_V2B16_TRAP:
- case NVPTX::SUST_P_2D_ARRAY_V2B32_TRAP:
- case NVPTX::SUST_P_2D_ARRAY_V4B8_TRAP:
- case NVPTX::SUST_P_2D_ARRAY_V4B16_TRAP:
- case NVPTX::SUST_P_2D_ARRAY_V4B32_TRAP:
- case NVPTX::SUST_P_3D_B8_TRAP:
- case NVPTX::SUST_P_3D_B16_TRAP:
- case NVPTX::SUST_P_3D_B32_TRAP:
- case NVPTX::SUST_P_3D_V2B8_TRAP:
- case NVPTX::SUST_P_3D_V2B16_TRAP:
- case NVPTX::SUST_P_3D_V2B32_TRAP:
- case NVPTX::SUST_P_3D_V4B8_TRAP:
- case NVPTX::SUST_P_3D_V4B16_TRAP:
- case NVPTX::SUST_P_3D_V4B32_TRAP: {
+ } else if (MCID.TSFlags & NVPTXII::IsSustFlag) {
// This is a surface store, so operand 0 is a surfref
MachineOperand &SurfHandle = MI.getOperand(0);
replaceImageHandle(SurfHandle, MF);
return true;
- }
- case NVPTX::TXQ_CHANNEL_ORDER:
- case NVPTX::TXQ_CHANNEL_DATA_TYPE:
- case NVPTX::TXQ_WIDTH:
- case NVPTX::TXQ_HEIGHT:
- case NVPTX::TXQ_DEPTH:
- case NVPTX::TXQ_ARRAY_SIZE:
- case NVPTX::TXQ_NUM_SAMPLES:
- case NVPTX::TXQ_NUM_MIPMAP_LEVELS:
- case NVPTX::SUQ_CHANNEL_ORDER:
- case NVPTX::SUQ_CHANNEL_DATA_TYPE:
- case NVPTX::SUQ_WIDTH:
- case NVPTX::SUQ_HEIGHT:
- case NVPTX::SUQ_DEPTH:
- case NVPTX::SUQ_ARRAY_SIZE: {
+ } else if (MCID.TSFlags & NVPTXII::IsSurfTexQueryFlag) {
// This is a query, so operand 1 is a surfref/texref
MachineOperand &Handle = MI.getOperand(1);
return true;
}
- }
+
+ return false;
}
void NVPTXReplaceImageHandles::
replaceImageHandle(MachineOperand &Op, MachineFunction &MF) {
+ unsigned Idx;
+ if (findIndexForHandle(Op, MF, Idx)) {
+ Op.ChangeToImmediate(Idx);
+ }
+}
+
+bool NVPTXReplaceImageHandles::
+findIndexForHandle(MachineOperand &Op, MachineFunction &MF, unsigned &Idx) {
const MachineRegisterInfo &MRI = MF.getRegInfo();
NVPTXMachineFunctionInfo *MFI = MF.getInfo<NVPTXMachineFunctionInfo>();
+
+ assert(Op.isReg() && "Handle is not in a reg?");
+
// Which instruction defines the handle?
- MachineInstr *MI = MRI.getVRegDef(Op.getReg());
- assert(MI && "No def for image handle vreg?");
- MachineInstr &TexHandleDef = *MI;
+ MachineInstr &TexHandleDef = *MRI.getVRegDef(Op.getReg());
switch (TexHandleDef.getOpcode()) {
case NVPTX::LD_i64_avar: {
// The handle is a parameter value being loaded, replace with the
// parameter symbol
+ const NVPTXSubtarget &ST = MF.getTarget().getSubtarget<NVPTXSubtarget>();
+ if (ST.getDrvInterface() == NVPTX::CUDA) {
+ // For CUDA, we preserve the param loads coming from function arguments
+ return false;
+ }
+
assert(TexHandleDef.getOperand(6).isSymbol() && "Load is not a symbol!");
StringRef Sym = TexHandleDef.getOperand(6).getSymbolName();
std::string ParamBaseName = MF.getName();
std::string NewSym;
raw_string_ostream NewSymStr(NewSym);
NewSymStr << MF.getFunction()->getName() << "_param_" << Param;
- Op.ChangeToImmediate(
- MFI->getImageHandleSymbolIndex(NewSymStr.str().c_str()));
+
InstrsToRemove.insert(&TexHandleDef);
- break;
+ Idx = MFI->getImageHandleSymbolIndex(NewSymStr.str().c_str());
+ return true;
}
case NVPTX::texsurf_handles: {
// The handle is a global variable, replace with the global variable name
assert(TexHandleDef.getOperand(1).isGlobal() && "Load is not a global!");
const GlobalValue *GV = TexHandleDef.getOperand(1).getGlobal();
assert(GV->hasName() && "Global sampler must be named!");
- Op.ChangeToImmediate(MFI->getImageHandleSymbolIndex(GV->getName().data()));
InstrsToRemove.insert(&TexHandleDef);
- break;
+ Idx = MFI->getImageHandleSymbolIndex(GV->getName().data());
+ return true;
+ }
+ case NVPTX::nvvm_move_i64:
+ case TargetOpcode::COPY: {
+ bool Res = findIndexForHandle(TexHandleDef.getOperand(1), MF, Idx);
+ if (Res) {
+ InstrsToRemove.insert(&TexHandleDef);
+ }
+ return Res;
}
default:
llvm_unreachable("Unknown instruction operating on handle");