setOperationAction(ISD::LOAD, MVT::i32, Custom);
setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
+
+ // EXTLOAD should be the same as ZEXTLOAD. It is legal for some address
+ // spaces, so it is custom lowered to handle those where it isn't.
setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Custom);
setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Custom);
setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Custom);
setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Custom);
+ setLoadExtAction(ISD::EXTLOAD, MVT::i8, Custom);
+ setLoadExtAction(ISD::EXTLOAD, MVT::i16, Custom);
+
setOperationAction(ISD::STORE, MVT::i8, Custom);
setOperationAction(ISD::STORE, MVT::i32, Custom);
setOperationAction(ISD::STORE, MVT::v2i32, Custom);
switch (MI->getOpcode()) {
default:
- if (TII->isLDSInstr(MI->getOpcode()) &&
- TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::dst) != -1) {
+ // Replace LDS_*_RET instruction that don't have any uses with the
+ // equivalent LDS_*_NORET instruction.
+ if (TII->isLDSRetInstr(MI->getOpcode())) {
int DstIdx = TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::dst);
assert(DstIdx != -1);
MachineInstrBuilder NewMI;
- if (!MRI.use_empty(MI->getOperand(DstIdx).getReg())) {
- NewMI = BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()),
- AMDGPU::OQAP);
- TII->buildDefaultInstruction(*BB, I, AMDGPU::MOV,
- MI->getOperand(0).getReg(),
- AMDGPU::OQAP);
- } else {
- NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
- TII->get(AMDGPU::getLDSNoRetOp(MI->getOpcode())));
- }
+ if (!MRI.use_empty(MI->getOperand(DstIdx).getReg()))
+ return BB;
+
+ NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
+ TII->get(AMDGPU::getLDSNoRetOp(MI->getOpcode())));
for (unsigned i = 1, e = MI->getNumOperands(); i < e; ++i) {
NewMI.addOperand(MI->getOperand(i));
}
case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
case AMDGPU::RAT_WRITE_CACHELESS_64_eg:
case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
- unsigned EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
+ unsigned EOP = (std::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
.addOperand(MI->getOperand(0))
// Instruction is left unmodified if its not the last one of its type
bool isLastInstructionOfItsType = true;
unsigned InstExportType = MI->getOperand(1).getImm();
- for (MachineBasicBlock::iterator NextExportInst = llvm::next(I),
+ for (MachineBasicBlock::iterator NextExportInst = std::next(I),
EndBlock = BB->end(); NextExportInst != EndBlock;
- NextExportInst = llvm::next(NextExportInst)) {
+ NextExportInst = std::next(NextExportInst)) {
if (NextExportInst->getOpcode() == AMDGPU::EG_ExportSwz ||
NextExportInst->getOpcode() == AMDGPU::R600_ExportSwz) {
unsigned CurrentInstExportType = NextExportInst->getOperand(1)
}
}
}
- bool EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN)? 1 : 0;
+ bool EOP = (std::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
if (!EOP && !isLastInstructionOfItsType)
return BB;
unsigned CfInst = (MI->getOpcode() == AMDGPU::EG_ExportSwz)? 84 : 40;
case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
case ISD::STORE: return LowerSTORE(Op, DAG);
case ISD::LOAD: return LowerLOAD(Op, DAG);
- case ISD::FrameIndex: return LowerFrameIndex(Op, DAG);
case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
case ISD::INTRINSIC_VOID: {
SDValue Chain = Op.getOperand(0);
false, false, false, 0);
}
-SDValue R600TargetLowering::LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const {
-
- MachineFunction &MF = DAG.getMachineFunction();
- const AMDGPUFrameLowering *TFL =
- static_cast<const AMDGPUFrameLowering*>(getTargetMachine().getFrameLowering());
-
- FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Op);
- assert(FIN);
-
- unsigned FrameIndex = FIN->getIndex();
- unsigned Offset = TFL->getFrameIndexOffset(MF, FrameIndex);
- return DAG.getConstant(Offset * 4 * TFL->getStackWidth(MF), MVT::i32);
-}
-
bool R600TargetLowering::isZero(SDValue Op) const {
if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
return Cst->isNullValue();
HWFalse = DAG.getConstant(0, CompareVT);
}
else {
- assert(!"Unhandled value type in LowerSELECT_CC");
+ llvm_unreachable("Unhandled value type in LowerSELECT_CC");
}
// Lower this unsupported SELECT_CC into a combination of two supported
DAG.getCondCode(ISD::SETNE));
}
-/// LLVM generates byte-addresed pointers. For indirect addressing, we need to
+/// LLVM generates byte-addressed pointers. For indirect addressing, we need to
/// convert these pointers to a register index. Each register holds
/// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
/// \p StackWidth, which tells us how many of the 4 sub-registrers will be used
Ptr, DAG.getConstant(2, MVT::i32)));
if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) {
- assert(!"Truncated and indexed stores not supported yet");
+ llvm_unreachable("Truncated and indexed stores not supported yet");
} else {
Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
}
return SDValue();
}
+ SDValue Ret = AMDGPUTargetLowering::LowerSTORE(Op, DAG);
+ if (Ret.getNode()) {
+ return Ret;
+ }
// Lowering for indirect addressing
const MachineFunction &MF = DAG.getMachineFunction();
SDValue Ptr = Op.getOperand(1);
SDValue LoweredLoad;
+ SDValue Ret = AMDGPUTargetLowering::LowerLOAD(Op, DAG);
+ if (Ret.getNode()) {
+ SDValue Ops[2];
+ Ops[0] = Ret;
+ Ops[1] = Chain;
+ return DAG.getMergeValues(Ops, 2, DL);
+ }
+
+
if (LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && VT.isVector()) {
SDValue MergedValues[2] = {
SplitVectorLoad(Op, DAG),
}
int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
- if (ConstantBlock > -1 && LoadNode->getExtensionType() != ISD::SEXTLOAD) {
+ if (ConstantBlock > -1 &&
+ ((LoadNode->getExtensionType() == ISD::NON_EXTLOAD) ||
+ (LoadNode->getExtensionType() == ISD::ZEXTLOAD))) {
SDValue Result;
if (isa<ConstantExpr>(LoadNode->getSrcValue()) ||
isa<Constant>(LoadNode->getSrcValue()) ||
}
Result = DAG.getNode(ISD::BUILD_VECTOR, DL, NewVT, Slots, NumElements);
} else {
- // non constant ptr cant be folded, keeps it as a v4f32 load
+ // non-constant ptr can't be folded, keeps it as a v4f32 load
Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(4, MVT::i32)),
DAG.getConstant(LoadNode->getAddressSpace() -
DAG.getConstant(36 + VA.getLocMemOffset(), MVT::i32),
MachinePointerInfo(UndefValue::get(PtrTy)),
MemVT, false, false, 4);
- // 4 is the prefered alignment for
- // the CONSTANT memory space.
+ // 4 is the preferred alignment for
+ // the CONSTANT memory space.
InVals.push_back(Arg);
}
return Chain;
VectorEntry.getOperand(3)
};
bool isUnmovable[4] = { false, false, false, false };
- for (unsigned i = 0; i < 4; i++)
+ for (unsigned i = 0; i < 4; i++) {
RemapSwizzle[i] = i;
+ if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
+ unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
+ ->getZExtValue();
+ if (i == Idx)
+ isUnmovable[Idx] = true;
+ }
+ }
for (unsigned i = 0; i < 4; i++) {
if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
->getZExtValue();
- if (i == Idx) {
- isUnmovable[Idx] = true;
- continue;
- }
if (isUnmovable[Idx])
continue;
// Swap i and Idx