addRegisterClass(MVT::v2i32, &AMDGPU::VSrc_64RegClass);
addRegisterClass(MVT::v2f32, &AMDGPU::VSrc_64RegClass);
- addRegisterClass(MVT::v4i32, &AMDGPU::VReg_128RegClass);
- addRegisterClass(MVT::v4f32, &AMDGPU::VReg_128RegClass);
- addRegisterClass(MVT::i128, &AMDGPU::SReg_128RegClass);
+ addRegisterClass(MVT::v4i32, &AMDGPU::VSrc_128RegClass);
+ addRegisterClass(MVT::v4f32, &AMDGPU::VSrc_128RegClass);
addRegisterClass(MVT::v8i32, &AMDGPU::VReg_256RegClass);
addRegisterClass(MVT::v8f32, &AMDGPU::VReg_256RegClass);
setOperationAction(ISD::ADDC, MVT::i32, Legal);
setOperationAction(ISD::ADDE, MVT::i32, Legal);
- setOperationAction(ISD::BITCAST, MVT::i128, Legal);
-
// We need to custom lower vector stores from local memory
setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
setOperationAction(ISD::STORE, MVT::i1, Custom);
setOperationAction(ISD::STORE, MVT::i32, Custom);
setOperationAction(ISD::STORE, MVT::i64, Custom);
- setOperationAction(ISD::STORE, MVT::i128, Custom);
setOperationAction(ISD::STORE, MVT::v2i32, Custom);
setOperationAction(ISD::STORE, MVT::v4i32, Custom);
setTruncStoreAction(MVT::i32, MVT::i16, Custom);
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
setTruncStoreAction(MVT::i64, MVT::i32, Expand);
- setTruncStoreAction(MVT::i128, MVT::i64, Expand);
setTruncStoreAction(MVT::v8i32, MVT::v8i16, Expand);
setTruncStoreAction(MVT::v16i32, MVT::v16i16, Expand);
AMDGPU::VGPR2, VT);
case AMDGPUIntrinsic::SI_load_const: {
SDValue Ops [] = {
- ResourceDescriptorToi128(Op.getOperand(1), DAG),
+ Op.getOperand(1),
Op.getOperand(2)
};
return LowerSampleIntrinsic(AMDGPUISD::SAMPLEL, Op, DAG);
case AMDGPUIntrinsic::SI_vs_load_input:
return DAG.getNode(AMDGPUISD::LOAD_INPUT, DL, VT,
- ResourceDescriptorToi128(Op.getOperand(1), DAG),
+ Op.getOperand(1),
Op.getOperand(2),
Op.getOperand(3));
}
SDLoc DL(Op);
SDValue Ops [] = {
Chain,
- ResourceDescriptorToi128(Op.getOperand(2), DAG),
+ Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(4),
Op.getOperand(5),
}
-SDValue SITargetLowering::ResourceDescriptorToi128(SDValue Op,
- SelectionDAG &DAG) const {
-
- if (Op.getValueType() == MVT::i128) {
- return Op;
- }
-
- assert(Op.getOpcode() == ISD::UNDEF);
-
- return DAG.getNode(ISD::BUILD_PAIR, SDLoc(Op), MVT::i128,
- DAG.getConstant(0, MVT::i64),
- DAG.getConstant(0, MVT::i64));
-}
-
SDValue SITargetLowering::LowerSampleIntrinsic(unsigned Opcode,
const SDValue &Op,
SelectionDAG &DAG) const {
return DAG.getNode(Opcode, SDLoc(Op), Op.getValueType(), Op.getOperand(1),
Op.getOperand(2),
- ResourceDescriptorToi128(Op.getOperand(3), DAG),
+ Op.getOperand(3),
Op.getOperand(4));
}
SDValue LowerZERO_EXTEND(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
- SDValue ResourceDescriptorToi128(SDValue Op, SelectionDAG &DAG) const;
bool foldImm(SDValue &Operand, int32_t &Immediate,
bool &ScalarSlotUsed) const;
const TargetRegisterClass *getRegClassForNode(SelectionDAG &DAG,
>;
def SIload_constant : SDNode<"AMDGPUISD::LOAD_CONSTANT",
- SDTypeProfile<1, 2, [SDTCisVT<0, f32>, SDTCisVT<1, i128>, SDTCisVT<2, i32>]>,
+ SDTypeProfile<1, 2, [SDTCisVT<0, f32>, SDTCisVT<1, v4i32>, SDTCisVT<2, i32>]>,
[SDNPMayLoad, SDNPMemOperand]
>;
def SItbuffer_store : SDNode<"AMDGPUISD::TBUFFER_STORE_FORMAT",
SDTypeProfile<0, 13,
- [SDTCisVT<0, i128>, // rsrc(SGPR)
+ [SDTCisVT<0, v4i32>, // rsrc(SGPR)
SDTCisVT<1, iAny>, // vdata(VGPR)
SDTCisVT<2, i32>, // num_channels(imm)
SDTCisVT<3, i32>, // vaddr(VGPR)
>;
def SIload_input : SDNode<"AMDGPUISD::LOAD_INPUT",
- SDTypeProfile<1, 3, [SDTCisVT<0, v4f32>, SDTCisVT<1, i128>, SDTCisVT<2, i16>,
+ SDTypeProfile<1, 3, [SDTCisVT<0, v4f32>, SDTCisVT<1, v4i32>, SDTCisVT<2, i16>,
SDTCisVT<3, i32>]>
>;
class SDSample<string opcode> : SDNode <opcode,
SDTypeProfile<1, 4, [SDTCisVT<0, v4f32>, SDTCisVT<2, v32i8>,
- SDTCisVT<3, i128>, SDTCisVT<4, i32>]>
+ SDTCisVT<3, v4i32>, SDTCisVT<4, i32>]>
>;
def SIsample : SDSample<"AMDGPUISD::SAMPLE">;
/* int_SI_vs_load_input */
def : Pat<
- (SIload_input i128:$tlst, IMM12bit:$attr_offset, i32:$buf_idx_vgpr),
+ (SIload_input v4i32:$tlst, IMM12bit:$attr_offset, i32:$buf_idx_vgpr),
(BUFFER_LOAD_FORMAT_XYZW_IDXEN $tlst, $buf_idx_vgpr, imm:$attr_offset, 0, 0, 0, 0)
>;
/* SIsample for simple 1D texture lookup */
def : Pat <
- (SIsample i32:$addr, v32i8:$rsrc, i128:$sampler, imm),
+ (SIsample i32:$addr, v32i8:$rsrc, v4i32:$sampler, imm),
(IMAGE_SAMPLE_V4_V1 0xf, 0, 0, 0, 0, 0, 0, 0, $addr, $rsrc, $sampler)
>;
class SamplePattern<SDNode name, MIMG opcode, ValueType vt> : Pat <
- (name vt:$addr, v32i8:$rsrc, i128:$sampler, imm),
+ (name vt:$addr, v32i8:$rsrc, v4i32:$sampler, imm),
(opcode 0xf, 0, 0, 0, 0, 0, 0, 0, $addr, $rsrc, $sampler)
>;
class SampleRectPattern<SDNode name, MIMG opcode, ValueType vt> : Pat <
- (name vt:$addr, v32i8:$rsrc, i128:$sampler, TEX_RECT),
+ (name vt:$addr, v32i8:$rsrc, v4i32:$sampler, TEX_RECT),
(opcode 0xf, 1, 0, 0, 0, 0, 0, 0, $addr, $rsrc, $sampler)
>;
class SampleArrayPattern<SDNode name, MIMG opcode, ValueType vt> : Pat <
- (name vt:$addr, v32i8:$rsrc, i128:$sampler, TEX_ARRAY),
+ (name vt:$addr, v32i8:$rsrc, v4i32:$sampler, TEX_ARRAY),
(opcode 0xf, 0, 0, 1, 0, 0, 0, 0, $addr, $rsrc, $sampler)
>;
class SampleShadowPattern<SDNode name, MIMG opcode,
ValueType vt> : Pat <
- (name vt:$addr, v32i8:$rsrc, i128:$sampler, TEX_SHADOW),
+ (name vt:$addr, v32i8:$rsrc, v4i32:$sampler, TEX_SHADOW),
(opcode 0xf, 0, 0, 0, 0, 0, 0, 0, $addr, $rsrc, $sampler)
>;
class SampleShadowArrayPattern<SDNode name, MIMG opcode,
ValueType vt> : Pat <
- (name vt:$addr, v32i8:$rsrc, i128:$sampler, TEX_SHADOW_ARRAY),
+ (name vt:$addr, v32i8:$rsrc, v4i32:$sampler, TEX_SHADOW_ARRAY),
(opcode 0xf, 0, 0, 1, 0, 0, 0, 0, $addr, $rsrc, $sampler)
>;
def : BitConvert <v4f32, v4i32, VReg_128>;
def : BitConvert <v4i32, v4f32, VReg_128>;
-def : BitConvert <v4i32, i128, VReg_128>;
-def : BitConvert <i128, v4i32, VReg_128>;
def : BitConvert <v8f32, v8i32, SReg_256>;
def : BitConvert <v8i32, v8f32, SReg_256>;
// 1. Offset as 8bit DWORD immediate
def : Pat <
- (SIload_constant i128:$sbase, IMM8bitDWORD:$offset),
+ (SIload_constant v4i32:$sbase, IMM8bitDWORD:$offset),
(S_BUFFER_LOAD_DWORD_IMM $sbase, (as_dword_i32imm $offset))
>;
// 2. Offset loaded in an 32bit SGPR
def : Pat <
- (SIload_constant i128:$sbase, imm:$offset),
+ (SIload_constant v4i32:$sbase, imm:$offset),
(S_BUFFER_LOAD_DWORD_SGPR $sbase, (S_MOV_B32 imm:$offset))
>;
// 3. Offset in an 32Bit VGPR
def : Pat <
- (SIload_constant i128:$sbase, i32:$voff),
+ (SIload_constant v4i32:$sbase, i32:$voff),
(BUFFER_LOAD_DWORD_OFFEN $sbase, $voff, 0, 0, 0, 0)
>;
defm : SMRD_Pattern <S_LOAD_DWORD_IMM, S_LOAD_DWORD_SGPR, i32>;
defm : SMRD_Pattern <S_LOAD_DWORDX2_IMM, S_LOAD_DWORDX2_SGPR, i64>;
defm : SMRD_Pattern <S_LOAD_DWORDX2_IMM, S_LOAD_DWORDX2_SGPR, v2i32>;
-defm : SMRD_Pattern <S_LOAD_DWORDX4_IMM, S_LOAD_DWORDX4_SGPR, i128>;
defm : SMRD_Pattern <S_LOAD_DWORDX4_IMM, S_LOAD_DWORDX4_SGPR, v4i32>;
defm : SMRD_Pattern <S_LOAD_DWORDX8_IMM, S_LOAD_DWORDX8_SGPR, v32i8>;
defm : SMRD_Pattern <S_LOAD_DWORDX8_IMM, S_LOAD_DWORDX8_SGPR, v8i32>;
MUBUF bothen> {
def : Pat <
- (vt (int_SI_buffer_load_dword i128:$rsrc, i32:$vaddr, i32:$soffset,
+ (vt (int_SI_buffer_load_dword v4i32:$rsrc, i32:$vaddr, i32:$soffset,
imm:$offset, 0, 0, imm:$glc, imm:$slc,
imm:$tfe)),
(offset $rsrc, $vaddr, (as_i16imm $offset), $soffset, (as_i1imm $glc),
>;
def : Pat <
- (vt (int_SI_buffer_load_dword i128:$rsrc, i32:$vaddr, i32:$soffset,
+ (vt (int_SI_buffer_load_dword v4i32:$rsrc, i32:$vaddr, i32:$soffset,
imm, 1, 0, imm:$glc, imm:$slc,
imm:$tfe)),
(offen $rsrc, $vaddr, $soffset, (as_i1imm $glc), (as_i1imm $slc),
>;
def : Pat <
- (vt (int_SI_buffer_load_dword i128:$rsrc, i32:$vaddr, i32:$soffset,
+ (vt (int_SI_buffer_load_dword v4i32:$rsrc, i32:$vaddr, i32:$soffset,
imm:$offset, 0, 1, imm:$glc, imm:$slc,
imm:$tfe)),
(idxen $rsrc, $vaddr, (as_i16imm $offset), $soffset, (as_i1imm $glc),
>;
def : Pat <
- (vt (int_SI_buffer_load_dword i128:$rsrc, v2i32:$vaddr, i32:$soffset,
+ (vt (int_SI_buffer_load_dword v4i32:$rsrc, v2i32:$vaddr, i32:$soffset,
imm, 1, 1, imm:$glc, imm:$slc,
imm:$tfe)),
(bothen $rsrc, $vaddr, $soffset, (as_i1imm $glc), (as_i1imm $slc),
// TBUFFER_STORE_FORMAT_*, addr64=0
class MTBUF_StoreResource <ValueType vt, int num_channels, MTBUF opcode> : Pat<
- (SItbuffer_store i128:$rsrc, vt:$vdata, num_channels, i32:$vaddr,
+ (SItbuffer_store v4i32:$rsrc, vt:$vdata, num_channels, i32:$vaddr,
i32:$soffset, imm:$inst_offset, imm:$dfmt,
imm:$nfmt, imm:$offen, imm:$idxen,
imm:$glc, imm:$slc, imm:$tfe),
// Miscellaneous Patterns
//===----------------------------------------------------------------------===//
-def : Pat <
- (i64 (trunc i128:$x)),
- (INSERT_SUBREG (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
- (i32 (EXTRACT_SUBREG $x, sub0)), sub0),
- (i32 (EXTRACT_SUBREG $x, sub1)), sub1)
->;
-
def : Pat <
(i32 (trunc i64:$a)),
(EXTRACT_SUBREG $a, sub0)
(add SGPR_64Regs, VCCReg, EXECReg)
>;
-def SReg_128 : RegisterClass<"AMDGPU", [i128, v4i32], 128, (add SGPR_128)>;
+def SReg_128 : RegisterClass<"AMDGPU", [v4i32], 128, (add SGPR_128)>;
def SReg_256 : RegisterClass<"AMDGPU", [v32i8, v8i32, v8f32], 256, (add SGPR_256)>;
let Size = 96;
}
-def VReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, i128], 128, (add VGPR_128)>;
+def VReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32], 128, (add VGPR_128)>;
def VReg_256 : RegisterClass<"AMDGPU", [v32i8, v8i32, v8f32], 256, (add VGPR_256)>;
def VReg_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 512, (add VGPR_512)>;
//===----------------------------------------------------------------------===//
-// [SV]Src_* register classes, can have either an immediate or an register
+// [SV]Src_(32|64) register classes, can have either an immediate or an register
//===----------------------------------------------------------------------===//
def SSrc_32 : RegisterClass<"AMDGPU", [i32, f32], 32, (add SReg_32)>;
def VSrc_64 : RegisterClass<"AMDGPU", [i64, f64], 64, (add VReg_64, SReg_64)>;
+//===----------------------------------------------------------------------===//
+// SGPR and VGPR register classes
+//===----------------------------------------------------------------------===//
+
+def VSrc_128 : RegisterClass<"AMDGPU", [v4i32, v4f32], 128,
+ (add VReg_128, SReg_128)>;
static char ID;
Module *Mod;
Type *v16i8;
- Type *i128;
+ Type *v4i32;
public:
SITypeRewriter() : FunctionPass(ID) { }
bool SITypeRewriter::doInitialization(Module &M) {
Mod = &M;
v16i8 = VectorType::get(Type::getInt8Ty(M.getContext()), 16);
- i128 = Type::getIntNTy(M.getContext(), 128);
+ v4i32 = VectorType::get(Type::getInt32Ty(M.getContext()), 4);
return false;
}
Type *ElemTy = PtrTy->getPointerElementType();
IRBuilder<> Builder(&I);
if (ElemTy == v16i8) {
- Value *BitCast = Builder.CreateBitCast(Ptr, Type::getIntNPtrTy(I.getContext(), 128, 2));
+ Value *BitCast = Builder.CreateBitCast(Ptr,
+ PointerType::get(v4i32,PtrTy->getPointerAddressSpace()));
LoadInst *Load = Builder.CreateLoad(BitCast);
SmallVector <std::pair<unsigned, MDNode*>, 8> MD;
I.getAllMetadataOtherThanDebugLoc(MD);
void SITypeRewriter::visitCallInst(CallInst &I) {
IRBuilder<> Builder(&I);
+
SmallVector <Value*, 8> Args;
SmallVector <Type*, 8> Types;
bool NeedToReplace = false;
for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) {
Value *Arg = I.getArgOperand(i);
if (Arg->getType() == v16i8) {
- Args.push_back(Builder.CreateBitCast(Arg, i128));
- Types.push_back(i128);
+ Args.push_back(Builder.CreateBitCast(Arg, v4i32));
+ Types.push_back(v4i32);
NeedToReplace = true;
- Name = Name + ".i128";
+ Name = Name + ".v4i32";
} else if (Arg->getType()->isVectorTy() &&
Arg->getType()->getVectorNumElements() == 1 &&
Arg->getType()->getVectorElementType() ==
void SITypeRewriter::visitBitCast(BitCastInst &I) {
IRBuilder<> Builder(&I);
- if (I.getDestTy() != i128) {
+ if (I.getDestTy() != v4i32) {
return;
}
if (BitCastInst *Op = dyn_cast<BitCastInst>(I.getOperand(0))) {
- if (Op->getSrcTy() == i128) {
+ if (Op->getSrcTy() == v4i32) {
I.replaceAllUsesWith(Op->getOperand(0));
I.eraseFromParent();
}
}
attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+; When i128 was a legal type this program generated cannot select errors:
+
+; FUNC-LABEL: @i128-const-store
+; FIXME: We should be able to to this with one store instruction
+; EG-CHECK: STORE_RAW
+; EG-CHECK: STORE_RAW
+; EG-CHECK: STORE_RAW
+; EG-CHECK: STORE_RAW
+; CM-CHECK: STORE_DWORD
+; CM-CHECK: STORE_DWORD
+; CM-CHECK: STORE_DWORD
+; CM-CHECK: STORE_DWORD
+; SI: BUFFER_STORE_DWORDX2
+; SI: BUFFER_STORE_DWORDX2
+define void @i128-const-store(i32 addrspace(1)* %out) {
+entry:
+ store i32 1, i32 addrspace(1)* %out, align 4
+ %arrayidx2 = getelementptr inbounds i32 addrspace(1)* %out, i64 1
+ store i32 1, i32 addrspace(1)* %arrayidx2, align 4
+ %arrayidx4 = getelementptr inbounds i32 addrspace(1)* %out, i64 2
+ store i32 2, i32 addrspace(1)* %arrayidx4, align 4
+ %arrayidx6 = getelementptr inbounds i32 addrspace(1)* %out, i64 3
+ store i32 2, i32 addrspace(1)* %arrayidx6, align 4
+ ret void
+}