LAST_LOADEXT_TYPE
};
- NodeType getExtForLoadExtType(LoadExtType);
+ NodeType getExtForLoadExtType(bool IsFP, LoadExtType);
//===--------------------------------------------------------------------===//
/// ISD::CondCode enum - These are ordered carefully to make the bitfields
break;
}
case TargetLowering::Expand:
- if (!TLI.isLoadExtLegal(ISD::EXTLOAD, Node->getValueType(0),
- SrcVT) && TLI.isTypeLegal(SrcVT)) {
- SDValue Load = DAG.getLoad(SrcVT, dl, Chain, Ptr, LD->getMemOperand());
- unsigned ExtendOp;
- switch (ExtType) {
- case ISD::EXTLOAD:
- ExtendOp = (SrcVT.isFloatingPoint() ?
- ISD::FP_EXTEND : ISD::ANY_EXTEND);
+ if (!TLI.isLoadExtLegal(ISD::EXTLOAD, Node->getValueType(0), SrcVT)) {
+ // If the source type is not legal, see if there is a legal extload to
+ // an intermediate type that we can then extend further.
+ EVT LoadVT = TLI.getRegisterType(SrcVT.getSimpleVT());
+ if (TLI.isTypeLegal(SrcVT) || // Same as SrcVT == LoadVT?
+ TLI.isLoadExtLegal(ExtType, LoadVT, SrcVT)) {
+ // If we are loading a legal type, this is a non-extload followed by a
+ // full extend.
+ ISD::LoadExtType MidExtType =
+ (LoadVT == SrcVT) ? ISD::NON_EXTLOAD : ExtType;
+
+ SDValue Load = DAG.getExtLoad(MidExtType, dl, LoadVT, Chain, Ptr,
+ SrcVT, LD->getMemOperand());
+ unsigned ExtendOp =
+ ISD::getExtForLoadExtType(SrcVT.isFloatingPoint(), ExtType);
+ Value = DAG.getNode(ExtendOp, dl, Node->getValueType(0), Load);
+ Chain = Load.getValue(1);
break;
- case ISD::SEXTLOAD: ExtendOp = ISD::SIGN_EXTEND; break;
- case ISD::ZEXTLOAD: ExtendOp = ISD::ZERO_EXTEND; break;
- default: llvm_unreachable("Unexpected extend load type!");
}
- Value = DAG.getNode(ExtendOp, dl, Node->getValueType(0), Load);
- Chain = Load.getValue(1);
- break;
}
assert(!SrcVT.isVector() &&
return true;
}
-ISD::NodeType ISD::getExtForLoadExtType(ISD::LoadExtType ExtType) {
+ISD::NodeType ISD::getExtForLoadExtType(bool IsFP, ISD::LoadExtType ExtType) {
switch (ExtType) {
case ISD::EXTLOAD:
- return ISD::ANY_EXTEND;
+ return IsFP ? ISD::FP_EXTEND : ISD::ANY_EXTEND;
case ISD::SEXTLOAD:
return ISD::SIGN_EXTEND;
case ISD::ZEXTLOAD:
setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v8f32, Custom);
setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v8i32, Custom);
+ // There are no 64-bit extloads. These should be done as a 32-bit extload and
+ // an extension to 64-bit.
+ for (MVT VT : MVT::integer_valuetypes()) {
+ setLoadExtAction(ISD::EXTLOAD, MVT::i64, VT, Expand);
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i64, VT, Expand);
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::i64, VT, Expand);
+ }
+
for (MVT VT : MVT::integer_vector_valuetypes()) {
setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i8, Expand);
setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i8, Expand);
EVT VT = Op.getValueType();
EVT MemVT = Load->getMemoryVT();
- if (ExtType != ISD::NON_EXTLOAD && !VT.isVector() && VT.getSizeInBits() > 32) {
- // We can do the extload to 32-bits, and then need to separately extend to
- // 64-bits.
-
- SDValue ExtLoad32 = DAG.getExtLoad(ExtType, DL, MVT::i32,
- Load->getChain(),
- Load->getBasePtr(),
- MemVT,
- Load->getMemOperand());
-
- SDValue Ops[] = {
- DAG.getNode(ISD::getExtForLoadExtType(ExtType), DL, VT, ExtLoad32),
- ExtLoad32.getValue(1)
- };
-
- return DAG.getMergeValues(Ops, DL);
- }
-
if (ExtType == ISD::NON_EXTLOAD && VT.getSizeInBits() < 32) {
assert(VT == MVT::i1 && "Only i1 non-extloads expected");
// FIXME: Copied from PPC
setOperationAction(ISD::BRCOND, MVT::Other, Custom);
for (MVT VT : MVT::integer_valuetypes()) {
+ if (VT == MVT::i64)
+ continue;
+
setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
- setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Custom);
- setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i16, Custom);
+ setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Legal);
+ setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i16, Legal);
setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i32, Expand);
setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
- setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i8, Custom);
- setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i16, Custom);
+ setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i8, Legal);
+ setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i16, Legal);
setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i32, Expand);
setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote);
- setLoadExtAction(ISD::EXTLOAD, VT, MVT::i8, Custom);
- setLoadExtAction(ISD::EXTLOAD, VT, MVT::i16, Custom);
+ setLoadExtAction(ISD::EXTLOAD, VT, MVT::i8, Legal);
+ setLoadExtAction(ISD::EXTLOAD, VT, MVT::i16, Legal);
setLoadExtAction(ISD::EXTLOAD, VT, MVT::i32, Expand);
}
; SI-DAG: v_cvt_f32_ubyte0_e32 v[[LORESULT:[0-9]+]], [[LOADREG]]
; SI: buffer_store_dwordx2 v{{\[}}[[LORESULT]]:[[HIRESULT]]{{\]}},
define void @load_v2i8_to_v2f32(<2 x float> addrspace(1)* noalias %out, <2 x i8> addrspace(1)* noalias %in) nounwind {
- %load = load <2 x i8> addrspace(1)* %in, align 1
+ %load = load <2 x i8> addrspace(1)* %in, align 2
%cvt = uitofp <2 x i8> %load to <2 x float>
store <2 x float> %cvt, <2 x float> addrspace(1)* %out, align 16
ret void
}
; SI-LABEL: {{^}}load_v4i8_to_v4f32:
-; We can't use buffer_load_dword here, because the load is byte aligned, and
-; buffer_load_dword requires dword alignment.
-; SI: buffer_load_ushort
-; SI: buffer_load_ushort
-; SI: v_or_b32_e32 [[LOADREG:v[0-9]+]]
+; SI: buffer_load_dword [[LOADREG:v[0-9]+]]
; SI-NOT: bfe
; SI-NOT: lshr
; SI-DAG: v_cvt_f32_ubyte3_e32 v[[HIRESULT:[0-9]+]], [[LOADREG]]
; SI-DAG: v_cvt_f32_ubyte0_e32 v[[LORESULT:[0-9]+]], [[LOADREG]]
; SI: buffer_store_dwordx4 v{{\[}}[[LORESULT]]:[[HIRESULT]]{{\]}},
define void @load_v4i8_to_v4f32(<4 x float> addrspace(1)* noalias %out, <4 x i8> addrspace(1)* noalias %in) nounwind {
+ %load = load <4 x i8> addrspace(1)* %in, align 4
+ %cvt = uitofp <4 x i8> %load to <4 x float>
+ store <4 x float> %cvt, <4 x float> addrspace(1)* %out, align 16
+ ret void
+}
+
+; This should not be adding instructions to shift into the correct
+; position in the word for the component.
+
+; SI-LABEL: {{^}}load_v4i8_to_v4f32_unaligned:
+; SI: buffer_load_ubyte [[LOADREG0:v[0-9]+]]
+; SI: buffer_load_ubyte [[LOADREG1:v[0-9]+]]
+; SI: buffer_load_ubyte [[LOADREG2:v[0-9]+]]
+; SI: buffer_load_ubyte [[LOADREG3:v[0-9]+]]
+
+; SI: v_lshlrev_b32
+; SI: v_or_b32
+; SI: v_lshlrev_b32
+; SI: v_or_b32
+; SI: v_lshlrev_b32
+; SI: v_or_b32
+
+; XSI-DAG: v_cvt_f32_ubyte0_e32 v[[HIRESULT:[0-9]+]], [[LOADREG0]]
+; XSI-DAG: v_cvt_f32_ubyte0_e32 v{{[0-9]+}}, [[LOADREG1]]
+; XSI-DAG: v_cvt_f32_ubyte0_e32 v{{[0-9]+}}, [[LOADREG2]]
+; XSI-DAG: v_cvt_f32_ubyte0_e32 v[[LORESULT:[0-9]+]], [[LOADREG3]]
+
+; SI-DAG: v_cvt_f32_ubyte0_e32
+; SI-DAG: v_cvt_f32_ubyte1_e32
+; SI-DAG: v_cvt_f32_ubyte2_e32
+; SI-DAG: v_cvt_f32_ubyte3_e32
+
+; SI: buffer_store_dwordx4 v{{\[}}[[LORESULT]]:[[HIRESULT]]{{\]}},
+define void @load_v4i8_to_v4f32_unaligned(<4 x float> addrspace(1)* noalias %out, <4 x i8> addrspace(1)* noalias %in) nounwind {
%load = load <4 x i8> addrspace(1)* %in, align 1
%cvt = uitofp <4 x i8> %load to <4 x float>
store <4 x float> %cvt, <4 x float> addrspace(1)* %out, align 16
--- /dev/null
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; XUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+; FIXME: Evergreen broken
+
+; FUNC-LABEL: {{^}}zextload_global_i1_to_i32:
+; SI: buffer_load_ubyte
+; SI: buffer_store_dword
+; SI: s_endpgm
+define void @zextload_global_i1_to_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
+ %a = load i1 addrspace(1)* %in
+ %ext = zext i1 %a to i32
+ store i32 %ext, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_i1_to_i32:
+; SI: buffer_load_ubyte
+; SI: v_bfe_i32 {{v[0-9]+}}, {{v[0-9]+}}, 0, 1{{$}}
+; SI: buffer_store_dword
+; SI: s_endpgm
+define void @sextload_global_i1_to_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
+ %a = load i1 addrspace(1)* %in
+ %ext = sext i1 %a to i32
+ store i32 %ext, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_global_v1i1_to_v1i32:
+; SI: s_endpgm
+define void @zextload_global_v1i1_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i1> addrspace(1)* nocapture %in) nounwind {
+ %load = load <1 x i1> addrspace(1)* %in
+ %ext = zext <1 x i1> %load to <1 x i32>
+ store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_v1i1_to_v1i32:
+; SI: s_endpgm
+define void @sextload_global_v1i1_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i1> addrspace(1)* nocapture %in) nounwind {
+ %load = load <1 x i1> addrspace(1)* %in
+ %ext = sext <1 x i1> %load to <1 x i32>
+ store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_global_v2i1_to_v2i32:
+; SI: s_endpgm
+define void @zextload_global_v2i1_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i1> addrspace(1)* nocapture %in) nounwind {
+ %load = load <2 x i1> addrspace(1)* %in
+ %ext = zext <2 x i1> %load to <2 x i32>
+ store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_v2i1_to_v2i32:
+; SI: s_endpgm
+define void @sextload_global_v2i1_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i1> addrspace(1)* nocapture %in) nounwind {
+ %load = load <2 x i1> addrspace(1)* %in
+ %ext = sext <2 x i1> %load to <2 x i32>
+ store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_global_v4i1_to_v4i32:
+; SI: s_endpgm
+define void @zextload_global_v4i1_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i1> addrspace(1)* nocapture %in) nounwind {
+ %load = load <4 x i1> addrspace(1)* %in
+ %ext = zext <4 x i1> %load to <4 x i32>
+ store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_v4i1_to_v4i32:
+; SI: s_endpgm
+define void @sextload_global_v4i1_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i1> addrspace(1)* nocapture %in) nounwind {
+ %load = load <4 x i1> addrspace(1)* %in
+ %ext = sext <4 x i1> %load to <4 x i32>
+ store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_global_v8i1_to_v8i32:
+; SI: s_endpgm
+define void @zextload_global_v8i1_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i1> addrspace(1)* nocapture %in) nounwind {
+ %load = load <8 x i1> addrspace(1)* %in
+ %ext = zext <8 x i1> %load to <8 x i32>
+ store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_v8i1_to_v8i32:
+; SI: s_endpgm
+define void @sextload_global_v8i1_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i1> addrspace(1)* nocapture %in) nounwind {
+ %load = load <8 x i1> addrspace(1)* %in
+ %ext = sext <8 x i1> %load to <8 x i32>
+ store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_global_v16i1_to_v16i32:
+; SI: s_endpgm
+define void @zextload_global_v16i1_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i1> addrspace(1)* nocapture %in) nounwind {
+ %load = load <16 x i1> addrspace(1)* %in
+ %ext = zext <16 x i1> %load to <16 x i32>
+ store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_v16i1_to_v16i32:
+; SI: s_endpgm
+define void @sextload_global_v16i1_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i1> addrspace(1)* nocapture %in) nounwind {
+ %load = load <16 x i1> addrspace(1)* %in
+ %ext = sext <16 x i1> %load to <16 x i32>
+ store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
+ ret void
+}
+
+; XFUNC-LABEL: {{^}}zextload_global_v32i1_to_v32i32:
+; XSI: s_endpgm
+; define void @zextload_global_v32i1_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i1> addrspace(1)* nocapture %in) nounwind {
+; %load = load <32 x i1> addrspace(1)* %in
+; %ext = zext <32 x i1> %load to <32 x i32>
+; store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
+; ret void
+; }
+
+; XFUNC-LABEL: {{^}}sextload_global_v32i1_to_v32i32:
+; XSI: s_endpgm
+; define void @sextload_global_v32i1_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i1> addrspace(1)* nocapture %in) nounwind {
+; %load = load <32 x i1> addrspace(1)* %in
+; %ext = sext <32 x i1> %load to <32 x i32>
+; store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
+; ret void
+; }
+
+; XFUNC-LABEL: {{^}}zextload_global_v64i1_to_v64i32:
+; XSI: s_endpgm
+; define void @zextload_global_v64i1_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i1> addrspace(1)* nocapture %in) nounwind {
+; %load = load <64 x i1> addrspace(1)* %in
+; %ext = zext <64 x i1> %load to <64 x i32>
+; store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
+; ret void
+; }
+
+; XFUNC-LABEL: {{^}}sextload_global_v64i1_to_v64i32:
+; XSI: s_endpgm
+; define void @sextload_global_v64i1_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i1> addrspace(1)* nocapture %in) nounwind {
+; %load = load <64 x i1> addrspace(1)* %in
+; %ext = sext <64 x i1> %load to <64 x i32>
+; store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
+; ret void
+; }
+
+; FUNC-LABEL: {{^}}zextload_global_i1_to_i64:
+; SI: buffer_load_ubyte [[LOAD:v[0-9]+]],
+; SI: v_mov_b32_e32 {{v[0-9]+}}, 0{{$}}
+; SI: buffer_store_dwordx2
+define void @zextload_global_i1_to_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
+ %a = load i1 addrspace(1)* %in
+ %ext = zext i1 %a to i64
+ store i64 %ext, i64 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_i1_to_i64:
+; SI: buffer_load_ubyte [[LOAD:v[0-9]+]],
+; SI: v_bfe_i32 [[BFE:v[0-9]+]], {{v[0-9]+}}, 0, 1{{$}}
+; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, [[BFE]]
+; SI: buffer_store_dwordx2
+define void @sextload_global_i1_to_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
+ %a = load i1 addrspace(1)* %in
+ %ext = sext i1 %a to i64
+ store i64 %ext, i64 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_global_v1i1_to_v1i64:
+; SI: s_endpgm
+define void @zextload_global_v1i1_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i1> addrspace(1)* nocapture %in) nounwind {
+ %load = load <1 x i1> addrspace(1)* %in
+ %ext = zext <1 x i1> %load to <1 x i64>
+ store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_v1i1_to_v1i64:
+; SI: s_endpgm
+define void @sextload_global_v1i1_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i1> addrspace(1)* nocapture %in) nounwind {
+ %load = load <1 x i1> addrspace(1)* %in
+ %ext = sext <1 x i1> %load to <1 x i64>
+ store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_global_v2i1_to_v2i64:
+; SI: s_endpgm
+define void @zextload_global_v2i1_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i1> addrspace(1)* nocapture %in) nounwind {
+ %load = load <2 x i1> addrspace(1)* %in
+ %ext = zext <2 x i1> %load to <2 x i64>
+ store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_v2i1_to_v2i64:
+; SI: s_endpgm
+define void @sextload_global_v2i1_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i1> addrspace(1)* nocapture %in) nounwind {
+ %load = load <2 x i1> addrspace(1)* %in
+ %ext = sext <2 x i1> %load to <2 x i64>
+ store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_global_v4i1_to_v4i64:
+; SI: s_endpgm
+define void @zextload_global_v4i1_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i1> addrspace(1)* nocapture %in) nounwind {
+ %load = load <4 x i1> addrspace(1)* %in
+ %ext = zext <4 x i1> %load to <4 x i64>
+ store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_v4i1_to_v4i64:
+; SI: s_endpgm
+define void @sextload_global_v4i1_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i1> addrspace(1)* nocapture %in) nounwind {
+ %load = load <4 x i1> addrspace(1)* %in
+ %ext = sext <4 x i1> %load to <4 x i64>
+ store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_global_v8i1_to_v8i64:
+; SI: s_endpgm
+define void @zextload_global_v8i1_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i1> addrspace(1)* nocapture %in) nounwind {
+ %load = load <8 x i1> addrspace(1)* %in
+ %ext = zext <8 x i1> %load to <8 x i64>
+ store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_v8i1_to_v8i64:
+; SI: s_endpgm
+define void @sextload_global_v8i1_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i1> addrspace(1)* nocapture %in) nounwind {
+ %load = load <8 x i1> addrspace(1)* %in
+ %ext = sext <8 x i1> %load to <8 x i64>
+ store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_global_v16i1_to_v16i64:
+; SI: s_endpgm
+define void @zextload_global_v16i1_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i1> addrspace(1)* nocapture %in) nounwind {
+ %load = load <16 x i1> addrspace(1)* %in
+ %ext = zext <16 x i1> %load to <16 x i64>
+ store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_v16i1_to_v16i64:
+; SI: s_endpgm
+define void @sextload_global_v16i1_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i1> addrspace(1)* nocapture %in) nounwind {
+ %load = load <16 x i1> addrspace(1)* %in
+ %ext = sext <16 x i1> %load to <16 x i64>
+ store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
+ ret void
+}
+
+; XFUNC-LABEL: {{^}}zextload_global_v32i1_to_v32i64:
+; XSI: s_endpgm
+; define void @zextload_global_v32i1_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i1> addrspace(1)* nocapture %in) nounwind {
+; %load = load <32 x i1> addrspace(1)* %in
+; %ext = zext <32 x i1> %load to <32 x i64>
+; store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
+; ret void
+; }
+
+; XFUNC-LABEL: {{^}}sextload_global_v32i1_to_v32i64:
+; XSI: s_endpgm
+; define void @sextload_global_v32i1_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i1> addrspace(1)* nocapture %in) nounwind {
+; %load = load <32 x i1> addrspace(1)* %in
+; %ext = sext <32 x i1> %load to <32 x i64>
+; store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
+; ret void
+; }
+
+; XFUNC-LABEL: {{^}}zextload_global_v64i1_to_v64i64:
+; XSI: s_endpgm
+; define void @zextload_global_v64i1_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i1> addrspace(1)* nocapture %in) nounwind {
+; %load = load <64 x i1> addrspace(1)* %in
+; %ext = zext <64 x i1> %load to <64 x i64>
+; store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
+; ret void
+; }
+
+; XFUNC-LABEL: {{^}}sextload_global_v64i1_to_v64i64:
+; XSI: s_endpgm
+; define void @sextload_global_v64i1_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i1> addrspace(1)* nocapture %in) nounwind {
+; %load = load <64 x i1> addrspace(1)* %in
+; %ext = sext <64 x i1> %load to <64 x i64>
+; store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
+; ret void
+; }
--- /dev/null
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; XUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+; FIXME: cypress is broken because the bigger testcases spill and it's not implemented
+
+; FUNC-LABEL: {{^}}zextload_global_i16_to_i32:
+; SI: buffer_load_ushort
+; SI: buffer_store_dword
+; SI: s_endpgm
+define void @zextload_global_i16_to_i32(i32 addrspace(1)* %out, i16 addrspace(1)* %in) nounwind {
+ %a = load i16 addrspace(1)* %in
+ %ext = zext i16 %a to i32
+ store i32 %ext, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_i16_to_i32:
+; SI: buffer_load_sshort
+; SI: buffer_store_dword
+; SI: s_endpgm
+define void @sextload_global_i16_to_i32(i32 addrspace(1)* %out, i16 addrspace(1)* %in) nounwind {
+ %a = load i16 addrspace(1)* %in
+ %ext = sext i16 %a to i32
+ store i32 %ext, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_global_v1i16_to_v1i32:
+; SI: buffer_load_ushort
+; SI: s_endpgm
+define void @zextload_global_v1i16_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i16> addrspace(1)* nocapture %in) nounwind {
+ %load = load <1 x i16> addrspace(1)* %in
+ %ext = zext <1 x i16> %load to <1 x i32>
+ store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_v1i16_to_v1i32:
+; SI: buffer_load_sshort
+; SI: s_endpgm
+define void @sextload_global_v1i16_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i16> addrspace(1)* nocapture %in) nounwind {
+ %load = load <1 x i16> addrspace(1)* %in
+ %ext = sext <1 x i16> %load to <1 x i32>
+ store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_global_v2i16_to_v2i32:
+; SI: s_endpgm
+define void @zextload_global_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* nocapture %in) nounwind {
+ %load = load <2 x i16> addrspace(1)* %in
+ %ext = zext <2 x i16> %load to <2 x i32>
+ store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_v2i16_to_v2i32:
+; SI: s_endpgm
+define void @sextload_global_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* nocapture %in) nounwind {
+ %load = load <2 x i16> addrspace(1)* %in
+ %ext = sext <2 x i16> %load to <2 x i32>
+ store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_global_v4i16_to_v4i32:
+; SI: s_endpgm
+define void @zextload_global_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* nocapture %in) nounwind {
+ %load = load <4 x i16> addrspace(1)* %in
+ %ext = zext <4 x i16> %load to <4 x i32>
+ store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_v4i16_to_v4i32:
+; SI: s_endpgm
+define void @sextload_global_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* nocapture %in) nounwind {
+ %load = load <4 x i16> addrspace(1)* %in
+ %ext = sext <4 x i16> %load to <4 x i32>
+ store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_global_v8i16_to_v8i32:
+; SI: s_endpgm
+define void @zextload_global_v8i16_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i16> addrspace(1)* nocapture %in) nounwind {
+ %load = load <8 x i16> addrspace(1)* %in
+ %ext = zext <8 x i16> %load to <8 x i32>
+ store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_v8i16_to_v8i32:
+; SI: s_endpgm
+define void @sextload_global_v8i16_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i16> addrspace(1)* nocapture %in) nounwind {
+ %load = load <8 x i16> addrspace(1)* %in
+ %ext = sext <8 x i16> %load to <8 x i32>
+ store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_global_v16i16_to_v16i32:
+; SI: s_endpgm
+define void @zextload_global_v16i16_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i16> addrspace(1)* nocapture %in) nounwind {
+ %load = load <16 x i16> addrspace(1)* %in
+ %ext = zext <16 x i16> %load to <16 x i32>
+ store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_v16i16_to_v16i32:
+; SI: s_endpgm
+define void @sextload_global_v16i16_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i16> addrspace(1)* nocapture %in) nounwind {
+ %load = load <16 x i16> addrspace(1)* %in
+ %ext = sext <16 x i16> %load to <16 x i32>
+ store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_global_v32i16_to_v32i32:
+; SI: s_endpgm
+define void @zextload_global_v32i16_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i16> addrspace(1)* nocapture %in) nounwind {
+ %load = load <32 x i16> addrspace(1)* %in
+ %ext = zext <32 x i16> %load to <32 x i32>
+ store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_v32i16_to_v32i32:
+; SI: s_endpgm
+define void @sextload_global_v32i16_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i16> addrspace(1)* nocapture %in) nounwind {
+ %load = load <32 x i16> addrspace(1)* %in
+ %ext = sext <32 x i16> %load to <32 x i32>
+ store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_global_v64i16_to_v64i32:
+; SI: s_endpgm
+define void @zextload_global_v64i16_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i16> addrspace(1)* nocapture %in) nounwind {
+ %load = load <64 x i16> addrspace(1)* %in
+ %ext = zext <64 x i16> %load to <64 x i32>
+ store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_v64i16_to_v64i32:
+; SI: s_endpgm
+define void @sextload_global_v64i16_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i16> addrspace(1)* nocapture %in) nounwind {
+ %load = load <64 x i16> addrspace(1)* %in
+ %ext = sext <64 x i16> %load to <64 x i32>
+ store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_global_i16_to_i64:
+; SI: buffer_load_ushort [[LOAD:v[0-9]+]],
+; SI: v_mov_b32_e32 {{v[0-9]+}}, 0{{$}}
+; SI: buffer_store_dwordx2
+define void @zextload_global_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in) nounwind {
+ %a = load i16 addrspace(1)* %in
+ %ext = zext i16 %a to i64
+ store i64 %ext, i64 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_i16_to_i64:
+; SI: buffer_load_sshort [[LOAD:v[0-9]+]],
+; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, [[LOAD]]
+; SI: buffer_store_dwordx2
+define void @sextload_global_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in) nounwind {
+ %a = load i16 addrspace(1)* %in
+ %ext = sext i16 %a to i64
+ store i64 %ext, i64 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_global_v1i16_to_v1i64:
+; SI: s_endpgm
+define void @zextload_global_v1i16_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i16> addrspace(1)* nocapture %in) nounwind {
+ %load = load <1 x i16> addrspace(1)* %in
+ %ext = zext <1 x i16> %load to <1 x i64>
+ store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_v1i16_to_v1i64:
+; SI: s_endpgm
+define void @sextload_global_v1i16_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i16> addrspace(1)* nocapture %in) nounwind {
+ %load = load <1 x i16> addrspace(1)* %in
+ %ext = sext <1 x i16> %load to <1 x i64>
+ store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_global_v2i16_to_v2i64:
+; SI: s_endpgm
+define void @zextload_global_v2i16_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i16> addrspace(1)* nocapture %in) nounwind {
+ %load = load <2 x i16> addrspace(1)* %in
+ %ext = zext <2 x i16> %load to <2 x i64>
+ store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_v2i16_to_v2i64:
+; SI: s_endpgm
+define void @sextload_global_v2i16_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i16> addrspace(1)* nocapture %in) nounwind {
+ %load = load <2 x i16> addrspace(1)* %in
+ %ext = sext <2 x i16> %load to <2 x i64>
+ store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_global_v4i16_to_v4i64:
+; SI: s_endpgm
+define void @zextload_global_v4i16_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i16> addrspace(1)* nocapture %in) nounwind {
+ %load = load <4 x i16> addrspace(1)* %in
+ %ext = zext <4 x i16> %load to <4 x i64>
+ store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_v4i16_to_v4i64:
+; SI: s_endpgm
+define void @sextload_global_v4i16_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i16> addrspace(1)* nocapture %in) nounwind {
+ %load = load <4 x i16> addrspace(1)* %in
+ %ext = sext <4 x i16> %load to <4 x i64>
+ store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_global_v8i16_to_v8i64:
+; SI: s_endpgm
+define void @zextload_global_v8i16_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i16> addrspace(1)* nocapture %in) nounwind {
+ %load = load <8 x i16> addrspace(1)* %in
+ %ext = zext <8 x i16> %load to <8 x i64>
+ store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_v8i16_to_v8i64:
+; SI: s_endpgm
+define void @sextload_global_v8i16_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i16> addrspace(1)* nocapture %in) nounwind {
+ %load = load <8 x i16> addrspace(1)* %in
+ %ext = sext <8 x i16> %load to <8 x i64>
+ store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_global_v16i16_to_v16i64:
+; SI: s_endpgm
+define void @zextload_global_v16i16_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i16> addrspace(1)* nocapture %in) nounwind {
+ %load = load <16 x i16> addrspace(1)* %in
+ %ext = zext <16 x i16> %load to <16 x i64>
+ store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_v16i16_to_v16i64:
+; SI: s_endpgm
+define void @sextload_global_v16i16_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i16> addrspace(1)* nocapture %in) nounwind {
+ %load = load <16 x i16> addrspace(1)* %in
+ %ext = sext <16 x i16> %load to <16 x i64>
+ store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_global_v32i16_to_v32i64:
+; SI: s_endpgm
+define void @zextload_global_v32i16_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i16> addrspace(1)* nocapture %in) nounwind {
+ %load = load <32 x i16> addrspace(1)* %in
+ %ext = zext <32 x i16> %load to <32 x i64>
+ store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_v32i16_to_v32i64:
+; SI: s_endpgm
+define void @sextload_global_v32i16_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i16> addrspace(1)* nocapture %in) nounwind {
+ %load = load <32 x i16> addrspace(1)* %in
+ %ext = sext <32 x i16> %load to <32 x i64>
+ store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_global_v64i16_to_v64i64:
+; SI: s_endpgm
+define void @zextload_global_v64i16_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i16> addrspace(1)* nocapture %in) nounwind {
+ %load = load <64 x i16> addrspace(1)* %in
+ %ext = zext <64 x i16> %load to <64 x i64>
+ store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_v64i16_to_v64i64:
+; SI: s_endpgm
+define void @sextload_global_v64i16_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i16> addrspace(1)* nocapture %in) nounwind {
+ %load = load <64 x i16> addrspace(1)* %in
+ %ext = sext <64 x i16> %load to <64 x i64>
+ store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
+ ret void
+}
--- /dev/null
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+
+; FUNC-LABEL: {{^}}zextload_global_i32_to_i64:
+; SI: buffer_load_dword [[LOAD:v[0-9]+]],
+; SI: v_mov_b32_e32 {{v[0-9]+}}, 0{{$}}
+; SI: buffer_store_dwordx2
+; SI: s_endpgm
+define void @zextload_global_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
+ %a = load i32 addrspace(1)* %in
+ %ext = zext i32 %a to i64
+ store i64 %ext, i64 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_i32_to_i64:
+; SI: buffer_load_dword [[LOAD:v[0-9]+]],
+; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, [[LOAD]]
+; SI: buffer_store_dwordx2
+define void @sextload_global_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
+ %a = load i32 addrspace(1)* %in
+ %ext = sext i32 %a to i64
+ store i64 %ext, i64 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_global_v1i32_to_v1i64:
+; SI: buffer_load_dword
+; SI: buffer_store_dwordx2
+; SI: s_endpgm
+define void @zextload_global_v1i32_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i32> addrspace(1)* nocapture %in) nounwind {
+ %load = load <1 x i32> addrspace(1)* %in
+ %ext = zext <1 x i32> %load to <1 x i64>
+ store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_v1i32_to_v1i64:
+; SI: buffer_load_dword
+; SI: v_ashrrev_i32
+; SI: buffer_store_dwordx2
+; SI: s_endpgm
+define void @sextload_global_v1i32_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i32> addrspace(1)* nocapture %in) nounwind {
+ %load = load <1 x i32> addrspace(1)* %in
+ %ext = sext <1 x i32> %load to <1 x i64>
+ store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_global_v2i32_to_v2i64:
+; SI: buffer_load_dwordx2
+; SI: buffer_store_dwordx2
+; SI: buffer_store_dwordx2
+; SI: s_endpgm
+define void @zextload_global_v2i32_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i32> addrspace(1)* nocapture %in) nounwind {
+ %load = load <2 x i32> addrspace(1)* %in
+ %ext = zext <2 x i32> %load to <2 x i64>
+ store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_v2i32_to_v2i64:
+; SI: buffer_load_dwordx2
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI: s_endpgm
+define void @sextload_global_v2i32_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i32> addrspace(1)* nocapture %in) nounwind {
+ %load = load <2 x i32> addrspace(1)* %in
+ %ext = sext <2 x i32> %load to <2 x i64>
+ store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_global_v4i32_to_v4i64:
+; SI: buffer_load_dwordx4
+; SI: buffer_store_dwordx2
+; SI: buffer_store_dwordx2
+; SI: buffer_store_dwordx2
+; SI: buffer_store_dwordx2
+; SI: s_endpgm
+define void @zextload_global_v4i32_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i32> addrspace(1)* nocapture %in) nounwind {
+ %load = load <4 x i32> addrspace(1)* %in
+ %ext = zext <4 x i32> %load to <4 x i64>
+ store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_v4i32_to_v4i64:
+; SI: buffer_load_dwordx4
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI: s_endpgm
+define void @sextload_global_v4i32_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i32> addrspace(1)* nocapture %in) nounwind {
+ %load = load <4 x i32> addrspace(1)* %in
+ %ext = sext <4 x i32> %load to <4 x i64>
+ store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_global_v8i32_to_v8i64:
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI: s_endpgm
+define void @zextload_global_v8i32_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i32> addrspace(1)* nocapture %in) nounwind {
+ %load = load <8 x i32> addrspace(1)* %in
+ %ext = zext <8 x i32> %load to <8 x i64>
+ store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_v8i32_to_v8i64:
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+
+; SI: s_endpgm
+define void @sextload_global_v8i32_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i32> addrspace(1)* nocapture %in) nounwind {
+ %load = load <8 x i32> addrspace(1)* %in
+ %ext = sext <8 x i32> %load to <8 x i64>
+ store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_v16i32_to_v16i64:
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI: s_endpgm
+define void @sextload_global_v16i32_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i32> addrspace(1)* nocapture %in) nounwind {
+ %load = load <16 x i32> addrspace(1)* %in
+ %ext = sext <16 x i32> %load to <16 x i64>
+ store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_global_v16i32_to_v16i64
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+
+; SI: buffer_store_dwordx2
+; SI: buffer_store_dwordx2
+; SI: buffer_store_dwordx2
+; SI: buffer_store_dwordx2
+; SI: buffer_store_dwordx2
+; SI: buffer_store_dwordx2
+; SI: buffer_store_dwordx2
+; SI: buffer_store_dwordx2
+; SI: buffer_store_dwordx2
+; SI: buffer_store_dwordx2
+; SI: buffer_store_dwordx2
+; SI: buffer_store_dwordx2
+; SI: buffer_store_dwordx2
+; SI: buffer_store_dwordx2
+; SI: buffer_store_dwordx2
+; SI: buffer_store_dwordx2
+
+; SI: s_endpgm
+define void @zextload_global_v16i32_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i32> addrspace(1)* nocapture %in) nounwind {
+ %load = load <16 x i32> addrspace(1)* %in
+ %ext = zext <16 x i32> %load to <16 x i64>
+ store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_v32i32_to_v32i64:
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+
+; SI: s_endpgm
+define void @sextload_global_v32i32_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i32> addrspace(1)* nocapture %in) nounwind {
+ %load = load <32 x i32> addrspace(1)* %in
+ %ext = sext <32 x i32> %load to <32 x i64>
+ store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_global_v32i32_to_v32i64:
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+
+; SI: s_endpgm
+define void @zextload_global_v32i32_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i32> addrspace(1)* nocapture %in) nounwind {
+ %load = load <32 x i32> addrspace(1)* %in
+ %ext = zext <32 x i32> %load to <32 x i64>
+ store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
+ ret void
+}
--- /dev/null
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+
+; FUNC-LABEL: {{^}}zextload_global_i8_to_i32:
+; SI: buffer_load_ubyte
+; SI: buffer_store_dword
+; SI: s_endpgm
+define void @zextload_global_i8_to_i32(i32 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind {
+ %a = load i8 addrspace(1)* %in
+ %ext = zext i8 %a to i32
+ store i32 %ext, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_i8_to_i32:
+; SI: buffer_load_sbyte
+; SI: buffer_store_dword
+; SI: s_endpgm
+define void @sextload_global_i8_to_i32(i32 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind {
+ %a = load i8 addrspace(1)* %in
+ %ext = sext i8 %a to i32
+ store i32 %ext, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_global_v1i8_to_v1i32:
+; SI: s_endpgm
+define void @zextload_global_v1i8_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i8> addrspace(1)* nocapture %in) nounwind {
+ %load = load <1 x i8> addrspace(1)* %in
+ %ext = zext <1 x i8> %load to <1 x i32>
+ store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_v1i8_to_v1i32:
+; SI: s_endpgm
+define void @sextload_global_v1i8_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i8> addrspace(1)* nocapture %in) nounwind {
+ %load = load <1 x i8> addrspace(1)* %in
+ %ext = sext <1 x i8> %load to <1 x i32>
+ store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_global_v2i8_to_v2i32:
+; SI: s_endpgm
+define void @zextload_global_v2i8_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(1)* nocapture %in) nounwind {
+ %load = load <2 x i8> addrspace(1)* %in
+ %ext = zext <2 x i8> %load to <2 x i32>
+ store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_v2i8_to_v2i32:
+; SI: s_endpgm
+define void @sextload_global_v2i8_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(1)* nocapture %in) nounwind {
+ %load = load <2 x i8> addrspace(1)* %in
+ %ext = sext <2 x i8> %load to <2 x i32>
+ store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_global_v4i8_to_v4i32:
+; SI: s_endpgm
+define void @zextload_global_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(1)* nocapture %in) nounwind {
+ %load = load <4 x i8> addrspace(1)* %in
+ %ext = zext <4 x i8> %load to <4 x i32>
+ store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_v4i8_to_v4i32:
+; SI: s_endpgm
+define void @sextload_global_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(1)* nocapture %in) nounwind {
+ %load = load <4 x i8> addrspace(1)* %in
+ %ext = sext <4 x i8> %load to <4 x i32>
+ store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_global_v8i8_to_v8i32:
+; SI: s_endpgm
+define void @zextload_global_v8i8_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i8> addrspace(1)* nocapture %in) nounwind {
+ %load = load <8 x i8> addrspace(1)* %in
+ %ext = zext <8 x i8> %load to <8 x i32>
+ store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_v8i8_to_v8i32:
+; SI: s_endpgm
+define void @sextload_global_v8i8_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i8> addrspace(1)* nocapture %in) nounwind {
+ %load = load <8 x i8> addrspace(1)* %in
+ %ext = sext <8 x i8> %load to <8 x i32>
+ store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_global_v16i8_to_v16i32:
+; SI: s_endpgm
+define void @zextload_global_v16i8_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i8> addrspace(1)* nocapture %in) nounwind {
+ %load = load <16 x i8> addrspace(1)* %in
+ %ext = zext <16 x i8> %load to <16 x i32>
+ store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_v16i8_to_v16i32:
+; SI: s_endpgm
+define void @sextload_global_v16i8_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i8> addrspace(1)* nocapture %in) nounwind {
+ %load = load <16 x i8> addrspace(1)* %in
+ %ext = sext <16 x i8> %load to <16 x i32>
+ store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
+ ret void
+}
+
+; XFUNC-LABEL: {{^}}zextload_global_v32i8_to_v32i32:
+; XSI: s_endpgm
+; define void @zextload_global_v32i8_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i8> addrspace(1)* nocapture %in) nounwind {
+; %load = load <32 x i8> addrspace(1)* %in
+; %ext = zext <32 x i8> %load to <32 x i32>
+; store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
+; ret void
+; }
+
+; XFUNC-LABEL: {{^}}sextload_global_v32i8_to_v32i32:
+; XSI: s_endpgm
+; define void @sextload_global_v32i8_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i8> addrspace(1)* nocapture %in) nounwind {
+; %load = load <32 x i8> addrspace(1)* %in
+; %ext = sext <32 x i8> %load to <32 x i32>
+; store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
+; ret void
+; }
+
+; XFUNC-LABEL: {{^}}zextload_global_v64i8_to_v64i32:
+; XSI: s_endpgm
+; define void @zextload_global_v64i8_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i8> addrspace(1)* nocapture %in) nounwind {
+; %load = load <64 x i8> addrspace(1)* %in
+; %ext = zext <64 x i8> %load to <64 x i32>
+; store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
+; ret void
+; }
+
+; XFUNC-LABEL: {{^}}sextload_global_v64i8_to_v64i32:
+; XSI: s_endpgm
+; define void @sextload_global_v64i8_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i8> addrspace(1)* nocapture %in) nounwind {
+; %load = load <64 x i8> addrspace(1)* %in
+; %ext = sext <64 x i8> %load to <64 x i32>
+; store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
+; ret void
+; }
+
+; FUNC-LABEL: {{^}}zextload_global_i8_to_i64:
+; SI: buffer_load_ubyte [[LOAD:v[0-9]+]],
+; SI: v_mov_b32_e32 {{v[0-9]+}}, 0{{$}}
+; SI: buffer_store_dwordx2
+define void @zextload_global_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind {
+ %a = load i8 addrspace(1)* %in
+ %ext = zext i8 %a to i64
+ store i64 %ext, i64 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_i8_to_i64:
+; SI: buffer_load_sbyte [[LOAD:v[0-9]+]],
+; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, [[LOAD]]
+; SI: buffer_store_dwordx2
+define void @sextload_global_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind {
+ %a = load i8 addrspace(1)* %in
+ %ext = sext i8 %a to i64
+ store i64 %ext, i64 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_global_v1i8_to_v1i64:
+; SI: s_endpgm
+define void @zextload_global_v1i8_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i8> addrspace(1)* nocapture %in) nounwind {
+ %load = load <1 x i8> addrspace(1)* %in
+ %ext = zext <1 x i8> %load to <1 x i64>
+ store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_v1i8_to_v1i64:
+; SI: s_endpgm
+define void @sextload_global_v1i8_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i8> addrspace(1)* nocapture %in) nounwind {
+ %load = load <1 x i8> addrspace(1)* %in
+ %ext = sext <1 x i8> %load to <1 x i64>
+ store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_global_v2i8_to_v2i64:
+; SI: s_endpgm
+define void @zextload_global_v2i8_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i8> addrspace(1)* nocapture %in) nounwind {
+ %load = load <2 x i8> addrspace(1)* %in
+ %ext = zext <2 x i8> %load to <2 x i64>
+ store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_v2i8_to_v2i64:
+; SI: s_endpgm
+define void @sextload_global_v2i8_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i8> addrspace(1)* nocapture %in) nounwind {
+ %load = load <2 x i8> addrspace(1)* %in
+ %ext = sext <2 x i8> %load to <2 x i64>
+ store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_global_v4i8_to_v4i64:
+; SI: s_endpgm
+define void @zextload_global_v4i8_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i8> addrspace(1)* nocapture %in) nounwind {
+ %load = load <4 x i8> addrspace(1)* %in
+ %ext = zext <4 x i8> %load to <4 x i64>
+ store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_v4i8_to_v4i64:
+; SI: s_endpgm
+define void @sextload_global_v4i8_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i8> addrspace(1)* nocapture %in) nounwind {
+ %load = load <4 x i8> addrspace(1)* %in
+ %ext = sext <4 x i8> %load to <4 x i64>
+ store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_global_v8i8_to_v8i64:
+; SI: s_endpgm
+define void @zextload_global_v8i8_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i8> addrspace(1)* nocapture %in) nounwind {
+ %load = load <8 x i8> addrspace(1)* %in
+ %ext = zext <8 x i8> %load to <8 x i64>
+ store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_v8i8_to_v8i64:
+; SI: s_endpgm
+define void @sextload_global_v8i8_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i8> addrspace(1)* nocapture %in) nounwind {
+ %load = load <8 x i8> addrspace(1)* %in
+ %ext = sext <8 x i8> %load to <8 x i64>
+ store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_global_v16i8_to_v16i64:
+; SI: s_endpgm
+define void @zextload_global_v16i8_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i8> addrspace(1)* nocapture %in) nounwind {
+ %load = load <16 x i8> addrspace(1)* %in
+ %ext = zext <16 x i8> %load to <16 x i64>
+ store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_v16i8_to_v16i64:
+; SI: s_endpgm
+define void @sextload_global_v16i8_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i8> addrspace(1)* nocapture %in) nounwind {
+ %load = load <16 x i8> addrspace(1)* %in
+ %ext = sext <16 x i8> %load to <16 x i64>
+ store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
+ ret void
+}
+
+; XFUNC-LABEL: {{^}}zextload_global_v32i8_to_v32i64:
+; XSI: s_endpgm
+; define void @zextload_global_v32i8_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i8> addrspace(1)* nocapture %in) nounwind {
+; %load = load <32 x i8> addrspace(1)* %in
+; %ext = zext <32 x i8> %load to <32 x i64>
+; store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
+; ret void
+; }
+
+; XFUNC-LABEL: {{^}}sextload_global_v32i8_to_v32i64:
+; XSI: s_endpgm
+; define void @sextload_global_v32i8_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i8> addrspace(1)* nocapture %in) nounwind {
+; %load = load <32 x i8> addrspace(1)* %in
+; %ext = sext <32 x i8> %load to <32 x i64>
+; store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
+; ret void
+; }
+
+; XFUNC-LABEL: {{^}}zextload_global_v64i8_to_v64i64:
+; XSI: s_endpgm
+; define void @zextload_global_v64i8_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i8> addrspace(1)* nocapture %in) nounwind {
+; %load = load <64 x i8> addrspace(1)* %in
+; %ext = zext <64 x i8> %load to <64 x i64>
+; store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
+; ret void
+; }
+
+; XFUNC-LABEL: {{^}}sextload_global_v64i8_to_v64i64:
+; XSI: s_endpgm
+; define void @sextload_global_v64i8_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i8> addrspace(1)* nocapture %in) nounwind {
+; %load = load <64 x i8> addrspace(1)* %in
+; %ext = sext <64 x i8> %load to <64 x i64>
+; store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
+; ret void
+; }
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
-; FIXME: This is probably wrong. This probably needs to expand to 8-bit reads and writes.
; SI-LABEL: {{^}}unaligned_load_store_i32:
-; SI: ds_read_u16
-; SI: ds_read_u16
+; SI: ds_read_u8
+; SI: ds_read_u8
; SI: ds_write_b32
; SI: s_endpgm
define void @unaligned_load_store_i32(i32 addrspace(3)* %p, i32 addrspace(3)* %r) nounwind {
}
; SI-LABEL: {{^}}unaligned_load_store_v4i32:
-; SI: ds_read_u16
-; SI: ds_read_u16
-; SI: ds_read_u16
-; SI: ds_read_u16
-; SI: ds_read_u16
-; SI: ds_read_u16
-; SI: ds_read_u16
-; SI: ds_read_u16
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_read_u8
+
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_read_u8
+
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_read_u8
+
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_read_u8
+
; SI: ds_write_b32
; SI: ds_write_b32
; SI: ds_write_b32