+; R600-CHECK: @load_v2i16_local
+; R600-CHECK: LDS_USHORT_READ_RET
+; R600-CHECK: LDS_USHORT_READ_RET
+; SI-CHECK: @load_v2i16_local
+; SI-CHECK: DS_READ_U16
+; SI-CHECK: DS_READ_U16
+define void @load_v2i16_local(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(3)* %in) {
+entry:
+ %0 = load <2 x i16> addrspace(3)* %in
+ %1 = zext <2 x i16> %0 to <2 x i32>
+ store <2 x i32> %1, <2 x i32> addrspace(1)* %out
+ ret void
+}
+
+; R600-CHECK: @load_v2i16_sext_local
+; R600-CHECK-DAG: LDS_USHORT_READ_RET
+; R600-CHECK-DAG: LDS_USHORT_READ_RET
+; R600-CHECK-DAG: ASHR
+; R600-CHECK-DAG: ASHR
+; SI-CHECK: @load_v2i16_sext_local
+; SI-CHECK: DS_READ_I16
+; SI-CHECK: DS_READ_I16
+define void @load_v2i16_sext_local(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(3)* %in) {
+entry:
+ %0 = load <2 x i16> addrspace(3)* %in
+ %1 = sext <2 x i16> %0 to <2 x i32>
+ store <2 x i32> %1, <2 x i32> addrspace(1)* %out
+ ret void
+}
+
+; R600-CHECK: @load_v4i16_local
+; R600-CHECK: LDS_USHORT_READ_RET
+; R600-CHECK: LDS_USHORT_READ_RET
+; R600-CHECK: LDS_USHORT_READ_RET
+; R600-CHECK: LDS_USHORT_READ_RET
+; SI-CHECK: @load_v4i16_local
+; SI-CHECK: DS_READ_U16
+; SI-CHECK: DS_READ_U16
+; SI-CHECK: DS_READ_U16
+; SI-CHECK: DS_READ_U16
+define void @load_v4i16_local(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(3)* %in) {
+entry:
+ %0 = load <4 x i16> addrspace(3)* %in
+ %1 = zext <4 x i16> %0 to <4 x i32>
+ store <4 x i32> %1, <4 x i32> addrspace(1)* %out
+ ret void
+}
+
+; R600-CHECK: @load_v4i16_sext_local
+; R600-CHECK-DAG: LDS_USHORT_READ_RET
+; R600-CHECK-DAG: LDS_USHORT_READ_RET
+; R600-CHECK-DAG: LDS_USHORT_READ_RET
+; R600-CHECK-DAG: LDS_USHORT_READ_RET
+; R600-CHECK-DAG: ASHR
+; R600-CHECK-DAG: ASHR
+; R600-CHECK-DAG: ASHR
+; R600-CHECK-DAG: ASHR
+; SI-CHECK: @load_v4i16_sext_local
+; SI-CHECK: DS_READ_I16
+; SI-CHECK: DS_READ_I16
+; SI-CHECK: DS_READ_I16
+; SI-CHECK: DS_READ_I16
+define void @load_v4i16_sext_local(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(3)* %in) {
+entry:
+ %0 = load <4 x i16> addrspace(3)* %in
+ %1 = sext <4 x i16> %0 to <4 x i32>
+ store <4 x i32> %1, <4 x i32> addrspace(1)* %out
+ ret void
+}
+