; This test checks that the lds input queue will is empty at the end of
; the ALU clause.
-; CHECK-LABEL: @lds_input_queue
+; CHECK-LABEL: {{^}}lds_input_queue:
; CHECK: LDS_READ_RET * OQAP
; CHECK-NOT: ALU clause
; CHECK: MOV * T{{[0-9]\.[XYZW]}}, OQAP
-@local_mem = internal unnamed_addr addrspace(3) global [2 x i32] [i32 1, i32 2], align 4
+@local_mem = internal unnamed_addr addrspace(3) global [2 x i32] undef, align 4
define void @lds_input_queue(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %index) {
entry:
- %0 = getelementptr inbounds [2 x i32] addrspace(3)* @local_mem, i32 0, i32 %index
- %1 = load i32 addrspace(3)* %0
+ %0 = getelementptr inbounds [2 x i32], [2 x i32] addrspace(3)* @local_mem, i32 0, i32 %index
+ %1 = load i32, i32 addrspace(3)* %0
call void @llvm.AMDGPU.barrier.local()
; This will start a new clause for the vertex fetch
- %2 = load i32 addrspace(1)* %in
+ %2 = load i32, i32 addrspace(1)* %in
%3 = add i32 %1, %2
store i32 %3, i32 addrspace(1)* %out
ret void
; load from global memory which immediately follows a load from a global value that
; has been declared in the local memory space:
;
-; %0 = getelementptr inbounds [2 x i32] addrspace(3)* @local_mem, i32 0, i32 %index
-; %1 = load i32 addrspace(3)* %0
-; %2 = load i32 addrspace(1)* %in
+; %0 = getelementptr inbounds [2 x i32], [2 x i32] addrspace(3)* @local_mem, i32 0, i32 %index
+; %1 = load i32, i32 addrspace(3)* %0
+; %2 = load i32, i32 addrspace(1)* %in
;
; The instruction selection phase will generate ISA that looks like this:
; %OQAP = LDS_READ_RET
; analysis, we should be able to keep these instructions sparate before
; scheduling.
;
-; CHECK-LABEL: @local_global_alias
+; CHECK-LABEL: {{^}}local_global_alias:
; CHECK: LDS_READ_RET
; CHECK-NOT: ALU clause
; CHECK: MOV * T{{[0-9]\.[XYZW]}}, OQAP
define void @local_global_alias(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
entry:
- %0 = getelementptr inbounds [2 x i32] addrspace(3)* @local_mem, i32 0, i32 0
- %1 = load i32 addrspace(3)* %0
- %2 = load i32 addrspace(1)* %in
+ %0 = getelementptr inbounds [2 x i32], [2 x i32] addrspace(3)* @local_mem, i32 0, i32 0
+ %1 = load i32, i32 addrspace(3)* %0
+ %2 = load i32, i32 addrspace(1)* %in
%3 = add i32 %2, %1
store i32 %3, i32 addrspace(1)* %out
ret void