test/CodeGen/AMDGPU/addrspacecast.ll

   1 ; RUN: not llc -O0 -march=amdgcn -mcpu=bonaire -mattr=-promote-alloca < %s 2>&1 | FileCheck -check-prefix=ERROR %s
   2
   3 ; ERROR: unsupported addrspacecast not implemented
   4
   5 ; XUN: llc -O0 -march=amdgcn -mcpu=bonaire -mattr=-promote-alloca < %s | FileCheck -check-prefix=CHECK -check-prefix=CHECK-NO-PROMOTE %s
   6 ; XUN: llc -O0 -march=amdgcn -mcpu=bonaire -mattr=+promote-alloca < %s | FileCheck -check-prefix=CHECK -check-prefix=CHECK-PROMOTE %s
   7 ; XUN: llc -O0 -march=amdgcn -mcpu=tonga -mattr=-promote-alloca < %s | FileCheck -check-prefix=CHECK -check-prefix=CHECK-NO-PROMOTE %s
   8 ; XUN: llc -O0 -march=amdgcn -mcpu=tonga -mattr=+promote-alloca < %s | FileCheck -check-prefix=CHECK -check-prefix=CHECK-PROMOTE %s
   9
  10 ; Disable optimizations in case there are optimizations added that
  11 ; specialize away generic pointer accesses.
  12
  13 ; CHECK-LABEL: {{^}}branch_use_flat_i32:
  14 ; CHECK: flat_store_dword {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}
  15 ; CHECK: s_endpgm
  16 define void @branch_use_flat_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* %gptr, i32 addrspace(3)* %lptr, i32 %x, i32 %c) #0 {
  17 entry:
  18   %cmp = icmp ne i32 %c, 0
  19   br i1 %cmp, label %local, label %global
  20
  21 local:
  22   %flat_local = addrspacecast i32 addrspace(3)* %lptr to i32 addrspace(4)*
  23   br label %end
  24
  25 global:
  26   %flat_global = addrspacecast i32 addrspace(1)* %gptr to i32 addrspace(4)*
  27   br label %end
  28
  29 end:
  30   %fptr = phi i32 addrspace(4)* [ %flat_local, %local ], [ %flat_global, %global ]
  31   store i32 %x, i32 addrspace(4)* %fptr, align 4
  32 ;  %val = load i32, i32 addrspace(4)* %fptr, align 4
  33 ;  store i32 %val, i32 addrspace(1)* %out, align 4
  34   ret void
  35 }
  36
  37 ; TODO: This should not be zero when registers are used for small
  38 ; scratch allocations again.
  39
  40 ; Check for prologue initializing special SGPRs pointing to scratch.
  41 ; CHECK-LABEL: {{^}}store_flat_scratch:
  42 ; CHECK: s_movk_i32 flat_scratch_lo, 0
  43 ; CHECK-NO-PROMOTE: s_movk_i32 flat_scratch_hi, 0x28{{$}}
  44 ; CHECK-PROMOTE: s_movk_i32 flat_scratch_hi, 0x0{{$}}
  45 ; CHECK: flat_store_dword
  46 ; CHECK: s_barrier
  47 ; CHECK: flat_load_dword
  48 define void @store_flat_scratch(i32 addrspace(1)* noalias %out, i32) #0 {
  49   %alloca = alloca i32, i32 9, align 4
  50   %x = call i32 @llvm.r600.read.tidig.x() #3
  51   %pptr = getelementptr i32, i32* %alloca, i32 %x
  52   %fptr = addrspacecast i32* %pptr to i32 addrspace(4)*
  53   store i32 %x, i32 addrspace(4)* %fptr
  54   ; Dummy call
  55   call void @llvm.AMDGPU.barrier.local() #1
  56   %reload = load i32, i32 addrspace(4)* %fptr, align 4
  57   store i32 %reload, i32 addrspace(1)* %out, align 4
  58   ret void
  59 }
  60
  61 declare void @llvm.AMDGPU.barrier.local() #1
  62 declare i32 @llvm.r600.read.tidig.x() #3
  63
  64 attributes #0 = { nounwind }
  65 attributes #1 = { nounwind convergent }
  66 attributes #3 = { nounwind readnone }