test/CodeGen/AMDGPU/use-sgpr-multiple-times.ll

   1 ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s
   2 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN %s
   3
   4 declare float @llvm.fma.f32(float, float, float) #1
   5 declare float @llvm.fmuladd.f32(float, float, float) #1
   6 declare i32 @llvm.AMDGPU.imad24(i32, i32, i32) #1
   7
   8
   9 ; GCN-LABEL: {{^}}test_sgpr_use_twice_binop:
  10 ; GCN: s_load_dword [[SGPR:s[0-9]+]],
  11 ; GCN: v_add_f32_e64 [[RESULT:v[0-9]+]], [[SGPR]], [[SGPR]]
  12 ; GCN: buffer_store_dword [[RESULT]]
  13 define void @test_sgpr_use_twice_binop(float addrspace(1)* %out, float %a) #0 {
  14   %dbl = fadd float %a, %a
  15   store float %dbl, float addrspace(1)* %out, align 4
  16   ret void
  17 }
  18
  19 ; GCN-LABEL: {{^}}test_sgpr_use_three_ternary_op:
  20 ; GCN: s_load_dword [[SGPR:s[0-9]+]],
  21 ; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR]], [[SGPR]], [[SGPR]]
  22 ; GCN: buffer_store_dword [[RESULT]]
  23 define void @test_sgpr_use_three_ternary_op(float addrspace(1)* %out, float %a) #0 {
  24   %fma = call float @llvm.fma.f32(float %a, float %a, float %a) #1
  25   store float %fma, float addrspace(1)* %out, align 4
  26   ret void
  27 }
  28
  29 ; GCN-LABEL: {{^}}test_sgpr_use_twice_ternary_op_a_a_b:
  30 ; SI: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
  31 ; SI: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
  32 ; VI: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c
  33 ; VI: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x30
  34 ; GCN: v_mov_b32_e32 [[VGPR1:v[0-9]+]], [[SGPR1]]
  35 ; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR0]], [[SGPR0]], [[VGPR1]]
  36 ; GCN: buffer_store_dword [[RESULT]]
  37 define void @test_sgpr_use_twice_ternary_op_a_a_b(float addrspace(1)* %out, float %a, float %b) #0 {
  38   %fma = call float @llvm.fma.f32(float %a, float %a, float %b) #1
  39   store float %fma, float addrspace(1)* %out, align 4
  40   ret void
  41 }
  42
  43 ; GCN-LABEL: {{^}}test_sgpr_use_twice_ternary_op_a_b_a:
  44 ; SI: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
  45 ; SI: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
  46 ; VI: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c
  47 ; VI: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x30
  48 ; GCN: v_mov_b32_e32 [[VGPR1:v[0-9]+]], [[SGPR1]]
  49 ; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[VGPR1]], [[SGPR0]], [[SGPR0]]
  50 ; GCN: buffer_store_dword [[RESULT]]
  51 define void @test_sgpr_use_twice_ternary_op_a_b_a(float addrspace(1)* %out, float %a, float %b) #0 {
  52   %fma = call float @llvm.fma.f32(float %a, float %b, float %a) #1
  53   store float %fma, float addrspace(1)* %out, align 4
  54   ret void
  55 }
  56
  57 ; GCN-LABEL: {{^}}test_sgpr_use_twice_ternary_op_b_a_a:
  58 ; SI: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
  59 ; SI: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
  60 ; VI: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c
  61 ; VI: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x30
  62 ; GCN: v_mov_b32_e32 [[VGPR1:v[0-9]+]], [[SGPR1]]
  63 ; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR0]], [[VGPR1]], [[SGPR0]]
  64 ; GCN: buffer_store_dword [[RESULT]]
  65 define void @test_sgpr_use_twice_ternary_op_b_a_a(float addrspace(1)* %out, float %a, float %b) #0 {
  66   %fma = call float @llvm.fma.f32(float %b, float %a, float %a) #1
  67   store float %fma, float addrspace(1)* %out, align 4
  68   ret void
  69 }
  70
  71 ; GCN-LABEL: {{^}}test_sgpr_use_twice_ternary_op_a_a_imm:
  72 ; GCN: s_load_dword [[SGPR:s[0-9]+]]
  73 ; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR]], [[SGPR]], 2.0
  74 ; GCN: buffer_store_dword [[RESULT]]
  75 define void @test_sgpr_use_twice_ternary_op_a_a_imm(float addrspace(1)* %out, float %a) #0 {
  76   %fma = call float @llvm.fma.f32(float %a, float %a, float 2.0) #1
  77   store float %fma, float addrspace(1)* %out, align 4
  78   ret void
  79 }
  80
  81 ; GCN-LABEL: {{^}}test_sgpr_use_twice_ternary_op_a_imm_a:
  82 ; GCN: s_load_dword [[SGPR:s[0-9]+]]
  83 ; GCN: v_fma_f32 [[RESULT:v[0-9]+]], 2.0, [[SGPR]], [[SGPR]]
  84 ; GCN: buffer_store_dword [[RESULT]]
  85 define void @test_sgpr_use_twice_ternary_op_a_imm_a(float addrspace(1)* %out, float %a) #0 {
  86   %fma = call float @llvm.fma.f32(float %a, float 2.0, float %a) #1
  87   store float %fma, float addrspace(1)* %out, align 4
  88   ret void
  89 }
  90
  91 ; Don't use fma since fma c, x, y is canonicalized to fma x, c, y
  92 ; GCN-LABEL: {{^}}test_sgpr_use_twice_ternary_op_imm_a_a:
  93 ; GCN: s_load_dword [[SGPR:s[0-9]+]]
  94 ; GCN: v_mad_i32_i24 [[RESULT:v[0-9]+]], 2, [[SGPR]], [[SGPR]]
  95 ; GCN: buffer_store_dword [[RESULT]]
  96 define void @test_sgpr_use_twice_ternary_op_imm_a_a(i32 addrspace(1)* %out, i32 %a) #0 {
  97   %fma = call i32 @llvm.AMDGPU.imad24(i32 2, i32 %a, i32 %a) #1
  98   store i32 %fma, i32 addrspace(1)* %out, align 4
  99   ret void
 100 }
 101
 102 attributes #0 = { nounwind }
 103 attributes #1 = { nounwind readnone }