test/CodeGen/R600/cvt_f32_ubyte.ll

   1 ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
   2
   3 ; SI-LABEL: {{^}}load_i8_to_f32:
   4 ; SI: buffer_load_ubyte [[LOADREG:v[0-9]+]],
   5 ; SI-NOT: bfe
   6 ; SI-NOT: lshr
   7 ; SI: v_cvt_f32_ubyte0_e32 [[CONV:v[0-9]+]], [[LOADREG]]
   8 ; SI: buffer_store_dword [[CONV]],
   9 define void @load_i8_to_f32(float addrspace(1)* noalias %out, i8 addrspace(1)* noalias %in) nounwind {
  10   %load = load i8 addrspace(1)* %in, align 1
  11   %cvt = uitofp i8 %load to float
  12   store float %cvt, float addrspace(1)* %out, align 4
  13   ret void
  14 }
  15
  16 ; SI-LABEL: {{^}}load_v2i8_to_v2f32:
  17 ; SI: buffer_load_ushort [[LOADREG:v[0-9]+]],
  18 ; SI-NOT: bfe
  19 ; SI-NOT: lshr
  20 ; SI-NOT: and
  21 ; SI-DAG: v_cvt_f32_ubyte1_e32 v[[HIRESULT:[0-9]+]], [[LOADREG]]
  22 ; SI-DAG: v_cvt_f32_ubyte0_e32 v[[LORESULT:[0-9]+]], [[LOADREG]]
  23 ; SI: buffer_store_dwordx2 v{{\[}}[[LORESULT]]:[[HIRESULT]]{{\]}},
  24 define void @load_v2i8_to_v2f32(<2 x float> addrspace(1)* noalias %out, <2 x i8> addrspace(1)* noalias %in) nounwind {
  25   %load = load <2 x i8> addrspace(1)* %in, align 2
  26   %cvt = uitofp <2 x i8> %load to <2 x float>
  27   store <2 x float> %cvt, <2 x float> addrspace(1)* %out, align 16
  28   ret void
  29 }
  30
  31 ; SI-LABEL: {{^}}load_v3i8_to_v3f32:
  32 ; SI-NOT: bfe
  33 ; SI-NOT: v_cvt_f32_ubyte3_e32
  34 ; SI-DAG: v_cvt_f32_ubyte2_e32
  35 ; SI-DAG: v_cvt_f32_ubyte1_e32
  36 ; SI-DAG: v_cvt_f32_ubyte0_e32
  37 ; SI: buffer_store_dwordx2 v{{\[}}[[LORESULT]]:[[HIRESULT]]{{\]}},
  38 define void @load_v3i8_to_v3f32(<3 x float> addrspace(1)* noalias %out, <3 x i8> addrspace(1)* noalias %in) nounwind {
  39   %load = load <3 x i8> addrspace(1)* %in, align 4
  40   %cvt = uitofp <3 x i8> %load to <3 x float>
  41   store <3 x float> %cvt, <3 x float> addrspace(1)* %out, align 16
  42   ret void
  43 }
  44
  45 ; SI-LABEL: {{^}}load_v4i8_to_v4f32:
  46 ; SI: buffer_load_dword [[LOADREG:v[0-9]+]]
  47 ; SI-NOT: bfe
  48 ; SI-NOT: lshr
  49 ; SI-DAG: v_cvt_f32_ubyte3_e32 v[[HIRESULT:[0-9]+]], [[LOADREG]]
  50 ; SI-DAG: v_cvt_f32_ubyte2_e32 v{{[0-9]+}}, [[LOADREG]]
  51 ; SI-DAG: v_cvt_f32_ubyte1_e32 v{{[0-9]+}}, [[LOADREG]]
  52 ; SI-DAG: v_cvt_f32_ubyte0_e32 v[[LORESULT:[0-9]+]], [[LOADREG]]
  53 ; SI: buffer_store_dwordx4 v{{\[}}[[LORESULT]]:[[HIRESULT]]{{\]}},
  54 define void @load_v4i8_to_v4f32(<4 x float> addrspace(1)* noalias %out, <4 x i8> addrspace(1)* noalias %in) nounwind {
  55   %load = load <4 x i8> addrspace(1)* %in, align 4
  56   %cvt = uitofp <4 x i8> %load to <4 x float>
  57   store <4 x float> %cvt, <4 x float> addrspace(1)* %out, align 16
  58   ret void
  59 }
  60
  61 ; This should not be adding instructions to shift into the correct
  62 ; position in the word for the component.
  63
  64 ; SI-LABEL: {{^}}load_v4i8_to_v4f32_unaligned:
  65 ; SI: buffer_load_ubyte [[LOADREG0:v[0-9]+]]
  66 ; SI: buffer_load_ubyte [[LOADREG1:v[0-9]+]]
  67 ; SI: buffer_load_ubyte [[LOADREG2:v[0-9]+]]
  68 ; SI: buffer_load_ubyte [[LOADREG3:v[0-9]+]]
  69 ; SI-NOT: v_lshlrev_b32
  70 ; SI-NOT: v_or_b32
  71
  72 ; SI-DAG: v_cvt_f32_ubyte0_e32 v[[LORESULT:[0-9]+]], [[LOADREG0]]
  73 ; SI-DAG: v_cvt_f32_ubyte0_e32 v{{[0-9]+}}, [[LOADREG1]]
  74 ; SI-DAG: v_cvt_f32_ubyte0_e32 v{{[0-9]+}}, [[LOADREG2]]
  75 ; SI-DAG: v_cvt_f32_ubyte0_e32 v[[HIRESULT:[0-9]+]], [[LOADREG3]]
  76
  77 ; SI: buffer_store_dwordx4 v{{\[}}[[LORESULT]]:[[HIRESULT]]{{\]}},
  78 define void @load_v4i8_to_v4f32_unaligned(<4 x float> addrspace(1)* noalias %out, <4 x i8> addrspace(1)* noalias %in) nounwind {
  79   %load = load <4 x i8> addrspace(1)* %in, align 1
  80   %cvt = uitofp <4 x i8> %load to <4 x float>
  81   store <4 x float> %cvt, <4 x float> addrspace(1)* %out, align 16
  82   ret void
  83 }
  84
  85 ; XXX - This should really still be able to use the v_cvt_f32_ubyte0
  86 ; for each component, but computeKnownBits doesn't handle vectors very
  87 ; well.
  88
  89 ; SI-LABEL: {{^}}load_v4i8_to_v4f32_2_uses:
  90 ; SI: buffer_load_ubyte
  91 ; SI: buffer_load_ubyte
  92 ; SI: buffer_load_ubyte
  93 ; SI: buffer_load_ubyte
  94 ; SI: v_cvt_f32_ubyte0_e32
  95 ; SI: v_cvt_f32_ubyte0_e32
  96 ; SI: v_cvt_f32_ubyte0_e32
  97 ; SI: v_cvt_f32_ubyte0_e32
  98
  99 ; XXX - replace with this when v4i8 loads aren't scalarized anymore.
 100 ; XSI: buffer_load_dword
 101 ; XSI: v_cvt_f32_u32_e32
 102 ; XSI: v_cvt_f32_u32_e32
 103 ; XSI: v_cvt_f32_u32_e32
 104 ; XSI: v_cvt_f32_u32_e32
 105 ; SI: s_endpgm
 106 define void @load_v4i8_to_v4f32_2_uses(<4 x float> addrspace(1)* noalias %out, <4 x i8> addrspace(1)* noalias %out2, <4 x i8> addrspace(1)* noalias %in) nounwind {
 107   %load = load <4 x i8> addrspace(1)* %in, align 4
 108   %cvt = uitofp <4 x i8> %load to <4 x float>
 109   store <4 x float> %cvt, <4 x float> addrspace(1)* %out, align 16
 110   %add = add <4 x i8> %load, <i8 9, i8 9, i8 9, i8 9> ; Second use of %load
 111   store <4 x i8> %add, <4 x i8> addrspace(1)* %out2, align 4
 112   ret void
 113 }
 114
 115 ; Make sure this doesn't crash.
 116 ; SI-LABEL: {{^}}load_v7i8_to_v7f32:
 117 ; SI: s_endpgm
 118 define void @load_v7i8_to_v7f32(<7 x float> addrspace(1)* noalias %out, <7 x i8> addrspace(1)* noalias %in) nounwind {
 119   %load = load <7 x i8> addrspace(1)* %in, align 1
 120   %cvt = uitofp <7 x i8> %load to <7 x float>
 121   store <7 x float> %cvt, <7 x float> addrspace(1)* %out, align 16
 122   ret void
 123 }
 124
 125 ; SI-LABEL: {{^}}load_v8i8_to_v8f32:
 126 ; SI: buffer_load_dwordx2 v{{\[}}[[LOLOAD:[0-9]+]]:[[HILOAD:[0-9]+]]{{\]}},
 127 ; SI-NOT: bfe
 128 ; SI-NOT: lshr
 129 ; SI-DAG: v_cvt_f32_ubyte3_e32 v{{[0-9]+}}, v[[LOLOAD]]
 130 ; SI-DAG: v_cvt_f32_ubyte2_e32 v{{[0-9]+}}, v[[LOLOAD]]
 131 ; SI-DAG: v_cvt_f32_ubyte1_e32 v{{[0-9]+}}, v[[LOLOAD]]
 132 ; SI-DAG: v_cvt_f32_ubyte0_e32 v{{[0-9]+}}, v[[LOLOAD]]
 133 ; SI-DAG: v_cvt_f32_ubyte3_e32 v{{[0-9]+}}, v[[HILOAD]]
 134 ; SI-DAG: v_cvt_f32_ubyte2_e32 v{{[0-9]+}}, v[[HILOAD]]
 135 ; SI-DAG: v_cvt_f32_ubyte1_e32 v{{[0-9]+}}, v[[HILOAD]]
 136 ; SI-DAG: v_cvt_f32_ubyte0_e32 v{{[0-9]+}}, v[[HILOAD]]
 137 ; SI-NOT: bfe
 138 ; SI-NOT: lshr
 139 ; SI: buffer_store_dword
 140 ; SI: buffer_store_dword
 141 ; SI: buffer_store_dword
 142 ; SI: buffer_store_dword
 143 ; SI: buffer_store_dword
 144 ; SI: buffer_store_dword
 145 ; SI: buffer_store_dword
 146 ; SI: buffer_store_dword
 147 define void @load_v8i8_to_v8f32(<8 x float> addrspace(1)* noalias %out, <8 x i8> addrspace(1)* noalias %in) nounwind {
 148   %load = load <8 x i8> addrspace(1)* %in, align 1
 149   %cvt = uitofp <8 x i8> %load to <8 x float>
 150   store <8 x float> %cvt, <8 x float> addrspace(1)* %out, align 16
 151   ret void
 152 }
 153
 154 ; SI-LABEL: {{^}}i8_zext_inreg_i32_to_f32:
 155 ; SI: buffer_load_dword [[LOADREG:v[0-9]+]],
 156 ; SI: v_add_i32_e32 [[ADD:v[0-9]+]], 2, [[LOADREG]]
 157 ; SI-NEXT: v_cvt_f32_ubyte0_e32 [[CONV:v[0-9]+]], [[ADD]]
 158 ; SI: buffer_store_dword [[CONV]],
 159 define void @i8_zext_inreg_i32_to_f32(float addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
 160   %load = load i32 addrspace(1)* %in, align 4
 161   %add = add i32 %load, 2
 162   %inreg = and i32 %add, 255
 163   %cvt = uitofp i32 %inreg to float
 164   store float %cvt, float addrspace(1)* %out, align 4
 165   ret void
 166 }
 167
 168 ; SI-LABEL: {{^}}i8_zext_inreg_hi1_to_f32:
 169 define void @i8_zext_inreg_hi1_to_f32(float addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
 170   %load = load i32 addrspace(1)* %in, align 4
 171   %inreg = and i32 %load, 65280
 172   %shr = lshr i32 %inreg, 8
 173   %cvt = uitofp i32 %shr to float
 174   store float %cvt, float addrspace(1)* %out, align 4
 175   ret void
 176 }
 177
 178
 179 ; We don't get these ones because of the zext, but instcombine removes
 180 ; them so it shouldn't really matter.
 181 define void @i8_zext_i32_to_f32(float addrspace(1)* noalias %out, i8 addrspace(1)* noalias %in) nounwind {
 182   %load = load i8 addrspace(1)* %in, align 1
 183   %ext = zext i8 %load to i32
 184   %cvt = uitofp i32 %ext to float
 185   store float %cvt, float addrspace(1)* %out, align 4
 186   ret void
 187 }
 188
 189 define void @v4i8_zext_v4i32_to_v4f32(<4 x float> addrspace(1)* noalias %out, <4 x i8> addrspace(1)* noalias %in) nounwind {
 190   %load = load <4 x i8> addrspace(1)* %in, align 1
 191   %ext = zext <4 x i8> %load to <4 x i32>
 192   %cvt = uitofp <4 x i32> %ext to <4 x float>
 193   store <4 x float> %cvt, <4 x float> addrspace(1)* %out, align 16
 194   ret void
 195 }