test/CodeGen/R600/sext-in-reg.ll

   1 ; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
   2 ; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
   3
   4 declare i32 @llvm.AMDGPU.imax(i32, i32) nounwind readnone
   5
   6
   7 ; FUNC-LABEL: {{^}}sext_in_reg_i1_i32:
   8 ; SI: S_LOAD_DWORD [[ARG:s[0-9]+]],
   9 ; SI: S_BFE_I32 [[SEXTRACT:s[0-9]+]], [[ARG]], 0x10000
  10 ; SI: V_MOV_B32_e32 [[EXTRACT:v[0-9]+]], [[SEXTRACT]]
  11 ; SI: BUFFER_STORE_DWORD [[EXTRACT]],
  12
  13 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]]
  14 ; EG: BFE_INT [[RES]], {{.*}}, 0.0, 1
  15 ; EG-NEXT: LSHR * [[ADDR]]
  16 define void @sext_in_reg_i1_i32(i32 addrspace(1)* %out, i32 %in) {
  17   %shl = shl i32 %in, 31
  18   %sext = ashr i32 %shl, 31
  19   store i32 %sext, i32 addrspace(1)* %out
  20   ret void
  21 }
  22
  23 ; FUNC-LABEL: {{^}}sext_in_reg_i8_to_i32:
  24 ; SI: S_ADD_I32 [[VAL:s[0-9]+]],
  25 ; SI: S_SEXT_I32_I8 [[EXTRACT:s[0-9]+]], [[VAL]]
  26 ; SI: V_MOV_B32_e32 [[VEXTRACT:v[0-9]+]], [[EXTRACT]]
  27 ; SI: BUFFER_STORE_DWORD [[VEXTRACT]],
  28
  29 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]]
  30 ; EG: ADD_INT
  31 ; EG-NEXT: BFE_INT [[RES]], {{.*}}, 0.0, literal
  32 ; EG-NEXT: LSHR * [[ADDR]]
  33 define void @sext_in_reg_i8_to_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
  34   %c = add i32 %a, %b ; add to prevent folding into extload
  35   %shl = shl i32 %c, 24
  36   %ashr = ashr i32 %shl, 24
  37   store i32 %ashr, i32 addrspace(1)* %out, align 4
  38   ret void
  39 }
  40
  41 ; FUNC-LABEL: {{^}}sext_in_reg_i16_to_i32:
  42 ; SI: S_ADD_I32 [[VAL:s[0-9]+]],
  43 ; SI: S_SEXT_I32_I16 [[EXTRACT:s[0-9]+]], [[VAL]]
  44 ; SI: V_MOV_B32_e32 [[VEXTRACT:v[0-9]+]], [[EXTRACT]]
  45 ; SI: BUFFER_STORE_DWORD [[VEXTRACT]],
  46
  47 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]]
  48 ; EG: ADD_INT
  49 ; EG-NEXT: BFE_INT [[RES]], {{.*}}, 0.0, literal
  50 ; EG-NEXT: LSHR * [[ADDR]]
  51 define void @sext_in_reg_i16_to_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
  52   %c = add i32 %a, %b ; add to prevent folding into extload
  53   %shl = shl i32 %c, 16
  54   %ashr = ashr i32 %shl, 16
  55   store i32 %ashr, i32 addrspace(1)* %out, align 4
  56   ret void
  57 }
  58
  59 ; FUNC-LABEL: {{^}}sext_in_reg_i8_to_v1i32:
  60 ; SI: S_ADD_I32 [[VAL:s[0-9]+]],
  61 ; SI: S_SEXT_I32_I8 [[EXTRACT:s[0-9]+]], [[VAL]]
  62 ; SI: V_MOV_B32_e32 [[VEXTRACT:v[0-9]+]], [[EXTRACT]]
  63 ; SI: BUFFER_STORE_DWORD [[VEXTRACT]],
  64
  65 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]]
  66 ; EG: ADD_INT
  67 ; EG-NEXT: BFE_INT [[RES]], {{.*}}, 0.0, literal
  68 ; EG-NEXT: LSHR * [[ADDR]]
  69 define void @sext_in_reg_i8_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i32> %a, <1 x i32> %b) nounwind {
  70   %c = add <1 x i32> %a, %b ; add to prevent folding into extload
  71   %shl = shl <1 x i32> %c, <i32 24>
  72   %ashr = ashr <1 x i32> %shl, <i32 24>
  73   store <1 x i32> %ashr, <1 x i32> addrspace(1)* %out, align 4
  74   ret void
  75 }
  76
  77 ; FUNC-LABEL: {{^}}sext_in_reg_i1_to_i64:
  78 ; SI: S_MOV_B32 {{s[0-9]+}}, -1
  79 ; SI: S_ADD_I32 [[VAL:s[0-9]+]],
  80 ; SI: S_BFE_I32 s{{[0-9]+}}, s{{[0-9]+}}, 0x10000
  81 ; SI: BUFFER_STORE_DWORDX2
  82 define void @sext_in_reg_i1_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
  83   %c = add i64 %a, %b
  84   %shl = shl i64 %c, 63
  85   %ashr = ashr i64 %shl, 63
  86   store i64 %ashr, i64 addrspace(1)* %out, align 8
  87   ret void
  88 }
  89
  90 ; FUNC-LABEL: {{^}}sext_in_reg_i8_to_i64:
  91 ; SI: S_MOV_B32 {{s[0-9]+}}, -1
  92 ; SI: S_ADD_I32 [[VAL:s[0-9]+]],
  93 ; SI: S_SEXT_I32_I8 [[EXTRACT:s[0-9]+]], [[VAL]]
  94 ; SI: BUFFER_STORE_DWORDX2
  95
  96 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES_LO:T[0-9]+\.[XYZW]]], [[ADDR_LO:T[0-9]+.[XYZW]]]
  97 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES_HI:T[0-9]+\.[XYZW]]], [[ADDR_HI:T[0-9]+.[XYZW]]]
  98 ; EG: ADD_INT
  99 ; EG-NEXT: BFE_INT {{\*?}} [[RES_LO]], {{.*}}, 0.0, literal
 100 ; EG: ASHR [[RES_HI]]
 101 ; EG-NOT: BFE_INT
 102 ; EG: LSHR
 103 ; EG: LSHR
 104 ;; TODO Check address computation, using | with variables in {{}} does not work,
 105 ;; also the _LO/_HI order might be different
 106 define void @sext_in_reg_i8_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
 107   %c = add i64 %a, %b
 108   %shl = shl i64 %c, 56
 109   %ashr = ashr i64 %shl, 56
 110   store i64 %ashr, i64 addrspace(1)* %out, align 8
 111   ret void
 112 }
 113
 114 ; FUNC-LABEL: {{^}}sext_in_reg_i16_to_i64:
 115 ; SI: S_MOV_B32 {{s[0-9]+}}, -1
 116 ; SI: S_ADD_I32 [[VAL:s[0-9]+]],
 117 ; SI: S_SEXT_I32_I16 [[EXTRACT:s[0-9]+]], [[VAL]]
 118 ; SI: BUFFER_STORE_DWORDX2
 119
 120 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES_LO:T[0-9]+\.[XYZW]]], [[ADDR_LO:T[0-9]+.[XYZW]]]
 121 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES_HI:T[0-9]+\.[XYZW]]], [[ADDR_HI:T[0-9]+.[XYZW]]]
 122 ; EG: ADD_INT
 123 ; EG-NEXT: BFE_INT {{\*?}} [[RES_LO]], {{.*}}, 0.0, literal
 124 ; EG: ASHR [[RES_HI]]
 125 ; EG-NOT: BFE_INT
 126 ; EG: LSHR
 127 ; EG: LSHR
 128 ;; TODO Check address computation, using | with variables in {{}} does not work,
 129 ;; also the _LO/_HI order might be different
 130 define void @sext_in_reg_i16_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
 131   %c = add i64 %a, %b
 132   %shl = shl i64 %c, 48
 133   %ashr = ashr i64 %shl, 48
 134   store i64 %ashr, i64 addrspace(1)* %out, align 8
 135   ret void
 136 }
 137
 138 ; FUNC-LABEL: {{^}}sext_in_reg_i32_to_i64:
 139 ; SI: S_LOAD_DWORD
 140 ; SI: S_LOAD_DWORD
 141 ; SI: S_ADD_I32 [[ADD:s[0-9]+]],
 142 ; SI: S_ASHR_I32 s{{[0-9]+}}, [[ADD]], 31
 143 ; SI: BUFFER_STORE_DWORDX2
 144
 145 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES_LO:T[0-9]+\.[XYZW]]], [[ADDR_LO:T[0-9]+.[XYZW]]]
 146 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES_HI:T[0-9]+\.[XYZW]]], [[ADDR_HI:T[0-9]+.[XYZW]]]
 147 ; EG-NOT: BFE_INT
 148 ; EG: ADD_INT {{\*?}} [[RES_LO]]
 149 ; EG: ASHR [[RES_HI]]
 150 ; EG: ADD_INT
 151 ; EG: LSHR
 152 ; EG: LSHR
 153 ;; TODO Check address computation, using | with variables in {{}} does not work,
 154 ;; also the _LO/_HI order might be different
 155 define void @sext_in_reg_i32_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
 156   %c = add i64 %a, %b
 157   %shl = shl i64 %c, 32
 158   %ashr = ashr i64 %shl, 32
 159   store i64 %ashr, i64 addrspace(1)* %out, align 8
 160   ret void
 161 }
 162
 163 ; This is broken on Evergreen for some reason related to the <1 x i64> kernel arguments.
 164 ; XFUNC-LABEL: {{^}}sext_in_reg_i8_to_v1i64:
 165 ; XSI: S_BFE_I32 [[EXTRACT:s[0-9]+]], {{s[0-9]+}}, 524288
 166 ; XSI: S_ASHR_I32 {{v[0-9]+}}, [[EXTRACT]], 31
 167 ; XSI: BUFFER_STORE_DWORD
 168 ; XEG: BFE_INT
 169 ; XEG: ASHR
 170 ; define void @sext_in_reg_i8_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i64> %a, <1 x i64> %b) nounwind {
 171 ;   %c = add <1 x i64> %a, %b
 172 ;   %shl = shl <1 x i64> %c, <i64 56>
 173 ;   %ashr = ashr <1 x i64> %shl, <i64 56>
 174 ;   store <1 x i64> %ashr, <1 x i64> addrspace(1)* %out, align 8
 175 ;   ret void
 176 ; }
 177
 178 ; FUNC-LABEL: {{^}}sext_in_reg_i1_in_i32_other_amount:
 179 ; SI-NOT: BFE
 180 ; SI: S_LSHL_B32 [[REG:s[0-9]+]], {{s[0-9]+}}, 6
 181 ; SI: S_ASHR_I32 {{s[0-9]+}}, [[REG]], 7
 182
 183 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]]
 184 ; EG-NOT: BFE
 185 ; EG: ADD_INT
 186 ; EG: LSHL
 187 ; EG: ASHR [[RES]]
 188 ; EG: LSHR {{\*?}} [[ADDR]]
 189 define void @sext_in_reg_i1_in_i32_other_amount(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
 190   %c = add i32 %a, %b
 191   %x = shl i32 %c, 6
 192   %y = ashr i32 %x, 7
 193   store i32 %y, i32 addrspace(1)* %out
 194   ret void
 195 }
 196
 197 ; FUNC-LABEL: {{^}}sext_in_reg_v2i1_in_v2i32_other_amount:
 198 ; SI-DAG: S_LSHL_B32 [[REG0:s[0-9]+]], {{s[0-9]}}, 6
 199 ; SI-DAG: S_ASHR_I32 {{s[0-9]+}}, [[REG0]], 7
 200 ; SI-DAG: S_LSHL_B32 [[REG1:s[0-9]+]], {{s[0-9]}}, 6
 201 ; SI-DAG: S_ASHR_I32 {{s[0-9]+}}, [[REG1]], 7
 202 ; SI: S_ENDPGM
 203
 204 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]]
 205 ; EG-NOT: BFE
 206 ; EG: ADD_INT
 207 ; EG: LSHL
 208 ; EG: ASHR [[RES]]
 209 ; EG: LSHL
 210 ; EG: ASHR [[RES]]
 211 ; EG: LSHR {{\*?}} [[ADDR]]
 212 define void @sext_in_reg_v2i1_in_v2i32_other_amount(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) nounwind {
 213   %c = add <2 x i32> %a, %b
 214   %x = shl <2 x i32> %c, <i32 6, i32 6>
 215   %y = ashr <2 x i32> %x, <i32 7, i32 7>
 216   store <2 x i32> %y, <2 x i32> addrspace(1)* %out, align 2
 217   ret void
 218 }
 219
 220
 221 ; FUNC-LABEL: {{^}}sext_in_reg_v2i1_to_v2i32:
 222 ; SI: S_BFE_I32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000
 223 ; SI: S_BFE_I32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000
 224 ; SI: BUFFER_STORE_DWORDX2
 225
 226 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]]
 227 ; EG: BFE_INT [[RES]]
 228 ; EG: BFE_INT [[RES]]
 229 ; EG: LSHR {{\*?}} [[ADDR]]
 230 define void @sext_in_reg_v2i1_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) nounwind {
 231   %c = add <2 x i32> %a, %b ; add to prevent folding into extload
 232   %shl = shl <2 x i32> %c, <i32 31, i32 31>
 233   %ashr = ashr <2 x i32> %shl, <i32 31, i32 31>
 234   store <2 x i32> %ashr, <2 x i32> addrspace(1)* %out, align 8
 235   ret void
 236 }
 237
 238 ; FUNC-LABEL: {{^}}sext_in_reg_v4i1_to_v4i32:
 239 ; SI: S_BFE_I32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000
 240 ; SI: S_BFE_I32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000
 241 ; SI: S_BFE_I32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000
 242 ; SI: S_BFE_I32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000
 243 ; SI: BUFFER_STORE_DWORDX4
 244
 245 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW][XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]]
 246 ; EG: BFE_INT [[RES]]
 247 ; EG: BFE_INT [[RES]]
 248 ; EG: BFE_INT [[RES]]
 249 ; EG: BFE_INT [[RES]]
 250 ; EG: LSHR {{\*?}} [[ADDR]]
 251 define void @sext_in_reg_v4i1_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b) nounwind {
 252   %c = add <4 x i32> %a, %b ; add to prevent folding into extload
 253   %shl = shl <4 x i32> %c, <i32 31, i32 31, i32 31, i32 31>
 254   %ashr = ashr <4 x i32> %shl, <i32 31, i32 31, i32 31, i32 31>
 255   store <4 x i32> %ashr, <4 x i32> addrspace(1)* %out, align 8
 256   ret void
 257 }
 258
 259 ; FUNC-LABEL: {{^}}sext_in_reg_v2i8_to_v2i32:
 260 ; SI: S_SEXT_I32_I8 {{s[0-9]+}}, {{s[0-9]+}}
 261 ; SI: S_SEXT_I32_I8 {{s[0-9]+}}, {{s[0-9]+}}
 262 ; SI: BUFFER_STORE_DWORDX2
 263
 264 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]]
 265 ; EG: BFE_INT [[RES]]
 266 ; EG: BFE_INT [[RES]]
 267 ; EG: LSHR {{\*?}} [[ADDR]]
 268 define void @sext_in_reg_v2i8_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) nounwind {
 269   %c = add <2 x i32> %a, %b ; add to prevent folding into extload
 270   %shl = shl <2 x i32> %c, <i32 24, i32 24>
 271   %ashr = ashr <2 x i32> %shl, <i32 24, i32 24>
 272   store <2 x i32> %ashr, <2 x i32> addrspace(1)* %out, align 8
 273   ret void
 274 }
 275
 276 ; FUNC-LABEL: {{^}}sext_in_reg_v4i8_to_v4i32:
 277 ; SI: S_SEXT_I32_I8 {{s[0-9]+}}, {{s[0-9]+}}
 278 ; SI: S_SEXT_I32_I8 {{s[0-9]+}}, {{s[0-9]+}}
 279 ; SI: S_SEXT_I32_I8 {{s[0-9]+}}, {{s[0-9]+}}
 280 ; SI: S_SEXT_I32_I8 {{s[0-9]+}}, {{s[0-9]+}}
 281 ; SI: BUFFER_STORE_DWORDX4
 282
 283 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW][XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]]
 284 ; EG: BFE_INT [[RES]]
 285 ; EG: BFE_INT [[RES]]
 286 ; EG: BFE_INT [[RES]]
 287 ; EG: BFE_INT [[RES]]
 288 ; EG: LSHR {{\*?}} [[ADDR]]
 289 define void @sext_in_reg_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b) nounwind {
 290   %c = add <4 x i32> %a, %b ; add to prevent folding into extload
 291   %shl = shl <4 x i32> %c, <i32 24, i32 24, i32 24, i32 24>
 292   %ashr = ashr <4 x i32> %shl, <i32 24, i32 24, i32 24, i32 24>
 293   store <4 x i32> %ashr, <4 x i32> addrspace(1)* %out, align 8
 294   ret void
 295 }
 296
 297 ; FUNC-LABEL: {{^}}sext_in_reg_v2i16_to_v2i32:
 298 ; SI: S_SEXT_I32_I16 {{s[0-9]+}}, {{s[0-9]+}}
 299 ; SI: S_SEXT_I32_I16 {{s[0-9]+}}, {{s[0-9]+}}
 300 ; SI: BUFFER_STORE_DWORDX2
 301
 302 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]]
 303 ; EG: BFE_INT [[RES]]
 304 ; EG: BFE_INT [[RES]]
 305 ; EG: LSHR {{\*?}} [[ADDR]]
 306 define void @sext_in_reg_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) nounwind {
 307   %c = add <2 x i32> %a, %b ; add to prevent folding into extload
 308   %shl = shl <2 x i32> %c, <i32 16, i32 16>
 309   %ashr = ashr <2 x i32> %shl, <i32 16, i32 16>
 310   store <2 x i32> %ashr, <2 x i32> addrspace(1)* %out, align 8
 311   ret void
 312 }
 313
 314 ; FUNC-LABEL: {{^}}testcase:
 315 define void @testcase(i8 addrspace(1)* %out, i8 %a) nounwind {
 316   %and_a_1 = and i8 %a, 1
 317   %cmp_eq = icmp eq i8 %and_a_1, 0
 318   %cmp_slt = icmp slt i8 %a, 0
 319   %sel0 = select i1 %cmp_slt, i8 0, i8 %a
 320   %sel1 = select i1 %cmp_eq, i8 0, i8 %a
 321   %xor = xor i8 %sel0, %sel1
 322   store i8 %xor, i8 addrspace(1)* %out
 323   ret void
 324 }
 325
 326 ; FUNC-LABEL: {{^}}testcase_3:
 327 define void @testcase_3(i8 addrspace(1)* %out, i8 %a) nounwind {
 328   %and_a_1 = and i8 %a, 1
 329   %cmp_eq = icmp eq i8 %and_a_1, 0
 330   %cmp_slt = icmp slt i8 %a, 0
 331   %sel0 = select i1 %cmp_slt, i8 0, i8 %a
 332   %sel1 = select i1 %cmp_eq, i8 0, i8 %a
 333   %xor = xor i8 %sel0, %sel1
 334   store i8 %xor, i8 addrspace(1)* %out
 335   ret void
 336 }
 337
 338 ; FUNC-LABEL: {{^}}vgpr_sext_in_reg_v4i8_to_v4i32:
 339 ; SI: V_BFE_I32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 8
 340 ; SI: V_BFE_I32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 8
 341 ; SI: V_BFE_I32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 8
 342 ; SI: V_BFE_I32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 8
 343 define void @vgpr_sext_in_reg_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %a, <4 x i32> addrspace(1)* %b) nounwind {
 344   %loada = load <4 x i32> addrspace(1)* %a, align 16
 345   %loadb = load <4 x i32> addrspace(1)* %b, align 16
 346   %c = add <4 x i32> %loada, %loadb ; add to prevent folding into extload
 347   %shl = shl <4 x i32> %c, <i32 24, i32 24, i32 24, i32 24>
 348   %ashr = ashr <4 x i32> %shl, <i32 24, i32 24, i32 24, i32 24>
 349   store <4 x i32> %ashr, <4 x i32> addrspace(1)* %out, align 8
 350   ret void
 351 }
 352
 353 ; FUNC-LABEL: {{^}}vgpr_sext_in_reg_v4i16_to_v4i32:
 354 ; SI: V_BFE_I32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 16
 355 ; SI: V_BFE_I32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 16
 356 define void @vgpr_sext_in_reg_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %a, <4 x i32> addrspace(1)* %b) nounwind {
 357   %loada = load <4 x i32> addrspace(1)* %a, align 16
 358   %loadb = load <4 x i32> addrspace(1)* %b, align 16
 359   %c = add <4 x i32> %loada, %loadb ; add to prevent folding into extload
 360   %shl = shl <4 x i32> %c, <i32 16, i32 16, i32 16, i32 16>
 361   %ashr = ashr <4 x i32> %shl, <i32 16, i32 16, i32 16, i32 16>
 362   store <4 x i32> %ashr, <4 x i32> addrspace(1)* %out, align 8
 363   ret void
 364 }
 365
 366 ; FIXME: The BFE should really be eliminated. I think it should happen
 367 ; when computeKnownBitsForTargetNode is implemented for imax.
 368
 369 ; FUNC-LABEL: {{^}}sext_in_reg_to_illegal_type:
 370 ; SI: BUFFER_LOAD_SBYTE
 371 ; SI: V_MAX_I32
 372 ; SI: V_BFE_I32
 373 ; SI: BUFFER_STORE_SHORT
 374 define void @sext_in_reg_to_illegal_type(i16 addrspace(1)* nocapture %out, i8 addrspace(1)* nocapture %src) nounwind {
 375   %tmp5 = load i8 addrspace(1)* %src, align 1
 376   %tmp2 = sext i8 %tmp5 to i32
 377   %tmp3 = tail call i32 @llvm.AMDGPU.imax(i32 %tmp2, i32 0) nounwind readnone
 378   %tmp4 = trunc i32 %tmp3 to i8
 379   %tmp6 = sext i8 %tmp4 to i16
 380   store i16 %tmp6, i16 addrspace(1)* %out, align 2
 381   ret void
 382 }
 383
 384 declare i32 @llvm.AMDGPU.bfe.i32(i32, i32, i32) nounwind readnone
 385
 386 ; FUNC-LABEL: {{^}}bfe_0_width:
 387 ; SI-NOT: BFE
 388 ; SI: S_ENDPGM
 389 define void @bfe_0_width(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind {
 390   %load = load i32 addrspace(1)* %ptr, align 4
 391   %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %load, i32 8, i32 0) nounwind readnone
 392   store i32 %bfe, i32 addrspace(1)* %out, align 4
 393   ret void
 394 }
 395
 396 ; FUNC-LABEL: {{^}}bfe_8_bfe_8:
 397 ; SI: V_BFE_I32
 398 ; SI-NOT: BFE
 399 ; SI: S_ENDPGM
 400 define void @bfe_8_bfe_8(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind {
 401   %load = load i32 addrspace(1)* %ptr, align 4
 402   %bfe0 = call i32 @llvm.AMDGPU.bfe.i32(i32 %load, i32 0, i32 8) nounwind readnone
 403   %bfe1 = call i32 @llvm.AMDGPU.bfe.i32(i32 %bfe0, i32 0, i32 8) nounwind readnone
 404   store i32 %bfe1, i32 addrspace(1)* %out, align 4
 405   ret void
 406 }
 407
 408 ; FUNC-LABEL: {{^}}bfe_8_bfe_16:
 409 ; SI: V_BFE_I32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 8
 410 ; SI: S_ENDPGM
 411 define void @bfe_8_bfe_16(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind {
 412   %load = load i32 addrspace(1)* %ptr, align 4
 413   %bfe0 = call i32 @llvm.AMDGPU.bfe.i32(i32 %load, i32 0, i32 8) nounwind readnone
 414   %bfe1 = call i32 @llvm.AMDGPU.bfe.i32(i32 %bfe0, i32 0, i32 16) nounwind readnone
 415   store i32 %bfe1, i32 addrspace(1)* %out, align 4
 416   ret void
 417 }
 418
 419 ; This really should be folded into 1
 420 ; FUNC-LABEL: {{^}}bfe_16_bfe_8:
 421 ; SI: V_BFE_I32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 8
 422 ; SI-NOT: BFE
 423 ; SI: S_ENDPGM
 424 define void @bfe_16_bfe_8(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind {
 425   %load = load i32 addrspace(1)* %ptr, align 4
 426   %bfe0 = call i32 @llvm.AMDGPU.bfe.i32(i32 %load, i32 0, i32 16) nounwind readnone
 427   %bfe1 = call i32 @llvm.AMDGPU.bfe.i32(i32 %bfe0, i32 0, i32 8) nounwind readnone
 428   store i32 %bfe1, i32 addrspace(1)* %out, align 4
 429   ret void
 430 }
 431
 432 ; Make sure there isn't a redundant BFE
 433 ; FUNC-LABEL: {{^}}sext_in_reg_i8_to_i32_bfe:
 434 ; SI: S_SEXT_I32_I8 s{{[0-9]+}}, s{{[0-9]+}}
 435 ; SI-NOT: BFE
 436 ; SI: S_ENDPGM
 437 define void @sext_in_reg_i8_to_i32_bfe(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
 438   %c = add i32 %a, %b ; add to prevent folding into extload
 439   %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %c, i32 0, i32 8) nounwind readnone
 440   %shl = shl i32 %bfe, 24
 441   %ashr = ashr i32 %shl, 24
 442   store i32 %ashr, i32 addrspace(1)* %out, align 4
 443   ret void
 444 }
 445
 446 ; FUNC-LABEL: {{^}}sext_in_reg_i8_to_i32_bfe_wrong:
 447 define void @sext_in_reg_i8_to_i32_bfe_wrong(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
 448   %c = add i32 %a, %b ; add to prevent folding into extload
 449   %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %c, i32 8, i32 0) nounwind readnone
 450   %shl = shl i32 %bfe, 24
 451   %ashr = ashr i32 %shl, 24
 452   store i32 %ashr, i32 addrspace(1)* %out, align 4
 453   ret void
 454 }
 455
 456 ; FUNC-LABEL: {{^}}sextload_i8_to_i32_bfe:
 457 ; SI: BUFFER_LOAD_SBYTE
 458 ; SI-NOT: BFE
 459 ; SI: S_ENDPGM
 460 define void @sextload_i8_to_i32_bfe(i32 addrspace(1)* %out, i8 addrspace(1)* %ptr) nounwind {
 461   %load = load i8 addrspace(1)* %ptr, align 1
 462   %sext = sext i8 %load to i32
 463   %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %sext, i32 0, i32 8) nounwind readnone
 464   %shl = shl i32 %bfe, 24
 465   %ashr = ashr i32 %shl, 24
 466   store i32 %ashr, i32 addrspace(1)* %out, align 4
 467   ret void
 468 }
 469
 470 ; FUNC-LABEL: {{^}}sextload_i8_to_i32_bfe_0:
 471 ; SI: .text
 472 ; SI-NOT: BFE
 473 ; SI: S_ENDPGM
 474 define void @sextload_i8_to_i32_bfe_0(i32 addrspace(1)* %out, i8 addrspace(1)* %ptr) nounwind {
 475   %load = load i8 addrspace(1)* %ptr, align 1
 476   %sext = sext i8 %load to i32
 477   %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %sext, i32 8, i32 0) nounwind readnone
 478   %shl = shl i32 %bfe, 24
 479   %ashr = ashr i32 %shl, 24
 480   store i32 %ashr, i32 addrspace(1)* %out, align 4
 481   ret void
 482 }
 483
 484 ; FUNC-LABEL: {{^}}sext_in_reg_i1_bfe_offset_0:
 485 ; SI-NOT: SHR
 486 ; SI-NOT: SHL
 487 ; SI: V_BFE_I32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 1
 488 ; SI: S_ENDPGM
 489 define void @sext_in_reg_i1_bfe_offset_0(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
 490   %x = load i32 addrspace(1)* %in, align 4
 491   %shl = shl i32 %x, 31
 492   %shr = ashr i32 %shl, 31
 493   %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shr, i32 0, i32 1)
 494   store i32 %bfe, i32 addrspace(1)* %out, align 4
 495   ret void
 496 }
 497
 498 ; FUNC-LABEL: {{^}}sext_in_reg_i1_bfe_offset_1:
 499 ; SI: BUFFER_LOAD_DWORD
 500 ; SI-NOT: SHL
 501 ; SI-NOT: SHR
 502 ; SI: V_BFE_I32 v{{[0-9]+}}, v{{[0-9]+}}, 1, 1
 503 ; SI: S_ENDPGM
 504 define void @sext_in_reg_i1_bfe_offset_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
 505   %x = load i32 addrspace(1)* %in, align 4
 506   %shl = shl i32 %x, 30
 507   %shr = ashr i32 %shl, 30
 508   %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shr, i32 1, i32 1)
 509   store i32 %bfe, i32 addrspace(1)* %out, align 4
 510   ret void
 511 }
 512
 513 ; FUNC-LABEL: {{^}}sext_in_reg_i2_bfe_offset_1:
 514 ; SI: BUFFER_LOAD_DWORD
 515 ; SI: V_LSHLREV_B32_e32 v{{[0-9]+}}, 30, v{{[0-9]+}}
 516 ; SI: V_ASHRREV_I32_e32 v{{[0-9]+}}, 30, v{{[0-9]+}}
 517 ; SI: V_BFE_I32 v{{[0-9]+}}, v{{[0-9]+}}, 1, 2
 518 ; SI: S_ENDPGM
 519 define void @sext_in_reg_i2_bfe_offset_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
 520   %x = load i32 addrspace(1)* %in, align 4
 521   %shl = shl i32 %x, 30
 522   %shr = ashr i32 %shl, 30
 523   %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shr, i32 1, i32 2)
 524   store i32 %bfe, i32 addrspace(1)* %out, align 4
 525   ret void
 526 }