1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
3 declare void @Print__512(<16 x i32>) #0
5 define void @bar__512(<16 x i32>* %var) #0 {
6 ; CHECK-LABEL: bar__512:
7 ; CHECK: ## BB#0: ## %allocas
8 ; CHECK-NEXT: pushq %rbx
9 ; CHECK-NEXT: subq $112, %rsp
10 ; CHECK-NEXT: movq %rdi, %rbx
11 ; CHECK-NEXT: vmovdqu32 (%rbx), %zmm0
12 ; CHECK-NEXT: vmovups %zmm0, (%rsp) ## 64-byte Spill
13 ; CHECK-NEXT: vpbroadcastd {{.*}}(%rip), %zmm1
14 ; CHECK-NEXT: vmovdqa32 %zmm1, (%rbx)
15 ; CHECK-NEXT: callq _Print__512
16 ; CHECK-NEXT: vmovups (%rsp), %zmm0 ## 64-byte Reload
17 ; CHECK-NEXT: callq _Print__512
18 ; CHECK-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0
19 ; CHECK-NEXT: vmovdqa32 %zmm0, (%rbx)
20 ; CHECK-NEXT: addq $112, %rsp
21 ; CHECK-NEXT: popq %rbx
24 %var_load_load = load <16 x i32>, <16 x i32>* %var, align 1
25 store <16 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>, <16 x i32>* %var, align 64
26 call void @Print__512(<16 x i32> %var_load_load)
27 ; %var_load_load value should be reloaded
28 call void @Print__512(<16 x i32> %var_load_load)
29 store <16 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>, <16 x i32>* %var, align 64
34 attributes #0 = { nounwind }