+; XFAIL: *
+; ...should pass. See PR12324: misched bringup
; RUN: llc < %s -march=x86-64 -O3 -asm-verbose=false | FileCheck %s
target datalayout = "e-p:64:64:64"
target triple = "x86_64-unknown-unknown"
; Instruction selection should use the FLAGS value from the dec for
; the branch. Scheduling should push the adds upwards.
-; CHECK: full_me_0:
+; CHECK-LABEL: full_me_0:
; CHECK: movsd (%rsi), %xmm0
; CHECK: mulsd (%rdx), %xmm0
; CHECK: movsd %xmm0, (%rdi)
; would be better on x86-64, since the start value would be 0 instead of
; 2048.
-; CHECK: mostly_full_me_0:
+; CHECK-LABEL: mostly_full_me_0:
; CHECK: movsd -2048(%rsi), %xmm0
; CHECK: mulsd -2048(%rdx), %xmm0
; CHECK: movsd %xmm0, -2048(%rdi)
; A minor variation on mostly_full_me_0.
; Prefer to start the indvar at 0.
-; CHECK: mostly_full_me_1:
+; CHECK-LABEL: mostly_full_me_1:
; CHECK: movsd (%rsi), %xmm0
; CHECK: mulsd (%rdx), %xmm0
; CHECK: movsd %xmm0, (%rdi)
; A slightly less minor variation on mostly_full_me_0.
-; CHECK: mostly_full_me_2:
+; CHECK-LABEL: mostly_full_me_2:
; CHECK: movsd (%rsi), %xmm0
; CHECK: mulsd (%rdx), %xmm0
; CHECK: movsd %xmm0, (%rdi)
; cases away, but it's useful here to verify that LSR's register pressure
; heuristics are working as expected.
-; CHECK: count_me_0:
+; CHECK-LABEL: count_me_0:
; CHECK: movsd (%rsi,%rax,8), %xmm0
; CHECK: mulsd (%rdx,%rax,8), %xmm0
; CHECK: movsd %xmm0, (%rdi,%rax,8)
; would not reduce register pressure.
; (though it would reduce register pressure inside the loop...)
-; CHECK: count_me_1:
+; CHECK-LABEL: count_me_1:
; CHECK: movsd (%rsi,%rax,8), %xmm0
; CHECK: mulsd (%rdx,%rax,8), %xmm0
; CHECK: movsd %xmm0, (%rdi,%rax,8)
; Full strength reduction doesn't save any registers here because the
; loop tripcount is a constant.
-; CHECK: count_me_2:
+; CHECK-LABEL: count_me_2:
; CHECK: movl $10, %eax
; CHECK: align
; CHECK: BB6_1:
; This should be fully strength-reduced to reduce register pressure.
-; CHECK: full_me_1:
+; CHECK-LABEL: full_me_1:
; CHECK: align
; CHECK: BB7_1:
; CHECK: movsd (%rdi), %xmm0
; This is a variation on full_me_0 in which the 0,+,1 induction variable
; has a non-address use, pinning that value in a register.
-; CHECK: count_me_3:
+; CHECK-LABEL: count_me_3:
; CHECK: call
; CHECK: movsd (%r{{[^,]*}},%r{{[^,]*}},8), %xmm0
; CHECK: mulsd (%r{{[^,]*}},%r{{[^,]*}},8), %xmm0
; LSR should use only one indvar for the inner loop.
; rdar://7657764
-; CHECK: asd:
-; CHECK: BB9_5:
+; CHECK-LABEL: asd:
+; CHECK: BB9_4:
; CHECK-NEXT: addl (%r{{[^,]*}},%rdi,4), %e
; CHECK-NEXT: incq %rdi
; CHECK-NEXT: cmpq %rdi, %r{{[^,]*}}
; we don't want to leave extra induction variables around, or use an
; lea to compute an exit condition inside the loop:
-; CHECK: test:
+; CHECK-LABEL: test:
; CHECK: BB10_4:
; CHECK-NEXT: movaps %xmm{{.*}}, %xmm{{.*}}
; And the one at %bb68, where we want to be sure to use superhero mode:
-; CHECK: BB10_9:
+; CHECK: BB10_7:
; CHECK-NEXT: movaps 48(%r{{[^,]*}}), %xmm{{.*}}
; CHECK-NEXT: mulps %xmm{{.*}}, %xmm{{.*}}
; CHECK-NEXT: movaps 32(%r{{[^,]*}}), %xmm{{.*}}
; CHECK-NEXT: addq $64, %r{{.*}}
; CHECK-NEXT: addq $64, %r{{.*}}
; CHECK-NEXT: addq $-16, %r{{.*}}
-; CHECK-NEXT: BB10_10:
; CHECK-NEXT: cmpq $15, %r{{.*}}
; CHECK-NEXT: jg