1 ; RUN: llc < %s -march=x86-64 -mcpu=core2 -pre-RA-sched=source -enable-misched \
2 ; RUN: -misched-topdown -verify-machineinstrs \
3 ; RUN: | FileCheck %s -check-prefix=TOPDOWN
5 ; Verify that the MI scheduler minimizes register pressure for a
6 ; uniform set of bottom-up subtrees (unrolled matrix multiply).
8 ; For current top-down heuristics, ensure that some folded imulls have
9 ; been reordered with the stores. This tests the scheduler's cheap
10 ; alias analysis ability (that doesn't require any AliasAnalysis pass).
13 ; TOPDOWN: movl %{{.*}}, (
14 ; TOPDOWN: imull {{[0-9]*}}(
15 ; TOPDOWN: movl %{{.*}}, 4(
16 ; TOPDOWN: imull {{[0-9]*}}(
17 ; TOPDOWN: movl %{{.*}}, 8(
18 ; TOPDOWN: movl %{{.*}}, 12(
21 define void @mmult([4 x i32]* noalias nocapture %m1, [4 x i32]* noalias nocapture %m2,
22 [4 x i32]* noalias nocapture %m3) nounwind uwtable ssp {
26 for.body: ; preds = %for.body, %entry
27 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
28 %arrayidx8 = getelementptr inbounds [4 x i32]* %m1, i64 %indvars.iv, i64 0
29 %tmp = load i32* %arrayidx8, align 4, !tbaa !0
30 %arrayidx12 = getelementptr inbounds [4 x i32]* %m2, i64 0, i64 0
31 %tmp1 = load i32* %arrayidx12, align 4, !tbaa !0
32 %arrayidx8.1 = getelementptr inbounds [4 x i32]* %m1, i64 %indvars.iv, i64 1
33 %tmp2 = load i32* %arrayidx8.1, align 4, !tbaa !0
34 %arrayidx12.1 = getelementptr inbounds [4 x i32]* %m2, i64 1, i64 0
35 %tmp3 = load i32* %arrayidx12.1, align 4, !tbaa !0
36 %arrayidx8.2 = getelementptr inbounds [4 x i32]* %m1, i64 %indvars.iv, i64 2
37 %tmp4 = load i32* %arrayidx8.2, align 4, !tbaa !0
38 %arrayidx12.2 = getelementptr inbounds [4 x i32]* %m2, i64 2, i64 0
39 %tmp5 = load i32* %arrayidx12.2, align 4, !tbaa !0
40 %arrayidx8.3 = getelementptr inbounds [4 x i32]* %m1, i64 %indvars.iv, i64 3
41 %tmp6 = load i32* %arrayidx8.3, align 4, !tbaa !0
42 %arrayidx12.3 = getelementptr inbounds [4 x i32]* %m2, i64 3, i64 0
43 %tmp8 = load i32* %arrayidx8, align 4, !tbaa !0
44 %arrayidx12.137 = getelementptr inbounds [4 x i32]* %m2, i64 0, i64 1
45 %tmp9 = load i32* %arrayidx12.137, align 4, !tbaa !0
46 %tmp10 = load i32* %arrayidx8.1, align 4, !tbaa !0
47 %arrayidx12.1.1 = getelementptr inbounds [4 x i32]* %m2, i64 1, i64 1
48 %tmp11 = load i32* %arrayidx12.1.1, align 4, !tbaa !0
49 %tmp12 = load i32* %arrayidx8.2, align 4, !tbaa !0
50 %arrayidx12.2.1 = getelementptr inbounds [4 x i32]* %m2, i64 2, i64 1
51 %tmp13 = load i32* %arrayidx12.2.1, align 4, !tbaa !0
52 %tmp14 = load i32* %arrayidx8.3, align 4, !tbaa !0
53 %arrayidx12.3.1 = getelementptr inbounds [4 x i32]* %m2, i64 3, i64 1
54 %tmp15 = load i32* %arrayidx12.3.1, align 4, !tbaa !0
55 %tmp16 = load i32* %arrayidx8, align 4, !tbaa !0
56 %arrayidx12.239 = getelementptr inbounds [4 x i32]* %m2, i64 0, i64 2
57 %tmp17 = load i32* %arrayidx12.239, align 4, !tbaa !0
58 %tmp18 = load i32* %arrayidx8.1, align 4, !tbaa !0
59 %arrayidx12.1.2 = getelementptr inbounds [4 x i32]* %m2, i64 1, i64 2
60 %tmp19 = load i32* %arrayidx12.1.2, align 4, !tbaa !0
61 %tmp20 = load i32* %arrayidx8.2, align 4, !tbaa !0
62 %arrayidx12.2.2 = getelementptr inbounds [4 x i32]* %m2, i64 2, i64 2
63 %tmp21 = load i32* %arrayidx12.2.2, align 4, !tbaa !0
64 %tmp22 = load i32* %arrayidx8.3, align 4, !tbaa !0
65 %arrayidx12.3.2 = getelementptr inbounds [4 x i32]* %m2, i64 3, i64 2
66 %tmp23 = load i32* %arrayidx12.3.2, align 4, !tbaa !0
67 %tmp24 = load i32* %arrayidx8, align 4, !tbaa !0
68 %arrayidx12.341 = getelementptr inbounds [4 x i32]* %m2, i64 0, i64 3
69 %tmp25 = load i32* %arrayidx12.341, align 4, !tbaa !0
70 %tmp26 = load i32* %arrayidx8.1, align 4, !tbaa !0
71 %arrayidx12.1.3 = getelementptr inbounds [4 x i32]* %m2, i64 1, i64 3
72 %tmp27 = load i32* %arrayidx12.1.3, align 4, !tbaa !0
73 %tmp28 = load i32* %arrayidx8.2, align 4, !tbaa !0
74 %arrayidx12.2.3 = getelementptr inbounds [4 x i32]* %m2, i64 2, i64 3
75 %tmp29 = load i32* %arrayidx12.2.3, align 4, !tbaa !0
76 %tmp30 = load i32* %arrayidx8.3, align 4, !tbaa !0
77 %arrayidx12.3.3 = getelementptr inbounds [4 x i32]* %m2, i64 3, i64 3
78 %tmp31 = load i32* %arrayidx12.3.3, align 4, !tbaa !0
79 %tmp7 = load i32* %arrayidx12.3, align 4, !tbaa !0
80 %mul = mul nsw i32 %tmp1, %tmp
81 %mul.1 = mul nsw i32 %tmp3, %tmp2
82 %mul.2 = mul nsw i32 %tmp5, %tmp4
83 %mul.3 = mul nsw i32 %tmp7, %tmp6
84 %mul.138 = mul nsw i32 %tmp9, %tmp8
85 %mul.1.1 = mul nsw i32 %tmp11, %tmp10
86 %mul.2.1 = mul nsw i32 %tmp13, %tmp12
87 %mul.3.1 = mul nsw i32 %tmp15, %tmp14
88 %mul.240 = mul nsw i32 %tmp17, %tmp16
89 %mul.1.2 = mul nsw i32 %tmp19, %tmp18
90 %mul.2.2 = mul nsw i32 %tmp21, %tmp20
91 %mul.3.2 = mul nsw i32 %tmp23, %tmp22
92 %mul.342 = mul nsw i32 %tmp25, %tmp24
93 %mul.1.3 = mul nsw i32 %tmp27, %tmp26
94 %mul.2.3 = mul nsw i32 %tmp29, %tmp28
95 %mul.3.3 = mul nsw i32 %tmp31, %tmp30
96 %add.1 = add nsw i32 %mul.1, %mul
97 %add.2 = add nsw i32 %mul.2, %add.1
98 %add.3 = add nsw i32 %mul.3, %add.2
99 %add.1.1 = add nsw i32 %mul.1.1, %mul.138
100 %add.2.1 = add nsw i32 %mul.2.1, %add.1.1
101 %add.3.1 = add nsw i32 %mul.3.1, %add.2.1
102 %add.1.2 = add nsw i32 %mul.1.2, %mul.240
103 %add.2.2 = add nsw i32 %mul.2.2, %add.1.2
104 %add.3.2 = add nsw i32 %mul.3.2, %add.2.2
105 %add.1.3 = add nsw i32 %mul.1.3, %mul.342
106 %add.2.3 = add nsw i32 %mul.2.3, %add.1.3
107 %add.3.3 = add nsw i32 %mul.3.3, %add.2.3
108 %arrayidx16 = getelementptr inbounds [4 x i32]* %m3, i64 %indvars.iv, i64 0
109 store i32 %add.3, i32* %arrayidx16, align 4, !tbaa !0
110 %arrayidx16.1 = getelementptr inbounds [4 x i32]* %m3, i64 %indvars.iv, i64 1
111 store i32 %add.3.1, i32* %arrayidx16.1, align 4, !tbaa !0
112 %arrayidx16.2 = getelementptr inbounds [4 x i32]* %m3, i64 %indvars.iv, i64 2
113 store i32 %add.3.2, i32* %arrayidx16.2, align 4, !tbaa !0
114 %arrayidx16.3 = getelementptr inbounds [4 x i32]* %m3, i64 %indvars.iv, i64 3
115 store i32 %add.3.3, i32* %arrayidx16.3, align 4, !tbaa !0
116 %indvars.iv.next = add i64 %indvars.iv, 1
117 %lftr.wideiv = trunc i64 %indvars.iv.next to i32
118 %exitcond = icmp eq i32 %lftr.wideiv, 4
119 br i1 %exitcond, label %for.end, label %for.body
121 for.end: ; preds = %for.body
125 !0 = metadata !{metadata !"int", metadata !1}
126 !1 = metadata !{metadata !"omnipotent char", metadata !2}
127 !2 = metadata !{metadata !"Simple C/C++ TBAA"}