test/CodeGen/X86/misched-matrix.ll

   1 ; RUN: llc < %s -march=x86-64 -mcpu=core2 -pre-RA-sched=source -enable-misched \
   2 ; RUN:          -misched-topdown -verify-machineinstrs \
   3 ; RUN:     | FileCheck %s -check-prefix=TOPDOWN
   4 ;
   5 ; Verify that the MI scheduler minimizes register pressure for a
   6 ; uniform set of bottom-up subtrees (unrolled matrix multiply).
   7 ;
   8 ; For current top-down heuristics, ensure that some folded imulls have
   9 ; been reordered with the stores. This tests the scheduler's cheap
  10 ; alias analysis ability (that doesn't require any AliasAnalysis pass).
  11 ;
  12 ; TOPDOWN: %for.body
  13 ; TOPDOWN: movl %{{.*}}, (
  14 ; TOPDOWN: imull {{[0-9]*}}(
  15 ; TOPDOWN: movl %{{.*}}, 4(
  16 ; TOPDOWN: imull {{[0-9]*}}(
  17 ; TOPDOWN: movl %{{.*}}, 8(
  18 ; TOPDOWN: movl %{{.*}}, 12(
  19 ; TOPDOWN: %for.end
  20
  21 define void @mmult([4 x i32]* noalias nocapture %m1, [4 x i32]* noalias nocapture %m2,
  22 [4 x i32]* noalias nocapture %m3) nounwind uwtable ssp {
  23 entry:
  24   br label %for.body
  25
  26 for.body:                              ; preds = %for.body, %entry
  27   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
  28   %arrayidx8 = getelementptr inbounds [4 x i32]* %m1, i64 %indvars.iv, i64 0
  29   %tmp = load i32* %arrayidx8, align 4, !tbaa !0
  30   %arrayidx12 = getelementptr inbounds [4 x i32]* %m2, i64 0, i64 0
  31   %tmp1 = load i32* %arrayidx12, align 4, !tbaa !0
  32   %arrayidx8.1 = getelementptr inbounds [4 x i32]* %m1, i64 %indvars.iv, i64 1
  33   %tmp2 = load i32* %arrayidx8.1, align 4, !tbaa !0
  34   %arrayidx12.1 = getelementptr inbounds [4 x i32]* %m2, i64 1, i64 0
  35   %tmp3 = load i32* %arrayidx12.1, align 4, !tbaa !0
  36   %arrayidx8.2 = getelementptr inbounds [4 x i32]* %m1, i64 %indvars.iv, i64 2
  37   %tmp4 = load i32* %arrayidx8.2, align 4, !tbaa !0
  38   %arrayidx12.2 = getelementptr inbounds [4 x i32]* %m2, i64 2, i64 0
  39   %tmp5 = load i32* %arrayidx12.2, align 4, !tbaa !0
  40   %arrayidx8.3 = getelementptr inbounds [4 x i32]* %m1, i64 %indvars.iv, i64 3
  41   %tmp6 = load i32* %arrayidx8.3, align 4, !tbaa !0
  42   %arrayidx12.3 = getelementptr inbounds [4 x i32]* %m2, i64 3, i64 0
  43   %tmp8 = load i32* %arrayidx8, align 4, !tbaa !0
  44   %arrayidx12.137 = getelementptr inbounds [4 x i32]* %m2, i64 0, i64 1
  45   %tmp9 = load i32* %arrayidx12.137, align 4, !tbaa !0
  46   %tmp10 = load i32* %arrayidx8.1, align 4, !tbaa !0
  47   %arrayidx12.1.1 = getelementptr inbounds [4 x i32]* %m2, i64 1, i64 1
  48   %tmp11 = load i32* %arrayidx12.1.1, align 4, !tbaa !0
  49   %tmp12 = load i32* %arrayidx8.2, align 4, !tbaa !0
  50   %arrayidx12.2.1 = getelementptr inbounds [4 x i32]* %m2, i64 2, i64 1
  51   %tmp13 = load i32* %arrayidx12.2.1, align 4, !tbaa !0
  52   %tmp14 = load i32* %arrayidx8.3, align 4, !tbaa !0
  53   %arrayidx12.3.1 = getelementptr inbounds [4 x i32]* %m2, i64 3, i64 1
  54   %tmp15 = load i32* %arrayidx12.3.1, align 4, !tbaa !0
  55   %tmp16 = load i32* %arrayidx8, align 4, !tbaa !0
  56   %arrayidx12.239 = getelementptr inbounds [4 x i32]* %m2, i64 0, i64 2
  57   %tmp17 = load i32* %arrayidx12.239, align 4, !tbaa !0
  58   %tmp18 = load i32* %arrayidx8.1, align 4, !tbaa !0
  59   %arrayidx12.1.2 = getelementptr inbounds [4 x i32]* %m2, i64 1, i64 2
  60   %tmp19 = load i32* %arrayidx12.1.2, align 4, !tbaa !0
  61   %tmp20 = load i32* %arrayidx8.2, align 4, !tbaa !0
  62   %arrayidx12.2.2 = getelementptr inbounds [4 x i32]* %m2, i64 2, i64 2
  63   %tmp21 = load i32* %arrayidx12.2.2, align 4, !tbaa !0
  64   %tmp22 = load i32* %arrayidx8.3, align 4, !tbaa !0
  65   %arrayidx12.3.2 = getelementptr inbounds [4 x i32]* %m2, i64 3, i64 2
  66   %tmp23 = load i32* %arrayidx12.3.2, align 4, !tbaa !0
  67   %tmp24 = load i32* %arrayidx8, align 4, !tbaa !0
  68   %arrayidx12.341 = getelementptr inbounds [4 x i32]* %m2, i64 0, i64 3
  69   %tmp25 = load i32* %arrayidx12.341, align 4, !tbaa !0
  70   %tmp26 = load i32* %arrayidx8.1, align 4, !tbaa !0
  71   %arrayidx12.1.3 = getelementptr inbounds [4 x i32]* %m2, i64 1, i64 3
  72   %tmp27 = load i32* %arrayidx12.1.3, align 4, !tbaa !0
  73   %tmp28 = load i32* %arrayidx8.2, align 4, !tbaa !0
  74   %arrayidx12.2.3 = getelementptr inbounds [4 x i32]* %m2, i64 2, i64 3
  75   %tmp29 = load i32* %arrayidx12.2.3, align 4, !tbaa !0
  76   %tmp30 = load i32* %arrayidx8.3, align 4, !tbaa !0
  77   %arrayidx12.3.3 = getelementptr inbounds [4 x i32]* %m2, i64 3, i64 3
  78   %tmp31 = load i32* %arrayidx12.3.3, align 4, !tbaa !0
  79   %tmp7 = load i32* %arrayidx12.3, align 4, !tbaa !0
  80   %mul = mul nsw i32 %tmp1, %tmp
  81   %mul.1 = mul nsw i32 %tmp3, %tmp2
  82   %mul.2 = mul nsw i32 %tmp5, %tmp4
  83   %mul.3 = mul nsw i32 %tmp7, %tmp6
  84   %mul.138 = mul nsw i32 %tmp9, %tmp8
  85   %mul.1.1 = mul nsw i32 %tmp11, %tmp10
  86   %mul.2.1 = mul nsw i32 %tmp13, %tmp12
  87   %mul.3.1 = mul nsw i32 %tmp15, %tmp14
  88   %mul.240 = mul nsw i32 %tmp17, %tmp16
  89   %mul.1.2 = mul nsw i32 %tmp19, %tmp18
  90   %mul.2.2 = mul nsw i32 %tmp21, %tmp20
  91   %mul.3.2 = mul nsw i32 %tmp23, %tmp22
  92   %mul.342 = mul nsw i32 %tmp25, %tmp24
  93   %mul.1.3 = mul nsw i32 %tmp27, %tmp26
  94   %mul.2.3 = mul nsw i32 %tmp29, %tmp28
  95   %mul.3.3 = mul nsw i32 %tmp31, %tmp30
  96   %add.1 = add nsw i32 %mul.1, %mul
  97   %add.2 = add nsw i32 %mul.2, %add.1
  98   %add.3 = add nsw i32 %mul.3, %add.2
  99   %add.1.1 = add nsw i32 %mul.1.1, %mul.138
 100   %add.2.1 = add nsw i32 %mul.2.1, %add.1.1
 101   %add.3.1 = add nsw i32 %mul.3.1, %add.2.1
 102   %add.1.2 = add nsw i32 %mul.1.2, %mul.240
 103   %add.2.2 = add nsw i32 %mul.2.2, %add.1.2
 104   %add.3.2 = add nsw i32 %mul.3.2, %add.2.2
 105   %add.1.3 = add nsw i32 %mul.1.3, %mul.342
 106   %add.2.3 = add nsw i32 %mul.2.3, %add.1.3
 107   %add.3.3 = add nsw i32 %mul.3.3, %add.2.3
 108   %arrayidx16 = getelementptr inbounds [4 x i32]* %m3, i64 %indvars.iv, i64 0
 109   store i32 %add.3, i32* %arrayidx16, align 4, !tbaa !0
 110   %arrayidx16.1 = getelementptr inbounds [4 x i32]* %m3, i64 %indvars.iv, i64 1
 111   store i32 %add.3.1, i32* %arrayidx16.1, align 4, !tbaa !0
 112   %arrayidx16.2 = getelementptr inbounds [4 x i32]* %m3, i64 %indvars.iv, i64 2
 113   store i32 %add.3.2, i32* %arrayidx16.2, align 4, !tbaa !0
 114   %arrayidx16.3 = getelementptr inbounds [4 x i32]* %m3, i64 %indvars.iv, i64 3
 115   store i32 %add.3.3, i32* %arrayidx16.3, align 4, !tbaa !0
 116   %indvars.iv.next = add i64 %indvars.iv, 1
 117   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
 118   %exitcond = icmp eq i32 %lftr.wideiv, 4
 119   br i1 %exitcond, label %for.end, label %for.body
 120
 121 for.end:                                        ; preds = %for.body
 122   ret void
 123 }
 124
 125 !0 = metadata !{metadata !"int", metadata !1}
 126 !1 = metadata !{metadata !"omnipotent char", metadata !2}
 127 !2 = metadata !{metadata !"Simple C/C++ TBAA"}