test/CodeGen/X86/MergeConsecutiveStores.ll

   1 ; RUN: llc -march=x86-64 -mcpu=corei7 < %s | FileCheck %s
   2
   3 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
   4 target triple = "x86_64-apple-macosx10.8.0"
   5
   6 %struct.A = type { i8, i8, i8, i8, i8, i8, i8, i8 }
   7
   8 @a = common global [10000 x %struct.A] zeroinitializer, align 8
   9
  10 ; Move all of the constants using a single vector store.
  11 ; CHECK: merge_const_store
  12 ; CHECK: movq %xmm0
  13 ; CHECK: ret
  14 define void @merge_const_store(i32 %count, %struct.A* nocapture %p) nounwind uwtable noinline ssp {
  15   %1 = icmp sgt i32 %count, 0
  16   br i1 %1, label %.lr.ph, label %._crit_edge
  17 .lr.ph:
  18   %i.02 = phi i32 [ %10, %.lr.ph ], [ 0, %0 ]
  19   %.01 = phi %struct.A* [ %11, %.lr.ph ], [ %p, %0 ]
  20   %2 = getelementptr inbounds %struct.A* %.01, i64 0, i32 0
  21   store i8 1, i8* %2, align 1
  22   %3 = getelementptr inbounds %struct.A* %.01, i64 0, i32 1
  23   store i8 2, i8* %3, align 1
  24   %4 = getelementptr inbounds %struct.A* %.01, i64 0, i32 2
  25   store i8 3, i8* %4, align 1
  26   %5 = getelementptr inbounds %struct.A* %.01, i64 0, i32 3
  27   store i8 4, i8* %5, align 1
  28   %6 = getelementptr inbounds %struct.A* %.01, i64 0, i32 4
  29   store i8 5, i8* %6, align 1
  30   %7 = getelementptr inbounds %struct.A* %.01, i64 0, i32 5
  31   store i8 6, i8* %7, align 1
  32   %8 = getelementptr inbounds %struct.A* %.01, i64 0, i32 6
  33   store i8 7, i8* %8, align 1
  34   %9 = getelementptr inbounds %struct.A* %.01, i64 0, i32 7
  35   store i8 8, i8* %9, align 1
  36   %10 = add nsw i32 %i.02, 1
  37   %11 = getelementptr inbounds %struct.A* %.01, i64 1
  38   %exitcond = icmp eq i32 %10, %count
  39   br i1 %exitcond, label %._crit_edge, label %.lr.ph
  40 ._crit_edge:
  41   ret void
  42 }
  43
  44 ; Move the first 4 constants as a single vector. Move the rest as scalars.
  45 ; CHECK: merge_nonconst_store
  46 ; CHECK: movd %xmm0
  47 ; CHECK: movb
  48 ; CHECK: movb
  49 ; CHECK: movb
  50 ; CHECK: movb
  51 ; CHECK: ret
  52 define void @merge_nonconst_store(i32 %count, i8 %zz, %struct.A* nocapture %p) nounwind uwtable noinline ssp {
  53   %1 = icmp sgt i32 %count, 0
  54   br i1 %1, label %.lr.ph, label %._crit_edge
  55 .lr.ph:
  56   %i.02 = phi i32 [ %10, %.lr.ph ], [ 0, %0 ]
  57   %.01 = phi %struct.A* [ %11, %.lr.ph ], [ %p, %0 ]
  58   %2 = getelementptr inbounds %struct.A* %.01, i64 0, i32 0
  59   store i8 1, i8* %2, align 1
  60   %3 = getelementptr inbounds %struct.A* %.01, i64 0, i32 1
  61   store i8 2, i8* %3, align 1
  62   %4 = getelementptr inbounds %struct.A* %.01, i64 0, i32 2
  63   store i8 3, i8* %4, align 1
  64   %5 = getelementptr inbounds %struct.A* %.01, i64 0, i32 3
  65   store i8 4, i8* %5, align 1
  66   %6 = getelementptr inbounds %struct.A* %.01, i64 0, i32 4
  67   store i8 %zz, i8* %6, align 1                     ;  <----------- Not a const;
  68   %7 = getelementptr inbounds %struct.A* %.01, i64 0, i32 5
  69   store i8 6, i8* %7, align 1
  70   %8 = getelementptr inbounds %struct.A* %.01, i64 0, i32 6
  71   store i8 7, i8* %8, align 1
  72   %9 = getelementptr inbounds %struct.A* %.01, i64 0, i32 7
  73   store i8 8, i8* %9, align 1
  74   %10 = add nsw i32 %i.02, 1
  75   %11 = getelementptr inbounds %struct.A* %.01, i64 1
  76   %exitcond = icmp eq i32 %10, %count
  77   br i1 %exitcond, label %._crit_edge, label %.lr.ph
  78 ._crit_edge:
  79   ret void
  80 }
  81
  82
  83 ;CHECK: merge_loads
  84 ; load:
  85 ;CHECK: movw
  86 ; store:
  87 ;CHECK: movw
  88 ;CHECK: ret
  89 define void @merge_loads(i32 %count, %struct.A* noalias nocapture %q, %struct.A* noalias nocapture %p) nounwind uwtable noinline ssp {
  90   %1 = icmp sgt i32 %count, 0
  91   br i1 %1, label %.lr.ph, label %._crit_edge
  92
  93 .lr.ph:                                           ; preds = %0
  94   %2 = getelementptr inbounds %struct.A* %q, i64 0, i32 0
  95   %3 = getelementptr inbounds %struct.A* %q, i64 0, i32 1
  96   br label %4
  97
  98 ; <label>:4                                       ; preds = %4, %.lr.ph
  99   %i.02 = phi i32 [ 0, %.lr.ph ], [ %9, %4 ]
 100   %.01 = phi %struct.A* [ %p, %.lr.ph ], [ %10, %4 ]
 101   %5 = load i8* %2, align 1
 102   %6 = load i8* %3, align 1
 103   %7 = getelementptr inbounds %struct.A* %.01, i64 0, i32 0
 104   store i8 %5, i8* %7, align 1
 105   %8 = getelementptr inbounds %struct.A* %.01, i64 0, i32 1
 106   store i8 %6, i8* %8, align 1
 107   %9 = add nsw i32 %i.02, 1
 108   %10 = getelementptr inbounds %struct.A* %.01, i64 1
 109   %exitcond = icmp eq i32 %9, %count
 110   br i1 %exitcond, label %._crit_edge, label %4
 111
 112 ._crit_edge:                                      ; preds = %4, %0
 113   ret void
 114 }
 115
 116 ; The loads and the stores are interleved. Can't merge them.
 117 ;CHECK: no_merge_loads
 118 ;CHECK: movb
 119 ;CHECK: movb
 120 ;CHECK: movb
 121 ;CHECK: movb
 122 ;CHECK: ret
 123 define void @no_merge_loads(i32 %count, %struct.A* noalias nocapture %q, %struct.A* noalias nocapture %p) nounwind uwtable noinline ssp {
 124   %1 = icmp sgt i32 %count, 0
 125   br i1 %1, label %.lr.ph, label %._crit_edge
 126
 127 .lr.ph:                                           ; preds = %0
 128   %2 = getelementptr inbounds %struct.A* %q, i64 0, i32 0
 129   %3 = getelementptr inbounds %struct.A* %q, i64 0, i32 1
 130   br label %a4
 131
 132 a4:                                       ; preds = %4, %.lr.ph
 133   %i.02 = phi i32 [ 0, %.lr.ph ], [ %a9, %a4 ]
 134   %.01 = phi %struct.A* [ %p, %.lr.ph ], [ %a10, %a4 ]
 135   %a5 = load i8* %2, align 1
 136   %a7 = getelementptr inbounds %struct.A* %.01, i64 0, i32 0
 137   store i8 %a5, i8* %a7, align 1
 138   %a8 = getelementptr inbounds %struct.A* %.01, i64 0, i32 1
 139   %a6 = load i8* %3, align 1
 140   store i8 %a6, i8* %a8, align 1
 141   %a9 = add nsw i32 %i.02, 1
 142   %a10 = getelementptr inbounds %struct.A* %.01, i64 1
 143   %exitcond = icmp eq i32 %a9, %count
 144   br i1 %exitcond, label %._crit_edge, label %a4
 145
 146 ._crit_edge:                                      ; preds = %4, %0
 147   ret void
 148 }
 149
 150