1 ; RUN: opt < %s -S -loop-unroll -unroll-max-iteration-count-to-analyze=100 -unroll-dynamic-cost-savings-discount=1000 -unroll-threshold=10 -unroll-percent-dynamic-cost-saved-threshold=50 | FileCheck %s
2 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
4 @known_constant = internal unnamed_addr constant [10 x i32] [i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1], align 16
6 ; We should be able to propagate constant data through different types of
7 ; casts. For example, in this test we have a load, which becomes constant after
8 ; unrolling, which then is truncated to i8. Obviously, truncated value is also a
9 ; constant, which can be used in the further simplifications.
11 ; We expect this loop to be unrolled, because in this case load would become
12 ; constant, which is 0 in many cases, and which, in its turn, helps to simplify
13 ; following multiplication and addition. In total, unrolling should help to
14 ; optimize ~60% of all instructions in this case.
16 ; CHECK-LABEL: @const_load_trunc
19 define i8 @const_load_trunc(i32* noalias nocapture readonly %src) {
23 loop: ; preds = %loop, %entry
24 %iv = phi i64 [ 0, %entry ], [ %inc, %loop ]
25 %r = phi i8 [ 0, %entry ], [ %add, %loop ]
26 %arrayidx = getelementptr inbounds i32, i32* %src, i64 %iv
27 %src_element = load i32, i32* %arrayidx, align 4
28 %array_const_idx = getelementptr inbounds [10 x i32], [10 x i32]* @known_constant, i64 0, i64 %iv
29 %const_array_element = load i32, i32* %array_const_idx, align 4
30 %x = trunc i32 %src_element to i8
31 %y = trunc i32 %const_array_element to i8
32 %mul = mul nsw i8 %x, %y
33 %add = add nsw i8 %mul, %r
34 %inc = add nuw nsw i64 %iv, 1
35 %exitcond86.i = icmp eq i64 %inc, 10
36 br i1 %exitcond86.i, label %loop.end, label %loop
38 loop.end: ; preds = %loop
39 %r.lcssa = phi i8 [ %r, %loop ]
43 ; The same test as before, but with ZEXT instead of TRUNC.
44 ; CHECK-LABEL: @const_load_zext
47 define i64 @const_load_zext(i32* noalias nocapture readonly %src) {
51 loop: ; preds = %loop, %entry
52 %iv = phi i64 [ 0, %entry ], [ %inc, %loop ]
53 %r = phi i64 [ 0, %entry ], [ %add, %loop ]
54 %arrayidx = getelementptr inbounds i32, i32* %src, i64 %iv
55 %src_element = load i32, i32* %arrayidx, align 4
56 %array_const_idx = getelementptr inbounds [10 x i32], [10 x i32]* @known_constant, i64 0, i64 %iv
57 %const_array_element = load i32, i32* %array_const_idx, align 4
58 %x = zext i32 %src_element to i64
59 %y = zext i32 %const_array_element to i64
60 %mul = mul nsw i64 %x, %y
61 %add = add nsw i64 %mul, %r
62 %inc = add nuw nsw i64 %iv, 1
63 %exitcond86.i = icmp eq i64 %inc, 10
64 br i1 %exitcond86.i, label %loop.end, label %loop
66 loop.end: ; preds = %loop
67 %r.lcssa = phi i64 [ %r, %loop ]
71 ; The same test as the first one, but with SEXT instead of TRUNC.
72 ; CHECK-LABEL: @const_load_sext
75 define i64 @const_load_sext(i32* noalias nocapture readonly %src) {
79 loop: ; preds = %loop, %entry
80 %iv = phi i64 [ 0, %entry ], [ %inc, %loop ]
81 %r = phi i64 [ 0, %entry ], [ %add, %loop ]
82 %arrayidx = getelementptr inbounds i32, i32* %src, i64 %iv
83 %src_element = load i32, i32* %arrayidx, align 4
84 %array_const_idx = getelementptr inbounds [10 x i32], [10 x i32]* @known_constant, i64 0, i64 %iv
85 %const_array_element = load i32, i32* %array_const_idx, align 4
86 %x = sext i32 %src_element to i64
87 %y = sext i32 %const_array_element to i64
88 %mul = mul nsw i64 %x, %y
89 %add = add nsw i64 %mul, %r
90 %inc = add nuw nsw i64 %iv, 1
91 %exitcond86.i = icmp eq i64 %inc, 10
92 br i1 %exitcond86.i, label %loop.end, label %loop
94 loop.end: ; preds = %loop
95 %r.lcssa = phi i64 [ %r, %loop ]