1 ; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
2 ; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=core2 | FileCheck %s --check-prefix=SSE3
3 ; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=core-avx2 | FileCheck %s --check-prefix=AVX2
5 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
6 target triple = "x86_64-apple-macosx10.8.0"
8 define i32 @add(i32 %arg) {
9 ;CHECK: cost of 1 {{.*}} add
10 %A = add <4 x i32> undef, undef
11 ;CHECK: cost of 4 {{.*}} add
12 %B = add <8 x i32> undef, undef
13 ;CHECK: cost of 1 {{.*}} add
14 %C = add <2 x i64> undef, undef
15 ;CHECK: cost of 4 {{.*}} add
16 %D = add <4 x i64> undef, undef
17 ;CHECK: cost of 8 {{.*}} add
18 %E = add <8 x i64> undef, undef
19 ;CHECK: cost of 0 {{.*}} ret
24 define i32 @xor(i32 %arg) {
25 ;CHECK: cost of 1 {{.*}} xor
26 %A = xor <4 x i32> undef, undef
27 ;CHECK: cost of 1 {{.*}} xor
28 %B = xor <8 x i32> undef, undef
29 ;CHECK: cost of 1 {{.*}} xor
30 %C = xor <2 x i64> undef, undef
31 ;CHECK: cost of 1 {{.*}} xor
32 %D = xor <4 x i64> undef, undef
33 ;CHECK: cost of 0 {{.*}} ret
39 ; A <2 x i32> gets expanded to a <2 x i64> vector.
40 ; A <2 x i64> vector multiply is implemented using
41 ; 3 PMULUDQ and 2 PADDS and 4 shifts.
42 ;CHECK: cost of 9 {{.*}} mul
43 %A0 = mul <2 x i32> undef, undef
44 ;CHECK: cost of 9 {{.*}} mul
45 %A1 = mul <2 x i64> undef, undef
46 ;CHECK: cost of 18 {{.*}} mul
47 %A2 = mul <4 x i64> undef, undef
52 define void @sse3mull() {
53 ; SSE3: cost of 6 {{.*}} mul
54 %A0 = mul <4 x i32> undef, undef
60 define void @avx2mull() {
61 ; AVX2: cost of 9 {{.*}} mul
62 %A0 = mul <4 x i64> undef, undef
68 define i32 @fmul(i32 %arg) {
69 ;CHECK: cost of 2 {{.*}} fmul
70 %A = fmul <4 x float> undef, undef
71 ;CHECK: cost of 2 {{.*}} fmul
72 %B = fmul <8 x float> undef, undef
78 define void @shift() {
79 ; AVX: cost of 2 {{.*}} shl
80 ; AVX2: cost of 1 {{.*}} shl
81 %A0 = shl <4 x i32> undef, undef
82 ; AVX: cost of 2 {{.*}} shl
83 ; AVX2: cost of 1 {{.*}} shl
84 %A1 = shl <2 x i64> undef, undef
86 ; AVX: cost of 2 {{.*}} lshr
87 ; AVX2: cost of 1 {{.*}} lshr
88 %B0 = lshr <4 x i32> undef, undef
89 ; AVX: cost of 2 {{.*}} lshr
90 ; AVX2: cost of 1 {{.*}} lshr
91 %B1 = lshr <2 x i64> undef, undef
93 ; AVX: cost of 2 {{.*}} ashr
94 ; AVX2: cost of 1 {{.*}} ashr
95 %C0 = ashr <4 x i32> undef, undef
96 ; AVX: cost of 6 {{.*}} ashr
97 ; AVX2: cost of 20 {{.*}} ashr
98 %C1 = ashr <2 x i64> undef, undef
105 define void @avx2shift() {
106 ; AVX: cost of 2 {{.*}} shl
107 ; AVX2: cost of 1 {{.*}} shl
108 %A0 = shl <8 x i32> undef, undef
109 ; AVX: cost of 2 {{.*}} shl
110 ; AVX2: cost of 1 {{.*}} shl
111 %A1 = shl <4 x i64> undef, undef
113 ; AVX: cost of 2 {{.*}} lshr
114 ; AVX2: cost of 1 {{.*}} lshr
115 %B0 = lshr <8 x i32> undef, undef
116 ; AVX: cost of 2 {{.*}} lshr
117 ; AVX2: cost of 1 {{.*}} lshr
118 %B1 = lshr <4 x i64> undef, undef
120 ; AVX: cost of 2 {{.*}} ashr
121 ; AVX2: cost of 1 {{.*}} ashr
122 %C0 = ashr <8 x i32> undef, undef
123 ; AVX: cost of 12 {{.*}} ashr
124 ; AVX2: cost of 40 {{.*}} ashr
125 %C1 = ashr <4 x i64> undef, undef