fixed to test features, not CPU models
[oota-llvm.git] / test / CodeGen / X86 / recip-fastmath.ll
1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=sse2 | FileCheck %s
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx,use-recip-est | FileCheck %s --check-prefix=RECIP
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx,use-recip-est -x86-recip-refinement-steps=2 | FileCheck %s --check-prefix=REFINE
4
5 ; If the target's divss/divps instructions are substantially
6 ; slower than rcpss/rcpps with a Newton-Raphson refinement,
7 ; we should generate the estimate sequence.
8
9 ; See PR21385 ( http://llvm.org/bugs/show_bug.cgi?id=21385 )
10 ; for details about the accuracy, speed, and implementation
11 ; differences of x86 reciprocal estimates.
12
13 define float @reciprocal_estimate(float %x) #0 {
14   %div = fdiv fast float 1.0, %x
15   ret float %div
16
17 ; CHECK-LABEL: reciprocal_estimate:
18 ; CHECK: movss
19 ; CHECK-NEXT: divss
20 ; CHECK-NEXT: movaps
21 ; CHECK-NEXT: retq
22
23 ; RECIP-LABEL: reciprocal_estimate:
24 ; RECIP: vrcpss
25 ; RECIP: vmulss
26 ; RECIP: vsubss
27 ; RECIP: vmulss
28 ; RECIP: vaddss
29 ; RECIP-NEXT: retq
30
31 ; REFINE-LABEL: reciprocal_estimate:
32 ; REFINE: vrcpss
33 ; REFINE: vmulss
34 ; REFINE: vsubss
35 ; REFINE: vmulss
36 ; REFINE: vaddss
37 ; REFINE: vmulss
38 ; REFINE: vsubss
39 ; REFINE: vmulss
40 ; REFINE: vaddss
41 ; REFINE-NEXT: retq
42 }
43
44 define <4 x float> @reciprocal_estimate_v4f32(<4 x float> %x) #0 {
45   %div = fdiv fast <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %x
46   ret <4 x float> %div
47
48 ; CHECK-LABEL: reciprocal_estimate_v4f32:
49 ; CHECK: movaps
50 ; CHECK-NEXT: divps
51 ; CHECK-NEXT: movaps
52 ; CHECK-NEXT: retq
53
54 ; RECIP-LABEL: reciprocal_estimate_v4f32:
55 ; RECIP: vrcpps
56 ; RECIP: vmulps
57 ; RECIP: vsubps
58 ; RECIP: vmulps
59 ; RECIP: vaddps
60 ; RECIP-NEXT: retq
61
62 ; REFINE-LABEL: reciprocal_estimate_v4f32:
63 ; REFINE: vrcpps
64 ; REFINE: vmulps
65 ; REFINE: vsubps
66 ; REFINE: vmulps
67 ; REFINE: vaddps
68 ; REFINE: vmulps
69 ; REFINE: vsubps
70 ; REFINE: vmulps
71 ; REFINE: vaddps
72 ; REFINE-NEXT: retq
73 }
74
75 define <8 x float> @reciprocal_estimate_v8f32(<8 x float> %x) #0 {
76   %div = fdiv fast <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %x
77   ret <8 x float> %div
78
79 ; CHECK-LABEL: reciprocal_estimate_v8f32:
80 ; CHECK: movaps
81 ; CHECK: movaps
82 ; CHECK-NEXT: divps
83 ; CHECK-NEXT: divps
84 ; CHECK-NEXT: movaps
85 ; CHECK-NEXT: movaps
86 ; CHECK-NEXT: retq
87
88 ; RECIP-LABEL: reciprocal_estimate_v8f32:
89 ; RECIP: vrcpps
90 ; RECIP: vmulps
91 ; RECIP: vsubps
92 ; RECIP: vmulps
93 ; RECIP: vaddps
94 ; RECIP-NEXT: retq
95
96 ; REFINE-LABEL: reciprocal_estimate_v8f32:
97 ; REFINE: vrcpps
98 ; REFINE: vmulps
99 ; REFINE: vsubps
100 ; REFINE: vmulps
101 ; REFINE: vaddps
102 ; REFINE: vmulps
103 ; REFINE: vsubps
104 ; REFINE: vmulps
105 ; REFINE: vaddps
106 ; REFINE-NEXT: retq
107 }
108
109 attributes #0 = { "unsafe-fp-math"="true" }