38279a0a9f81b19c3c8d62581c886a845740c7ef
[oota-llvm.git] / lib / Target / CellSPU / SPUMathInstr.td
1 //======--- SPUMathInst.td - Cell SPU math operations -*- tablegen -*---======//
2 //
3 //                     Cell SPU math operations
4 //
5 // This target description file contains instruction sequences for various
6 // math operations, such as vector multiplies, i32 multiply, etc., for the
7 // SPU's i32, i16 i8 and corresponding vector types.
8 //
9 // Any resemblance to libsimdmath or the Cell SDK simdmath library is
10 // purely and completely coincidental.
11 //
12 // Primary author: Scott Michel (scottm@aero.org)
13 //===----------------------------------------------------------------------===//
14
15 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
16 // v16i8 multiply instruction sequence:
17 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
18
19 def : Pat<(mul (v16i8 VECREG:$rA), (v16i8 VECREG:$rB)),
20           (ORv4i32
21            (ANDv4i32
22             (SELBv4i32 (MPYv8i16 VECREG:$rA, VECREG:$rB),
23                        (SHLHIv8i16 (MPYv8i16 (ROTMAHIv8i16 VECREG:$rA, 8),
24                                              (ROTMAHIv8i16 VECREG:$rB, 8)), 8),
25                        (FSMBIv8i16 0x2222)),
26             (ILAv4i32 0x0000ffff)),
27            (SHLIv4i32
28             (SELBv4i32 (MPYv8i16 (ROTMAIv4i32_i32 VECREG:$rA, 16),
29                                  (ROTMAIv4i32_i32 VECREG:$rB, 16)),
30                        (SHLHIv8i16 (MPYv8i16 (ROTMAIv4i32_i32 VECREG:$rA, 8),
31                                              (ROTMAIv4i32_i32 VECREG:$rB, 8)), 8),
32                        (FSMBIv8i16 0x2222)), 16))>;
33                         
34 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
35 // v8i16 multiply instruction sequence:
36 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
37
38 def : Pat<(mul (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)),
39           (SELBv8i16 (MPYv8i16 VECREG:$rA, VECREG:$rB),
40                      (SHLIv4i32 (MPYHHv8i16 VECREG:$rA, VECREG:$rB), 16),
41                      (FSMBIv8i16 0xcccc))>;
42                  
43 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
44 // v4i32, i32 multiply instruction sequence:
45 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
46
47 def MPYv4i32:
48   Pat<(mul (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)),
49       (Av4i32
50         (Av4i32 (MPYHv4i32 VECREG:$rA, VECREG:$rB),
51                 (MPYHv4i32 VECREG:$rB, VECREG:$rA)),
52         (MPYUv4i32 VECREG:$rA, VECREG:$rB))>;
53
54 def MPYi32:
55   Pat<(mul R32C:$rA, R32C:$rB),
56       (Ar32
57         (Ar32 (MPYHr32 R32C:$rA, R32C:$rB),
58               (MPYHr32 R32C:$rB, R32C:$rA)),
59         (MPYUr32 R32C:$rA, R32C:$rB))>;
60
61 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
62 // f32, v4f32 divide instruction sequence:
63 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
64
65 // Reciprocal estimate and interpolation
66 def Interpf32: CodeFrag<(FIf32 R32FP:$rB, (FRESTf32 R32FP:$rB))>;
67 // Division estimate
68 def DivEstf32: CodeFrag<(FMf32 R32FP:$rA, Interpf32.Fragment)>;
69 // Newton-Raphson iteration
70 def NRaphf32: CodeFrag<(FMAf32 (FNMSf32 DivEstf32.Fragment, R32FP:$rB, R32FP:$rA),
71                                Interpf32.Fragment,
72                                DivEstf32.Fragment)>;
73 // Epsilon addition
74 def Epsilonf32: CodeFrag<(AIf32 NRaphf32.Fragment, 1)>;
75
76 def : Pat<(fdiv R32FP:$rA, R32FP:$rB),
77           (SELBf32_cond NRaphf32.Fragment,
78                         Epsilonf32.Fragment,
79                         (CGTIf32 (FNMSf32 R32FP:$rB, Epsilonf32.Fragment, R32FP:$rA), -1))>;
80
81 // Reciprocal estimate and interpolation
82 def Interpv4f32: CodeFrag<(FIv4f32 (v4f32 VECREG:$rB), (FRESTv4f32 (v4f32 VECREG:$rB)))>;
83 // Division estimate
84 def DivEstv4f32: CodeFrag<(FMv4f32 (v4f32 VECREG:$rA), Interpv4f32.Fragment)>;
85 // Newton-Raphson iteration
86 def NRaphv4f32: CodeFrag<(FMAv4f32 (FNMSv4f32 DivEstv4f32.Fragment,
87                                               (v4f32 VECREG:$rB),
88                                               (v4f32 VECREG:$rA)),
89                                    Interpv4f32.Fragment,
90                                    DivEstv4f32.Fragment)>;
91 // Epsilon addition
92 def Epsilonv4f32: CodeFrag<(AIv4f32 NRaphv4f32.Fragment, 1)>;
93
94 def : Pat<(fdiv (v4f32 VECREG:$rA), (v4f32 VECREG:$rB)),
95           (SELBv4f32_cond NRaphv4f32.Fragment,
96                         Epsilonv4f32.Fragment,
97                         (CGTIv4f32 (FNMSv4f32 (v4f32 VECREG:$rB),
98                                               Epsilonv4f32.Fragment,
99                                               (v4f32 VECREG:$rA)), -1))>;