lib/Target/CellSPU/SPUMathInstr.td

   1 //======--- SPUMathInst.td - Cell SPU math operations -*- tablegen -*---======//
   2 //
   3 //                     Cell SPU math operations
   4 //
   5 // This target description file contains instruction sequences for various
   6 // math operations, such as vector multiplies, i32 multiply, etc., for the
   7 // SPU's i32, i16 i8 and corresponding vector types.
   8 //
   9 // Any resemblance to libsimdmath or the Cell SDK simdmath library is
  10 // purely and completely coincidental.
  11 //
  12 // Primary author: Scott Michel (scottm@aero.org)
  13 //===----------------------------------------------------------------------===//
  14
  15 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
  16 // v16i8 multiply instruction sequence:
  17 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
  18
  19 def : Pat<(mul (v16i8 VECREG:$rA), (v16i8 VECREG:$rB)),
  20           (ORv4i32
  21            (ANDv4i32
  22             (SELBv4i32 (MPYv8i16 VECREG:$rA, VECREG:$rB),
  23                        (SHLHIv8i16 (MPYv8i16 (ROTMAHIv8i16 VECREG:$rA, 8),
  24                                              (ROTMAHIv8i16 VECREG:$rB, 8)), 8),
  25                        (FSMBIv8i16 0x2222)),
  26             (ILAv4i32 0x0000ffff)),
  27            (SHLIv4i32
  28             (SELBv4i32 (MPYv8i16 (ROTMAIv4i32_i32 VECREG:$rA, 16),
  29                                  (ROTMAIv4i32_i32 VECREG:$rB, 16)),
  30                        (SHLHIv8i16 (MPYv8i16 (ROTMAIv4i32_i32 VECREG:$rA, 8),
  31                                              (ROTMAIv4i32_i32 VECREG:$rB, 8)), 8),
  32                        (FSMBIv8i16 0x2222)), 16))>;
  33
  34 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
  35 // v8i16 multiply instruction sequence:
  36 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
  37
  38 def : Pat<(mul (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)),
  39           (SELBv8i16 (MPYv8i16 VECREG:$rA, VECREG:$rB),
  40                      (SHLIv4i32 (MPYHHv8i16 VECREG:$rA, VECREG:$rB), 16),
  41                      (FSMBIv8i16 0xcccc))>;
  42
  43 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
  44 // v4i32, i32 multiply instruction sequence:
  45 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
  46
  47 def MPYv4i32:
  48   Pat<(mul (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)),
  49       (Av4i32
  50         (Av4i32 (MPYHv4i32 VECREG:$rA, VECREG:$rB),
  51                 (MPYHv4i32 VECREG:$rB, VECREG:$rA)),
  52         (MPYUv4i32 VECREG:$rA, VECREG:$rB))>;
  53
  54 def MPYi32:
  55   Pat<(mul R32C:$rA, R32C:$rB),
  56       (Ar32
  57         (Ar32 (MPYHr32 R32C:$rA, R32C:$rB),
  58               (MPYHr32 R32C:$rB, R32C:$rA)),
  59         (MPYUr32 R32C:$rA, R32C:$rB))>;
  60
  61 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
  62 // f32, v4f32 divide instruction sequence:
  63 //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
  64
  65 // Reciprocal estimate and interpolation
  66 def Interpf32: CodeFrag<(FIf32 R32FP:$rB, (FRESTf32 R32FP:$rB))>;
  67 // Division estimate
  68 def DivEstf32: CodeFrag<(FMf32 R32FP:$rA, Interpf32.Fragment)>;
  69 // Newton-Raphson iteration
  70 def NRaphf32: CodeFrag<(FMAf32 (FNMSf32 DivEstf32.Fragment, R32FP:$rB, R32FP:$rA),
  71                                Interpf32.Fragment,
  72                                DivEstf32.Fragment)>;
  73 // Epsilon addition
  74 def Epsilonf32: CodeFrag<(AIf32 NRaphf32.Fragment, 1)>;
  75
  76 def : Pat<(fdiv R32FP:$rA, R32FP:$rB),
  77           (SELBf32_cond NRaphf32.Fragment,
  78                         Epsilonf32.Fragment,
  79                         (CGTIf32 (FNMSf32 R32FP:$rB, Epsilonf32.Fragment, R32FP:$rA), -1))>;
  80
  81 // Reciprocal estimate and interpolation
  82 def Interpv4f32: CodeFrag<(FIv4f32 (v4f32 VECREG:$rB), (FRESTv4f32 (v4f32 VECREG:$rB)))>;
  83 // Division estimate
  84 def DivEstv4f32: CodeFrag<(FMv4f32 (v4f32 VECREG:$rA), Interpv4f32.Fragment)>;
  85 // Newton-Raphson iteration
  86 def NRaphv4f32: CodeFrag<(FMAv4f32 (FNMSv4f32 DivEstv4f32.Fragment,
  87                                               (v4f32 VECREG:$rB),
  88                                               (v4f32 VECREG:$rA)),
  89                                    Interpv4f32.Fragment,
  90                                    DivEstv4f32.Fragment)>;
  91 // Epsilon addition
  92 def Epsilonv4f32: CodeFrag<(AIv4f32 NRaphv4f32.Fragment, 1)>;
  93
  94 def : Pat<(fdiv (v4f32 VECREG:$rA), (v4f32 VECREG:$rB)),
  95           (SELBv4f32_cond NRaphv4f32.Fragment,
  96                         Epsilonv4f32.Fragment,
  97                         (CGTIv4f32 (FNMSv4f32 (v4f32 VECREG:$rB),
  98                                               Epsilonv4f32.Fragment,
  99                                               (v4f32 VECREG:$rA)), -1))>;