test/CodeGen/X86/fp-fast.ll

   1 ; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=avx -enable-unsafe-fp-math < %s | FileCheck %s
   2
   3 define float @test1(float %a) {
   4 ; CHECK-LABEL: test1:
   5 ; CHECK:       # BB#0:
   6 ; CHECK-NEXT:    vmulss {{.*}}(%rip), %xmm0, %xmm0
   7 ; CHECK-NEXT:    retq
   8   %t1 = fadd float %a, %a
   9   %r = fadd float %t1, %t1
  10   ret float %r
  11 }
  12
  13 define float @test2(float %a) {
  14 ; CHECK-LABEL: test2:
  15 ; CHECK:       # BB#0:
  16 ; CHECK-NEXT:    vmulss {{.*}}(%rip), %xmm0, %xmm0
  17 ; CHECK-NEXT:    retq
  18   %t1 = fmul float 4.0, %a
  19   %t2 = fadd float %a, %a
  20   %r = fadd float %t1, %t2
  21   ret float %r
  22 }
  23
  24 define float @test3(float %a) {
  25 ; CHECK-LABEL: test3:
  26 ; CHECK:       # BB#0:
  27 ; CHECK-NEXT:    vmulss {{.*}}(%rip), %xmm0, %xmm0
  28 ; CHECK-NEXT:    retq
  29   %t1 = fmul float %a, 4.0
  30   %t2 = fadd float %a, %a
  31   %r = fadd float %t1, %t2
  32   ret float %r
  33 }
  34
  35 define float @test4(float %a) {
  36 ; CHECK-LABEL: test4:
  37 ; CHECK:       # BB#0:
  38 ; CHECK-NEXT:    vmulss {{.*}}(%rip), %xmm0, %xmm0
  39 ; CHECK-NEXT:    retq
  40   %t1 = fadd float %a, %a
  41   %t2 = fmul float 4.0, %a
  42   %r = fadd float %t1, %t2
  43   ret float %r
  44 }
  45
  46 define float @test5(float %a) {
  47 ; CHECK-LABEL: test5:
  48 ; CHECK:       # BB#0:
  49 ; CHECK-NEXT:    vmulss {{.*}}(%rip), %xmm0, %xmm0
  50 ; CHECK-NEXT:    retq
  51   %t1 = fadd float %a, %a
  52   %t2 = fmul float %a, 4.0
  53   %r = fadd float %t1, %t2
  54   ret float %r
  55 }
  56
  57 define float @test6(float %a) {
  58 ; CHECK-LABEL: test6:
  59 ; CHECK:       # BB#0:
  60 ; CHECK-NEXT:    vxorps %xmm0, %xmm0, %xmm0
  61 ; CHECK-NEXT:    retq
  62   %t1 = fmul float 2.0, %a
  63   %t2 = fadd float %a, %a
  64   %r = fsub float %t1, %t2
  65   ret float %r
  66 }
  67
  68 define float @test7(float %a) {
  69 ; CHECK-LABEL: test7:
  70 ; CHECK:       # BB#0:
  71 ; CHECK-NEXT:    vxorps %xmm0, %xmm0, %xmm0
  72 ; CHECK-NEXT:    retq
  73   %t1 = fmul float %a, 2.0
  74   %t2 = fadd float %a, %a
  75   %r = fsub float %t1, %t2
  76   ret float %r
  77 }
  78
  79 define float @test8(float %a) {
  80 ; CHECK-LABEL: test8:
  81 ; CHECK:       # BB#0:
  82 ; CHECK-NEXT:    retq
  83   %t1 = fmul float %a, 0.0
  84   %t2 = fadd float %a, %t1
  85   ret float %t2
  86 }
  87
  88 define float @test9(float %a) {
  89 ; CHECK-LABEL: test9:
  90 ; CHECK:       # BB#0:
  91 ; CHECK-NEXT:    retq
  92   %t1 = fmul float 0.0, %a
  93   %t2 = fadd float %t1, %a
  94   ret float %t2
  95 }
  96
  97 define float @test10(float %a) {
  98 ; CHECK-LABEL: test10:
  99 ; CHECK:       # BB#0:
 100 ; CHECK-NEXT:    vxorps %xmm0, %xmm0, %xmm0
 101 ; CHECK-NEXT:    retq
 102   %t1 = fsub float -0.0, %a
 103   %t2 = fadd float %a, %t1
 104   ret float %t2
 105 }
 106
 107 define float @test11(float %a) {
 108 ; CHECK-LABEL: test11:
 109 ; CHECK:       # BB#0:
 110 ; CHECK-NEXT:    vxorps %xmm0, %xmm0, %xmm0
 111 ; CHECK-NEXT:    retq
 112   %t1 = fsub float -0.0, %a
 113   %t2 = fadd float %a, %t1
 114   ret float %t2
 115 }
 116
 117 ; Verify that the first two adds are independent regardless of how the inputs are
 118 ; commuted. The destination registers are used as source registers for the third add.
 119
 120 define float @reassociate_adds1(float %x0, float %x1, float %x2, float %x3) {
 121 ; CHECK-LABEL: reassociate_adds1:
 122 ; CHECK:       # BB#0:
 123 ; CHECK-NEXT:    vaddss %xmm1, %xmm0, %xmm0
 124 ; CHECK-NEXT:    vaddss %xmm3, %xmm2, %xmm1
 125 ; CHECK-NEXT:    vaddss %xmm1, %xmm0, %xmm0
 126 ; CHECK-NEXT:    retq
 127   %t0 = fadd float %x0, %x1
 128   %t1 = fadd float %t0, %x2
 129   %t2 = fadd float %t1, %x3
 130   ret float %t2
 131 }
 132
 133 define float @reassociate_adds2(float %x0, float %x1, float %x2, float %x3) {
 134 ; CHECK-LABEL: reassociate_adds2:
 135 ; CHECK:       # BB#0:
 136 ; CHECK-NEXT:    vaddss %xmm1, %xmm0, %xmm0
 137 ; CHECK-NEXT:    vaddss %xmm3, %xmm2, %xmm1
 138 ; CHECK-NEXT:    vaddss %xmm1, %xmm0, %xmm0
 139 ; CHECK-NEXT:    retq
 140   %t0 = fadd float %x0, %x1
 141   %t1 = fadd float %x2, %t0
 142   %t2 = fadd float %t1, %x3
 143   ret float %t2
 144 }
 145
 146 define float @reassociate_adds3(float %x0, float %x1, float %x2, float %x3) {
 147 ; CHECK-LABEL: reassociate_adds3:
 148 ; CHECK:       # BB#0:
 149 ; CHECK-NEXT:    vaddss %xmm1, %xmm0, %xmm0
 150 ; CHECK-NEXT:    vaddss %xmm3, %xmm2, %xmm1
 151 ; CHECK-NEXT:    vaddss %xmm1, %xmm0, %xmm0
 152 ; CHECK-NEXT:    retq
 153   %t0 = fadd float %x0, %x1
 154   %t1 = fadd float %t0, %x2
 155   %t2 = fadd float %x3, %t1
 156   ret float %t2
 157 }
 158
 159 define float @reassociate_adds4(float %x0, float %x1, float %x2, float %x3) {
 160 ; CHECK-LABEL: reassociate_adds4:
 161 ; CHECK:       # BB#0:
 162 ; CHECK-NEXT:    vaddss %xmm1, %xmm0, %xmm0
 163 ; CHECK-NEXT:    vaddss %xmm3, %xmm2, %xmm1
 164 ; CHECK-NEXT:    vaddss %xmm1, %xmm0, %xmm0
 165 ; CHECK-NEXT:    retq
 166   %t0 = fadd float %x0, %x1
 167   %t1 = fadd float %x2, %t0
 168   %t2 = fadd float %x3, %t1
 169   ret float %t2
 170 }
 171
 172 ; Verify that we reassociate some of these ops. The optimal balanced tree of adds is not
 173 ; produced because that would cost more compile time.
 174
 175 define float @reassociate_adds5(float %x0, float %x1, float %x2, float %x3, float %x4, float %x5, float %x6, float %x7) {
 176 ; CHECK-LABEL: reassociate_adds5:
 177 ; CHECK:       # BB#0:
 178 ; CHECK-NEXT:    vaddss %xmm1, %xmm0, %xmm0
 179 ; CHECK-NEXT:    vaddss %xmm3, %xmm2, %xmm1
 180 ; CHECK-NEXT:    vaddss %xmm1, %xmm0, %xmm0
 181 ; CHECK-NEXT:    vaddss %xmm5, %xmm4, %xmm1
 182 ; CHECK-NEXT:    vaddss %xmm1, %xmm0, %xmm0
 183 ; CHECK-NEXT:    vaddss %xmm7, %xmm6, %xmm1
 184 ; CHECK-NEXT:    vaddss %xmm1, %xmm0, %xmm0
 185 ; CHECK-NEXT:    retq
 186   %t0 = fadd float %x0, %x1
 187   %t1 = fadd float %t0, %x2
 188   %t2 = fadd float %t1, %x3
 189   %t3 = fadd float %t2, %x4
 190   %t4 = fadd float %t3, %x5
 191   %t5 = fadd float %t4, %x6
 192   %t6 = fadd float %t5, %x7
 193   ret float %t6
 194 }