test/CodeGen/X86/fold-load-unops.ll

   1 ; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+avx < %s | FileCheck %s
   2
   3 ; Verify that we're folding the load into the math instruction.
   4
   5 ; FIXME: The folding should also happen without the avx attribute;
   6 ; ie, when generating SSE (non-VEX-prefixed) instructions.
   7
   8 define float @rcpss(float* %a) {
   9 ; CHECK-LABEL: rcpss:
  10 ; CHECK:       vrcpss (%rdi), %xmm0, %xmm0
  11
  12     %ld = load float* %a
  13     %ins = insertelement <4 x float> undef, float %ld, i32 0
  14     %res = tail call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %ins)
  15     %ext = extractelement <4 x float> %res, i32 0
  16     ret float %ext
  17 }
  18
  19 define float @rsqrtss(float* %a) {
  20 ; CHECK-LABEL: rsqrtss:
  21 ; CHECK:       vrsqrtss (%rdi), %xmm0, %xmm0
  22
  23     %ld = load float* %a
  24     %ins = insertelement <4 x float> undef, float %ld, i32 0
  25     %res = tail call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %ins)
  26     %ext = extractelement <4 x float> %res, i32 0
  27     ret float %ext
  28 }
  29
  30 define float @sqrtss(float* %a) {
  31 ; CHECK-LABEL: sqrtss:
  32 ; CHECK:       vsqrtss (%rdi), %xmm0, %xmm0
  33
  34     %ld = load float* %a
  35     %ins = insertelement <4 x float> undef, float %ld, i32 0
  36     %res = tail call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %ins)
  37     %ext = extractelement <4 x float> %res, i32 0
  38     ret float %ext
  39 }
  40
  41 define double @sqrtsd(double* %a) {
  42 ; CHECK-LABEL: sqrtsd:
  43 ; CHECK:       vsqrtsd (%rdi), %xmm0, %xmm0
  44
  45     %ld = load double* %a
  46     %ins = insertelement <2 x double> undef, double %ld, i32 0
  47     %res = tail call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %ins)
  48     %ext = extractelement <2 x double> %res, i32 0
  49     ret double %ext
  50 }
  51
  52
  53 declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>) nounwind readnone
  54 declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>) nounwind readnone
  55 declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>) nounwind readnone
  56 declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone
  57