test/CodeGen/X86/sse_partial_update.ll

   1 ; RUN: llc < %s -mtriple=x86_64-apple-macosx -mattr=+sse2 -mcpu=nehalem | FileCheck %s
   2
   3 ; rdar: 12558838
   4 ; PR14221
   5 ; There is a mismatch between the intrinsic and the actual instruction.
   6 ; The actual instruction has a partial update of dest, while the intrinsic
   7 ; passes through the upper FP values. Here, we make sure the source and
   8 ; destination of rsqrtss are the same.
   9 define void @t1(<4 x float> %a) nounwind uwtable ssp {
  10 entry:
  11 ; CHECK-LABEL: t1:
  12 ; CHECK: rsqrtss %xmm0, %xmm0
  13   %0 = tail call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %a) nounwind
  14   %a.addr.0.extract = extractelement <4 x float> %0, i32 0
  15   %conv = fpext float %a.addr.0.extract to double
  16   %a.addr.4.extract = extractelement <4 x float> %0, i32 1
  17   %conv3 = fpext float %a.addr.4.extract to double
  18   tail call void @callee(double %conv, double %conv3) nounwind
  19   ret void
  20 }
  21 declare void @callee(double, double)
  22 declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>) nounwind readnone
  23
  24 define void @t2(<4 x float> %a) nounwind uwtable ssp {
  25 entry:
  26 ; CHECK-LABEL: t2:
  27 ; CHECK: rcpss %xmm0, %xmm0
  28   %0 = tail call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %a) nounwind
  29   %a.addr.0.extract = extractelement <4 x float> %0, i32 0
  30   %conv = fpext float %a.addr.0.extract to double
  31   %a.addr.4.extract = extractelement <4 x float> %0, i32 1
  32   %conv3 = fpext float %a.addr.4.extract to double
  33   tail call void @callee(double %conv, double %conv3) nounwind
  34   ret void
  35 }
  36 declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>) nounwind readnone