test/CodeGen/ARM/subreg-remat.ll

   1 ; RUN: llc < %s -relocation-model=pic -disable-fp-elim -mcpu=cortex-a8 -pre-RA-sched=source -no-integrated-as | FileCheck %s
   2 target triple = "thumbv7-apple-ios"
   3 ; <rdar://problem/10032939>
   4 ;
   5 ; The vector %v2 is built like this:
   6 ;
   7 ;   %vreg6:ssub_1<def> = ...
   8 ;   %vreg6:ssub_0<def> = VLDRS <cp#0>, 0, pred:14, pred:%noreg; mem:LD4[ConstantPool] DPR_VFP2:%vreg6
   9 ;
  10 ; When %vreg6 spills, the VLDRS constant pool load cannot be rematerialized
  11 ; since it implicitly reads the ssub_1 sub-register.
  12 ;
  13 ; CHECK: f1
  14 ; CHECK: vmov    d0, r0, r0
  15 ; CHECK: vldr s1, LCPI
  16 ; The vector must be spilled:
  17 ; CHECK: vstr d0,
  18 ; CHECK: asm clobber d0
  19 ; And reloaded after the asm:
  20 ; CHECK: vldr [[D16:d[0-9]+]],
  21 ; CHECK: vstr [[D16]], [r1]
  22 define void @f1(float %x, <2 x float>* %p) {
  23   %v1 = insertelement <2 x float> undef, float %x, i32 0
  24   %v2 = insertelement <2 x float> %v1, float 0x400921FB60000000, i32 1
  25   %y = call double asm sideeffect "asm clobber $0", "=w,0,~{d1},~{d2},~{d3},~{d4},~{d5},~{d6},~{d7},~{d8},~{d9},~{d10},~{d11},~{d12},~{d13},~{d14},~{d15},~{d16},~{d17},~{d18},~{d19},~{d20},~{d21},~{d22},~{d23},~{d24},~{d25},~{d26},~{d27},~{d28},~{d29},~{d30},~{d31}"(<2 x float> %v2) nounwind
  26   store <2 x float> %v2, <2 x float>* %p, align 8
  27   ret void
  28 }
  29
  30 ; On the other hand, when the partial redef doesn't read the full register
  31 ; because the bits are undef, we should rematerialize.  The vector is now built
  32 ; like this:
  33 ;
  34 ;   %vreg2:ssub_0<def> = VLDRS <cp#0>, 0, pred:14, pred:%noreg, %vreg2<imp-def>; mem:LD4[ConstantPool]
  35 ;
  36 ; The extra <imp-def> operand indicates that the instruction fully defines the
  37 ; virtual register.  It doesn't read the old value.
  38 ;
  39 ; CHECK: f2
  40 ; CHECK: vldr s0, LCPI
  41 ; The vector must not be spilled:
  42 ; CHECK-NOT: vstr
  43 ; CHECK: asm clobber d0
  44 ; But instead rematerialize after the asm:
  45 ; CHECK: vldr [[S0:s[0-9]+]], LCPI
  46 ; CHECK: vstr [[D0:d[0-9]+]], [r0]
  47 define void @f2(<2 x float>* %p) {
  48   %v2 = insertelement <2 x float> undef, float 0x400921FB60000000, i32 0
  49   %y = call double asm sideeffect "asm clobber $0", "=w,0,~{d1},~{d2},~{d3},~{d4},~{d5},~{d6},~{d7},~{d8},~{d9},~{d10},~{d11},~{d12},~{d13},~{d14},~{d15},~{d16},~{d17},~{d18},~{d19},~{d20},~{d21},~{d22},~{d23},~{d24},~{d25},~{d26},~{d27},~{d28},~{d29},~{d30},~{d31}"(<2 x float> %v2) nounwind
  50   store <2 x float> %v2, <2 x float>* %p, align 8
  51   ret void
  52 }