test/CodeGen/X86/lower-bitcast.ll

   1 ; RUN: llc < %s -march=x86-64 -mcpu=core2 -mattr=+sse2 | FileCheck %s
   2
   3
   4 define double @test1(double %A) {
   5   %1 = bitcast double %A to <2 x i32>
   6   %add = add <2 x i32> %1, <i32 3, i32 5>
   7   %2 = bitcast <2 x i32> %add to double
   8   ret double %2
   9 }
  10 ; FIXME: Ideally we should be able to fold the entire body of @test1 into a
  11 ; single paddd instruction. At the moment we produce the sequence
  12 ; pshufd+paddq+pshufd.
  13
  14 ; CHECK-LABEL: test1
  15 ; CHECK-NOT: movsd
  16 ; CHECK: pshufd
  17 ; CHECK-NEXT: paddd
  18 ; CHECK-NEXT: pshufd
  19 ; CHECK-NEXT: ret
  20
  21
  22 define double @test2(double %A, double %B) {
  23   %1 = bitcast double %A to <2 x i32>
  24   %2 = bitcast double %B to <2 x i32>
  25   %add = add <2 x i32> %1, %2
  26   %3 = bitcast <2 x i32> %add to double
  27   ret double %3
  28 }
  29 ; CHECK-LABEL: test2
  30 ; CHECK-NOT: movsd
  31 ; CHECK: paddd
  32 ; CHECK-NEXT: ret
  33
  34
  35 define i64 @test3(i64 %A) {
  36   %1 = bitcast i64 %A to <2 x float>
  37   %add = fadd <2 x float> %1, <float 3.0, float 5.0>
  38   %2 = bitcast <2 x float> %add to i64
  39   ret i64 %2
  40 }
  41 ; CHECK-LABEL: test3
  42 ; CHECK-NOT: pshufd
  43 ; CHECK: addps
  44 ; CHECK-NOT: pshufd
  45 ; CHECK: ret
  46
  47
  48 define i64 @test4(i64 %A) {
  49   %1 = bitcast i64 %A to <2 x i32>
  50   %add = add <2 x i32> %1, <i32 3, i32 5>
  51   %2 = bitcast <2 x i32> %add to i64
  52   ret i64 %2
  53 }
  54 ; FIXME: At the moment we still produce the sequence pshufd+paddq+pshufd.
  55 ; Ideally, we should fold that sequence into a single paddd.
  56
  57 ; CHECK-LABEL: test4
  58 ; CHECK: pshufd
  59 ; CHECK-NEXT: paddq
  60 ; CHECK-NEXT: pshufd
  61 ; CHECK: ret
  62
  63
  64 define double @test5(double %A) {
  65   %1 = bitcast double %A to <2 x float>
  66   %add = fadd <2 x float> %1, <float 3.0, float 5.0>
  67   %2 = bitcast <2 x float> %add to double
  68   ret double %2
  69 }
  70 ; CHECK-LABEL: test5
  71 ; CHECK: addps
  72 ; CHECK-NEXT: ret
  73
  74
  75 define double @test6(double %A) {
  76   %1 = bitcast double %A to <4 x i16>
  77   %add = add <4 x i16> %1, <i16 3, i16 4, i16 5, i16 6>
  78   %2 = bitcast <4 x i16> %add to double
  79   ret double %2
  80 }
  81 ; FIXME: Ideally we should be able to fold the entire body of @test6 into a
  82 ; single paddw instruction.
  83
  84 ; CHECK-LABEL: test6
  85 ; CHECK-NOT: movsd
  86 ; CHECK: punpcklwd
  87 ; CHECK-NEXT: paddw
  88 ; CHECK-NEXT: pshufb
  89 ; CHECK-NEXT: ret
  90
  91
  92 define double @test7(double %A, double %B) {
  93   %1 = bitcast double %A to <4 x i16>
  94   %2 = bitcast double %B to <4 x i16>
  95   %add = add <4 x i16> %1, %2
  96   %3 = bitcast <4 x i16> %add to double
  97   ret double %3
  98 }
  99 ; CHECK-LABEL: test7
 100 ; CHECK-NOT: movsd
 101 ; CHECK-NOT: punpcklwd
 102 ; CHECK: paddw
 103 ; CHECK-NEXT: ret
 104
 105
 106 define double @test8(double %A) {
 107   %1 = bitcast double %A to <8 x i8>
 108   %add = add <8 x i8> %1, <i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10>
 109   %2 = bitcast <8 x i8> %add to double
 110   ret double %2
 111 }
 112 ; FIXME: Ideally we should be able to fold the entire body of @test8 into a
 113 ; single paddb instruction. At the moment we produce the sequence
 114 ; pshufd+paddw+pshufd.
 115
 116 ; CHECK-LABEL: test8
 117 ; CHECK-NOT: movsd
 118 ; CHECK: punpcklbw
 119 ; CHECK-NEXT: paddb
 120 ; CHECK-NEXT: pshufb
 121 ; CHECK-NEXT: ret
 122
 123
 124 define double @test9(double %A, double %B) {
 125   %1 = bitcast double %A to <8 x i8>
 126   %2 = bitcast double %B to <8 x i8>
 127   %add = add <8 x i8> %1, %2
 128   %3 = bitcast <8 x i8> %add to double
 129   ret double %3
 130 }
 131 ; CHECK-LABEL: test9
 132 ; CHECK-NOT: movsd
 133 ; CHECK-NOT: punpcklbw
 134 ; CHECK: paddb
 135 ; CHECK-NEXT: ret
 136