test/CodeGen/X86/fast-isel-vecload.ll

   1 ; RUN: llc -O0 -fast-isel -fast-isel-abort=1 -mtriple=x86_64-unknown-unknown -mattr=+sse2 < %s | FileCheck %s --check-prefix=SSE --check-prefix=ALL
   2 ; RUN: llc -O0 -fast-isel -fast-isel-abort=1 -mtriple=x86_64-unknown-unknown -mattr=+avx < %s | FileCheck %s --check-prefix=AVX --check-prefix=ALL
   3
   4 ; Verify that fast-isel knows how to select aligned/unaligned vector loads.
   5 ; Also verify that the selected load instruction is in the correct domain.
   6
   7 define <16 x i8> @test_v16i8(<16 x i8>* %V) {
   8 ; ALL-LABEL: test_v16i8:
   9 ; SSE: movdqa  (%rdi), %xmm0
  10 ; AVX: vmovdqa  (%rdi), %xmm0
  11 ; ALL-NEXT: retq
  12 entry:
  13   %0 = load <16 x i8>, <16 x i8>* %V, align 16
  14   ret <16 x i8> %0
  15 }
  16
  17 define <8 x i16> @test_v8i16(<8 x i16>* %V) {
  18 ; ALL-LABEL: test_v8i16:
  19 ; SSE: movdqa  (%rdi), %xmm0
  20 ; AVX: vmovdqa  (%rdi), %xmm0
  21 ; ALL-NEXT: retq
  22 entry:
  23   %0 = load <8 x i16>, <8 x i16>* %V, align 16
  24   ret <8 x i16> %0
  25 }
  26
  27 define <4 x i32> @test_v4i32(<4 x i32>* %V) {
  28 ; ALL-LABEL: test_v4i32:
  29 ; SSE: movdqa  (%rdi), %xmm0
  30 ; AVX: vmovdqa  (%rdi), %xmm0
  31 ; ALL-NEXT: retq
  32 entry:
  33   %0 = load <4 x i32>, <4 x i32>* %V, align 16
  34   ret <4 x i32> %0
  35 }
  36
  37 define <2 x i64> @test_v2i64(<2 x i64>* %V) {
  38 ; ALL-LABEL: test_v2i64:
  39 ; SSE: movdqa  (%rdi), %xmm0
  40 ; AVX: vmovdqa  (%rdi), %xmm0
  41 ; ALL-NEXT: retq
  42 entry:
  43   %0 = load <2 x i64>, <2 x i64>* %V, align 16
  44   ret <2 x i64> %0
  45 }
  46
  47 define <16 x i8> @test_v16i8_unaligned(<16 x i8>* %V) {
  48 ; ALL-LABEL: test_v16i8_unaligned:
  49 ; SSE: movdqu  (%rdi), %xmm0
  50 ; AVX: vmovdqu  (%rdi), %xmm0
  51 ; ALL-NEXT: retq
  52 entry:
  53   %0 = load <16 x i8>, <16 x i8>* %V, align 4
  54   ret <16 x i8> %0
  55 }
  56
  57 define <8 x i16> @test_v8i16_unaligned(<8 x i16>* %V) {
  58 ; ALL-LABEL: test_v8i16_unaligned:
  59 ; SSE: movdqu  (%rdi), %xmm0
  60 ; AVX: vmovdqu  (%rdi), %xmm0
  61 ; ALL-NEXT: retq
  62 entry:
  63   %0 = load <8 x i16>, <8 x i16>* %V, align 4
  64   ret <8 x i16> %0
  65 }
  66
  67 define <4 x i32> @test_v4i32_unaligned(<4 x i32>* %V) {
  68 ; ALL-LABEL: test_v4i32_unaligned:
  69 ; SSE: movdqu  (%rdi), %xmm0
  70 ; AVX: vmovdqu  (%rdi), %xmm0
  71 ; ALL-NEXT: retq
  72 entry:
  73   %0 = load <4 x i32>, <4 x i32>* %V, align 4
  74   ret <4 x i32> %0
  75 }
  76
  77 define <2 x i64> @test_v2i64_unaligned(<2 x i64>* %V) {
  78 ; ALL-LABEL: test_v2i64_unaligned:
  79 ; SSE: movdqu  (%rdi), %xmm0
  80 ; AVX: vmovdqu  (%rdi), %xmm0
  81 ; ALL-NEXT: retq
  82 entry:
  83   %0 = load <2 x i64>, <2 x i64>* %V, align 4
  84   ret <2 x i64> %0
  85 }
  86
  87 define <4 x float> @test_v4f32(<4 x float>* %V) {
  88 ; ALL-LABEL: test_v4f32:
  89 ; SSE: movaps  (%rdi), %xmm0
  90 ; AVX: vmovaps  (%rdi), %xmm0
  91 ; ALL-NEXT: retq
  92 entry:
  93   %0 = load <4 x float>, <4 x float>* %V, align 16
  94   ret <4 x float> %0
  95 }
  96
  97 define <2 x double> @test_v2f64(<2 x double>* %V) {
  98 ; ALL-LABEL: test_v2f64:
  99 ; SSE: movapd  (%rdi), %xmm0
 100 ; AVX: vmovapd  (%rdi), %xmm0
 101 ; ALL-NEXT: retq
 102 entry:
 103   %0 = load <2 x double>, <2 x double>* %V, align 16
 104   ret <2 x double> %0
 105 }
 106
 107 define <4 x float> @test_v4f32_unaligned(<4 x float>* %V) {
 108 ; ALL-LABEL: test_v4f32_unaligned:
 109 ; SSE: movups  (%rdi), %xmm0
 110 ; AVX: vmovups  (%rdi), %xmm0
 111 ; ALL-NEXT: retq
 112 entry:
 113   %0 = load <4 x float>, <4 x float>* %V, align 4
 114   ret <4 x float> %0
 115 }
 116
 117 define <2 x double> @test_v2f64_unaligned(<2 x double>* %V) {
 118 ; ALL-LABEL: test_v2f64_unaligned:
 119 ; SSE: movupd  (%rdi), %xmm0
 120 ; AVX: vmovupd  (%rdi), %xmm0
 121 ; ALL-NEXT: retq
 122 entry:
 123   %0 = load <2 x double>, <2 x double>* %V, align 4
 124   ret <2 x double> %0
 125 }
 126
 127 define <16 x i8> @test_v16i8_abi_alignment(<16 x i8>* %V) {
 128 ; ALL-LABEL: test_v16i8_abi_alignment:
 129 ; SSE: movdqa  (%rdi), %xmm0
 130 ; AVX: vmovdqa  (%rdi), %xmm0
 131 ; ALL-NEXT: retq
 132 entry:
 133   %0 = load <16 x i8>, <16 x i8>* %V
 134   ret <16 x i8> %0
 135 }
 136
 137 define <8 x i16> @test_v8i16_abi_alignment(<8 x i16>* %V) {
 138 ; ALL-LABEL: test_v8i16_abi_alignment:
 139 ; SSE: movdqa  (%rdi), %xmm0
 140 ; AVX: vmovdqa  (%rdi), %xmm0
 141 ; ALL-NEXT: retq
 142 entry:
 143   %0 = load <8 x i16>, <8 x i16>* %V
 144   ret <8 x i16> %0
 145 }
 146
 147 define <4 x i32> @test_v4i32_abi_alignment(<4 x i32>* %V) {
 148 ; ALL-LABEL: test_v4i32_abi_alignment:
 149 ; SSE: movdqa  (%rdi), %xmm0
 150 ; AVX: vmovdqa  (%rdi), %xmm0
 151 ; ALL-NEXT: retq
 152 entry:
 153   %0 = load <4 x i32>, <4 x i32>* %V
 154   ret <4 x i32> %0
 155 }
 156
 157 define <2 x i64> @test_v2i64_abi_alignment(<2 x i64>* %V) {
 158 ; ALL-LABEL: test_v2i64_abi_alignment:
 159 ; SSE: movdqa  (%rdi), %xmm0
 160 ; AVX: vmovdqa  (%rdi), %xmm0
 161 ; ALL-NEXT: retq
 162 entry:
 163   %0 = load <2 x i64>, <2 x i64>* %V
 164   ret <2 x i64> %0
 165 }
 166
 167 define <4 x float> @test_v4f32_abi_alignment(<4 x float>* %V) {
 168 ; ALL-LABEL: test_v4f32_abi_alignment:
 169 ; SSE: movaps  (%rdi), %xmm0
 170 ; AVX: vmovaps  (%rdi), %xmm0
 171 ; ALL-NEXT: retq
 172 entry:
 173   %0 = load <4 x float>, <4 x float>* %V
 174   ret <4 x float> %0
 175 }
 176
 177 define <2 x double> @test_v2f64_abi_alignment(<2 x double>* %V) {
 178 ; ALL-LABEL: test_v2f64_abi_alignment:
 179 ; SSE: movapd  (%rdi), %xmm0
 180 ; AVX: vmovapd  (%rdi), %xmm0
 181 ; ALL-NEXT: retq
 182 entry:
 183   %0 = load <2 x double>, <2 x double>* %V
 184   ret <2 x double> %0
 185 }