test/CodeGen/AArch64/aarch64-interleaved-accesses.ll

   1 ; RUN: llc -march=aarch64 -aarch64-neon-syntax=generic -lower-interleaved-accesses=true < %s | FileCheck %s
   2
   3 ; CHECK-LABEL: load_factor2:
   4 ; CHECK: ld2 { v0.8b, v1.8b }, [x0]
   5 define <8 x i8> @load_factor2(<16 x i8>* %ptr) {
   6   %wide.vec = load <16 x i8>, <16 x i8>* %ptr, align 4
   7   %strided.v0 = shufflevector <16 x i8> %wide.vec, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
   8   %strided.v1 = shufflevector <16 x i8> %wide.vec, <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
   9   %add = add nsw <8 x i8> %strided.v0, %strided.v1
  10   ret <8 x i8> %add
  11 }
  12
  13 ; CHECK-LABEL: load_factor3:
  14 ; CHECK: ld3 { v0.4s, v1.4s, v2.4s }, [x0]
  15 define <4 x i32> @load_factor3(i32* %ptr) {
  16   %base = bitcast i32* %ptr to <12 x i32>*
  17   %wide.vec = load <12 x i32>, <12 x i32>* %base, align 4
  18   %strided.v2 = shufflevector <12 x i32> %wide.vec, <12 x i32> undef, <4 x i32> <i32 2, i32 5, i32 8, i32 11>
  19   %strided.v1 = shufflevector <12 x i32> %wide.vec, <12 x i32> undef, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
  20   %add = add nsw <4 x i32> %strided.v2, %strided.v1
  21   ret <4 x i32> %add
  22 }
  23
  24 ; CHECK-LABEL: load_factor4:
  25 ; CHECK: ld4 { v0.4s, v1.4s, v2.4s, v3.4s }, [x0]
  26 define <4 x i32> @load_factor4(i32* %ptr) {
  27   %base = bitcast i32* %ptr to <16 x i32>*
  28   %wide.vec = load <16 x i32>, <16 x i32>* %base, align 4
  29   %strided.v0 = shufflevector <16 x i32> %wide.vec, <16 x i32> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
  30   %strided.v2 = shufflevector <16 x i32> %wide.vec, <16 x i32> undef, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
  31   %add = add nsw <4 x i32> %strided.v0, %strided.v2
  32   ret <4 x i32> %add
  33 }
  34
  35 ; CHECK-LABEL: store_factor2:
  36 ; CHECK: st2 { v0.8b, v1.8b }, [x0]
  37 define void @store_factor2(<16 x i8>* %ptr, <8 x i8> %v0, <8 x i8> %v1) {
  38   %interleaved.vec = shufflevector <8 x i8> %v0, <8 x i8> %v1, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
  39   store <16 x i8> %interleaved.vec, <16 x i8>* %ptr, align 4
  40   ret void
  41 }
  42
  43 ; CHECK-LABEL: store_factor3:
  44 ; CHECK: st3 { v0.4s, v1.4s, v2.4s }, [x0]
  45 define void @store_factor3(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2) {
  46   %base = bitcast i32* %ptr to <12 x i32>*
  47   %v0_v1 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
  48   %v2_u = shufflevector <4 x i32> %v2, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
  49   %interleaved.vec = shufflevector <8 x i32> %v0_v1, <8 x i32> %v2_u, <12 x i32> <i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>
  50   store <12 x i32> %interleaved.vec, <12 x i32>* %base, align 4
  51   ret void
  52 }
  53
  54 ; CHECK-LABEL: store_factor4:
  55 ; CHECK: st4 { v0.4s, v1.4s, v2.4s, v3.4s }, [x0]
  56 define void @store_factor4(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) {
  57   %base = bitcast i32* %ptr to <16 x i32>*
  58   %v0_v1 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
  59   %v2_v3 = shufflevector <4 x i32> %v2, <4 x i32> %v3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
  60   %interleaved.vec = shufflevector <8 x i32> %v0_v1, <8 x i32> %v2_v3, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
  61   store <16 x i32> %interleaved.vec, <16 x i32>* %base, align 4
  62   ret void
  63 }
  64
  65 ; The following cases test that interleaved access of pointer vectors can be
  66 ; matched to ldN/stN instruction.
  67
  68 ; CHECK-LABEL: load_ptrvec_factor2:
  69 ; CHECK: ld2 { v0.2d, v1.2d }, [x0]
  70 define <2 x i32*> @load_ptrvec_factor2(i32** %ptr) {
  71   %base = bitcast i32** %ptr to <4 x i32*>*
  72   %wide.vec = load <4 x i32*>, <4 x i32*>* %base, align 4
  73   %strided.v0 = shufflevector <4 x i32*> %wide.vec, <4 x i32*> undef, <2 x i32> <i32 0, i32 2>
  74   ret <2 x i32*> %strided.v0
  75 }
  76
  77 ; CHECK-LABEL: load_ptrvec_factor3:
  78 ; CHECK: ld3 { v0.2d, v1.2d, v2.2d }, [x0]
  79 define void @load_ptrvec_factor3(i32** %ptr, <2 x i32*>* %ptr1, <2 x i32*>* %ptr2) {
  80   %base = bitcast i32** %ptr to <6 x i32*>*
  81   %wide.vec = load <6 x i32*>, <6 x i32*>* %base, align 4
  82   %strided.v2 = shufflevector <6 x i32*> %wide.vec, <6 x i32*> undef, <2 x i32> <i32 2, i32 5>
  83   store <2 x i32*> %strided.v2, <2 x i32*>* %ptr1
  84   %strided.v1 = shufflevector <6 x i32*> %wide.vec, <6 x i32*> undef, <2 x i32> <i32 1, i32 4>
  85   store <2 x i32*> %strided.v1, <2 x i32*>* %ptr2
  86   ret void
  87 }
  88
  89 ; CHECK-LABEL: load_ptrvec_factor4:
  90 ; CHECK: ld4 { v0.2d, v1.2d, v2.2d, v3.2d }, [x0]
  91 define void @load_ptrvec_factor4(i32** %ptr, <2 x i32*>* %ptr1, <2 x i32*>* %ptr2) {
  92   %base = bitcast i32** %ptr to <8 x i32*>*
  93   %wide.vec = load <8 x i32*>, <8 x i32*>* %base, align 4
  94   %strided.v1 = shufflevector <8 x i32*> %wide.vec, <8 x i32*> undef, <2 x i32> <i32 1, i32 5>
  95   %strided.v3 = shufflevector <8 x i32*> %wide.vec, <8 x i32*> undef, <2 x i32> <i32 3, i32 7>
  96   store <2 x i32*> %strided.v1, <2 x i32*>* %ptr1
  97   store <2 x i32*> %strided.v3, <2 x i32*>* %ptr2
  98   ret void
  99 }
 100
 101 ; CHECK-LABEL: store_ptrvec_factor2:
 102 ; CHECK: st2 { v0.2d, v1.2d }, [x0]
 103 define void @store_ptrvec_factor2(i32** %ptr, <2 x i32*> %v0, <2 x i32*> %v1) {
 104   %base = bitcast i32** %ptr to <4 x i32*>*
 105   %interleaved.vec = shufflevector <2 x i32*> %v0, <2 x i32*> %v1, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
 106   store <4 x i32*> %interleaved.vec, <4 x i32*>* %base, align 4
 107   ret void
 108 }
 109
 110 ; CHECK-LABEL: store_ptrvec_factor3:
 111 ; CHECK: st3 { v0.2d, v1.2d, v2.2d }, [x0]
 112 define void @store_ptrvec_factor3(i32** %ptr, <2 x i32*> %v0, <2 x i32*> %v1, <2 x i32*> %v2) {
 113   %base = bitcast i32** %ptr to <6 x i32*>*
 114   %v0_v1 = shufflevector <2 x i32*> %v0, <2 x i32*> %v1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 115   %v2_u = shufflevector <2 x i32*> %v2, <2 x i32*> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
 116   %interleaved.vec = shufflevector <4 x i32*> %v0_v1, <4 x i32*> %v2_u, <6 x i32> <i32 0, i32 2, i32 4, i32 1, i32 3, i32 5>
 117   store <6 x i32*> %interleaved.vec, <6 x i32*>* %base, align 4
 118   ret void
 119 }
 120
 121 ; CHECK-LABEL: store_ptrvec_factor4:
 122 ; CHECK: st4 { v0.2d, v1.2d, v2.2d, v3.2d }, [x0]
 123 define void @store_ptrvec_factor4(i32* %ptr, <2 x i32*> %v0, <2 x i32*> %v1, <2 x i32*> %v2, <2 x i32*> %v3) {
 124   %base = bitcast i32* %ptr to <8 x i32*>*
 125   %v0_v1 = shufflevector <2 x i32*> %v0, <2 x i32*> %v1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 126   %v2_v3 = shufflevector <2 x i32*> %v2, <2 x i32*> %v3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 127   %interleaved.vec = shufflevector <4 x i32*> %v0_v1, <4 x i32*> %v2_v3, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7>
 128   store <8 x i32*> %interleaved.vec, <8 x i32*>* %base, align 4
 129   ret void
 130 }
 131
 132 ; Following cases check that shuffle maskes with undef indices can be matched
 133 ; into ldN/stN instruction.
 134
 135 ; CHECK-LABEL: load_undef_mask_factor2:
 136 ; CHECK: ld2 { v0.4s, v1.4s }, [x0]
 137 define <4 x i32> @load_undef_mask_factor2(i32* %ptr) {
 138   %base = bitcast i32* %ptr to <8 x i32>*
 139   %wide.vec = load <8 x i32>, <8 x i32>* %base, align 4
 140   %strided.v0 = shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 undef, i32 2, i32 undef, i32 6>
 141   %strided.v1 = shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 undef, i32 3, i32 undef, i32 7>
 142   %add = add nsw <4 x i32> %strided.v0, %strided.v1
 143   ret <4 x i32> %add
 144 }
 145
 146 ; CHECK-LABEL: load_undef_mask_factor3:
 147 ; CHECK: ld3 { v0.4s, v1.4s, v2.4s }, [x0]
 148 define <4 x i32> @load_undef_mask_factor3(i32* %ptr) {
 149   %base = bitcast i32* %ptr to <12 x i32>*
 150   %wide.vec = load <12 x i32>, <12 x i32>* %base, align 4
 151   %strided.v2 = shufflevector <12 x i32> %wide.vec, <12 x i32> undef, <4 x i32> <i32 2, i32 undef, i32 undef, i32 undef>
 152   %strided.v1 = shufflevector <12 x i32> %wide.vec, <12 x i32> undef, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
 153   %add = add nsw <4 x i32> %strided.v2, %strided.v1
 154   ret <4 x i32> %add
 155 }
 156
 157 ; CHECK-LABEL: load_undef_mask_factor4:
 158 ; CHECK: ld4 { v0.4s, v1.4s, v2.4s, v3.4s }, [x0]
 159 define <4 x i32> @load_undef_mask_factor4(i32* %ptr) {
 160   %base = bitcast i32* %ptr to <16 x i32>*
 161   %wide.vec = load <16 x i32>, <16 x i32>* %base, align 4
 162   %strided.v0 = shufflevector <16 x i32> %wide.vec, <16 x i32> undef, <4 x i32> <i32 0, i32 4, i32 undef, i32 undef>
 163   %strided.v2 = shufflevector <16 x i32> %wide.vec, <16 x i32> undef, <4 x i32> <i32 2, i32 6, i32 undef, i32 undef>
 164   %add = add nsw <4 x i32> %strided.v0, %strided.v2
 165   ret <4 x i32> %add
 166 }
 167
 168 ; CHECK-LABEL: store_undef_mask_factor2:
 169 ; CHECK: st2 { v0.4s, v1.4s }, [x0]
 170 define void @store_undef_mask_factor2(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1) {
 171   %base = bitcast i32* %ptr to <8 x i32>*
 172   %interleaved.vec = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 2, i32 6, i32 3, i32 7>
 173   store <8 x i32> %interleaved.vec, <8 x i32>* %base, align 4
 174   ret void
 175 }
 176
 177 ; CHECK-LABEL: store_undef_mask_factor3:
 178 ; CHECK: st3 { v0.4s, v1.4s, v2.4s }, [x0]
 179 define void @store_undef_mask_factor3(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2) {
 180   %base = bitcast i32* %ptr to <12 x i32>*
 181   %v0_v1 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
 182   %v2_u = shufflevector <4 x i32> %v2, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
 183   %interleaved.vec = shufflevector <8 x i32> %v0_v1, <8 x i32> %v2_u, <12 x i32> <i32 0, i32 4, i32 undef, i32 1, i32 undef, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>
 184   store <12 x i32> %interleaved.vec, <12 x i32>* %base, align 4
 185   ret void
 186 }
 187
 188 ; CHECK-LABEL: store_undef_mask_factor4:
 189 ; CHECK: st4 { v0.4s, v1.4s, v2.4s, v3.4s }, [x0]
 190 define void @store_undef_mask_factor4(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) {
 191   %base = bitcast i32* %ptr to <16 x i32>*
 192   %v0_v1 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
 193   %v2_v3 = shufflevector <4 x i32> %v2, <4 x i32> %v3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
 194   %interleaved.vec = shufflevector <8 x i32> %v0_v1, <8 x i32> %v2_v3, <16 x i32> <i32 0, i32 4, i32 8, i32 undef, i32 undef, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
 195   store <16 x i32> %interleaved.vec, <16 x i32>* %base, align 4
 196   ret void
 197 }