test/CodeGen/AArch64/arm64-rev.ll

   1 ; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
   2
   3 define i32 @test_rev_w(i32 %a) nounwind {
   4 entry:
   5 ; CHECK-LABEL: test_rev_w:
   6 ; CHECK: rev w0, w0
   7   %0 = tail call i32 @llvm.bswap.i32(i32 %a)
   8   ret i32 %0
   9 }
  10
  11 define i64 @test_rev_x(i64 %a) nounwind {
  12 entry:
  13 ; CHECK-LABEL: test_rev_x:
  14 ; CHECK: rev x0, x0
  15   %0 = tail call i64 @llvm.bswap.i64(i64 %a)
  16   ret i64 %0
  17 }
  18
  19 declare i32 @llvm.bswap.i32(i32) nounwind readnone
  20 declare i64 @llvm.bswap.i64(i64) nounwind readnone
  21
  22 define i32 @test_rev16_w(i32 %X) nounwind {
  23 entry:
  24 ; CHECK-LABEL: test_rev16_w:
  25 ; CHECK: rev16 w0, w0
  26   %tmp1 = lshr i32 %X, 8
  27   %X15 = bitcast i32 %X to i32
  28   %tmp4 = shl i32 %X15, 8
  29   %tmp2 = and i32 %tmp1, 16711680
  30   %tmp5 = and i32 %tmp4, -16777216
  31   %tmp9 = and i32 %tmp1, 255
  32   %tmp13 = and i32 %tmp4, 65280
  33   %tmp6 = or i32 %tmp5, %tmp2
  34   %tmp10 = or i32 %tmp6, %tmp13
  35   %tmp14 = or i32 %tmp10, %tmp9
  36   ret i32 %tmp14
  37 }
  38
  39 ; 64-bit REV16 is *not* a swap then a 16-bit rotation:
  40 ;   01234567 ->(bswap) 76543210 ->(rotr) 10765432
  41 ;   01234567 ->(rev16) 10325476
  42 define i64 @test_rev16_x(i64 %a) nounwind {
  43 entry:
  44 ; CHECK-LABEL: test_rev16_x:
  45 ; CHECK-NOT: rev16 x0, x0
  46   %0 = tail call i64 @llvm.bswap.i64(i64 %a)
  47   %1 = lshr i64 %0, 16
  48   %2 = shl i64 %0, 48
  49   %3 = or i64 %1, %2
  50   ret i64 %3
  51 }
  52
  53 define i64 @test_rev32_x(i64 %a) nounwind {
  54 entry:
  55 ; CHECK-LABEL: test_rev32_x:
  56 ; CHECK: rev32 x0, x0
  57   %0 = tail call i64 @llvm.bswap.i64(i64 %a)
  58   %1 = lshr i64 %0, 32
  59   %2 = shl i64 %0, 32
  60   %3 = or i64 %1, %2
  61   ret i64 %3
  62 }
  63
  64 define <8 x i8> @test_vrev64D8(<8 x i8>* %A) nounwind {
  65 ;CHECK-LABEL: test_vrev64D8:
  66 ;CHECK: rev64.8b
  67         %tmp1 = load <8 x i8>, <8 x i8>* %A
  68         %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
  69         ret <8 x i8> %tmp2
  70 }
  71
  72 define <4 x i16> @test_vrev64D16(<4 x i16>* %A) nounwind {
  73 ;CHECK-LABEL: test_vrev64D16:
  74 ;CHECK: rev64.4h
  75         %tmp1 = load <4 x i16>, <4 x i16>* %A
  76         %tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
  77         ret <4 x i16> %tmp2
  78 }
  79
  80 define <2 x i32> @test_vrev64D32(<2 x i32>* %A) nounwind {
  81 ;CHECK-LABEL: test_vrev64D32:
  82 ;CHECK: rev64.2s
  83         %tmp1 = load <2 x i32>, <2 x i32>* %A
  84         %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> <i32 1, i32 0>
  85         ret <2 x i32> %tmp2
  86 }
  87
  88 define <2 x float> @test_vrev64Df(<2 x float>* %A) nounwind {
  89 ;CHECK-LABEL: test_vrev64Df:
  90 ;CHECK: rev64.2s
  91         %tmp1 = load <2 x float>, <2 x float>* %A
  92         %tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> <i32 1, i32 0>
  93         ret <2 x float> %tmp2
  94 }
  95
  96 define <16 x i8> @test_vrev64Q8(<16 x i8>* %A) nounwind {
  97 ;CHECK-LABEL: test_vrev64Q8:
  98 ;CHECK: rev64.16b
  99         %tmp1 = load <16 x i8>, <16 x i8>* %A
 100         %tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
 101         ret <16 x i8> %tmp2
 102 }
 103
 104 define <8 x i16> @test_vrev64Q16(<8 x i16>* %A) nounwind {
 105 ;CHECK-LABEL: test_vrev64Q16:
 106 ;CHECK: rev64.8h
 107         %tmp1 = load <8 x i16>, <8 x i16>* %A
 108         %tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
 109         ret <8 x i16> %tmp2
 110 }
 111
 112 define <4 x i32> @test_vrev64Q32(<4 x i32>* %A) nounwind {
 113 ;CHECK-LABEL: test_vrev64Q32:
 114 ;CHECK: rev64.4s
 115         %tmp1 = load <4 x i32>, <4 x i32>* %A
 116         %tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
 117         ret <4 x i32> %tmp2
 118 }
 119
 120 define <4 x float> @test_vrev64Qf(<4 x float>* %A) nounwind {
 121 ;CHECK-LABEL: test_vrev64Qf:
 122 ;CHECK: rev64.4s
 123         %tmp1 = load <4 x float>, <4 x float>* %A
 124         %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
 125         ret <4 x float> %tmp2
 126 }
 127
 128 define <8 x i8> @test_vrev32D8(<8 x i8>* %A) nounwind {
 129 ;CHECK-LABEL: test_vrev32D8:
 130 ;CHECK: rev32.8b
 131         %tmp1 = load <8 x i8>, <8 x i8>* %A
 132         %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
 133         ret <8 x i8> %tmp2
 134 }
 135
 136 define <4 x i16> @test_vrev32D16(<4 x i16>* %A) nounwind {
 137 ;CHECK-LABEL: test_vrev32D16:
 138 ;CHECK: rev32.4h
 139         %tmp1 = load <4 x i16>, <4 x i16>* %A
 140         %tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
 141         ret <4 x i16> %tmp2
 142 }
 143
 144 define <16 x i8> @test_vrev32Q8(<16 x i8>* %A) nounwind {
 145 ;CHECK-LABEL: test_vrev32Q8:
 146 ;CHECK: rev32.16b
 147         %tmp1 = load <16 x i8>, <16 x i8>* %A
 148         %tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
 149         ret <16 x i8> %tmp2
 150 }
 151
 152 define <8 x i16> @test_vrev32Q16(<8 x i16>* %A) nounwind {
 153 ;CHECK-LABEL: test_vrev32Q16:
 154 ;CHECK: rev32.8h
 155         %tmp1 = load <8 x i16>, <8 x i16>* %A
 156         %tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
 157         ret <8 x i16> %tmp2
 158 }
 159
 160 define <8 x i8> @test_vrev16D8(<8 x i8>* %A) nounwind {
 161 ;CHECK-LABEL: test_vrev16D8:
 162 ;CHECK: rev16.8b
 163         %tmp1 = load <8 x i8>, <8 x i8>* %A
 164         %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
 165         ret <8 x i8> %tmp2
 166 }
 167
 168 define <16 x i8> @test_vrev16Q8(<16 x i8>* %A) nounwind {
 169 ;CHECK-LABEL: test_vrev16Q8:
 170 ;CHECK: rev16.16b
 171         %tmp1 = load <16 x i8>, <16 x i8>* %A
 172         %tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
 173         ret <16 x i8> %tmp2
 174 }
 175
 176 ; Undef shuffle indices should not prevent matching to VREV:
 177
 178 define <8 x i8> @test_vrev64D8_undef(<8 x i8>* %A) nounwind {
 179 ;CHECK-LABEL: test_vrev64D8_undef:
 180 ;CHECK: rev64.8b
 181         %tmp1 = load <8 x i8>, <8 x i8>* %A
 182         %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 7, i32 undef, i32 undef, i32 4, i32 3, i32 2, i32 1, i32 0>
 183         ret <8 x i8> %tmp2
 184 }
 185
 186 define <8 x i16> @test_vrev32Q16_undef(<8 x i16>* %A) nounwind {
 187 ;CHECK-LABEL: test_vrev32Q16_undef:
 188 ;CHECK: rev32.8h
 189         %tmp1 = load <8 x i16>, <8 x i16>* %A
 190         %tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> <i32 undef, i32 0, i32 undef, i32 2, i32 5, i32 4, i32 7, i32 undef>
 191         ret <8 x i16> %tmp2
 192 }
 193
 194 ; vrev <4 x i16> should use REV32 and not REV64
 195 define void @test_vrev64(<4 x i16>* nocapture %source, <2 x i16>* nocapture %dst) nounwind ssp {
 196 ; CHECK-LABEL: test_vrev64:
 197 ; CHECK: ldr [[DEST:q[0-9]+]],
 198 ; CHECK: st1.h
 199 ; CHECK: st1.h
 200 entry:
 201   %0 = bitcast <4 x i16>* %source to <8 x i16>*
 202   %tmp2 = load <8 x i16>, <8 x i16>* %0, align 4
 203   %tmp3 = extractelement <8 x i16> %tmp2, i32 6
 204   %tmp5 = insertelement <2 x i16> undef, i16 %tmp3, i32 0
 205   %tmp9 = extractelement <8 x i16> %tmp2, i32 5
 206   %tmp11 = insertelement <2 x i16> %tmp5, i16 %tmp9, i32 1
 207   store <2 x i16> %tmp11, <2 x i16>* %dst, align 4
 208   ret void
 209 }
 210
 211 ; Test vrev of float4
 212 define void @float_vrev64(float* nocapture %source, <4 x float>* nocapture %dest) nounwind noinline ssp {
 213 ; CHECK: float_vrev64
 214 ; CHECK: ldr [[DEST:q[0-9]+]],
 215 ; CHECK: rev64.4s
 216 entry:
 217   %0 = bitcast float* %source to <4 x float>*
 218   %tmp2 = load <4 x float>, <4 x float>* %0, align 4
 219   %tmp5 = shufflevector <4 x float> <float 0.000000e+00, float undef, float undef, float undef>, <4 x float> %tmp2, <4 x i32> <i32 0, i32 7, i32 0, i32 0>
 220   %arrayidx8 = getelementptr inbounds <4 x float>, <4 x float>* %dest, i32 11
 221   store <4 x float> %tmp5, <4 x float>* %arrayidx8, align 4
 222   ret void
 223 }
 224
 225
 226 define <4 x i32> @test_vrev32_bswap(<4 x i32> %source) nounwind {
 227 ; CHECK-LABEL: test_vrev32_bswap:
 228 ; CHECK: rev32.16b
 229 ; CHECK-NOT: rev
 230 ; CHECK: ret
 231   %bswap = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %source)
 232   ret <4 x i32> %bswap
 233 }
 234
 235 declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>) nounwind readnone