test/CodeGen/X86/combine-vec-shuffle-3.ll

   1 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 | FileCheck %s
   2
   3 define <4 x float> @test1(<4 x float> %a, <4 x float> %b) {
   4   %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
   5   %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
   6   ret <4 x float> %2
   7 }
   8 ; CHECK-LABEL: test1
   9 ; Mask: [0,1,2,3]
  10 ; CHECK: movaps
  11 ; CHECK: ret
  12
  13 define <4 x float> @test2(<4 x float> %a, <4 x float> %b) {
  14   %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
  15   %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 3>
  16   ret <4 x float> %2
  17 }
  18 ; CHECK-LABEL: test2
  19 ; Mask: [0,5,6,7]
  20 ; CHECK: movss
  21 ; CHECK: ret
  22
  23 define <4 x float> @test3(<4 x float> %a, <4 x float> %b) {
  24   %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 1, i32 7>
  25   %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 1>
  26   ret <4 x float> %2
  27 }
  28 ; CHECK-LABEL: test3
  29 ; Mask: [0,1,4,5]
  30 ; CHECK: movlhps
  31 ; CHECK: ret
  32
  33 define <4 x float> @test4(<4 x float> %a, <4 x float> %b) {
  34   %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 1, i32 5, i32 5>
  35   %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 6, i32 7, i32 0, i32 1>
  36   ret <4 x float> %2
  37 }
  38 ; FIXME: this should be lowered as a single movhlps. However, the backend
  39 ; wrongly thinks that shuffle mask [6,7,2,3] is not legal. Therefore, we
  40 ; end up with the sub-optimal sequence 'shufps, palignr'.
  41 ; CHECK-LABEL: test4
  42 ; Mask: [6,7,2,3]
  43 ; CHECK: shufps $84
  44 ; CHECK: palignr $8
  45 ; CHECK: ret
  46
  47 define <4 x float> @test5(<4 x float> %a, <4 x float> %b) {
  48   %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
  49   %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
  50   ret <4 x float> %2
  51 }
  52 ; CHECK-LABEL: test5
  53 ; Mask: [4,1,6,7]
  54 ; CHECK: blendps $13
  55 ; CHECK: ret
  56
  57
  58 define <4 x i32> @test6(<4 x i32> %a, <4 x i32> %b) {
  59   %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
  60   %2 = shufflevector <4 x i32> %1, <4 x i32> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
  61   ret <4 x i32> %2
  62 }
  63 ; CHECK-LABEL: test6
  64 ; Mask: [4,5,6,7]
  65 ; CHECK: movaps
  66 ; CHECK: ret
  67
  68 define <4 x i32> @test7(<4 x i32> %a, <4 x i32> %b) {
  69   %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
  70   %2 = shufflevector <4 x i32> %1, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 3>
  71   ret <4 x i32> %2
  72 }
  73 ; CHECK-LABEL: test7
  74 ; Mask: [0,5,6,7]
  75 ; CHECK: movss
  76 ; CHECK: ret
  77
  78 define <4 x i32> @test8(<4 x i32> %a, <4 x i32> %b) {
  79   %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 5, i32 1, i32 7>
  80   %2 = shufflevector <4 x i32> %1, <4 x i32> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 1>
  81   ret <4 x i32> %2
  82 }
  83 ; CHECK-LABEL: test8
  84 ; Mask: [0,1,4,5]
  85 ; CHECK: movlhps
  86 ; CHECK: ret
  87
  88 define <4 x i32> @test9(<4 x i32> %a, <4 x i32> %b) {
  89   %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 5, i32 5>
  90   %2 = shufflevector <4 x i32> %1, <4 x i32> %b, <4 x i32> <i32 6, i32 7, i32 0, i32 1>
  91   ret <4 x i32> %2
  92 }
  93 ; FIXME: this should be lowered as a single movhlps. However, the backend thinks that
  94 ; shuffle mask [6,7,2,3] is not legal.
  95 ; CHECK-LABEL: test9
  96 ; Mask: [6,7,2,3]
  97 ; CHECK: shufps $84
  98 ; CHECK: palignr $8
  99 ; CHECK: ret
 100
 101 define <4 x i32> @test10(<4 x i32> %a, <4 x i32> %b) {
 102   %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
 103   %2 = shufflevector <4 x i32> %1, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
 104   ret <4 x i32> %2
 105 }
 106 ; CHECK-LABEL: test10
 107 ; Mask: [4,1,6,7]
 108 ; CHECK: blendps
 109 ; CHECK: ret
 110
 111 define <4 x float> @test11(<4 x float> %a, <4 x float> %b) {
 112   %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
 113   %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
 114   ret <4 x float> %2
 115 }
 116 ; CHECK-LABEL: test11
 117 ; Mask: [0,1,2,3]
 118 ; CHECK-NOT: movaps
 119 ; CHECK-NOT: blendps
 120 ; CHECK: ret
 121
 122 define <4 x float> @test12(<4 x float> %a, <4 x float> %b) {
 123   %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
 124   %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
 125   ret <4 x float> %2
 126 }
 127 ; CHECK-LABEL: test12
 128 ; Mask: [0,5,6,7]
 129 ; CHECK: movss
 130 ; CHECK: ret
 131
 132 define <4 x float> @test13(<4 x float> %a, <4 x float> %b) {
 133   %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
 134   %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
 135   ret <4 x float> %2
 136 }
 137 ; CHECK-LABEL: test13
 138 ; Mask: [0,1,4,5]
 139 ; CHECK: movlhps
 140 ; CHECK: ret
 141
 142 define <4 x float> @test14(<4 x float> %a, <4 x float> %b) {
 143   %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 6, i32 7, i32 5, i32 5>
 144   %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
 145   ret <4 x float> %2
 146 }
 147 ; FIXME: this should be lowered as a single movhlps. However, the backend
 148 ; wrongly thinks that shuffle mask [6,7,2,3] is not legal. Therefore, we
 149 ; end up with the sub-optimal sequence 'pshufd, blendps'.
 150 ; CHECK-LABEL: test14
 151 ; Mask: [6,7,2,3]
 152 ; CHECK: pshufd $94
 153 ; CHECK: blendps $12
 154 ; CHECK: ret
 155
 156 define <4 x float> @test15(<4 x float> %a, <4 x float> %b) {
 157   %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 7>
 158   %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 2, i32 3>
 159   ret <4 x float> %2
 160 }
 161 ; CHECK-LABEL: test15
 162 ; Mask: [4,1,6,7]
 163 ; CHECK: blendps $13
 164 ; CHECK: ret
 165
 166 define <4 x i32> @test16(<4 x i32> %a, <4 x i32> %b) {
 167   %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
 168   %2 = shufflevector <4 x i32> %1, <4 x i32> %a, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
 169   ret <4 x i32> %2
 170 }
 171 ; CHECK-LABEL: test16
 172 ; Mask: [0,1,2,3]
 173 ; CHECK-NOT: movaps
 174 ; CHECK-NOT: blendps
 175 ; CHECK: ret
 176
 177 define <4 x i32> @test17(<4 x i32> %a, <4 x i32> %b) {
 178   %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
 179   %2 = shufflevector <4 x i32> %1, <4 x i32> %a, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
 180   ret <4 x i32> %2
 181 }
 182 ; CHECK-LABEL: test17
 183 ; Mask: [0,5,6,7]
 184 ; CHECK: movss
 185 ; CHECK: ret
 186
 187 define <4 x i32> @test18(<4 x i32> %a, <4 x i32> %b) {
 188   %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
 189   %2 = shufflevector <4 x i32> %1, <4 x i32> %a, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
 190   ret <4 x i32> %2
 191 }
 192 ; CHECK-LABEL: test18
 193 ; Mask: [0,1,4,5]
 194 ; CHECK: movlhps
 195 ; CHECK: ret
 196
 197 define <4 x i32> @test19(<4 x i32> %a, <4 x i32> %b) {
 198   %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 6, i32 7, i32 5, i32 5>
 199   %2 = shufflevector <4 x i32> %1, <4 x i32> %a, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
 200   ret <4 x i32> %2
 201 }
 202 ; FIXME: this should be lowered as a single movhlps. However, the backend
 203 ; wrongly thinks that shuffle mask [6,7,2,3] is not legal. Therefore, we
 204 ; end up with the sub-optimal sequence 'shufps, palignr'.
 205 ; CHECK-LABEL: test19
 206 ; Mask: [6,7,2,3]
 207 ; CHECK: pshufd $94
 208 ; CHECK: blendps $12
 209 ; CHECK: ret
 210
 211 define <4 x i32> @test20(<4 x i32> %a, <4 x i32> %b) {
 212   %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 7>
 213   %2 = shufflevector <4 x i32> %1, <4 x i32> %a, <4 x i32> <i32 0, i32 5, i32 2, i32 3>
 214   ret <4 x i32> %2
 215 }
 216 ; CHECK-LABEL: test20
 217 ; Mask: [4,1,6,7]
 218 ; CHECK: blendps $13
 219 ; CHECK: ret
 220
 221 ; Check some negative cases.
 222 define <4 x float> @test1b(<4 x float> %a, <4 x float> %b) {
 223   %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
 224   %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 0>
 225   ret <4 x float> %2
 226 }
 227 ; CHECK-LABEL: test1b
 228 ; CHECK: shufps
 229 ; CHECK: shufps
 230 ; CHECK: ret
 231
 232 define <4 x float> @test2b(<4 x float> %a, <4 x float> %b) {
 233   %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
 234   %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 0, i32 5>
 235   ret <4 x float> %2
 236 }
 237 ; CHECK-LABEL: test2b
 238 ; CHECK: shufps
 239 ; CHECK: pshufd
 240 ; CHECK: ret
 241
 242 define <4 x float> @test3b(<4 x float> %a, <4 x float> %b) {
 243   %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 0, i32 6, i32 3>
 244   %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 7, i32 2, i32 7>
 245   ret <4 x float> %2
 246 }
 247 ; CHECK-LABEL: test3b
 248 ; CHECK: shufps
 249 ; CHECK: shufps
 250 ; CHECK: ret
 251
 252 define <4 x float> @test4b(<4 x float> %a, <4 x float> %b) {
 253   %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
 254   %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 5, i32 5, i32 2, i32 7>
 255   ret <4 x float> %2
 256 }
 257 ; CHECK-LABEL: test4b
 258 ; CHECK: shufps
 259 ; CHECK: shufps
 260 ; CHECK: ret
 261
 262
 263 ; Verify that we correctly fold shuffles even when we use illegal vector types.
 264 define <4 x i8> @test1c(<4 x i8>* %a, <4 x i8>* %b) {
 265   %A = load <4 x i8>* %a
 266   %B = load <4 x i8>* %b
 267   %1 = shufflevector <4 x i8> %A, <4 x i8> %B, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
 268   %2 = shufflevector <4 x i8> %1, <4 x i8> %B, <4 x i32> <i32 0, i32 1, i32 6, i32 3>
 269   ret <4 x i8> %2
 270 }
 271 ; CHECK-LABEL: test1c
 272 ; Mask: [0,5,6,7]
 273 ; CHECK: movss
 274 ; CHECK-NEXT: ret
 275
 276 define <4 x i8> @test2c(<4 x i8>* %a, <4 x i8>* %b) {
 277   %A = load <4 x i8>* %a
 278   %B = load <4 x i8>* %b
 279   %1 = shufflevector <4 x i8> %A, <4 x i8> %B, <4 x i32> <i32 0, i32 5, i32 1, i32 5>
 280   %2 = shufflevector <4 x i8> %1, <4 x i8> %B, <4 x i32> <i32 0, i32 2, i32 4, i32 1>
 281   ret <4 x i8> %2
 282 }
 283 ; CHECK-LABEL: test2c
 284 ; Mask: [0,1,4,5]
 285 ; CHECK: movlhps
 286 ; CHECK-NEXT: ret
 287
 288 define <4 x i8> @test3c(<4 x i8>* %a, <4 x i8>* %b) {
 289   %A = load <4 x i8>* %a
 290   %B = load <4 x i8>* %b
 291   %1 = shufflevector <4 x i8> %A, <4 x i8> %B, <4 x i32> <i32 0, i32 1, i32 5, i32 5>
 292   %2 = shufflevector <4 x i8> %1, <4 x i8> %B, <4 x i32> <i32 6, i32 7, i32 0, i32 1>
 293   ret <4 x i8> %2
 294 }
 295 ; FIXME: this should be lowered as a single movhlps. However, the backend
 296 ; wrongly thinks that shuffle mask [6,7,2,3] is not legal. Therefore, we end up
 297 ; with a sub-optimal sequence of 'shufps+palignr'.
 298
 299 ; CHECK-LABEL: test3c
 300 ; Mask: [6,7,2,3]
 301 ; CHECK: shufps $84
 302 ; CHECK: palignr $8
 303 ; CHECK: ret
 304
 305 define <4 x i8> @test4c(<4 x i8>* %a, <4 x i8>* %b) {
 306   %A = load <4 x i8>* %a
 307   %B = load <4 x i8>* %b
 308   %1 = shufflevector <4 x i8> %A, <4 x i8> %B, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
 309   %2 = shufflevector <4 x i8> %1, <4 x i8> %B, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
 310   ret <4 x i8> %2
 311 }
 312 ; CHECK-LABEL: test4c
 313 ; Mask: [4,1,6,7]
 314 ; CHECK: blendps $13
 315 ; CHECK: ret
 316
 317 ; The following test cases are generated from this C++ code
 318 ;
 319 ;__m128 blend_01(__m128 a, __m128 b)
 320 ;{
 321 ;  __m128 s = a;
 322 ;  s = _mm_blend_ps( s, b, 1<<0 );
 323 ;  s = _mm_blend_ps( s, b, 1<<1 );
 324 ;  return s;
 325 ;}
 326 ;
 327 ;__m128 blend_02(__m128 a, __m128 b)
 328 ;{
 329 ;  __m128 s = a;
 330 ;  s = _mm_blend_ps( s, b, 1<<0 );
 331 ;  s = _mm_blend_ps( s, b, 1<<2 );
 332 ;  return s;
 333 ;}
 334 ;
 335 ;__m128 blend_123(__m128 a, __m128 b)
 336 ;{
 337 ;  __m128 s = a;
 338 ;  s = _mm_blend_ps( s, b, 1<<1 );
 339 ;  s = _mm_blend_ps( s, b, 1<<2 );
 340 ;  s = _mm_blend_ps( s, b, 1<<3 );
 341 ;  return s;
 342 ;}
 343
 344 ; Ideally, we should collapse the following shuffles into a single one.
 345
 346 define <4 x float> @blend_01(<4 x float> %a, <4 x float> %b) {
 347   %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 4, i32 undef, i32 2, i32 3>
 348   %shuffle6 = shufflevector <4 x float> %shuffle, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 3>
 349   ret <4 x float> %shuffle6
 350 }
 351 ; CHECK-LABEL: blend_01
 352 ; CHECK: movsd
 353 ; CHECK-NEXT: ret
 354
 355 define <4 x float> @blend_02(<4 x float> %a, <4 x float> %b) {
 356   %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 4, i32 1, i32 undef, i32 3>
 357   %shuffle6 = shufflevector <4 x float> %shuffle, <4 x float> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 3>
 358   ret <4 x float> %shuffle6
 359 }
 360 ; CHECK-LABEL: blend_02
 361 ; CHECK: blendps $5
 362 ; CHECK-NEXT: ret
 363
 364 define <4 x float> @blend_123(<4 x float> %a, <4 x float> %b) {
 365   %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 undef, i32 undef>
 366   %shuffle6 = shufflevector <4 x float> %shuffle, <4 x float> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 undef>
 367   %shuffle12 = shufflevector <4 x float> %shuffle6, <4 x float> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
 368   ret <4 x float> %shuffle12
 369 }
 370 ; CHECK-LABEL: blend_123
 371 ; CHECK: movss
 372 ; CHECK: ret
 373