1 ; RUN: opt -slp-vectorizer < %s -S | FileCheck %s
3 ; Verify that the SLP vectorizer is able to figure out that commutativity
4 ; offers the possibility to splat/broadcast %c and thus make it profitable
5 ; to vectorize this case
8 ; ModuleID = 'bugpoint-reduced-simplified.bc'
9 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
10 target triple = "x86_64-apple-macosx10.11.0"
12 @cle = external unnamed_addr global [32 x i8], align 16
13 @cle32 = external unnamed_addr global [32 x i32], align 16
16 ; Check that we correctly detect a splat/broadcast by leveraging the
17 ; commutativity property of `xor`.
20 ; CHECK: store <16 x i8>
21 define void @splat(i8 %a, i8 %b, i8 %c) {
23 store i8 %1, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 0), align 16
25 store i8 %2, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 1)
27 store i8 %3, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 2)
29 store i8 %4, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 3)
31 store i8 %5, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 4)
33 store i8 %6, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 5)
35 store i8 %7, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 6)
37 store i8 %8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 7)
39 store i8 %9, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 8)
41 store i8 %10, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 9)
43 store i8 %11, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 10)
45 store i8 %12, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 11)
47 store i8 %13, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 12)
49 store i8 %14, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 13)
51 store i8 %15, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 14)
53 store i8 %16, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 15)
59 ; Check that we correctly detect that we can have the same opcode on one side by
60 ; leveraging the commutativity property of `xor`.
62 ; CHECK-LABEL: @same_opcode_on_one_side
63 ; CHECK: store <4 x i32>
64 define void @same_opcode_on_one_side(i32 %a, i32 %b, i32 %c) {
65 %add1 = add i32 %c, %a
66 %add2 = add i32 %c, %a
67 %add3 = add i32 %a, %c
68 %add4 = add i32 %c, %a
69 %1 = xor i32 %add1, %a
70 store i32 %1, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @cle32, i64 0, i64 0), align 16
71 %2 = xor i32 %b, %add2
72 store i32 %2, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @cle32, i64 0, i64 1)
73 %3 = xor i32 %c, %add3
74 store i32 %3, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @cle32, i64 0, i64 2)
75 %4 = xor i32 %a, %add4
76 store i32 %4, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @cle32, i64 0, i64 3)