-; RUN: opt -S < %s -loop-vectorize 2>&1 | FileCheck %s
+; RUN: opt -S < %s -loop-vectorize -force-vector-interleave=2 -force-vector-width=4 | FileCheck %s
; RUN: opt -S < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=2 | FileCheck %s --check-prefix=FORCE-VEC
target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
; }
; CHECK-LABEL: @ind_plus2(
-; CHECK: load <4 x i32>*
-; CHECK: load <4 x i32>*
+; CHECK: load <4 x i32>, <4 x i32>*
+; CHECK: load <4 x i32>, <4 x i32>*
; CHECK: mul nsw <4 x i32>
; CHECK: mul nsw <4 x i32>
; CHECK: add nsw <4 x i32>
; CHECK: icmp eq i64 %index.next, 512
; FORCE-VEC-LABEL: @ind_plus2(
-; FORCE-VEC: %wide.load = load <2 x i32>*
+; FORCE-VEC: %wide.load = load <2 x i32>, <2 x i32>*
; FORCE-VEC: mul nsw <2 x i32>
; FORCE-VEC: add nsw <2 x i32>
; FORCE-VEC: %index.next = add i64 %index, 2
%i = phi i32 [ 0, %entry ], [ %add1, %for.body ]
%sum = phi i32 [ 0, %entry ], [ %add, %for.body ]
%inc.ptr = getelementptr inbounds i32, i32* %A.addr, i64 1
- %0 = load i32* %A.addr, align 4
+ %0 = load i32, i32* %A.addr, align 4
%mul = mul nsw i32 %0, %i
%add = add nsw i32 %mul, %sum
%add1 = add nsw i32 %i, 2
; }
; CHECK-LABEL: @ind_minus2(
-; CHECK: load <4 x i32>*
-; CHECK: load <4 x i32>*
+; CHECK: load <4 x i32>, <4 x i32>*
+; CHECK: load <4 x i32>, <4 x i32>*
; CHECK: mul nsw <4 x i32>
; CHECK: mul nsw <4 x i32>
; CHECK: add nsw <4 x i32>
; CHECK: icmp eq i64 %index.next, 512
; FORCE-VEC-LABEL: @ind_minus2(
-; FORCE-VEC: %wide.load = load <2 x i32>*
+; FORCE-VEC: %wide.load = load <2 x i32>, <2 x i32>*
; FORCE-VEC: mul nsw <2 x i32>
; FORCE-VEC: add nsw <2 x i32>
; FORCE-VEC: %index.next = add i64 %index, 2
%i = phi i32 [ 1024, %entry ], [ %sub, %for.body ]
%sum = phi i32 [ 0, %entry ], [ %add, %for.body ]
%inc.ptr = getelementptr inbounds i32, i32* %A.addr, i64 1
- %0 = load i32* %A.addr, align 4
+ %0 = load i32, i32* %A.addr, align 4
%mul = mul nsw i32 %0, %i
%add = add nsw i32 %mul, %sum
%sub = add nsw i32 %i, -2
; }
; CHECK-LABEL: @ptr_ind_plus2(
-; CHECK: load i32*
-; CHECK: load i32*
-; CHECK: load i32*
-; CHECK: load i32*
-; CHECK: mul nsw i32
-; CHECK: mul nsw i32
-; CHECK: add nsw i32
-; CHECK: add nsw i32
-; CHECK: %index.next = add i64 %index, 2
-; CHECK: %21 = icmp eq i64 %index.next, 1024
+; CHECK: %[[V0:.*]] = load <8 x i32>
+; CHECK: shufflevector <8 x i32> %[[V0]], <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+; CHECK: shufflevector <8 x i32> %[[V0]], <8 x i32> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; CHECK: %[[V1:.*]] = load <8 x i32>
+; CHECK: shufflevector <8 x i32> %[[V1]], <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+; CHECK: shufflevector <8 x i32> %[[V1]], <8 x i32> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; CHECK: mul nsw <4 x i32>
+; CHECK: mul nsw <4 x i32>
+; CHECK: add nsw <4 x i32>
+; CHECK: add nsw <4 x i32>
+; CHECK: %index.next = add i64 %index, 8
+; CHECK: icmp eq i64 %index.next, 1024
; FORCE-VEC-LABEL: @ptr_ind_plus2(
-; FORCE-VEC: load i32*
-; FORCE-VEC: insertelement <2 x i32>
-; FORCE-VEC: load i32*
-; FORCE-VEC: insertelement <2 x i32>
-; FORCE-VEC: load i32*
-; FORCE-VEC: insertelement <2 x i32>
-; FORCE-VEC: load i32*
-; FORCE-VEC: insertelement <2 x i32>
+; FORCE-VEC: %[[V:.*]] = load <4 x i32>
+; FORCE-VEC: shufflevector <4 x i32> %[[V]], <4 x i32> undef, <2 x i32> <i32 0, i32 2>
+; FORCE-VEC: shufflevector <4 x i32> %[[V]], <4 x i32> undef, <2 x i32> <i32 1, i32 3>
; FORCE-VEC: mul nsw <2 x i32>
; FORCE-VEC: add nsw <2 x i32>
; FORCE-VEC: %index.next = add i64 %index, 2
%sum = phi i32 [ 0, %entry ], [ %add, %for.body ]
%i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
%inc.ptr = getelementptr inbounds i32, i32* %A.addr, i64 1
- %0 = load i32* %A.addr, align 4
+ %0 = load i32, i32* %A.addr, align 4
%inc.ptr1 = getelementptr inbounds i32, i32* %A.addr, i64 2
- %1 = load i32* %inc.ptr, align 4
+ %1 = load i32, i32* %inc.ptr, align 4
%mul = mul nsw i32 %1, %0
%add = add nsw i32 %mul, %sum
%inc = add nsw i32 %i, 1