From 561088eb5d0aa7a2cfdbcaca23d60a26d6b3859e Mon Sep 17 00:00:00 2001
From: Chandler Carruth <chandlerc@gmail.com>
Date: Sun, 11 Jan 2015 01:36:20 +0000
Subject: [PATCH] [x86] Remove some windows line endings that snuck into the
 tests here.

Folks on Windows, remember to set up your subversion to strip these when
submitting...

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@225593 91177308-0d34-0410-b5e6-96231b3b80d8
---
 test/CodeGen/X86/avx-intrinsics-x86.ll     |  60 +++---
 test/CodeGen/X86/avx2-intrinsics-x86.ll    |  60 +++---
 test/CodeGen/X86/fold-tied-op.ll           | 168 ++++++++--------
 test/CodeGen/X86/movtopush.ll              | 224 ++++++++++-----------
 test/CodeGen/X86/pr22103.ll                |  38 ++--
 test/CodeGen/X86/sse2-intrinsics-x86.ll    |  60 +++---
 test/CodeGen/X86/vec_extract-avx.ll        | 164 +++++++--------
 test/CodeGen/X86/vector-shuffle-128-v16.ll | 152 +++++++-------
 8 files changed, 463 insertions(+), 463 deletions(-)

diff --git a/test/CodeGen/X86/avx-intrinsics-x86.ll b/test/CodeGen/X86/avx-intrinsics-x86.ll
index ef3e83fc7ad..bb9354cff03 100644
--- a/test/CodeGen/X86/avx-intrinsics-x86.ll
+++ b/test/CodeGen/X86/avx-intrinsics-x86.ll
@@ -455,21 +455,21 @@ define <4 x i32> @test_x86_sse2_psll_d(<4 x i32> %a0, <4 x i32> %a1) {
   ret <4 x i32> %res
 }
 declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) nounwind readnone
-
-
-define <2 x i64> @test_x86_sse2_psll_dq(<2 x i64> %a0) {
-  ; CHECK: vpslldq {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
-  %res = call <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
-  ret <2 x i64> %res
-}
-declare <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64>, i32) nounwind readnone
-
-
-define <2 x i64> @test_x86_sse2_psll_dq_bs(<2 x i64> %a0) {
-  ; CHECK: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8]
-  %res = call <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
-  ret <2 x i64> %res
-}
+
+
+define <2 x i64> @test_x86_sse2_psll_dq(<2 x i64> %a0) {
+  ; CHECK: vpslldq {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+  %res = call <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64>, i32) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse2_psll_dq_bs(<2 x i64> %a0) {
+  ; CHECK: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8]
+  %res = call <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
 declare <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64>, i32) nounwind readnone
 
 
@@ -551,21 +551,21 @@ define <4 x i32> @test_x86_sse2_psrl_d(<4 x i32> %a0, <4 x i32> %a1) {
   ret <4 x i32> %res
 }
 declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) nounwind readnone
-
-
-define <2 x i64> @test_x86_sse2_psrl_dq(<2 x i64> %a0) {
-  ; CHECK: vpsrldq {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
-  %res = call <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
-  ret <2 x i64> %res
-}
-declare <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64>, i32) nounwind readnone
-
-
-define <2 x i64> @test_x86_sse2_psrl_dq_bs(<2 x i64> %a0) {
-  ; CHECK: vpsrldq {{.*#+}} xmm0 = xmm0[7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero
-  %res = call <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
-  ret <2 x i64> %res
-}
+
+
+define <2 x i64> @test_x86_sse2_psrl_dq(<2 x i64> %a0) {
+  ; CHECK: vpsrldq {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+  %res = call <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64>, i32) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse2_psrl_dq_bs(<2 x i64> %a0) {
+  ; CHECK: vpsrldq {{.*#+}} xmm0 = xmm0[7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero
+  %res = call <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
 declare <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64>, i32) nounwind readnone
 
 
diff --git a/test/CodeGen/X86/avx2-intrinsics-x86.ll b/test/CodeGen/X86/avx2-intrinsics-x86.ll
index 84b22b76bf6..79a3361bfe8 100644
--- a/test/CodeGen/X86/avx2-intrinsics-x86.ll
+++ b/test/CodeGen/X86/avx2-intrinsics-x86.ll
@@ -158,21 +158,21 @@ define <8 x i32> @test_x86_avx2_psll_d(<8 x i32> %a0, <4 x i32> %a1) {
   ret <8 x i32> %res
 }
 declare <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32>, <4 x i32>) nounwind readnone
-
-
-define <4 x i64> @test_x86_avx2_psll_dq(<4 x i64> %a0) {
-  ; CHECK: vpslldq {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31]
-  %res = call <4 x i64> @llvm.x86.avx2.psll.dq(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
-  ret <4 x i64> %res
-}
-declare <4 x i64> @llvm.x86.avx2.psll.dq(<4 x i64>, i32) nounwind readnone
-
-
-define <4 x i64> @test_x86_avx2_psll_dq_bs(<4 x i64> %a0) {
-  ; CHECK: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,ymm0[0,1,2,3,4,5,6,7,8],zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,18,19,20,21,22,23,24]
-  %res = call <4 x i64> @llvm.x86.avx2.psll.dq.bs(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
-  ret <4 x i64> %res
-}
+
+
+define <4 x i64> @test_x86_avx2_psll_dq(<4 x i64> %a0) {
+  ; CHECK: vpslldq {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31]
+  %res = call <4 x i64> @llvm.x86.avx2.psll.dq(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
+  ret <4 x i64> %res
+}
+declare <4 x i64> @llvm.x86.avx2.psll.dq(<4 x i64>, i32) nounwind readnone
+
+
+define <4 x i64> @test_x86_avx2_psll_dq_bs(<4 x i64> %a0) {
+  ; CHECK: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,ymm0[0,1,2,3,4,5,6,7,8],zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,18,19,20,21,22,23,24]
+  %res = call <4 x i64> @llvm.x86.avx2.psll.dq.bs(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
+  ret <4 x i64> %res
+}
 declare <4 x i64> @llvm.x86.avx2.psll.dq.bs(<4 x i64>, i32) nounwind readnone
 
 
@@ -254,21 +254,21 @@ define <8 x i32> @test_x86_avx2_psrl_d(<8 x i32> %a0, <4 x i32> %a1) {
   ret <8 x i32> %res
 }
 declare <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32>, <4 x i32>) nounwind readnone
-
-
-define <4 x i64> @test_x86_avx2_psrl_dq(<4 x i64> %a0) {
-  ; CHECK: vpsrldq {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31]
-  %res = call <4 x i64> @llvm.x86.avx2.psrl.dq(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
-  ret <4 x i64> %res
-}
-declare <4 x i64> @llvm.x86.avx2.psrl.dq(<4 x i64>, i32) nounwind readnone
-
-
-define <4 x i64> @test_x86_avx2_psrl_dq_bs(<4 x i64> %a0) {
-  ; CHECK: vpsrldq {{.*#+}} ymm0 = ymm0[7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,ymm0[23,24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zero,zero
-  %res = call <4 x i64> @llvm.x86.avx2.psrl.dq.bs(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
-  ret <4 x i64> %res
-}
+
+
+define <4 x i64> @test_x86_avx2_psrl_dq(<4 x i64> %a0) {
+  ; CHECK: vpsrldq {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31]
+  %res = call <4 x i64> @llvm.x86.avx2.psrl.dq(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
+  ret <4 x i64> %res
+}
+declare <4 x i64> @llvm.x86.avx2.psrl.dq(<4 x i64>, i32) nounwind readnone
+
+
+define <4 x i64> @test_x86_avx2_psrl_dq_bs(<4 x i64> %a0) {
+  ; CHECK: vpsrldq {{.*#+}} ymm0 = ymm0[7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,ymm0[23,24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zero,zero
+  %res = call <4 x i64> @llvm.x86.avx2.psrl.dq.bs(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
+  ret <4 x i64> %res
+}
 declare <4 x i64> @llvm.x86.avx2.psrl.dq.bs(<4 x i64>, i32) nounwind readnone
 
 
diff --git a/test/CodeGen/X86/fold-tied-op.ll b/test/CodeGen/X86/fold-tied-op.ll
index 65b8aeb74cb..5bf5dbd1a9c 100644
--- a/test/CodeGen/X86/fold-tied-op.ll
+++ b/test/CodeGen/X86/fold-tied-op.ll
@@ -1,84 +1,84 @@
-; RUN: llc -verify-machineinstrs -mtriple=i386--netbsd < %s | FileCheck %s
-; Regression test for http://reviews.llvm.org/D5701
-
-; ModuleID = 'xxhash.i'
-target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128"
-target triple = "i386--netbsd"
-
-; CHECK-LABEL: fn1
-; CHECK:       shldl {{.*#+}} 4-byte Folded Spill
-; CHECK:       orl   {{.*#+}} 4-byte Folded Reload
-; CHECK:       shldl {{.*#+}} 4-byte Folded Spill
-; CHECK:       orl   {{.*#+}} 4-byte Folded Reload
-; CHECK:       addl  {{.*#+}} 4-byte Folded Reload
-; CHECK:       imull {{.*#+}} 4-byte Folded Reload
-; CHECK:       orl   {{.*#+}} 4-byte Folded Reload
-; CHECK:       retl
-
-%struct.XXH_state64_t = type { i32, i32, i64, i64, i64 }
-
-@a = common global i32 0, align 4
-@b = common global i64 0, align 8
-
-; Function Attrs: nounwind uwtable
-define i64 @fn1() #0 {
-entry:
-  %0 = load i32* @a, align 4, !tbaa !1
-  %1 = inttoptr i32 %0 to %struct.XXH_state64_t*
-  %total_len = getelementptr inbounds %struct.XXH_state64_t* %1, i32 0, i32 0
-  %2 = load i32* %total_len, align 4, !tbaa !5
-  %tobool = icmp eq i32 %2, 0
-  br i1 %tobool, label %if.else, label %if.then
-
-if.then:                                          ; preds = %entry
-  %v3 = getelementptr inbounds %struct.XXH_state64_t* %1, i32 0, i32 3
-  %3 = load i64* %v3, align 4, !tbaa !8
-  %v4 = getelementptr inbounds %struct.XXH_state64_t* %1, i32 0, i32 4
-  %4 = load i64* %v4, align 4, !tbaa !9
-  %v2 = getelementptr inbounds %struct.XXH_state64_t* %1, i32 0, i32 2
-  %5 = load i64* %v2, align 4, !tbaa !10
-  %shl = shl i64 %5, 1
-  %or = or i64 %shl, %5
-  %shl2 = shl i64 %3, 2
-  %shr = lshr i64 %3, 1
-  %or3 = or i64 %shl2, %shr
-  %add = add i64 %or, %or3
-  %mul = mul i64 %4, -4417276706812531889
-  %shl4 = mul i64 %4, -8834553413625063778
-  %shr5 = ashr i64 %mul, 3
-  %or6 = or i64 %shr5, %shl4
-  %mul7 = mul nsw i64 %or6, 1400714785074694791
-  %xor = xor i64 %add, %mul7
-  store i64 %xor, i64* @b, align 8, !tbaa !11
-  %mul8 = mul nsw i64 %xor, 1400714785074694791
-  br label %if.end
-
-if.else:                                          ; preds = %entry
-  %6 = load i64* @b, align 8, !tbaa !11
-  %xor10 = xor i64 %6, -4417276706812531889
-  %mul11 = mul nsw i64 %xor10, 400714785074694791
-  br label %if.end
-
-if.end:                                           ; preds = %if.else, %if.then
-  %storemerge.in = phi i64 [ %mul11, %if.else ], [ %mul8, %if.then ]
-  %storemerge = add i64 %storemerge.in, -8796714831421723037
-  store i64 %storemerge, i64* @b, align 8, !tbaa !11
-  ret i64 undef
-}
-
-attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-
-!llvm.ident = !{!0}
-
-!0 = !{!"clang version 3.6 (trunk 219587)"}
-!1 = !{!2, !2, i64 0}
-!2 = !{!"int", !3, i64 0}
-!3 = !{!"omnipotent char", !4, i64 0}
-!4 = !{!"Simple C/C++ TBAA"}
-!5 = !{!6, !2, i64 0}
-!6 = !{!"XXH_state64_t", !2, i64 0, !2, i64 4, !7, i64 8, !7, i64 16, !7, i64 24}
-!7 = !{!"long long", !3, i64 0}
-!8 = !{!6, !7, i64 16}
-!9 = !{!6, !7, i64 24}
-!10 = !{!6, !7, i64 8}
-!11 = !{!7, !7, i64 0}
+; RUN: llc -verify-machineinstrs -mtriple=i386--netbsd < %s | FileCheck %s
+; Regression test for http://reviews.llvm.org/D5701
+
+; ModuleID = 'xxhash.i'
+target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128"
+target triple = "i386--netbsd"
+
+; CHECK-LABEL: fn1
+; CHECK:       shldl {{.*#+}} 4-byte Folded Spill
+; CHECK:       orl   {{.*#+}} 4-byte Folded Reload
+; CHECK:       shldl {{.*#+}} 4-byte Folded Spill
+; CHECK:       orl   {{.*#+}} 4-byte Folded Reload
+; CHECK:       addl  {{.*#+}} 4-byte Folded Reload
+; CHECK:       imull {{.*#+}} 4-byte Folded Reload
+; CHECK:       orl   {{.*#+}} 4-byte Folded Reload
+; CHECK:       retl
+
+%struct.XXH_state64_t = type { i32, i32, i64, i64, i64 }
+
+@a = common global i32 0, align 4
+@b = common global i64 0, align 8
+
+; Function Attrs: nounwind uwtable
+define i64 @fn1() #0 {
+entry:
+  %0 = load i32* @a, align 4, !tbaa !1
+  %1 = inttoptr i32 %0 to %struct.XXH_state64_t*
+  %total_len = getelementptr inbounds %struct.XXH_state64_t* %1, i32 0, i32 0
+  %2 = load i32* %total_len, align 4, !tbaa !5
+  %tobool = icmp eq i32 %2, 0
+  br i1 %tobool, label %if.else, label %if.then
+
+if.then:                                          ; preds = %entry
+  %v3 = getelementptr inbounds %struct.XXH_state64_t* %1, i32 0, i32 3
+  %3 = load i64* %v3, align 4, !tbaa !8
+  %v4 = getelementptr inbounds %struct.XXH_state64_t* %1, i32 0, i32 4
+  %4 = load i64* %v4, align 4, !tbaa !9
+  %v2 = getelementptr inbounds %struct.XXH_state64_t* %1, i32 0, i32 2
+  %5 = load i64* %v2, align 4, !tbaa !10
+  %shl = shl i64 %5, 1
+  %or = or i64 %shl, %5
+  %shl2 = shl i64 %3, 2
+  %shr = lshr i64 %3, 1
+  %or3 = or i64 %shl2, %shr
+  %add = add i64 %or, %or3
+  %mul = mul i64 %4, -4417276706812531889
+  %shl4 = mul i64 %4, -8834553413625063778
+  %shr5 = ashr i64 %mul, 3
+  %or6 = or i64 %shr5, %shl4
+  %mul7 = mul nsw i64 %or6, 1400714785074694791
+  %xor = xor i64 %add, %mul7
+  store i64 %xor, i64* @b, align 8, !tbaa !11
+  %mul8 = mul nsw i64 %xor, 1400714785074694791
+  br label %if.end
+
+if.else:                                          ; preds = %entry
+  %6 = load i64* @b, align 8, !tbaa !11
+  %xor10 = xor i64 %6, -4417276706812531889
+  %mul11 = mul nsw i64 %xor10, 400714785074694791
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %if.then
+  %storemerge.in = phi i64 [ %mul11, %if.else ], [ %mul8, %if.then ]
+  %storemerge = add i64 %storemerge.in, -8796714831421723037
+  store i64 %storemerge, i64* @b, align 8, !tbaa !11
+  ret i64 undef
+}
+
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.ident = !{!0}
+
+!0 = !{!"clang version 3.6 (trunk 219587)"}
+!1 = !{!2, !2, i64 0}
+!2 = !{!"int", !3, i64 0}
+!3 = !{!"omnipotent char", !4, i64 0}
+!4 = !{!"Simple C/C++ TBAA"}
+!5 = !{!6, !2, i64 0}
+!6 = !{!"XXH_state64_t", !2, i64 0, !2, i64 4, !7, i64 8, !7, i64 16, !7, i64 24}
+!7 = !{!"long long", !3, i64 0}
+!8 = !{!6, !7, i64 16}
+!9 = !{!6, !7, i64 24}
+!10 = !{!6, !7, i64 8}
+!11 = !{!7, !7, i64 0}
diff --git a/test/CodeGen/X86/movtopush.ll b/test/CodeGen/X86/movtopush.ll
index fd4094a7a1e..cb48ed747be 100644
--- a/test/CodeGen/X86/movtopush.ll
+++ b/test/CodeGen/X86/movtopush.ll
@@ -1,112 +1,112 @@
-; RUN: llc < %s -mtriple=i686-windows | FileCheck %s -check-prefix=NORMAL
-; RUN: llc < %s -mtriple=i686-windows -force-align-stack -stack-alignment=32 | FileCheck %s -check-prefix=ALIGNED 
-declare void @good(i32 %a, i32 %b, i32 %c, i32 %d)
-declare void @inreg(i32 %a, i32 inreg %b, i32 %c, i32 %d)
-
-; Here, we should have a reserved frame, so we don't expect pushes
-; NORMAL-LABEL: test1
-; NORMAL: subl    $16, %esp
-; NORMAL-NEXT: movl    $4, 12(%esp)
-; NORMAL-NEXT: movl    $3, 8(%esp)
-; NORMAL-NEXT: movl    $2, 4(%esp)
-; NORMAL-NEXT: movl    $1, (%esp)
-; NORMAL-NEXT: call
-define void @test1() {
-entry:
-  call void @good(i32 1, i32 2, i32 3, i32 4)
-  ret void
-}
-
-; Here, we expect a sequence of 4 immediate pushes
-; NORMAL-LABEL: test2
-; NORMAL-NOT: subl {{.*}} %esp
-; NORMAL: pushl   $4
-; NORMAL-NEXT: pushl   $3
-; NORMAL-NEXT: pushl   $2
-; NORMAL-NEXT: pushl   $1
-; NORMAL-NEXT: call
-define void @test2(i32 %k) {
-entry:
-  %a = alloca i32, i32 %k
-  call void @good(i32 1, i32 2, i32 3, i32 4)
-  ret void
-}
-
-; Again, we expect a sequence of 4 immediate pushes
-; Checks that we generate the right pushes for >8bit immediates
-; NORMAL-LABEL: test2b
-; NORMAL-NOT: subl {{.*}} %esp
-; NORMAL: pushl   $4096
-; NORMAL-NEXT: pushl   $3072
-; NORMAL-NEXT: pushl   $2048
-; NORMAL-NEXT: pushl   $1024
-; NORMAL-NEXT: call
-define void @test2b(i32 %k) {
-entry:
-  %a = alloca i32, i32 %k
-  call void @good(i32 1024, i32 2048, i32 3072, i32 4096)
-  ret void
-}
-
-; The first push should push a register
-; NORMAL-LABEL: test3
-; NORMAL-NOT: subl {{.*}} %esp
-; NORMAL: pushl   $4
-; NORMAL-NEXT: pushl   $3
-; NORMAL-NEXT: pushl   $2
-; NORMAL-NEXT: pushl   %e{{..}}
-; NORMAL-NEXT: call
-define void @test3(i32 %k) {
-entry:
-  %a = alloca i32, i32 %k
-  call void @good(i32 %k, i32 2, i32 3, i32 4)
-  ret void
-}
-
-; We don't support weird calling conventions
-; NORMAL-LABEL: test4
-; NORMAL: subl    $12, %esp
-; NORMAL-NEXT: movl    $4, 8(%esp)
-; NORMAL-NEXT: movl    $3, 4(%esp)
-; NORMAL-NEXT: movl    $1, (%esp)
-; NORMAL-NEXT: movl    $2, %eax
-; NORMAL-NEXT: call
-define void @test4(i32 %k) {
-entry:
-  %a = alloca i32, i32 %k
-  call void @inreg(i32 1, i32 2, i32 3, i32 4)
-  ret void
-}
-
-; Check that additional alignment is added when the pushes
-; don't add up to the required alignment.
-; ALIGNED-LABEL: test5
-; ALIGNED: subl    $16, %esp
-; ALIGNED-NEXT: pushl   $4
-; ALIGNED-NEXT: pushl   $3
-; ALIGNED-NEXT: pushl   $2
-; ALIGNED-NEXT: pushl   $1
-; ALIGNED-NEXT: call
-define void @test5(i32 %k) {
-entry:
-  %a = alloca i32, i32 %k
-  call void @good(i32 1, i32 2, i32 3, i32 4)
-  ret void
-}
-
-; Check that pushing the addresses of globals (Or generally, things that 
-; aren't exactly immediates) isn't broken.
-; Fixes PR21878.
-; NORMAL-LABEL: test6
-; NORMAL: pushl    $_ext
-; NORMAL-NEXT: call
-declare void @f(i8*)
-@ext = external constant i8
-
-define void @test6() {
-  call void @f(i8* @ext)
-  br label %bb
-bb:
-  alloca i32
-  ret void
-}
+; RUN: llc < %s -mtriple=i686-windows | FileCheck %s -check-prefix=NORMAL
+; RUN: llc < %s -mtriple=i686-windows -force-align-stack -stack-alignment=32 | FileCheck %s -check-prefix=ALIGNED 
+declare void @good(i32 %a, i32 %b, i32 %c, i32 %d)
+declare void @inreg(i32 %a, i32 inreg %b, i32 %c, i32 %d)
+
+; Here, we should have a reserved frame, so we don't expect pushes
+; NORMAL-LABEL: test1
+; NORMAL: subl    $16, %esp
+; NORMAL-NEXT: movl    $4, 12(%esp)
+; NORMAL-NEXT: movl    $3, 8(%esp)
+; NORMAL-NEXT: movl    $2, 4(%esp)
+; NORMAL-NEXT: movl    $1, (%esp)
+; NORMAL-NEXT: call
+define void @test1() {
+entry:
+  call void @good(i32 1, i32 2, i32 3, i32 4)
+  ret void
+}
+
+; Here, we expect a sequence of 4 immediate pushes
+; NORMAL-LABEL: test2
+; NORMAL-NOT: subl {{.*}} %esp
+; NORMAL: pushl   $4
+; NORMAL-NEXT: pushl   $3
+; NORMAL-NEXT: pushl   $2
+; NORMAL-NEXT: pushl   $1
+; NORMAL-NEXT: call
+define void @test2(i32 %k) {
+entry:
+  %a = alloca i32, i32 %k
+  call void @good(i32 1, i32 2, i32 3, i32 4)
+  ret void
+}
+
+; Again, we expect a sequence of 4 immediate pushes
+; Checks that we generate the right pushes for >8bit immediates
+; NORMAL-LABEL: test2b
+; NORMAL-NOT: subl {{.*}} %esp
+; NORMAL: pushl   $4096
+; NORMAL-NEXT: pushl   $3072
+; NORMAL-NEXT: pushl   $2048
+; NORMAL-NEXT: pushl   $1024
+; NORMAL-NEXT: call
+define void @test2b(i32 %k) {
+entry:
+  %a = alloca i32, i32 %k
+  call void @good(i32 1024, i32 2048, i32 3072, i32 4096)
+  ret void
+}
+
+; The first push should push a register
+; NORMAL-LABEL: test3
+; NORMAL-NOT: subl {{.*}} %esp
+; NORMAL: pushl   $4
+; NORMAL-NEXT: pushl   $3
+; NORMAL-NEXT: pushl   $2
+; NORMAL-NEXT: pushl   %e{{..}}
+; NORMAL-NEXT: call
+define void @test3(i32 %k) {
+entry:
+  %a = alloca i32, i32 %k
+  call void @good(i32 %k, i32 2, i32 3, i32 4)
+  ret void
+}
+
+; We don't support weird calling conventions
+; NORMAL-LABEL: test4
+; NORMAL: subl    $12, %esp
+; NORMAL-NEXT: movl    $4, 8(%esp)
+; NORMAL-NEXT: movl    $3, 4(%esp)
+; NORMAL-NEXT: movl    $1, (%esp)
+; NORMAL-NEXT: movl    $2, %eax
+; NORMAL-NEXT: call
+define void @test4(i32 %k) {
+entry:
+  %a = alloca i32, i32 %k
+  call void @inreg(i32 1, i32 2, i32 3, i32 4)
+  ret void
+}
+
+; Check that additional alignment is added when the pushes
+; don't add up to the required alignment.
+; ALIGNED-LABEL: test5
+; ALIGNED: subl    $16, %esp
+; ALIGNED-NEXT: pushl   $4
+; ALIGNED-NEXT: pushl   $3
+; ALIGNED-NEXT: pushl   $2
+; ALIGNED-NEXT: pushl   $1
+; ALIGNED-NEXT: call
+define void @test5(i32 %k) {
+entry:
+  %a = alloca i32, i32 %k
+  call void @good(i32 1, i32 2, i32 3, i32 4)
+  ret void
+}
+
+; Check that pushing the addresses of globals (Or generally, things that 
+; aren't exactly immediates) isn't broken.
+; Fixes PR21878.
+; NORMAL-LABEL: test6
+; NORMAL: pushl    $_ext
+; NORMAL-NEXT: call
+declare void @f(i8*)
+@ext = external constant i8
+
+define void @test6() {
+  call void @f(i8* @ext)
+  br label %bb
+bb:
+  alloca i32
+  ret void
+}
diff --git a/test/CodeGen/X86/pr22103.ll b/test/CodeGen/X86/pr22103.ll
index 3af1d44987f..77c0751e219 100644
--- a/test/CodeGen/X86/pr22103.ll
+++ b/test/CodeGen/X86/pr22103.ll
@@ -1,19 +1,19 @@
-; RUN: llc < %s | FileCheck %s
-; Don't try to emit a direct call through a TLS global.
-; This fixes PR22103
-
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
-
-@a = external thread_local global i64
-
-; Function Attrs: nounwind
-define void @_Z1fv() {
-; CHECK-NOT: callq *$a
-; CHECK: movq %fs:0, [[RAX:%r..]]
-; CHECK-NEXT: addq    a@GOTTPOFF(%rip), [[RAX]]
-; CHECK-NEXT: callq *[[RAX]]
-entry:
-  call void bitcast (i64* @a to void ()*)()
-  ret void
-}
+; RUN: llc < %s | FileCheck %s
+; Don't try to emit a direct call through a TLS global.
+; This fixes PR22103
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@a = external thread_local global i64
+
+; Function Attrs: nounwind
+define void @_Z1fv() {
+; CHECK-NOT: callq *$a
+; CHECK: movq %fs:0, [[RAX:%r..]]
+; CHECK-NEXT: addq    a@GOTTPOFF(%rip), [[RAX]]
+; CHECK-NEXT: callq *[[RAX]]
+entry:
+  call void bitcast (i64* @a to void ()*)()
+  ret void
+}
diff --git a/test/CodeGen/X86/sse2-intrinsics-x86.ll b/test/CodeGen/X86/sse2-intrinsics-x86.ll
index c4d9e6d7e28..ddb04211ec7 100644
--- a/test/CodeGen/X86/sse2-intrinsics-x86.ll
+++ b/test/CodeGen/X86/sse2-intrinsics-x86.ll
@@ -408,21 +408,21 @@ define <4 x i32> @test_x86_sse2_psll_d(<4 x i32> %a0, <4 x i32> %a1) {
   ret <4 x i32> %res
 }
 declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) nounwind readnone
-
-
-define <2 x i64> @test_x86_sse2_psll_dq(<2 x i64> %a0) {
-  ; CHECK: pslldq {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
-  %res = call <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
-  ret <2 x i64> %res
-}
-declare <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64>, i32) nounwind readnone
-
-
-define <2 x i64> @test_x86_sse2_psll_dq_bs(<2 x i64> %a0) {
-  ; CHECK: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8]
-  %res = call <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
-  ret <2 x i64> %res
-}
+
+
+define <2 x i64> @test_x86_sse2_psll_dq(<2 x i64> %a0) {
+  ; CHECK: pslldq {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+  %res = call <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64>, i32) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse2_psll_dq_bs(<2 x i64> %a0) {
+  ; CHECK: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8]
+  %res = call <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
 declare <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64>, i32) nounwind readnone
 
 
@@ -504,21 +504,21 @@ define <4 x i32> @test_x86_sse2_psrl_d(<4 x i32> %a0, <4 x i32> %a1) {
   ret <4 x i32> %res
 }
 declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) nounwind readnone
-
-
-define <2 x i64> @test_x86_sse2_psrl_dq(<2 x i64> %a0) {
-  ; CHECK: psrldq {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
-  %res = call <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
-  ret <2 x i64> %res
-}
-declare <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64>, i32) nounwind readnone
-
-
-define <2 x i64> @test_x86_sse2_psrl_dq_bs(<2 x i64> %a0) {
-  ; CHECK: psrldq {{.*#+}} xmm0 = xmm0[7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero
-  %res = call <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
-  ret <2 x i64> %res
-}
+
+
+define <2 x i64> @test_x86_sse2_psrl_dq(<2 x i64> %a0) {
+  ; CHECK: psrldq {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+  %res = call <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64>, i32) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse2_psrl_dq_bs(<2 x i64> %a0) {
+  ; CHECK: psrldq {{.*#+}} xmm0 = xmm0[7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero
+  %res = call <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
 declare <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64>, i32) nounwind readnone
 
 
diff --git a/test/CodeGen/X86/vec_extract-avx.ll b/test/CodeGen/X86/vec_extract-avx.ll
index e5e79375fff..fbb84170dc8 100644
--- a/test/CodeGen/X86/vec_extract-avx.ll
+++ b/test/CodeGen/X86/vec_extract-avx.ll
@@ -1,82 +1,82 @@
-target triple = "x86_64-unknown-unknown"
-
-; RUN: llc < %s -march=x86-64 -mattr=+avx | FileCheck %s
-
-; When extracting multiple consecutive elements from a larger
-; vector into a smaller one, do it efficiently. We should use
-; an EXTRACT_SUBVECTOR node internally rather than a bunch of
-; single element extractions. 
-
-; Extracting the low elements only requires using the right kind of store.
-define void @low_v8f32_to_v4f32(<8 x float> %v, <4 x float>* %ptr) {
-  %ext0 = extractelement <8 x float> %v, i32 0
-  %ext1 = extractelement <8 x float> %v, i32 1
-  %ext2 = extractelement <8 x float> %v, i32 2
-  %ext3 = extractelement <8 x float> %v, i32 3
-  %ins0 = insertelement <4 x float> undef, float %ext0, i32 0
-  %ins1 = insertelement <4 x float> %ins0, float %ext1, i32 1
-  %ins2 = insertelement <4 x float> %ins1, float %ext2, i32 2
-  %ins3 = insertelement <4 x float> %ins2, float %ext3, i32 3
-  store <4 x float> %ins3, <4 x float>* %ptr, align 16
-  ret void
-
-; CHECK-LABEL: low_v8f32_to_v4f32
-; CHECK: vmovaps
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
-}
-
-; Extracting the high elements requires just one AVX instruction. 
-define void @high_v8f32_to_v4f32(<8 x float> %v, <4 x float>* %ptr) {
-  %ext0 = extractelement <8 x float> %v, i32 4
-  %ext1 = extractelement <8 x float> %v, i32 5
-  %ext2 = extractelement <8 x float> %v, i32 6
-  %ext3 = extractelement <8 x float> %v, i32 7
-  %ins0 = insertelement <4 x float> undef, float %ext0, i32 0
-  %ins1 = insertelement <4 x float> %ins0, float %ext1, i32 1
-  %ins2 = insertelement <4 x float> %ins1, float %ext2, i32 2
-  %ins3 = insertelement <4 x float> %ins2, float %ext3, i32 3
-  store <4 x float> %ins3, <4 x float>* %ptr, align 16
-  ret void
-
-; CHECK-LABEL: high_v8f32_to_v4f32
-; CHECK: vextractf128
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
-}
-
-; Make sure element type doesn't alter the codegen. Note that
-; if we were actually using the vector in this function and
-; have AVX2, we should generate vextracti128 (the int version).
-define void @high_v8i32_to_v4i32(<8 x i32> %v, <4 x i32>* %ptr) {
-  %ext0 = extractelement <8 x i32> %v, i32 4
-  %ext1 = extractelement <8 x i32> %v, i32 5
-  %ext2 = extractelement <8 x i32> %v, i32 6
-  %ext3 = extractelement <8 x i32> %v, i32 7
-  %ins0 = insertelement <4 x i32> undef, i32 %ext0, i32 0
-  %ins1 = insertelement <4 x i32> %ins0, i32 %ext1, i32 1
-  %ins2 = insertelement <4 x i32> %ins1, i32 %ext2, i32 2
-  %ins3 = insertelement <4 x i32> %ins2, i32 %ext3, i32 3
-  store <4 x i32> %ins3, <4 x i32>* %ptr, align 16
-  ret void
-
-; CHECK-LABEL: high_v8i32_to_v4i32
-; CHECK: vextractf128
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
-}
-
-; Make sure that element size doesn't alter the codegen.
-define void @high_v4f64_to_v2f64(<4 x double> %v, <2 x double>* %ptr) {
-  %ext0 = extractelement <4 x double> %v, i32 2
-  %ext1 = extractelement <4 x double> %v, i32 3
-  %ins0 = insertelement <2 x double> undef, double %ext0, i32 0
-  %ins1 = insertelement <2 x double> %ins0, double %ext1, i32 1
-  store <2 x double> %ins1, <2 x double>* %ptr, align 16
-  ret void
-
-; CHECK-LABEL: high_v4f64_to_v2f64
-; CHECK: vextractf128
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
-}
+target triple = "x86_64-unknown-unknown"
+
+; RUN: llc < %s -march=x86-64 -mattr=+avx | FileCheck %s
+
+; When extracting multiple consecutive elements from a larger
+; vector into a smaller one, do it efficiently. We should use
+; an EXTRACT_SUBVECTOR node internally rather than a bunch of
+; single element extractions. 
+
+; Extracting the low elements only requires using the right kind of store.
+define void @low_v8f32_to_v4f32(<8 x float> %v, <4 x float>* %ptr) {
+  %ext0 = extractelement <8 x float> %v, i32 0
+  %ext1 = extractelement <8 x float> %v, i32 1
+  %ext2 = extractelement <8 x float> %v, i32 2
+  %ext3 = extractelement <8 x float> %v, i32 3
+  %ins0 = insertelement <4 x float> undef, float %ext0, i32 0
+  %ins1 = insertelement <4 x float> %ins0, float %ext1, i32 1
+  %ins2 = insertelement <4 x float> %ins1, float %ext2, i32 2
+  %ins3 = insertelement <4 x float> %ins2, float %ext3, i32 3
+  store <4 x float> %ins3, <4 x float>* %ptr, align 16
+  ret void
+
+; CHECK-LABEL: low_v8f32_to_v4f32
+; CHECK: vmovaps
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+}
+
+; Extracting the high elements requires just one AVX instruction. 
+define void @high_v8f32_to_v4f32(<8 x float> %v, <4 x float>* %ptr) {
+  %ext0 = extractelement <8 x float> %v, i32 4
+  %ext1 = extractelement <8 x float> %v, i32 5
+  %ext2 = extractelement <8 x float> %v, i32 6
+  %ext3 = extractelement <8 x float> %v, i32 7
+  %ins0 = insertelement <4 x float> undef, float %ext0, i32 0
+  %ins1 = insertelement <4 x float> %ins0, float %ext1, i32 1
+  %ins2 = insertelement <4 x float> %ins1, float %ext2, i32 2
+  %ins3 = insertelement <4 x float> %ins2, float %ext3, i32 3
+  store <4 x float> %ins3, <4 x float>* %ptr, align 16
+  ret void
+
+; CHECK-LABEL: high_v8f32_to_v4f32
+; CHECK: vextractf128
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+}
+
+; Make sure element type doesn't alter the codegen. Note that
+; if we were actually using the vector in this function and
+; have AVX2, we should generate vextracti128 (the int version).
+define void @high_v8i32_to_v4i32(<8 x i32> %v, <4 x i32>* %ptr) {
+  %ext0 = extractelement <8 x i32> %v, i32 4
+  %ext1 = extractelement <8 x i32> %v, i32 5
+  %ext2 = extractelement <8 x i32> %v, i32 6
+  %ext3 = extractelement <8 x i32> %v, i32 7
+  %ins0 = insertelement <4 x i32> undef, i32 %ext0, i32 0
+  %ins1 = insertelement <4 x i32> %ins0, i32 %ext1, i32 1
+  %ins2 = insertelement <4 x i32> %ins1, i32 %ext2, i32 2
+  %ins3 = insertelement <4 x i32> %ins2, i32 %ext3, i32 3
+  store <4 x i32> %ins3, <4 x i32>* %ptr, align 16
+  ret void
+
+; CHECK-LABEL: high_v8i32_to_v4i32
+; CHECK: vextractf128
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+}
+
+; Make sure that element size doesn't alter the codegen.
+define void @high_v4f64_to_v2f64(<4 x double> %v, <2 x double>* %ptr) {
+  %ext0 = extractelement <4 x double> %v, i32 2
+  %ext1 = extractelement <4 x double> %v, i32 3
+  %ins0 = insertelement <2 x double> undef, double %ext0, i32 0
+  %ins1 = insertelement <2 x double> %ins0, double %ext1, i32 1
+  store <2 x double> %ins1, <2 x double>* %ptr, align 16
+  ret void
+
+; CHECK-LABEL: high_v4f64_to_v2f64
+; CHECK: vextractf128
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+}
diff --git a/test/CodeGen/X86/vector-shuffle-128-v16.ll b/test/CodeGen/X86/vector-shuffle-128-v16.ll
index fd25cc96330..14058c91286 100644
--- a/test/CodeGen/X86/vector-shuffle-128-v16.ll
+++ b/test/CodeGen/X86/vector-shuffle-128-v16.ll
@@ -467,23 +467,23 @@ define <16 x i8> @PR20540(<8 x i8> %a) {
 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4]
 ; SSE2-NEXT:    packuswb %xmm1, %xmm0
 ; SSE2-NEXT:    retq
-;
-; SSSE3-LABEL: PR20540:
-; SSSE3:       # BB#0:
-; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero
-; SSSE3-NEXT:    retq
-;
-; SSE41-LABEL: PR20540:
-; SSE41:       # BB#0:
-; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero
-; SSE41-NEXT:    retq
-;
-; AVX-LABEL: PR20540:
-; AVX:       # BB#0:
-; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero
-; AVX-NEXT:    retq
-  %shuffle = shufflevector <8 x i8> %a, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
-  ret <16 x i8> %shuffle
+;
+; SSSE3-LABEL: PR20540:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: PR20540:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: PR20540:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero
+; AVX-NEXT:    retq
+  %shuffle = shufflevector <8 x i8> %a, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
+  ret <16 x i8> %shuffle
 }
 
 define <16 x i8> @shuffle_v16i8_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz(i8 %i) {
@@ -493,25 +493,25 @@ define <16 x i8> @shuffle_v16i8_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz(
 ; SSE2-NEXT:    movd %eax, %xmm0
 ; SSE2-NEXT:    retq
 ;
-; SSSE3-LABEL: shuffle_v16i8_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
-; SSSE3:       # BB#0:
-; SSSE3-NEXT:    movd %edi, %xmm0
-; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
-; SSSE3-NEXT:    retq
-;
-; SSE41-LABEL: shuffle_v16i8_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
-; SSE41:       # BB#0:
-; SSE41-NEXT:    movd %edi, %xmm0
-; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
-; SSE41-NEXT:    retq
-;
-; AVX-LABEL: shuffle_v16i8_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
-; AVX:       # BB#0:
-; AVX-NEXT:    vmovd %edi, %xmm0
-; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
-; AVX-NEXT:    retq
-  %a = insertelement <16 x i8> undef, i8 %i, i32 0
-  %shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> <i32 16, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; SSSE3-LABEL: shuffle_v16i8_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    movd %edi, %xmm0
+; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: shuffle_v16i8_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    movd %edi, %xmm0
+; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v16i8_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovd %edi, %xmm0
+; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX-NEXT:    retq
+  %a = insertelement <16 x i8> undef, i8 %i, i32 0
+  %shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> <i32 16, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
   ret <16 x i8> %shuffle
 }
 
@@ -523,25 +523,25 @@ define <16 x i8> @shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz(
 ; SSE2-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10]
 ; SSE2-NEXT:    retq
 ;
-; SSSE3-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
-; SSSE3:       # BB#0:
-; SSSE3-NEXT:    movd %edi, %xmm0
-; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,xmm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
-; SSSE3-NEXT:    retq
-;
-; SSE41-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
-; SSE41:       # BB#0:
-; SSE41-NEXT:    movd %edi, %xmm0
-; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,xmm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
-; SSE41-NEXT:    retq
-;
-; AVX-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
-; AVX:       # BB#0:
-; AVX-NEXT:    vmovd %edi, %xmm0
-; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,xmm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
-; AVX-NEXT:    retq
-  %a = insertelement <16 x i8> undef, i8 %i, i32 0
-  %shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+; SSSE3-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    movd %edi, %xmm0
+; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,xmm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    movd %edi, %xmm0
+; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,xmm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovd %edi, %xmm0
+; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,xmm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX-NEXT:    retq
+  %a = insertelement <16 x i8> undef, i8 %i, i32 0
+  %shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   ret <16 x i8> %shuffle
 }
 
@@ -571,27 +571,27 @@ define <16 x i8> @shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz(
 ; SSE2-NEXT:    retq
 ;
 ; SSSE3-LABEL: shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
-; SSSE3:       # BB#0:
-; SSSE3-NEXT:    movd %edi, %xmm0
-; SSSE3-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12]
-; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
-; SSSE3-NEXT:    retq
-;
-; SSE41-LABEL: shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
-; SSE41:       # BB#0:
-; SSE41-NEXT:    movd %edi, %xmm0
-; SSE41-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12]
-; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
-; SSE41-NEXT:    retq
-;
-; AVX-LABEL: shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
-; AVX:       # BB#0:
-; AVX-NEXT:    vmovd %edi, %xmm0
-; AVX-NEXT:    vpslldq {{.*#+}} xmm0 = zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12]
-; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
-; AVX-NEXT:    retq
-  %a = insertelement <16 x i8> undef, i8 %i, i32 3
-  %shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> <i32 0, i32 1, i32 19, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    movd %edi, %xmm0
+; SSSE3-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12]
+; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    movd %edi, %xmm0
+; SSE41-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12]
+; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovd %edi, %xmm0
+; AVX-NEXT:    vpslldq {{.*#+}} xmm0 = zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12]
+; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX-NEXT:    retq
+  %a = insertelement <16 x i8> undef, i8 %i, i32 3
+  %shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> <i32 0, i32 1, i32 19, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
   ret <16 x i8> %shuffle
 }
 
-- 
2.34.1