+define <16 x float> @test5(<16 x float> %A, <16 x float> %B) {
+; SSE-LABEL: test5:
+; SSE: # BB#0:
+; SSE-NEXT: addsubps %xmm4, %xmm0
+; SSE-NEXT: addsubps %xmm5, %xmm1
+; SSE-NEXT: addsubps %xmm6, %xmm2
+; SSE-NEXT: addsubps %xmm7, %xmm3
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: test5:
+; AVX1: # BB#0:
+; AVX1-NEXT: vaddsubps %ymm2, %ymm0, %ymm0
+; AVX1-NEXT: vaddsubps %ymm3, %ymm1, %ymm1
+; AVX1-NEXT: retq
+;
+; AVX512-LABEL: test5:
+; AVX512: # BB#0:
+; AVX512-NEXT: vaddps %zmm1, %zmm0, %zmm2
+; AVX512-NEXT: vsubps %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vmovdqa32 {{.*#+}} zmm1 = [0,17,2,19,4,21,6,23,8,25,10,27,12,29,14,31]
+; AVX512-NEXT: vpermt2ps %zmm2, %zmm1, %zmm0
+; AVX512-NEXT: retq
+ %add = fadd <16 x float> %A, %B
+ %sub = fsub <16 x float> %A, %B
+ %vecinit2 = shufflevector <16 x float> %sub, <16 x float> %add, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31>
+ ret <16 x float> %vecinit2
+}
+
+define <8 x double> @test6(<8 x double> %A, <8 x double> %B) {
+; SSE-LABEL: test6:
+; SSE: # BB#0:
+; SSE-NEXT: addsubpd %xmm4, %xmm0
+; SSE-NEXT: addsubpd %xmm5, %xmm1
+; SSE-NEXT: addsubpd %xmm6, %xmm2
+; SSE-NEXT: addsubpd %xmm7, %xmm3
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: test6:
+; AVX1: # BB#0:
+; AVX1-NEXT: vaddsubpd %ymm2, %ymm0, %ymm0
+; AVX1-NEXT: vaddsubpd %ymm3, %ymm1, %ymm1
+; AVX1-NEXT: retq
+;
+; AVX512-LABEL: test6:
+; AVX512: # BB#0:
+; AVX512-NEXT: vaddpd %zmm1, %zmm0, %zmm2
+; AVX512-NEXT: vsubpd %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,9,2,11,4,13,6,15]
+; AVX512-NEXT: vpermt2pd %zmm2, %zmm1, %zmm0
+; AVX512-NEXT: retq
+ %add = fadd <8 x double> %A, %B
+ %sub = fsub <8 x double> %A, %B
+ %vecinit2 = shufflevector <8 x double> %sub, <8 x double> %add, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
+ ret <8 x double> %vecinit2
+}
+