+
+define <4 x float> @insertps_4(<4 x float> %A, <4 x float> %B) {
+; X32-LABEL: insertps_4:
+; X32: ## BB#0: ## %entry
+; X32-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],zero,xmm1[2],zero
+; X32-NEXT: retl
+;
+; X64-LABEL: insertps_4:
+; X64: ## BB#0: ## %entry
+; X64-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],zero,xmm1[2],zero
+; X64-NEXT: retq
+entry:
+ %vecext = extractelement <4 x float> %A, i32 0
+ %vecinit = insertelement <4 x float> undef, float %vecext, i32 0
+ %vecinit1 = insertelement <4 x float> %vecinit, float 0.000000e+00, i32 1
+ %vecext2 = extractelement <4 x float> %B, i32 2
+ %vecinit3 = insertelement <4 x float> %vecinit1, float %vecext2, i32 2
+ %vecinit4 = insertelement <4 x float> %vecinit3, float 0.000000e+00, i32 3
+ ret <4 x float> %vecinit4
+}
+
+define <4 x float> @insertps_5(<4 x float> %A, <4 x float> %B) {
+; X32-LABEL: insertps_5:
+; X32: ## BB#0: ## %entry
+; X32-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[1],zero,zero
+; X32-NEXT: retl
+;
+; X64-LABEL: insertps_5:
+; X64: ## BB#0: ## %entry
+; X64-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[1],zero,zero
+; X64-NEXT: retq
+entry:
+ %vecext = extractelement <4 x float> %A, i32 0
+ %vecinit = insertelement <4 x float> undef, float %vecext, i32 0
+ %vecext1 = extractelement <4 x float> %B, i32 1
+ %vecinit2 = insertelement <4 x float> %vecinit, float %vecext1, i32 1
+ %vecinit3 = insertelement <4 x float> %vecinit2, float 0.000000e+00, i32 2
+ %vecinit4 = insertelement <4 x float> %vecinit3, float 0.000000e+00, i32 3
+ ret <4 x float> %vecinit4
+}
+
+define <4 x float> @insertps_6(<4 x float> %A, <4 x float> %B) {
+; X32-LABEL: insertps_6:
+; X32: ## BB#0: ## %entry
+; X32-NEXT: insertps {{.*#+}} xmm0 = zero,xmm0[1],xmm1[2],zero
+; X32-NEXT: retl
+;
+; X64-LABEL: insertps_6:
+; X64: ## BB#0: ## %entry
+; X64-NEXT: insertps {{.*#+}} xmm0 = zero,xmm0[1],xmm1[2],zero
+; X64-NEXT: retq
+entry:
+ %vecext = extractelement <4 x float> %A, i32 1
+ %vecinit = insertelement <4 x float> <float 0.000000e+00, float undef, float undef, float undef>, float %vecext, i32 1
+ %vecext1 = extractelement <4 x float> %B, i32 2
+ %vecinit2 = insertelement <4 x float> %vecinit, float %vecext1, i32 2
+ %vecinit3 = insertelement <4 x float> %vecinit2, float 0.000000e+00, i32 3
+ ret <4 x float> %vecinit3
+}
+
+define <4 x float> @insertps_7(<4 x float> %A, <4 x float> %B) {
+; X32-LABEL: insertps_7:
+; X32: ## BB#0: ## %entry
+; X32-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],zero,xmm1[1],zero
+; X32-NEXT: retl
+;
+; X64-LABEL: insertps_7:
+; X64: ## BB#0: ## %entry
+; X64-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],zero,xmm1[1],zero
+; X64-NEXT: retq
+entry:
+ %vecext = extractelement <4 x float> %A, i32 0
+ %vecinit = insertelement <4 x float> undef, float %vecext, i32 0
+ %vecinit1 = insertelement <4 x float> %vecinit, float 0.000000e+00, i32 1
+ %vecext2 = extractelement <4 x float> %B, i32 1
+ %vecinit3 = insertelement <4 x float> %vecinit1, float %vecext2, i32 2
+ %vecinit4 = insertelement <4 x float> %vecinit3, float 0.000000e+00, i32 3
+ ret <4 x float> %vecinit4
+}
+
+define <4 x float> @insertps_8(<4 x float> %A, <4 x float> %B) {
+; X32-LABEL: insertps_8:
+; X32: ## BB#0: ## %entry
+; X32-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
+; X32-NEXT: retl
+;
+; X64-LABEL: insertps_8:
+; X64: ## BB#0: ## %entry
+; X64-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
+; X64-NEXT: retq
+entry:
+ %vecext = extractelement <4 x float> %A, i32 0
+ %vecinit = insertelement <4 x float> undef, float %vecext, i32 0
+ %vecext1 = extractelement <4 x float> %B, i32 0
+ %vecinit2 = insertelement <4 x float> %vecinit, float %vecext1, i32 1
+ %vecinit3 = insertelement <4 x float> %vecinit2, float 0.000000e+00, i32 2
+ %vecinit4 = insertelement <4 x float> %vecinit3, float 0.000000e+00, i32 3
+ ret <4 x float> %vecinit4
+}
+
+define <4 x float> @insertps_9(<4 x float> %A, <4 x float> %B) {
+; X32-LABEL: insertps_9:
+; X32: ## BB#0: ## %entry
+; X32-NEXT: insertps {{.*#+}} xmm1 = zero,xmm0[0],xmm1[2],zero
+; X32-NEXT: movaps %xmm1, %xmm0
+; X32-NEXT: retl
+;
+; X64-LABEL: insertps_9:
+; X64: ## BB#0: ## %entry
+; X64-NEXT: insertps {{.*#+}} xmm1 = zero,xmm0[0],xmm1[2],zero
+; X64-NEXT: movaps %xmm1, %xmm0
+; X64-NEXT: retq
+entry:
+ %vecext = extractelement <4 x float> %A, i32 0
+ %vecinit = insertelement <4 x float> <float 0.000000e+00, float undef, float undef, float undef>, float %vecext, i32 1
+ %vecext1 = extractelement <4 x float> %B, i32 2
+ %vecinit2 = insertelement <4 x float> %vecinit, float %vecext1, i32 2
+ %vecinit3 = insertelement <4 x float> %vecinit2, float 0.000000e+00, i32 3
+ ret <4 x float> %vecinit3
+}
+
+define <4 x float> @insertps_10(<4 x float> %A)
+; X32-LABEL: insertps_10:
+; X32: ## BB#0:
+; X32-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],zero,xmm0[0],zero
+; X32-NEXT: retl
+;
+; X64-LABEL: insertps_10:
+; X64: ## BB#0:
+; X64-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],zero,xmm0[0],zero
+; X64-NEXT: retq
+{
+ %vecext = extractelement <4 x float> %A, i32 0
+ %vecbuild1 = insertelement <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, float %vecext, i32 0
+ %vecbuild2 = insertelement <4 x float> %vecbuild1, float %vecext, i32 2
+ ret <4 x float> %vecbuild2
+}
+
+define <4 x float> @build_vector_to_shuffle_1(<4 x float> %A) {
+; X32-LABEL: build_vector_to_shuffle_1:
+; X32: ## BB#0: ## %entry
+; X32-NEXT: xorps %xmm1, %xmm1
+; X32-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3]
+; X32-NEXT: retl
+;
+; X64-LABEL: build_vector_to_shuffle_1:
+; X64: ## BB#0: ## %entry
+; X64-NEXT: xorps %xmm1, %xmm1
+; X64-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3]
+; X64-NEXT: retq
+entry:
+ %vecext = extractelement <4 x float> %A, i32 1
+ %vecinit = insertelement <4 x float> zeroinitializer, float %vecext, i32 1
+ %vecinit1 = insertelement <4 x float> %vecinit, float 0.0, i32 2
+ %vecinit3 = shufflevector <4 x float> %vecinit1, <4 x float> %A, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+ ret <4 x float> %vecinit3
+}
+
+define <4 x float> @build_vector_to_shuffle_2(<4 x float> %A) {
+; X32-LABEL: build_vector_to_shuffle_2:
+; X32: ## BB#0: ## %entry
+; X32-NEXT: xorps %xmm1, %xmm1
+; X32-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2,3]
+; X32-NEXT: retl
+;
+; X64-LABEL: build_vector_to_shuffle_2:
+; X64: ## BB#0: ## %entry
+; X64-NEXT: xorps %xmm1, %xmm1
+; X64-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2,3]
+; X64-NEXT: retq
+entry:
+ %vecext = extractelement <4 x float> %A, i32 1
+ %vecinit = insertelement <4 x float> zeroinitializer, float %vecext, i32 1
+ %vecinit1 = insertelement <4 x float> %vecinit, float 0.0, i32 2
+ ret <4 x float> %vecinit1
+}