-; SSSE3-NEXT: movaps {{.*#+}} xmm4 = [255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255]
-; SSSE3-NEXT: andps %xmm4, %xmm2
-; SSSE3-NEXT: movaps {{.*#+}} xmm5 = [0,0,255,0,255,255,255,0,0,0,255,0,255,255,255,0]
-; SSSE3-NEXT: andps %xmm5, %xmm0
-; SSSE3-NEXT: orps %xmm2, %xmm0
-; SSSE3-NEXT: andps %xmm4, %xmm3
-; SSSE3-NEXT: andps %xmm5, %xmm1
-; SSSE3-NEXT: orps %xmm3, %xmm1
+; SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [0,1,128,3,128,128,128,7,128,128,128,128,128,128,128,128]
+; SSSE3-NEXT: pshufb %xmm4, %xmm2
+; SSSE3-NEXT: movdqa {{.*#+}} xmm5 = [128,128,2,128,4,5,6,128,8,9,10,11,12,13,14,15]
+; SSSE3-NEXT: pshufb %xmm5, %xmm0
+; SSSE3-NEXT: por %xmm2, %xmm0
+; SSSE3-NEXT: pshufb %xmm4, %xmm3
+; SSSE3-NEXT: pshufb %xmm5, %xmm1
+; SSSE3-NEXT: por %xmm3, %xmm1