Support added for shifts and unpacking MMX instructions.
authorBill Wendling <isanbard@gmail.com>
Thu, 22 Mar 2007 18:42:45 +0000 (18:42 +0000)
committerBill Wendling <isanbard@gmail.com>
Thu, 22 Mar 2007 18:42:45 +0000 (18:42 +0000)
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@35266 91177308-0d34-0410-b5e6-96231b3b80d8

include/llvm/IntrinsicsX86.td
lib/Target/X86/README-MMX.txt [new file with mode: 0644]
lib/Target/X86/README-SSE.txt
lib/Target/X86/X86ISelLowering.cpp
lib/Target/X86/X86InstrMMX.td

index 3af8f9f48c4eee79270ee2dcabb58d38ca43f3bc..2984f79a5a84d9ded28219881935ea1e52cc5615 100644 (file)
@@ -585,3 +585,41 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
               Intrinsic<[llvm_v2i32_ty, llvm_v4i16_ty,
                          llvm_v4i16_ty], [IntrNoMem]>;
 }
+
+// Integer shift ops.
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  // Shift left logical
+  def int_x86_mmx_psll_w : GCCBuiltin<"__builtin_ia32_psllw">,
+              Intrinsic<[llvm_v8i8_ty, llvm_v8i8_ty,
+                         llvm_v2i32_ty], [IntrNoMem]>;
+  def int_x86_mmx_psll_d : GCCBuiltin<"__builtin_ia32_pslld">,
+              Intrinsic<[llvm_v4i16_ty, llvm_v4i16_ty,
+                         llvm_v2i32_ty], [IntrNoMem]>;
+  def int_x86_mmx_psll_q : GCCBuiltin<"__builtin_ia32_psllq">,
+              Intrinsic<[llvm_v2i32_ty, llvm_v2i32_ty,
+                         llvm_v2i32_ty], [IntrNoMem]>;
+
+  def int_x86_mmx_psrl_w : GCCBuiltin<"__builtin_ia32_psrlw">,
+              Intrinsic<[llvm_v4i16_ty, llvm_v4i16_ty,
+                         llvm_v2i32_ty], [IntrNoMem]>;
+  def int_x86_mmx_psrl_d : GCCBuiltin<"__builtin_ia32_psrld">,
+              Intrinsic<[llvm_v2i32_ty, llvm_v2i32_ty,
+                         llvm_v2i32_ty], [IntrNoMem]>;
+  def int_x86_mmx_psrl_q : GCCBuiltin<"__builtin_ia32_psrlq">,
+              Intrinsic<[llvm_v2i32_ty,   llvm_v2i32_ty,
+                         llvm_v2i32_ty], [IntrNoMem]>;
+
+  def int_x86_mmx_psra_w : GCCBuiltin<"__builtin_ia32_psraw">,
+              Intrinsic<[llvm_v8i8_ty, llvm_v8i8_ty,
+                         llvm_v2i32_ty], [IntrNoMem]>;
+  def int_x86_mmx_psra_d : GCCBuiltin<"__builtin_ia32_psrad">,
+              Intrinsic<[llvm_v4i16_ty, llvm_v4i16_ty,
+                         llvm_v2i32_ty], [IntrNoMem]>;
+}
+
+// Vector pack/unpack ops.
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_mmx_punpckh_dq : GCCBuiltin<"__builtin_ia32_punpckhdq">,
+              Intrinsic<[llvm_v2i32_ty, llvm_v2i32_ty,
+                         llvm_v2i32_ty], [IntrNoMem]>;
+}
diff --git a/lib/Target/X86/README-MMX.txt b/lib/Target/X86/README-MMX.txt
new file mode 100644 (file)
index 0000000..b4886ae
--- /dev/null
@@ -0,0 +1,59 @@
+//===---------------------------------------------------------------------===//
+// Random ideas for the X86 backend: MMX-specific stuff.
+//===---------------------------------------------------------------------===//
+
+//===---------------------------------------------------------------------===//
+
+We should compile 
+
+#include <mmintrin.h>
+
+extern __m64 C;
+
+void baz(__v2si *A, __v2si *B)
+{
+  *A = __builtin_ia32_psllq(*B, C);
+  _mm_empty();
+}
+
+to:
+
+.globl _baz
+_baz:
+       call    L3
+"L00000000001$pb":
+L3:
+       popl    %ecx
+       subl    $12, %esp
+       movl    20(%esp), %eax
+       movq    (%eax), %mm0
+       movl    L_C$non_lazy_ptr-"L00000000001$pb"(%ecx), %eax
+       movq    (%eax), %mm1
+       movl    16(%esp), %eax
+       psllq   %mm1, %mm0
+       movq    %mm0, (%eax)
+       emms
+       addl    $12, %esp
+       ret
+
+not:
+
+_baz:
+       subl $12, %esp
+       call "L1$pb"
+"L1$pb":
+       popl %eax
+       movl L_C$non_lazy_ptr-"L1$pb"(%eax), %eax
+       movl (%eax), %ecx
+       movl %ecx, (%esp)
+       movl 4(%eax), %eax
+       movl %eax, 4(%esp)
+       movl 20(%esp), %eax
+       movq (%eax), %mm0
+       movq (%esp), %mm1
+       psllq %mm1, %mm0
+       movl 16(%esp), %eax
+       movq %mm0, (%eax)
+       emms
+       addl $12, %esp
+       ret
index 2b7f9ae4392e362e37d3ff2e8e22ceed8204006c..96e6aab24387a4807ca61f0de47ca42f867b5ce5 100644 (file)
@@ -571,4 +571,44 @@ swizzle:
         movaps %xmm0, (%eax)
         ret
 
+//===---------------------------------------------------------------------===//
+
+We should compile this:
+
+#include <xmmintrin.h>
+
+void foo(__m128i *A, __m128i *B) {
+  *A = _mm_sll_epi16 (*A, *B);
+}
+
+to: 
+
+_foo:
+       subl    $12, %esp
+       movl    16(%esp), %edx
+       movl    20(%esp), %eax
+       movdqa  (%edx), %xmm1
+       movdqa  (%eax), %xmm0
+       psllw   %xmm0, %xmm1
+       movdqa  %xmm1, (%edx)
+       addl    $12, %esp
+       ret
 
+not:
+
+_foo:
+       movl 8(%esp), %eax
+       movdqa (%eax), %xmm0
+       #IMPLICIT_DEF %eax
+       pinsrw $2, %eax, %xmm0
+       xorl %ecx, %ecx
+       pinsrw $3, %ecx, %xmm0
+       pinsrw $4, %eax, %xmm0
+       pinsrw $5, %ecx, %xmm0
+       pinsrw $6, %eax, %xmm0
+       pinsrw $7, %ecx, %xmm0
+       movl 4(%esp), %eax
+       movdqa (%eax), %xmm1
+       psllw %xmm0, %xmm1
+       movdqa %xmm1, (%eax)
+       ret
index e178646e992e61ee6d99a356ea25492bd44db294..2f9763d6f965514accf9b17c677c13bf1f9711a9 100644 (file)
@@ -355,6 +355,10 @@ X86TargetLowering::X86TargetLowering(TargetMachine &TM)
     setOperationAction(ISD::BUILD_VECTOR,       MVT::v8i8,  Expand);
     setOperationAction(ISD::BUILD_VECTOR,       MVT::v4i16, Expand);
     setOperationAction(ISD::BUILD_VECTOR,       MVT::v2i32, Expand);
+
+    setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v8i8,  Custom);
+    setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v4i16, Custom);
+    setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v2i32, Custom);
   }
 
   if (Subtarget->hasSSE1()) {
@@ -2312,7 +2316,7 @@ static SDOperand LowerBuildVectorv16i8(SDOperand Op, unsigned NonZeros,
   return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, V);
 }
 
-/// LowerBuildVectorv16i8 - Custom lower build_vector of v8i16.
+/// LowerBuildVectorv8i16 - Custom lower build_vector of v8i16.
 ///
 static SDOperand LowerBuildVectorv8i16(SDOperand Op, unsigned NonZeros,
                                        unsigned NumNonZero, unsigned NumZero,
index 1d3c127532edcb57d3b6c562b26a2b4fb2cc2fa9..f6c74f7cc7c2fb277d41ac307cb309d0c64c3625 100644 (file)
@@ -44,6 +44,10 @@ def : Pat<(v2i32 (undef)), (IMPLICIT_DEF_VR64)>;
 
 def loadv2i32 : PatFrag<(ops node:$ptr), (v2i32 (load node:$ptr))>;
 
+def bc_v8i8  : PatFrag<(ops node:$in), (v8i8  (bitconvert node:$in))>;
+def bc_v4i16 : PatFrag<(ops node:$in), (v4i16 (bitconvert node:$in))>;
+def bc_v2i32 : PatFrag<(ops node:$in), (v2i32 (bitconvert node:$in))>;
+
 //===----------------------------------------------------------------------===//
 // MMX Multiclasses
 //===----------------------------------------------------------------------===//
@@ -94,13 +98,28 @@ let isTwoAddress = 1 in {
                   [(set VR64:$dst,
                     (OpNode VR64:$src1,(loadv2i32 addr:$src2)))]>;
   }
+
+  multiclass MMXI_binop_rmi_int<bits<8> opc, bits<8> opc2, Format ImmForm,
+                                string OpcodeStr, Intrinsic IntId> {
+    def rr : MMXI<opc, MRMSrcReg, (ops VR64:$dst, VR64:$src1, VR64:$src2),
+                  !strconcat(OpcodeStr, " {$src2, $dst|$dst, $src2}"),
+                  [(set VR64:$dst, (IntId VR64:$src1, VR64:$src2))]>;
+    def rm : MMXI<opc, MRMSrcMem, (ops VR64:$dst, VR64:$src1, i64mem:$src2),
+                  !strconcat(OpcodeStr, " {$src2, $dst|$dst, $src2}"),
+                  [(set VR64:$dst, (IntId VR64:$src1,
+                                    (bitconvert (loadv2i32 addr:$src2))))]>;
+    def ri : MMXIi8<opc2, ImmForm, (ops VR64:$dst, VR64:$src1, i32i8imm:$src2),
+                    !strconcat(OpcodeStr, " {$src2, $dst|$dst, $src2}"),
+                    [(set VR64:$dst, (IntId VR64:$src1,
+                                      (scalar_to_vector (i32 imm:$src2))))]>;
+  }
 }
 
 //===----------------------------------------------------------------------===//
 // MMX EMMS Instruction
 //===----------------------------------------------------------------------===//
 
-def EMMS : MMXI<0x77, RawFrm, (ops), "emms", [(int_x86_mmx_emms)]>;
+def MMX_EMMS : MMXI<0x77, RawFrm, (ops), "emms", [(int_x86_mmx_emms)]>;
 
 //===----------------------------------------------------------------------===//
 // MMX Scalar Instructions
@@ -132,6 +151,53 @@ defm MMX_PMULLW  : MMXI_binop_rm<0xD5, "pmullw", mul, v4i16, 1>;
 defm MMX_PMULHW  : MMXI_binop_rm_int<0xE5, "pmulhw" , int_x86_mmx_pmulh_w , 1>;
 defm MMX_PMADDWD : MMXI_binop_rm_int<0xF5, "pmaddwd", int_x86_mmx_pmadd_wd, 1>;
 
+
+def MMX_UNPCKH_shuffle_mask : PatLeaf<(build_vector), [{
+  return X86::isUNPCKHMask(N);
+}]>;
+
+let isTwoAddress = 1 in {
+def MMX_PUNPCKHBWrr : MMXI<0x68, MRMSrcReg, 
+                           (ops VR64:$dst, VR64:$src1, VR64:$src2),
+                           "punpckhbw {$src2, $dst|$dst, $src2}",
+                           [(set VR64:$dst,
+                             (v8i8 (vector_shuffle VR64:$src1, VR64:$src2,
+                                    MMX_UNPCKH_shuffle_mask)))]>;
+def MMX_PUNPCKHBWrm : MMXI<0x68, MRMSrcMem, 
+                           (ops VR64:$dst, VR64:$src1, i64mem:$src2),
+                           "punpckhbw {$src2, $dst|$dst, $src2}",
+                           [(set VR64:$dst,
+                             (v8i8 (vector_shuffle VR64:$src1,
+                                    (bc_v8i8 (loadv2i32 addr:$src2)),
+                                    MMX_UNPCKH_shuffle_mask)))]>;
+def MMX_PUNPCKHWDrr : MMXI<0x69, MRMSrcReg, 
+                           (ops VR64:$dst, VR64:$src1, VR64:$src2),
+                           "punpckhwd {$src2, $dst|$dst, $src2}",
+                           [(set VR64:$dst,
+                             (v4i16 (vector_shuffle VR64:$src1, VR64:$src2,
+                                     MMX_UNPCKH_shuffle_mask)))]>;
+def MMX_PUNPCKHWDrm : MMXI<0x69, MRMSrcMem, 
+                           (ops VR64:$dst, VR64:$src1, i64mem:$src2),
+                           "punpckhwd {$src2, $dst|$dst, $src2}",
+                           [(set VR64:$dst,
+                             (v4i16 (vector_shuffle VR64:$src1,
+                                     (bc_v4i16 (loadv2i32 addr:$src2)),
+                                     MMX_UNPCKH_shuffle_mask)))]>;
+def MMX_PUNPCKHDQrr : MMXI<0x6A, MRMSrcReg, 
+                           (ops VR64:$dst, VR64:$src1, VR64:$src2),
+                           "punpckhdq {$src2, $dst|$dst, $src2}",
+                           [(set VR64:$dst,
+                             (v2i32 (vector_shuffle VR64:$src1, VR64:$src2,
+                                     MMX_UNPCKH_shuffle_mask)))]>;
+def MMX_PUNPCKHDQrm : MMXI<0x6A, MRMSrcMem,
+                           (ops VR64:$dst, VR64:$src1, i64mem:$src2),
+                           "punpckhdq {$src2, $dst|$dst, $src2}",
+                           [(set VR64:$dst,
+                             (v2i32 (vector_shuffle VR64:$src1,
+                                     (loadv2i32 addr:$src2),
+                                     MMX_UNPCKH_shuffle_mask)))]>;
+}
+
 // Logical Instructions
 defm MMX_PAND : MMXI_binop_rm_v2i32<0xDB, "pand", and, 1>;
 defm MMX_POR  : MMXI_binop_rm_v2i32<0xEB, "por" , or,  1>;
@@ -150,6 +216,26 @@ let isTwoAddress = 1 in {
                                                   (load addr:$src2))))]>;
 }
 
+// Shift Instructions
+defm MMX_PSRLW : MMXI_binop_rmi_int<0xD1, 0x71, MRM2r, "psrlw",
+                                    int_x86_mmx_psrl_w>;
+defm MMX_PSRLD : MMXI_binop_rmi_int<0xD2, 0x72, MRM2r, "psrld",
+                                    int_x86_mmx_psrl_d>;
+defm MMX_PSRLQ : MMXI_binop_rmi_int<0xD3, 0x73, MRM2r, "psrlq",
+                                    int_x86_mmx_psrl_q>;
+
+defm MMX_PSLLW : MMXI_binop_rmi_int<0xF1, 0x71, MRM6r, "psllw",
+                                    int_x86_mmx_psll_w>;
+defm MMX_PSLLD : MMXI_binop_rmi_int<0xF2, 0x72, MRM6r, "pslld",
+                                    int_x86_mmx_psll_d>;
+defm MMX_PSLLQ : MMXI_binop_rmi_int<0xF3, 0x73, MRM6r, "psllq",
+                                    int_x86_mmx_psll_q>;
+
+defm MMX_PSRAW : MMXI_binop_rmi_int<0xE1, 0x71, MRM4r, "psraw",
+                                    int_x86_mmx_psra_w>;
+defm MMX_PSRAD : MMXI_binop_rmi_int<0xE2, 0x72, MRM4r, "psrad",
+                                    int_x86_mmx_psra_d>;
+
 // Move Instructions
 def MOVD64rr : MMXI<0x6E, MRMSrcReg, (ops VR64:$dst, GR32:$src),
                     "movd {$src, $dst|$dst, $src}", []>;
@@ -225,3 +311,23 @@ def : Pat<(v4i16 (bitconvert (v2i32 VR64:$src))), (v4i16 VR64:$src)>;
 def : Pat<(v4i16 (bitconvert (v8i8  VR64:$src))), (v4i16 VR64:$src)>;
 def : Pat<(v2i32 (bitconvert (v4i16 VR64:$src))), (v2i32 VR64:$src)>;
 def : Pat<(v2i32 (bitconvert (v8i8  VR64:$src))), (v2i32 VR64:$src)>;
+
+// Splat v2i32
+let AddedComplexity = 10 in {
+  def : Pat<(vector_shuffle (v2i32 VR64:$src), (undef),
+             MMX_UNPCKH_shuffle_mask:$sm),
+            (MMX_PUNPCKHDQrr VR64:$src, VR64:$src)>;
+}
+
+// FIXME: Temporary workaround because 2-wide shuffle is broken.
+def : Pat<(int_x86_mmx_punpckh_dq VR64:$src1, VR64:$src2),
+          (v2i32 (MMX_PUNPCKHDQrr VR64:$src1, VR64:$src2))>;
+def : Pat<(int_x86_mmx_punpckh_dq VR64:$src1, (load addr:$src2)),
+          (v2i32 (MMX_PUNPCKHDQrm VR64:$src1, addr:$src2))>;
+
+def MMX_X86s2vec : SDNode<"X86ISD::S2VEC",  SDTypeProfile<1, 1, []>, []>;
+
+// Scalar to v4i16 / v8i8. The source may be a GR32, but only the lower 8 or
+// 16-bits matter.
+def : Pat<(v4i16 (MMX_X86s2vec GR32:$src)), (MOVD64rr GR32:$src)>;
+def : Pat<(v8i8 (MMX_X86s2vec GR32:$src)), (MOVD64rr GR32:$src)>;