Implement sse4.2 string/text processing instructions:
authorEric Christopher <echristo@apple.com>
Tue, 18 Aug 2009 22:50:32 +0000 (22:50 +0000)
committerEric Christopher <echristo@apple.com>
Tue, 18 Aug 2009 22:50:32 +0000 (22:50 +0000)
Add patterns and instruction encoding information.
Add custom lowering to deal with hardwired return register of
uncertain type (xmm0).

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@79377 91177308-0d34-0410-b5e6-96231b3b80d8

lib/Target/X86/X86ISelLowering.cpp
lib/Target/X86/X86ISelLowering.h
lib/Target/X86/X86InstrFormats.td
lib/Target/X86/X86InstrSSE.td

index cb5a74fa40fe57bf79ac22297cbb818df1892d8e..91b48138f44ada32e6a086c1f1ebc7e5947a91cc 100644 (file)
@@ -7595,6 +7595,43 @@ X86TargetLowering::EmitAtomicMinMaxWithCustomInserter(MachineInstr *mInstr,
   return nextMBB;
 }
 
+MachineBasicBlock *
+X86TargetLowering::EmitPCMP(MachineInstr *MI, MachineBasicBlock *BB,
+                           unsigned numArgs, bool memArg) const {
+
+  MachineFunction *F = BB->getParent();
+  DebugLoc dl = MI->getDebugLoc();
+  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+
+  unsigned Opc;
+
+  if (memArg) {
+    Opc = numArgs == 3 ?
+      X86::PCMPISTRM128rm :
+      X86::PCMPESTRM128rm;
+  } else {
+    Opc = numArgs == 3 ?
+      X86::PCMPISTRM128rr :
+      X86::PCMPESTRM128rr;
+  }
+
+  MachineInstrBuilder MIB = BuildMI(BB, dl, TII->get(Opc));
+
+  for (unsigned i = 0; i < numArgs; ++i) {
+    MachineOperand &Op = MI->getOperand(i+1);
+
+    if (!(Op.isReg() && Op.isImplicit()))
+      MIB.addOperand(Op);
+  }
+
+  BuildMI(BB, dl, TII->get(X86::MOVAPSrr), MI->getOperand(0).getReg())
+    .addReg(X86::XMM0);
+
+  F->DeleteMachineInstr(MI);
+
+  return BB;
+}
+
 MachineBasicBlock *
 X86TargetLowering::EmitVAStartSaveXMMRegsWithCustomInserter(
                                                  MachineInstr *MI,
@@ -7804,6 +7841,17 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
     F->DeleteMachineInstr(MI);   // The pseudo instruction is gone now.
     return BB;
   }
+    // String/text processing lowering.
+  case X86::PCMPISTRM128REG:
+    return EmitPCMP(MI, BB, 3, false /* in-mem */);
+  case X86::PCMPISTRM128MEM:
+    return EmitPCMP(MI, BB, 3, true /* in-mem */);
+  case X86::PCMPESTRM128REG:
+    return EmitPCMP(MI, BB, 5, false /* in mem */);
+  case X86::PCMPESTRM128MEM:
+    return EmitPCMP(MI, BB, 5, true /* in mem */);
+
+    // Atomic Lowering.
   case X86::ATOMAND32:
     return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND32rr,
                                                X86::AND32ri, X86::MOV32rm,
index f3f09f5ac93dc6a6ef38ef4c405928fb73429ca5..1c612a13a25c847ee1c87a6fb1bc362510773a63 100644 (file)
@@ -693,6 +693,14 @@ namespace llvm {
                                     const Value *DstSV, uint64_t DstSVOff,
                                     const Value *SrcSV, uint64_t SrcSVOff);
     
+    /// Utility function to emit string processing sse4.2 instructions
+    /// that return in xmm0.
+    // This takes the instruction to expand, the associated machine basic
+    // block, the number of args, and whether or not the second arg is
+    // in memory or not.
+    MachineBasicBlock *EmitPCMP(MachineInstr *BInstr, MachineBasicBlock *BB,
+                               unsigned argNum, bool inMem) const;
+
     /// Utility function to emit atomic bitwise operations (and, or, xor).
     // It takes the bitwise instruction to expand, the associated machine basic
     // block, and the associated X86 opcodes for reg/reg and reg/imm.
index 6f5941cf153046b8a125324fe65963d076839321..ddc8654359e89f83347b44e6a3b7ef1fbdf220f9 100644 (file)
@@ -235,6 +235,11 @@ class SS42FI<bits<8> o, Format F, dag outs, dag ins, string asm,
               list<dag> pattern>
       : I<o, F, outs, ins, asm, pattern>, TF, Requires<[HasSSE42]>;
       
+//   SS42AI = SSE 4.2 instructions with TA prefix
+class SS42AI<bits<8> o, Format F, dag outs, dag ins, string asm,
+            list<dag> pattern>
+      : I<o, F, outs, ins, asm, pattern>, TA, Requires<[HasSSE42]>;
+
 // X86-64 Instruction templates...
 //
 
@@ -288,4 +293,3 @@ class MMXID<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> patter
       : Ii8<o, F, outs, ins, asm, pattern>, XD, Requires<[HasMMX]>;
 class MMXIS<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
       : Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[HasMMX]>;
-
index f5965271ae2616a1bd54744545d0ca892e59429b..966833f44adc7a7f118cf61cc203e6cefc7e5ca6 100644 (file)
@@ -3657,6 +3657,11 @@ def MOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
                        "movntdqa\t{$src, $dst|$dst, $src}",
                        [(set VR128:$dst, (int_x86_sse41_movntdqa addr:$src))]>;
 
+
+//===----------------------------------------------------------------------===//
+// SSE4.2 Instructions
+//===----------------------------------------------------------------------===//
+
 /// SS42I_binop_rm_int - Simple SSE 4.2 binary operator
 let Constraints = "$src1 = $dst" in {
   multiclass SS42I_binop_rm_int<bits<8> opc, string OpcodeStr,
@@ -3739,3 +3744,115 @@ let Constraints = "$src1 = $dst" in {
                          (int_x86_sse42_crc32_64 GR64:$src1, GR64:$src2))]>,
                          OpSize, REX_W;
 }
+
+// String/text processing instructions.
+let Defs = [EFLAGS], usesCustomDAGSchedInserter = 1 in {
+def PCMPISTRM128REG : SS42AI<0, Pseudo, (outs VR128:$dst),
+                       (ins VR128:$src1, VR128:$src2, i8imm:$src3),
+                   "#PCMPISTRM128rr PSEUDO!",
+                   [(set VR128:$dst,
+                       (int_x86_sse42_pcmpistrm128 VR128:$src1, VR128:$src2,
+                                                   imm:$src3))]>, OpSize;
+def PCMPISTRM128MEM : SS42AI<0, Pseudo, (outs VR128:$dst),
+                       (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
+                   "#PCMPISTRM128rm PSEUDO!",
+                   [(set VR128:$dst,
+                       (int_x86_sse42_pcmpistrm128 VR128:$src1,
+                                                   (load addr:$src2),
+                                                   imm:$src3))]>, OpSize;
+}
+
+let Defs = [XMM0, EFLAGS] in {
+def PCMPISTRM128rr : SS42AI<0x62, MRMSrcReg, (outs),
+                           (ins VR128:$src1, VR128:$src2, i8imm:$src3),
+                    "pcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}",
+                    []>, OpSize;
+def PCMPISTRM128rm : SS42AI<0x62, MRMSrcMem, (outs),
+                           (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
+                    "pcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}",
+                    []>, OpSize;
+}
+
+let Defs = [EFLAGS], Uses = [EAX, EDX],
+       usesCustomDAGSchedInserter = 1 in {
+def PCMPESTRM128REG : SS42AI<0, Pseudo, (outs VR128:$dst),
+                       (ins VR128:$src1, VR128:$src3, i8imm:$src5),
+                   "#PCMPESTRM128rr PSEUDO!",
+                   [(set VR128:$dst,
+                       (int_x86_sse42_pcmpestrm128 VR128:$src1, EAX,
+                                                   VR128:$src3,
+                                                   EDX, imm:$src5))]>, OpSize;
+def PCMPESTRM128MEM : SS42AI<0, Pseudo, (outs VR128:$dst),
+                       (ins VR128:$src1, i128mem:$src3, i8imm:$src5),
+                   "#PCMPESTRM128rm PSEUDO!",
+                   [(set VR128:$dst,
+                       (int_x86_sse42_pcmpestrm128 VR128:$src1, EAX,
+                                                   (load addr:$src3),
+                                                   EDX, imm:$src5))]>, OpSize;
+}
+
+let Defs = [XMM0, EFLAGS], Uses = [EAX, EDX] in {
+def PCMPESTRM128rr : SS42AI<0x62, MRMSrcReg, (outs),
+                           (ins VR128:$src1, VR128:$src3, i8imm:$src5),
+                    "pcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}",
+                    []>, OpSize;
+def PCMPESTRM128rm : SS42AI<0x62, MRMSrcMem, (outs),
+                           (ins VR128:$src1, i128mem:$src3, i8imm:$src5),
+                    "pcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}",
+                    []>, OpSize;
+}
+
+let Defs = [ECX, EFLAGS] in {
+  multiclass SS42AI_pcmpistri<Intrinsic IntId128> {
+    def rr : SS42AI<0x63, MRMSrcReg, (outs),
+               (ins VR128:$src1, VR128:$src2, i8imm:$src3),
+               "pcmpistri\t{$src3, $src2, $src1|$src1, $src2, $src3}",
+               [(set ECX,
+                  (IntId128 VR128:$src1, VR128:$src2, imm:$src3)),
+                (implicit EFLAGS)]>,
+               OpSize;
+    def rm : SS42AI<0x63, MRMSrcMem, (outs),
+               (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
+               "pcmpistri\t{$src3, $src2, $src1|$src1, $src2, $src3}",
+               [(set ECX,
+                 (IntId128 VR128:$src1, (load addr:$src2), imm:$src3)),
+                (implicit EFLAGS)]>,
+               OpSize;
+  }
+}
+
+defm PCMPISTRI  : SS42AI_pcmpistri<int_x86_sse42_pcmpistri128>;
+defm PCMPISTRIA : SS42AI_pcmpistri<int_x86_sse42_pcmpistria128>;
+defm PCMPISTRIC : SS42AI_pcmpistri<int_x86_sse42_pcmpistric128>;
+defm PCMPISTRIO : SS42AI_pcmpistri<int_x86_sse42_pcmpistrio128>;
+defm PCMPISTRIS : SS42AI_pcmpistri<int_x86_sse42_pcmpistris128>;
+defm PCMPISTRIZ : SS42AI_pcmpistri<int_x86_sse42_pcmpistriz128>;
+
+let Defs = [ECX, EFLAGS] in {
+let Uses = [EAX, EDX] in {
+  multiclass SS42AI_pcmpestri<Intrinsic IntId128> {
+    def rr : SS42AI<0x61, MRMSrcReg, (outs),
+               (ins VR128:$src1, VR128:$src3, i8imm:$src5),
+               "pcmpestri\t{$src5, $src3, $src1|$src1, $src3, $src5}",
+               [(set ECX,
+                  (IntId128 VR128:$src1, EAX, VR128:$src3, EDX, imm:$src5)),
+                (implicit EFLAGS)]>,
+               OpSize;
+    def rm : SS42AI<0x61, MRMSrcMem, (outs),
+               (ins VR128:$src1, i128mem:$src3, i8imm:$src5),
+               "pcmpestri\t{$src5, $src3, $src1|$src1, $src3, $src5}",
+               [(set ECX,
+                 (IntId128 VR128:$src1, EAX, (load addr:$src3),
+                   EDX, imm:$src5)),
+                (implicit EFLAGS)]>,
+               OpSize;
+  }
+}
+}
+
+defm PCMPESTRI  : SS42AI_pcmpestri<int_x86_sse42_pcmpestri128>;
+defm PCMPESTRIA : SS42AI_pcmpestri<int_x86_sse42_pcmpestria128>;
+defm PCMPESTRIC : SS42AI_pcmpestri<int_x86_sse42_pcmpestric128>;
+defm PCMPESTRIO : SS42AI_pcmpestri<int_x86_sse42_pcmpestrio128>;
+defm PCMPESTRIS : SS42AI_pcmpestri<int_x86_sse42_pcmpestris128>;
+defm PCMPESTRIZ : SS42AI_pcmpestri<int_x86_sse42_pcmpestriz128>;