From: Eric Christopher Date: Tue, 18 Aug 2009 22:50:32 +0000 (+0000) Subject: Implement sse4.2 string/text processing instructions: X-Git-Url: http://plrg.eecs.uci.edu/git/?a=commitdiff_plain;h=b120ab4057fc66ce11ee1f108af9dbbeafa3fed9;p=oota-llvm.git Implement sse4.2 string/text processing instructions: Add patterns and instruction encoding information. Add custom lowering to deal with hardwired return register of uncertain type (xmm0). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@79377 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index cb5a74fa40f..91b48138f44 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -7595,6 +7595,43 @@ X86TargetLowering::EmitAtomicMinMaxWithCustomInserter(MachineInstr *mInstr, return nextMBB; } +MachineBasicBlock * +X86TargetLowering::EmitPCMP(MachineInstr *MI, MachineBasicBlock *BB, + unsigned numArgs, bool memArg) const { + + MachineFunction *F = BB->getParent(); + DebugLoc dl = MI->getDebugLoc(); + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + + unsigned Opc; + + if (memArg) { + Opc = numArgs == 3 ? + X86::PCMPISTRM128rm : + X86::PCMPESTRM128rm; + } else { + Opc = numArgs == 3 ? + X86::PCMPISTRM128rr : + X86::PCMPESTRM128rr; + } + + MachineInstrBuilder MIB = BuildMI(BB, dl, TII->get(Opc)); + + for (unsigned i = 0; i < numArgs; ++i) { + MachineOperand &Op = MI->getOperand(i+1); + + if (!(Op.isReg() && Op.isImplicit())) + MIB.addOperand(Op); + } + + BuildMI(BB, dl, TII->get(X86::MOVAPSrr), MI->getOperand(0).getReg()) + .addReg(X86::XMM0); + + F->DeleteMachineInstr(MI); + + return BB; +} + MachineBasicBlock * X86TargetLowering::EmitVAStartSaveXMMRegsWithCustomInserter( MachineInstr *MI, @@ -7804,6 +7841,17 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, F->DeleteMachineInstr(MI); // The pseudo instruction is gone now. return BB; } + // String/text processing lowering. + case X86::PCMPISTRM128REG: + return EmitPCMP(MI, BB, 3, false /* in-mem */); + case X86::PCMPISTRM128MEM: + return EmitPCMP(MI, BB, 3, true /* in-mem */); + case X86::PCMPESTRM128REG: + return EmitPCMP(MI, BB, 5, false /* in mem */); + case X86::PCMPESTRM128MEM: + return EmitPCMP(MI, BB, 5, true /* in mem */); + + // Atomic Lowering. case X86::ATOMAND32: return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND32rr, X86::AND32ri, X86::MOV32rm, diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index f3f09f5ac93..1c612a13a25 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -693,6 +693,14 @@ namespace llvm { const Value *DstSV, uint64_t DstSVOff, const Value *SrcSV, uint64_t SrcSVOff); + /// Utility function to emit string processing sse4.2 instructions + /// that return in xmm0. + // This takes the instruction to expand, the associated machine basic + // block, the number of args, and whether or not the second arg is + // in memory or not. + MachineBasicBlock *EmitPCMP(MachineInstr *BInstr, MachineBasicBlock *BB, + unsigned argNum, bool inMem) const; + /// Utility function to emit atomic bitwise operations (and, or, xor). // It takes the bitwise instruction to expand, the associated machine basic // block, and the associated X86 opcodes for reg/reg and reg/imm. diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td index 6f5941cf153..ddc8654359e 100644 --- a/lib/Target/X86/X86InstrFormats.td +++ b/lib/Target/X86/X86InstrFormats.td @@ -235,6 +235,11 @@ class SS42FI o, Format F, dag outs, dag ins, string asm, list pattern> : I, TF, Requires<[HasSSE42]>; +// SS42AI = SSE 4.2 instructions with TA prefix +class SS42AI o, Format F, dag outs, dag ins, string asm, + list pattern> + : I, TA, Requires<[HasSSE42]>; + // X86-64 Instruction templates... // @@ -288,4 +293,3 @@ class MMXID o, Format F, dag outs, dag ins, string asm, list patter : Ii8, XD, Requires<[HasMMX]>; class MMXIS o, Format F, dag outs, dag ins, string asm, list pattern> : Ii8, XS, Requires<[HasMMX]>; - diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index f5965271ae2..966833f44ad 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -3657,6 +3657,11 @@ def MOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), "movntdqa\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse41_movntdqa addr:$src))]>; + +//===----------------------------------------------------------------------===// +// SSE4.2 Instructions +//===----------------------------------------------------------------------===// + /// SS42I_binop_rm_int - Simple SSE 4.2 binary operator let Constraints = "$src1 = $dst" in { multiclass SS42I_binop_rm_int opc, string OpcodeStr, @@ -3739,3 +3744,115 @@ let Constraints = "$src1 = $dst" in { (int_x86_sse42_crc32_64 GR64:$src1, GR64:$src2))]>, OpSize, REX_W; } + +// String/text processing instructions. +let Defs = [EFLAGS], usesCustomDAGSchedInserter = 1 in { +def PCMPISTRM128REG : SS42AI<0, Pseudo, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src2, i8imm:$src3), + "#PCMPISTRM128rr PSEUDO!", + [(set VR128:$dst, + (int_x86_sse42_pcmpistrm128 VR128:$src1, VR128:$src2, + imm:$src3))]>, OpSize; +def PCMPISTRM128MEM : SS42AI<0, Pseudo, (outs VR128:$dst), + (ins VR128:$src1, i128mem:$src2, i8imm:$src3), + "#PCMPISTRM128rm PSEUDO!", + [(set VR128:$dst, + (int_x86_sse42_pcmpistrm128 VR128:$src1, + (load addr:$src2), + imm:$src3))]>, OpSize; +} + +let Defs = [XMM0, EFLAGS] in { +def PCMPISTRM128rr : SS42AI<0x62, MRMSrcReg, (outs), + (ins VR128:$src1, VR128:$src2, i8imm:$src3), + "pcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", + []>, OpSize; +def PCMPISTRM128rm : SS42AI<0x62, MRMSrcMem, (outs), + (ins VR128:$src1, i128mem:$src2, i8imm:$src3), + "pcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", + []>, OpSize; +} + +let Defs = [EFLAGS], Uses = [EAX, EDX], + usesCustomDAGSchedInserter = 1 in { +def PCMPESTRM128REG : SS42AI<0, Pseudo, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src3, i8imm:$src5), + "#PCMPESTRM128rr PSEUDO!", + [(set VR128:$dst, + (int_x86_sse42_pcmpestrm128 VR128:$src1, EAX, + VR128:$src3, + EDX, imm:$src5))]>, OpSize; +def PCMPESTRM128MEM : SS42AI<0, Pseudo, (outs VR128:$dst), + (ins VR128:$src1, i128mem:$src3, i8imm:$src5), + "#PCMPESTRM128rm PSEUDO!", + [(set VR128:$dst, + (int_x86_sse42_pcmpestrm128 VR128:$src1, EAX, + (load addr:$src3), + EDX, imm:$src5))]>, OpSize; +} + +let Defs = [XMM0, EFLAGS], Uses = [EAX, EDX] in { +def PCMPESTRM128rr : SS42AI<0x62, MRMSrcReg, (outs), + (ins VR128:$src1, VR128:$src3, i8imm:$src5), + "pcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", + []>, OpSize; +def PCMPESTRM128rm : SS42AI<0x62, MRMSrcMem, (outs), + (ins VR128:$src1, i128mem:$src3, i8imm:$src5), + "pcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", + []>, OpSize; +} + +let Defs = [ECX, EFLAGS] in { + multiclass SS42AI_pcmpistri { + def rr : SS42AI<0x63, MRMSrcReg, (outs), + (ins VR128:$src1, VR128:$src2, i8imm:$src3), + "pcmpistri\t{$src3, $src2, $src1|$src1, $src2, $src3}", + [(set ECX, + (IntId128 VR128:$src1, VR128:$src2, imm:$src3)), + (implicit EFLAGS)]>, + OpSize; + def rm : SS42AI<0x63, MRMSrcMem, (outs), + (ins VR128:$src1, i128mem:$src2, i8imm:$src3), + "pcmpistri\t{$src3, $src2, $src1|$src1, $src2, $src3}", + [(set ECX, + (IntId128 VR128:$src1, (load addr:$src2), imm:$src3)), + (implicit EFLAGS)]>, + OpSize; + } +} + +defm PCMPISTRI : SS42AI_pcmpistri; +defm PCMPISTRIA : SS42AI_pcmpistri; +defm PCMPISTRIC : SS42AI_pcmpistri; +defm PCMPISTRIO : SS42AI_pcmpistri; +defm PCMPISTRIS : SS42AI_pcmpistri; +defm PCMPISTRIZ : SS42AI_pcmpistri; + +let Defs = [ECX, EFLAGS] in { +let Uses = [EAX, EDX] in { + multiclass SS42AI_pcmpestri { + def rr : SS42AI<0x61, MRMSrcReg, (outs), + (ins VR128:$src1, VR128:$src3, i8imm:$src5), + "pcmpestri\t{$src5, $src3, $src1|$src1, $src3, $src5}", + [(set ECX, + (IntId128 VR128:$src1, EAX, VR128:$src3, EDX, imm:$src5)), + (implicit EFLAGS)]>, + OpSize; + def rm : SS42AI<0x61, MRMSrcMem, (outs), + (ins VR128:$src1, i128mem:$src3, i8imm:$src5), + "pcmpestri\t{$src5, $src3, $src1|$src1, $src3, $src5}", + [(set ECX, + (IntId128 VR128:$src1, EAX, (load addr:$src3), + EDX, imm:$src5)), + (implicit EFLAGS)]>, + OpSize; + } +} +} + +defm PCMPESTRI : SS42AI_pcmpestri; +defm PCMPESTRIA : SS42AI_pcmpestri; +defm PCMPESTRIC : SS42AI_pcmpestri; +defm PCMPESTRIO : SS42AI_pcmpestri; +defm PCMPESTRIS : SS42AI_pcmpestri; +defm PCMPESTRIZ : SS42AI_pcmpestri;