Fix "the the" and similar typos.

[oota-llvm.git] / lib / Target / X86 / X86InstrInfo.td
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td

index 4d922a54ec2cd8f2f463beb5713d1e5dba9aaa82..f0b423923b4f225723d47993ccecf2ab577e4b71 100644 (file)
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -160,15 +160,21 @@ def X86ehret : SDNode<"X86ISD::EH_RETURN", SDT_X86EHRET,
  def X86tcret : SDNode<"X86ISD::TC_RETURN", SDT_X86TCRET, 
                          [SDNPHasChain,  SDNPOptInFlag]>;
  
-def X86add_flag  : SDNode<"X86ISD::ADD",  SDTBinaryArithWithFlags>;
+def X86add_flag  : SDNode<"X86ISD::ADD",  SDTBinaryArithWithFlags,
+                          [SDNPCommutative]>;
  def X86sub_flag  : SDNode<"X86ISD::SUB",  SDTBinaryArithWithFlags>;
-def X86smul_flag : SDNode<"X86ISD::SMUL", SDTBinaryArithWithFlags>;
-def X86umul_flag : SDNode<"X86ISD::UMUL", SDTUnaryArithWithFlags>;
+def X86smul_flag : SDNode<"X86ISD::SMUL", SDTBinaryArithWithFlags,
+                          [SDNPCommutative]>;
+def X86umul_flag : SDNode<"X86ISD::UMUL", SDTUnaryArithWithFlags,
+                          [SDNPCommutative]>;
  def X86inc_flag  : SDNode<"X86ISD::INC",  SDTUnaryArithWithFlags>;
  def X86dec_flag  : SDNode<"X86ISD::DEC",  SDTUnaryArithWithFlags>;
-def X86or_flag   : SDNode<"X86ISD::OR",   SDTBinaryArithWithFlags>;
-def X86xor_flag  : SDNode<"X86ISD::XOR",  SDTBinaryArithWithFlags>;
-def X86and_flag  : SDNode<"X86ISD::AND",  SDTBinaryArithWithFlags>;
+def X86or_flag   : SDNode<"X86ISD::OR",   SDTBinaryArithWithFlags,
+                          [SDNPCommutative]>;
+def X86xor_flag  : SDNode<"X86ISD::XOR",  SDTBinaryArithWithFlags,
+                          [SDNPCommutative]>;
+def X86and_flag  : SDNode<"X86ISD::AND",  SDTBinaryArithWithFlags,
+                          [SDNPCommutative]>;
  
  def X86mul_imm : SDNode<"X86ISD::MUL_IMM", SDTIntBinOp>;
  
@@ -176,10 +182,6 @@ def X86mul_imm : SDNode<"X86ISD::MUL_IMM", SDTIntBinOp>;
  // X86 Operand Definitions.
  //
  
-def i32imm_pcrel : Operand<i32> {
-  let PrintMethod = "print_pcrel_imm";
-}
-
  // A version of ptr_rc which excludes SP, ESP, and RSP. This is used for
  // the index operand of an address, to conform to x86 encoding restrictions.
  def ptr_rc_nosp : PointerLikeRegClass<1>;
@@ -190,6 +192,14 @@ def X86MemAsmOperand : AsmOperandClass {
    let Name = "Mem";
    let SuperClass = ?;
  }
+def X86AbsMemAsmOperand : AsmOperandClass {
+  let Name = "AbsMem";
+  let SuperClass = X86MemAsmOperand;
+}
+def X86NoSegMemAsmOperand : AsmOperandClass {
+  let Name = "NoSegMem";
+  let SuperClass = X86MemAsmOperand;
+}
  class X86MemOperand<string printMethod> : Operand<iPTR> {
    let PrintMethod = printMethod;
    let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc_nosp, i32imm, i8imm);
@@ -201,11 +211,6 @@ def opaque48mem : X86MemOperand<"printopaquemem">;
  def opaque80mem : X86MemOperand<"printopaquemem">;
  def opaque512mem : X86MemOperand<"printopaquemem">;
  
-def offset8 : Operand<i64>  { let PrintMethod = "print_pcrel_imm"; }
-def offset16 : Operand<i64> { let PrintMethod = "print_pcrel_imm"; }
-def offset32 : Operand<i64> { let PrintMethod = "print_pcrel_imm"; }
-def offset64 : Operand<i64> { let PrintMethod = "print_pcrel_imm"; }
-
  def i8mem   : X86MemOperand<"printi8mem">;
  def i16mem  : X86MemOperand<"printi16mem">;
  def i32mem  : X86MemOperand<"printi32mem">;
@@ -229,7 +234,22 @@ def i8mem_NOREX : Operand<i64> {
  def lea32mem : Operand<i32> {
    let PrintMethod = "printlea32mem";
    let MIOperandInfo = (ops GR32, i8imm, GR32_NOSP, i32imm);
-  let ParserMatchClass = X86MemAsmOperand;
+  let ParserMatchClass = X86NoSegMemAsmOperand;
+}
+
+let ParserMatchClass = X86AbsMemAsmOperand,
+    PrintMethod = "print_pcrel_imm" in {
+def i32imm_pcrel : Operand<i32>;
+
+def offset8 : Operand<i64>;
+def offset16 : Operand<i64>;
+def offset32 : Operand<i64>;
+def offset64 : Operand<i64>;
+
+// Branch targets have OtherVT type and print as pc-relative values.
+def brtarget : Operand<OtherVT>;
+def brtarget8 : Operand<OtherVT>;
+
  }
  
  def SSECC : Operand<i8> {
@@ -251,15 +271,6 @@ def i32i8imm  : Operand<i32> {
    let ParserMatchClass = ImmSExt8AsmOperand;
  }
  
-// Branch targets have OtherVT type and print as pc-relative values.
-def brtarget : Operand<OtherVT> {
-  let PrintMethod = "print_pcrel_imm";
-}
-
-def brtarget8 : Operand<OtherVT> {
-  let PrintMethod = "print_pcrel_imm";
-}
-
  //===----------------------------------------------------------------------===//
  // X86 Complex Pattern Definitions.
  //
@@ -487,6 +498,21 @@ def trunc_su : PatFrag<(ops node:$src), (trunc node:$src), [{
    return N->hasOneUse();
  }]>;
  
+// Treat an 'or' node is as an 'add' if the or'ed bits are known to be zero.
+def or_is_add : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs),[{
+  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1)))
+    return CurDAG->MaskedValueIsZero(N->getOperand(0), CN->getAPIntValue());
+  else {
+    unsigned BitWidth = N->getValueType(0).getScalarType().getSizeInBits();
+    APInt Mask = APInt::getAllOnesValue(BitWidth);
+    APInt KnownZero0, KnownOne0;
+    CurDAG->ComputeMaskedBits(N->getOperand(0), Mask, KnownZero0, KnownOne0, 0);
+    APInt KnownZero1, KnownOne1;
+    CurDAG->ComputeMaskedBits(N->getOperand(1), Mask, KnownZero1, KnownOne1, 0);
+    return (~KnownZero0 & ~KnownZero1) == 0;
+  }
+}]>;
+
  // 'shld' and 'shrd' instruction patterns. Note that even though these have
  // the srl and shl in their patterns, the C++ code must still check for them,
  // because predicates are tested before children nodes are explored.
@@ -736,14 +762,15 @@ def TCRETURNri : I<0, Pseudo, (outs),
                   []>;
  
  let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
-  def TAILJMPd : IBr<0xE9, (ins i32imm_pcrel:$dst), "jmp\t$dst  # TAILCALL",
+  def TAILJMPd : IBr<0xE9, (ins i32imm_pcrel:$dst, variable_ops),
+                 "jmp\t$dst  # TAILCALL",
                   []>;
  let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
-  def TAILJMPr : I<0xFF, MRM4r, (outs), (ins GR32:$dst), 
+  def TAILJMPr : I<0xFF, MRM4r, (outs), (ins GR32:$dst, variable_ops), 
                     "jmp{l}\t{*}$dst  # TAILCALL",
                   []>;     
  let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
-  def TAILJMPm : I<0xFF, MRM4m, (outs), (ins i32mem:$dst),
+  def TAILJMPm : I<0xFF, MRM4m, (outs), (ins i32mem:$dst, variable_ops),
                     "jmp\t{*}$dst  # TAILCALL", []>;
  
  //===----------------------------------------------------------------------===//
@@ -856,7 +883,7 @@ def LEA32r   : I<0x8D, MRMSrcMem,
                   "lea{l}\t{$src|$dst}, {$dst|$src}",
                   [(set GR32:$dst, lea32addr:$src)]>, Requires<[In32BitMode]>;
  
-let Defs = [ECX,EDI,ESI], Uses = [ECX,EDI,ESI] in {
+let Defs = [ECX,EDI,ESI], Uses = [ECX,EDI,ESI], isCodeGenOnly = 1 in {
  def REP_MOVSB : I<0xA4, RawFrm, (outs), (ins), "{rep;movsb|rep movsb}",
                    [(X86rep_movs i8)]>, REP;
  def REP_MOVSW : I<0xA5, RawFrm, (outs), (ins), "{rep;movsw|rep movsw}",
@@ -865,16 +892,31 @@ def REP_MOVSD : I<0xA5, RawFrm, (outs), (ins), "{rep;movsl|rep movsd}",
                    [(X86rep_movs i32)]>, REP;
  }
  
-let Defs = [ECX,EDI], Uses = [AL,ECX,EDI] in
+// These uses the DF flag in the EFLAGS register to inc or dec EDI and ESI
+let Defs = [EDI,ESI], Uses = [EDI,ESI,EFLAGS] in {
+def MOVSB : I<0xA4, RawFrm, (outs), (ins), "{movsb}", []>;
+def MOVSW : I<0xA5, RawFrm, (outs), (ins), "{movsw}", []>, OpSize;
+def MOVSD : I<0xA5, RawFrm, (outs), (ins), "{movsl|movsd}", []>;
+}
+
+let Defs = [ECX,EDI], Uses = [AL,ECX,EDI], isCodeGenOnly = 1 in
  def REP_STOSB : I<0xAA, RawFrm, (outs), (ins), "{rep;stosb|rep stosb}",
                    [(X86rep_stos i8)]>, REP;
-let Defs = [ECX,EDI], Uses = [AX,ECX,EDI] in
+let Defs = [ECX,EDI], Uses = [AX,ECX,EDI], isCodeGenOnly = 1 in
  def REP_STOSW : I<0xAB, RawFrm, (outs), (ins), "{rep;stosw|rep stosw}",
                    [(X86rep_stos i16)]>, REP, OpSize;
-let Defs = [ECX,EDI], Uses = [EAX,ECX,EDI] in
+let Defs = [ECX,EDI], Uses = [EAX,ECX,EDI], isCodeGenOnly = 1 in
  def REP_STOSD : I<0xAB, RawFrm, (outs), (ins), "{rep;stosl|rep stosd}",
                    [(X86rep_stos i32)]>, REP;
  
+// These uses the DF flag in the EFLAGS register to inc or dec EDI and ESI
+let Defs = [EDI], Uses = [AL,EDI,EFLAGS] in
+def STOSB : I<0xAA, RawFrm, (outs), (ins), "{stosb}", []>;
+let Defs = [EDI], Uses = [AX,EDI,EFLAGS] in
+def STOSW : I<0xAB, RawFrm, (outs), (ins), "{stosw}", []>, OpSize;
+let Defs = [EDI], Uses = [EAX,EDI,EFLAGS] in
+def STOSD : I<0xAB, RawFrm, (outs), (ins), "{stosl|stosd}", []>;
+
  def SCAS8 : I<0xAE, RawFrm, (outs), (ins), "scas{b}", []>;
  def SCAS16 : I<0xAF, RawFrm, (outs), (ins), "scas{w}", []>, OpSize;
  def SCAS32 : I<0xAF, RawFrm, (outs), (ins), "scas{l}", []>;
@@ -975,6 +1017,7 @@ def MOV32ri : Ii32<0xB8, AddRegFrm, (outs GR32:$dst), (ins i32imm:$src),
                     "mov{l}\t{$src, $dst|$dst, $src}",
                     [(set GR32:$dst, imm:$src)]>;
  }
+
  def MOV8mi  : Ii8 <0xC6, MRM0m, (outs), (ins i8mem :$dst, i8imm :$src),
                     "mov{b}\t{$src, $dst|$dst, $src}",
                     [(store (i8 imm:$src), addr:$dst)]>;
@@ -2985,8 +3028,8 @@ let isTwoAddress = 0 in {
    def SBB32mr  : I<0x19, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2), 
                     "sbb{l}\t{$src2, $dst|$dst, $src2}",
                     [(store (sube (load addr:$dst), GR32:$src2), addr:$dst)]>;
-  def SBB8mi  : Ii32<0x80, MRM3m, (outs), (ins i8mem:$dst, i8imm:$src2), 
-                      "sbb{b}\t{$src2, $dst|$dst, $src2}",
+  def SBB8mi  : Ii8<0x80, MRM3m, (outs), (ins i8mem:$dst, i8imm:$src2), 
+                    "sbb{b}\t{$src2, $dst|$dst, $src2}",
                     [(store (sube (loadi8 addr:$dst), imm:$src2), addr:$dst)]>;
    def SBB16mi  : Ii16<0x81, MRM3m, (outs), (ins i16mem:$dst, i16imm:$src2), 
                        "sbb{w}\t{$src2, $dst|$dst, $src2}",
@@ -3213,17 +3256,18 @@ def LAHF     : I<0x9F, RawFrm, (outs),  (ins), "lahf", []>;  // AH = flags
  
  let Uses = [EFLAGS] in {
  // Use sbb to materialize carry bit.
-
  let Defs = [EFLAGS], isCodeGenOnly = 1 in {
-def SETB_C8r : I<0x18, MRMInitReg, (outs GR8:$dst), (ins),
-                 "sbb{b}\t$dst, $dst",
+// FIXME: These are pseudo ops that should be replaced with Pat<> patterns.
+// However, Pat<> can't replicate the destination reg into the inputs of the
+// result.
+// FIXME: Change these to have encoding Pseudo when X86MCCodeEmitter replaces
+// X86CodeEmitter.
+def SETB_C8r : I<0x18, MRMInitReg, (outs GR8:$dst), (ins), "",
                   [(set GR8:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>;
-def SETB_C16r : I<0x19, MRMInitReg, (outs GR16:$dst), (ins),
-                  "sbb{w}\t$dst, $dst",
+def SETB_C16r : I<0x19, MRMInitReg, (outs GR16:$dst), (ins), "",
                   [(set GR16:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>,
                  OpSize;
-def SETB_C32r : I<0x19, MRMInitReg, (outs GR32:$dst), (ins),
-                  "sbb{l}\t$dst, $dst",
+def SETB_C32r : I<0x19, MRMInitReg, (outs GR32:$dst), (ins), "",
                   [(set GR32:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>;
  } // isCodeGenOnly
  
@@ -3660,7 +3704,7 @@ def MOVZX32rm16: I<0xB7, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src),
                     "movz{wl|x}\t{$src, $dst|$dst, $src}",
                     [(set GR32:$dst, (zextloadi32i16 addr:$src))]>, TB;
  
-// These are the same as the regular regular MOVZX32rr8 and MOVZX32rm8
+// These are the same as the regular MOVZX32rr8 and MOVZX32rm8
  // except that they use GR32_NOREX for the output operand register class
  // instead of GR32. This allows them to operate on h registers on x86-64.
  def MOVZX32_NOREXrr8 : I<0xB6, MRMSrcReg,
@@ -3695,23 +3739,26 @@ let neverHasSideEffects = 1 in {
  
  // Alias instructions that map movr0 to xor.
  // FIXME: remove when we can teach regalloc that xor reg, reg is ok.
+// FIXME: Set encoding to pseudo.
  let Defs = [EFLAGS], isReMaterializable = 1, isAsCheapAsAMove = 1,
      isCodeGenOnly = 1 in {
-def MOV8r0   : I<0x30, MRMInitReg, (outs GR8 :$dst), (ins),
-                 "xor{b}\t$dst, $dst",
+def MOV8r0   : I<0x30, MRMInitReg, (outs GR8 :$dst), (ins), "",
                   [(set GR8:$dst, 0)]>;
+
+// We want to rewrite MOV16r0 in terms of MOV32r0, because it's a smaller
+// encoding and avoids a partial-register update sometimes, but doing so
+// at isel time interferes with rematerialization in the current register
+// allocator. For now, this is rewritten when the instruction is lowered
+// to an MCInst.
+def MOV16r0   : I<0x31, MRMInitReg, (outs GR16:$dst), (ins),
+                 "",
+                 [(set GR16:$dst, 0)]>, OpSize;
                   
-def MOV32r0  : I<0x31, MRMInitReg, (outs GR32:$dst), (ins),
-                 "xor{l}\t$dst, $dst",
+// FIXME: Set encoding to pseudo.
+def MOV32r0  : I<0x31, MRMInitReg, (outs GR32:$dst), (ins), "",
                   [(set GR32:$dst, 0)]>;
  }
  
-// Use xorl instead of xorw since we don't care about the high 16 bits,
-// it's smaller, and it avoids a partial-register update.
-let AddedComplexity = 1 in
-def : Pat<(i16 0),
-          (EXTRACT_SUBREG (MOV32r0), x86_subreg_16bit)>;
-
  //===----------------------------------------------------------------------===//
  // Thread Local Storage Instructions
  //
@@ -3792,7 +3839,7 @@ def LCMPXCHG32 : I<0xB1, MRMDestMem, (outs), (ins i32mem:$ptr, GR32:$swap),
                 [(X86cas addr:$ptr, GR32:$swap, 4)]>, TB, LOCK;
  }
  let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX] in {
-def LCMPXCHG8B : I<0xC7, MRM1m, (outs), (ins i32mem:$ptr),
+def LCMPXCHG8B : I<0xC7, MRM1m, (outs), (ins i64mem:$ptr),
                 "lock\n\t"
                 "cmpxchg8b\t$ptr",
                 [(X86cas8 addr:$ptr)]>, TB, LOCK;
@@ -3858,6 +3905,7 @@ def CMPXCHG16rm  : I<0xB1, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src),
  def CMPXCHG32rm  : I<0xB1, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
                       "cmpxchg{l}\t{$src, $dst|$dst, $src}", []>, TB;
  
+let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX] in
  def CMPXCHG8B : I<0xC7, MRM1m, (outs), (ins i64mem:$dst),
                    "cmpxchg8b\t$dst", []>, TB;
  
@@ -4130,6 +4178,26 @@ def LLDT16r : I<0x00, MRM2r, (outs), (ins GR16:$src),
  def LLDT16m : I<0x00, MRM2m, (outs), (ins i16mem:$src),
                  "lldt{w}\t$src", []>, TB;
                  
+// Lock instruction prefix
+def LOCK_PREFIX : I<0xF0, RawFrm, (outs),  (ins), "lock", []>;
+
+// Repeat string operation instruction prefixes
+// These uses the DF flag in the EFLAGS register to inc or dec ECX
+let Defs = [ECX], Uses = [ECX,EFLAGS] in {
+// Repeat (used with INS, OUTS, MOVS, LODS and STOS)
+def REP_PREFIX : I<0xF3, RawFrm, (outs),  (ins), "rep", []>;
+// Repeat while not equal (used with CMPS and SCAS)
+def REPNE_PREFIX : I<0xF2, RawFrm, (outs),  (ins), "repne", []>;
+}
+
+// Segment override instruction prefixes
+def CS_PREFIX : I<0x2E, RawFrm, (outs),  (ins), "cs", []>;
+def SS_PREFIX : I<0x36, RawFrm, (outs),  (ins), "ss", []>;
+def DS_PREFIX : I<0x3E, RawFrm, (outs),  (ins), "ds", []>;
+def ES_PREFIX : I<0x26, RawFrm, (outs),  (ins), "es", []>;
+def FS_PREFIX : I<0x64, RawFrm, (outs),  (ins), "fs", []>;
+def GS_PREFIX : I<0x65, RawFrm, (outs),  (ins), "gs", []>;
+
  // String manipulation instructions
  
  def LODSB : I<0xAC, RawFrm, (outs), (ins), "lodsb", []>;
@@ -4466,7 +4534,7 @@ def : Pat<(i8 (trunc (srl_su GR32:$src, (i8 8)))),
            (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),
                            x86_subreg_8bit_hi)>,
        Requires<[In32BitMode]>;
-def : Pat<(srl_su GR16:$src, (i8 8)),
+def : Pat<(srl GR16:$src, (i8 8)),
            (EXTRACT_SUBREG
              (MOVZX32rr8
                (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
@@ -4640,6 +4708,28 @@ def : Pat<(i16 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
  def : Pat<(i32 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
            (SETB_C32r)>;
  
+// (or x1, x2) -> (add x1, x2) if two operands are known not to share bits.
+let AddedComplexity = 5 in { // Try this before the selecting to OR
+def : Pat<(parallel (or_is_add GR16:$src1, imm:$src2),
+                    (implicit EFLAGS)),
+          (ADD16ri GR16:$src1, imm:$src2)>;
+def : Pat<(parallel (or_is_add GR32:$src1, imm:$src2),
+                    (implicit EFLAGS)),
+          (ADD32ri GR32:$src1, imm:$src2)>;
+def : Pat<(parallel (or_is_add GR16:$src1, i16immSExt8:$src2),
+                    (implicit EFLAGS)),
+          (ADD16ri8 GR16:$src1, i16immSExt8:$src2)>;
+def : Pat<(parallel (or_is_add GR32:$src1, i32immSExt8:$src2),
+                    (implicit EFLAGS)),
+          (ADD32ri8 GR32:$src1, i32immSExt8:$src2)>;
+def : Pat<(parallel (or_is_add GR16:$src1, GR16:$src2),
+                    (implicit EFLAGS)),
+          (ADD16rr GR16:$src1, GR16:$src2)>;
+def : Pat<(parallel (or_is_add GR32:$src1, GR32:$src2),
+                    (implicit EFLAGS)),
+          (ADD32rr GR32:$src1, GR32:$src2)>;
+} // AddedComplexity
+
  //===----------------------------------------------------------------------===//
  // EFLAGS-defining Patterns
  //===----------------------------------------------------------------------===//
@@ -5133,6 +5223,12 @@ include "X86InstrFPStack.td"
  
  include "X86Instr64bit.td"
  
+//===----------------------------------------------------------------------===//
+// SIMD support (SSE, MMX and AVX)
+//===----------------------------------------------------------------------===//
+
+include "X86InstrFragmentsSIMD.td"
+
  //===----------------------------------------------------------------------===//
  // XMM Floating point support (requires SSE / SSE2)
  //===----------------------------------------------------------------------===//