lib/Target/X86/X86InstrCompiler.td

   1 //===- X86InstrCompiler.td - Compiler Pseudos and Patterns -*- tablegen -*-===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file describes the various pseudo instructions used by the compiler,
  11 // as well as Pat patterns used during instruction selection.
  12 //
  13 //===----------------------------------------------------------------------===//
  14
  15 // PIC base construction.  This expands to code that looks like this:
  16 //     call  $next_inst
  17 //     popl %destreg"
  18 let neverHasSideEffects = 1, isNotDuplicable = 1, Uses = [ESP] in
  19   def MOVPC32r : Ii32<0xE8, Pseudo, (outs GR32:$reg), (ins i32imm:$label),
  20                       "", []>;
  21
  22
  23 // ADJCALLSTACKDOWN/UP implicitly use/def ESP because they may be expanded into
  24 // a stack adjustment and the codegen must know that they may modify the stack
  25 // pointer before prolog-epilog rewriting occurs.
  26 // Pessimistically assume ADJCALLSTACKDOWN / ADJCALLSTACKUP will become
  27 // sub / add which can clobber EFLAGS.
  28 let Defs = [ESP, EFLAGS], Uses = [ESP] in {
  29 def ADJCALLSTACKDOWN32 : I<0, Pseudo, (outs), (ins i32imm:$amt),
  30                            "#ADJCALLSTACKDOWN",
  31                            [(X86callseq_start timm:$amt)]>,
  32                           Requires<[In32BitMode]>;
  33 def ADJCALLSTACKUP32   : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2),
  34                            "#ADJCALLSTACKUP",
  35                            [(X86callseq_end timm:$amt1, timm:$amt2)]>,
  36                           Requires<[In32BitMode]>;
  37 }
  38
  39 // ADJCALLSTACKDOWN/UP implicitly use/def RSP because they may be expanded into
  40 // a stack adjustment and the codegen must know that they may modify the stack
  41 // pointer before prolog-epilog rewriting occurs.
  42 // Pessimistically assume ADJCALLSTACKDOWN / ADJCALLSTACKUP will become
  43 // sub / add which can clobber EFLAGS.
  44 let Defs = [RSP, EFLAGS], Uses = [RSP] in {
  45 def ADJCALLSTACKDOWN64 : I<0, Pseudo, (outs), (ins i32imm:$amt),
  46                            "#ADJCALLSTACKDOWN",
  47                            [(X86callseq_start timm:$amt)]>,
  48                           Requires<[In64BitMode]>;
  49 def ADJCALLSTACKUP64   : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2),
  50                            "#ADJCALLSTACKUP",
  51                            [(X86callseq_end timm:$amt1, timm:$amt2)]>,
  52                           Requires<[In64BitMode]>;
  53 }
  54
  55
  56
  57 // x86-64 va_start lowering magic.
  58 let usesCustomInserter = 1 in {
  59 def VASTART_SAVE_XMM_REGS : I<0, Pseudo,
  60                               (outs),
  61                               (ins GR8:$al,
  62                                    i64imm:$regsavefi, i64imm:$offset,
  63                                    variable_ops),
  64                               "#VASTART_SAVE_XMM_REGS $al, $regsavefi, $offset",
  65                               [(X86vastart_save_xmm_regs GR8:$al,
  66                                                          imm:$regsavefi,
  67                                                          imm:$offset)]>;
  68
  69 // Dynamic stack allocation yields _alloca call for Cygwin/Mingw targets.  Calls
  70 // to _alloca is needed to probe the stack when allocating more than 4k bytes in
  71 // one go. Touching the stack at 4K increments is necessary to ensure that the
  72 // guard pages used by the OS virtual memory manager are allocated in correct
  73 // sequence.
  74 // The main point of having separate instruction are extra unmodelled effects
  75 // (compared to ordinary calls) like stack pointer change.
  76
  77 let Defs = [EAX, ESP, EFLAGS], Uses = [ESP] in
  78   def MINGW_ALLOCA : I<0, Pseudo, (outs), (ins),
  79                        "# dynamic stack allocation",
  80                        [(X86MingwAlloca)]>;
  81 }
  82
  83
  84
  85 //===----------------------------------------------------------------------===//
  86 // EH Pseudo Instructions
  87 //
  88 let isTerminator = 1, isReturn = 1, isBarrier = 1,
  89     hasCtrlDep = 1, isCodeGenOnly = 1 in {
  90 def EH_RETURN   : I<0xC3, RawFrm, (outs), (ins GR32:$addr),
  91                     "ret\t#eh_return, addr: $addr",
  92                     [(X86ehret GR32:$addr)]>;
  93
  94 }
  95
  96 let isTerminator = 1, isReturn = 1, isBarrier = 1,
  97     hasCtrlDep = 1, isCodeGenOnly = 1 in {
  98 def EH_RETURN64   : I<0xC3, RawFrm, (outs), (ins GR64:$addr),
  99                      "ret\t#eh_return, addr: $addr",
 100                      [(X86ehret GR64:$addr)]>;
 101
 102 }
 103
 104 //===----------------------------------------------------------------------===//
 105 // Alias Instructions
 106 //===----------------------------------------------------------------------===//
 107
 108 // Alias instructions that map movr0 to xor.
 109 // FIXME: remove when we can teach regalloc that xor reg, reg is ok.
 110 // FIXME: Set encoding to pseudo.
 111 let Defs = [EFLAGS], isReMaterializable = 1, isAsCheapAsAMove = 1,
 112     isCodeGenOnly = 1 in {
 113 def MOV8r0   : I<0x30, MRMInitReg, (outs GR8 :$dst), (ins), "",
 114                  [(set GR8:$dst, 0)]>;
 115
 116 // We want to rewrite MOV16r0 in terms of MOV32r0, because it's a smaller
 117 // encoding and avoids a partial-register update sometimes, but doing so
 118 // at isel time interferes with rematerialization in the current register
 119 // allocator. For now, this is rewritten when the instruction is lowered
 120 // to an MCInst.
 121 def MOV16r0   : I<0x31, MRMInitReg, (outs GR16:$dst), (ins),
 122                  "",
 123                  [(set GR16:$dst, 0)]>, OpSize;
 124
 125 // FIXME: Set encoding to pseudo.
 126 def MOV32r0  : I<0x31, MRMInitReg, (outs GR32:$dst), (ins), "",
 127                  [(set GR32:$dst, 0)]>;
 128 }
 129
 130 //===----------------------------------------------------------------------===//
 131 // Thread Local Storage Instructions
 132 //
 133
 134 // ELF TLS Support
 135 // All calls clobber the non-callee saved registers. ESP is marked as
 136 // a use to prevent stack-pointer assignments that appear immediately
 137 // before calls from potentially appearing dead.
 138 let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0,
 139             MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
 140             XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
 141             XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
 142     Uses = [ESP] in
 143 def TLS_addr32 : I<0, Pseudo, (outs), (ins i32mem:$sym),
 144                   "leal\t$sym, %eax; "
 145                   "call\t___tls_get_addr@PLT",
 146                   [(X86tlsaddr tls32addr:$sym)]>,
 147                   Requires<[In32BitMode]>;
 148
 149 // All calls clobber the non-callee saved registers. RSP is marked as
 150 // a use to prevent stack-pointer assignments that appear immediately
 151 // before calls from potentially appearing dead.
 152 let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
 153             FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, ST1,
 154             MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
 155             XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
 156             XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
 157     Uses = [RSP] in
 158 def TLS_addr64 : I<0, Pseudo, (outs), (ins i64mem:$sym),
 159                    ".byte\t0x66; "
 160                    "leaq\t$sym(%rip), %rdi; "
 161                    ".word\t0x6666; "
 162                    "rex64; "
 163                    "call\t__tls_get_addr@PLT",
 164                   [(X86tlsaddr tls64addr:$sym)]>,
 165                   Requires<[In64BitMode]>;
 166
 167 // Darwin TLS Support
 168 // For i386, the address of the thunk is passed on the stack, on return the
 169 // address of the variable is in %eax.  %ecx is trashed during the function
 170 // call.  All other registers are preserved.
 171 let Defs = [EAX, ECX],
 172     Uses = [ESP],
 173     usesCustomInserter = 1 in
 174 def TLSCall_32 : I<0, Pseudo, (outs), (ins i32mem:$sym),
 175                 "# TLSCall_32",
 176                 [(X86TLSCall addr:$sym)]>,
 177                 Requires<[In32BitMode]>;
 178
 179 // For x86_64, the address of the thunk is passed in %rdi, on return
 180 // the address of the variable is in %rax.  All other registers are preserved.
 181 let Defs = [RAX],
 182     Uses = [RDI],
 183     usesCustomInserter = 1 in
 184 def TLSCall_64 : I<0, Pseudo, (outs), (ins i64mem:$sym),
 185                   "# TLSCall_64",
 186                   [(X86TLSCall addr:$sym)]>,
 187                   Requires<[In64BitMode]>;
 188
 189 //===----------------------------------------------------------------------===//
 190 // Non-Instruction Patterns
 191 //===----------------------------------------------------------------------===//
 192
 193 // ConstantPool GlobalAddress, ExternalSymbol, and JumpTable
 194 def : Pat<(i32 (X86Wrapper tconstpool  :$dst)), (MOV32ri tconstpool  :$dst)>;
 195 def : Pat<(i32 (X86Wrapper tjumptable  :$dst)), (MOV32ri tjumptable  :$dst)>;
 196 def : Pat<(i32 (X86Wrapper tglobaltlsaddr:$dst)),(MOV32ri tglobaltlsaddr:$dst)>;
 197 def : Pat<(i32 (X86Wrapper tglobaladdr :$dst)), (MOV32ri tglobaladdr :$dst)>;
 198 def : Pat<(i32 (X86Wrapper texternalsym:$dst)), (MOV32ri texternalsym:$dst)>;
 199 def : Pat<(i32 (X86Wrapper tblockaddress:$dst)), (MOV32ri tblockaddress:$dst)>;
 200
 201 def : Pat<(add GR32:$src1, (X86Wrapper tconstpool:$src2)),
 202           (ADD32ri GR32:$src1, tconstpool:$src2)>;
 203 def : Pat<(add GR32:$src1, (X86Wrapper tjumptable:$src2)),
 204           (ADD32ri GR32:$src1, tjumptable:$src2)>;
 205 def : Pat<(add GR32:$src1, (X86Wrapper tglobaladdr :$src2)),
 206           (ADD32ri GR32:$src1, tglobaladdr:$src2)>;
 207 def : Pat<(add GR32:$src1, (X86Wrapper texternalsym:$src2)),
 208           (ADD32ri GR32:$src1, texternalsym:$src2)>;
 209 def : Pat<(add GR32:$src1, (X86Wrapper tblockaddress:$src2)),
 210           (ADD32ri GR32:$src1, tblockaddress:$src2)>;
 211
 212 def : Pat<(store (i32 (X86Wrapper tglobaladdr:$src)), addr:$dst),
 213           (MOV32mi addr:$dst, tglobaladdr:$src)>;
 214 def : Pat<(store (i32 (X86Wrapper texternalsym:$src)), addr:$dst),
 215           (MOV32mi addr:$dst, texternalsym:$src)>;
 216 def : Pat<(store (i32 (X86Wrapper tblockaddress:$src)), addr:$dst),
 217           (MOV32mi addr:$dst, tblockaddress:$src)>;
 218
 219
 220
 221 // ConstantPool GlobalAddress, ExternalSymbol, and JumpTable when not in small
 222 // code model mode, should use 'movabs'.  FIXME: This is really a hack, the
 223 //  'movabs' predicate should handle this sort of thing.
 224 def : Pat<(i64 (X86Wrapper tconstpool  :$dst)),
 225           (MOV64ri tconstpool  :$dst)>, Requires<[FarData]>;
 226 def : Pat<(i64 (X86Wrapper tjumptable  :$dst)),
 227           (MOV64ri tjumptable  :$dst)>, Requires<[FarData]>;
 228 def : Pat<(i64 (X86Wrapper tglobaladdr :$dst)),
 229           (MOV64ri tglobaladdr :$dst)>, Requires<[FarData]>;
 230 def : Pat<(i64 (X86Wrapper texternalsym:$dst)),
 231           (MOV64ri texternalsym:$dst)>, Requires<[FarData]>;
 232 def : Pat<(i64 (X86Wrapper tblockaddress:$dst)),
 233           (MOV64ri tblockaddress:$dst)>, Requires<[FarData]>;
 234
 235 // In static codegen with small code model, we can get the address of a label
 236 // into a register with 'movl'.  FIXME: This is a hack, the 'imm' predicate of
 237 // the MOV64ri64i32 should accept these.
 238 def : Pat<(i64 (X86Wrapper tconstpool  :$dst)),
 239           (MOV64ri64i32 tconstpool  :$dst)>, Requires<[SmallCode]>;
 240 def : Pat<(i64 (X86Wrapper tjumptable  :$dst)),
 241           (MOV64ri64i32 tjumptable  :$dst)>, Requires<[SmallCode]>;
 242 def : Pat<(i64 (X86Wrapper tglobaladdr :$dst)),
 243           (MOV64ri64i32 tglobaladdr :$dst)>, Requires<[SmallCode]>;
 244 def : Pat<(i64 (X86Wrapper texternalsym:$dst)),
 245           (MOV64ri64i32 texternalsym:$dst)>, Requires<[SmallCode]>;
 246 def : Pat<(i64 (X86Wrapper tblockaddress:$dst)),
 247           (MOV64ri64i32 tblockaddress:$dst)>, Requires<[SmallCode]>;
 248
 249 // In kernel code model, we can get the address of a label
 250 // into a register with 'movq'.  FIXME: This is a hack, the 'imm' predicate of
 251 // the MOV64ri32 should accept these.
 252 def : Pat<(i64 (X86Wrapper tconstpool  :$dst)),
 253           (MOV64ri32 tconstpool  :$dst)>, Requires<[KernelCode]>;
 254 def : Pat<(i64 (X86Wrapper tjumptable  :$dst)),
 255           (MOV64ri32 tjumptable  :$dst)>, Requires<[KernelCode]>;
 256 def : Pat<(i64 (X86Wrapper tglobaladdr :$dst)),
 257           (MOV64ri32 tglobaladdr :$dst)>, Requires<[KernelCode]>;
 258 def : Pat<(i64 (X86Wrapper texternalsym:$dst)),
 259           (MOV64ri32 texternalsym:$dst)>, Requires<[KernelCode]>;
 260 def : Pat<(i64 (X86Wrapper tblockaddress:$dst)),
 261           (MOV64ri32 tblockaddress:$dst)>, Requires<[KernelCode]>;
 262
 263 // If we have small model and -static mode, it is safe to store global addresses
 264 // directly as immediates.  FIXME: This is really a hack, the 'imm' predicate
 265 // for MOV64mi32 should handle this sort of thing.
 266 def : Pat<(store (i64 (X86Wrapper tconstpool:$src)), addr:$dst),
 267           (MOV64mi32 addr:$dst, tconstpool:$src)>,
 268           Requires<[NearData, IsStatic]>;
 269 def : Pat<(store (i64 (X86Wrapper tjumptable:$src)), addr:$dst),
 270           (MOV64mi32 addr:$dst, tjumptable:$src)>,
 271           Requires<[NearData, IsStatic]>;
 272 def : Pat<(store (i64 (X86Wrapper tglobaladdr:$src)), addr:$dst),
 273           (MOV64mi32 addr:$dst, tglobaladdr:$src)>,
 274           Requires<[NearData, IsStatic]>;
 275 def : Pat<(store (i64 (X86Wrapper texternalsym:$src)), addr:$dst),
 276           (MOV64mi32 addr:$dst, texternalsym:$src)>,
 277           Requires<[NearData, IsStatic]>;
 278 def : Pat<(store (i64 (X86Wrapper tblockaddress:$src)), addr:$dst),
 279           (MOV64mi32 addr:$dst, tblockaddress:$src)>,
 280           Requires<[NearData, IsStatic]>;
 281
 282
 283
 284 // Calls
 285
 286 // tls has some funny stuff here...
 287 // This corresponds to movabs $foo@tpoff, %rax
 288 def : Pat<(i64 (X86Wrapper tglobaltlsaddr :$dst)),
 289           (MOV64ri tglobaltlsaddr :$dst)>;
 290 // This corresponds to add $foo@tpoff, %rax
 291 def : Pat<(add GR64:$src1, (X86Wrapper tglobaltlsaddr :$dst)),
 292           (ADD64ri32 GR64:$src1, tglobaltlsaddr :$dst)>;
 293 // This corresponds to mov foo@tpoff(%rbx), %eax
 294 def : Pat<(load (i64 (X86Wrapper tglobaltlsaddr :$dst))),
 295           (MOV64rm tglobaltlsaddr :$dst)>;
 296
 297
 298 // Direct PC relative function call for small code model. 32-bit displacement
 299 // sign extended to 64-bit.
 300 def : Pat<(X86call (i64 tglobaladdr:$dst)),
 301           (CALL64pcrel32 tglobaladdr:$dst)>, Requires<[NotWin64]>;
 302 def : Pat<(X86call (i64 texternalsym:$dst)),
 303           (CALL64pcrel32 texternalsym:$dst)>, Requires<[NotWin64]>;
 304
 305 def : Pat<(X86call (i64 tglobaladdr:$dst)),
 306           (WINCALL64pcrel32 tglobaladdr:$dst)>, Requires<[IsWin64]>;
 307 def : Pat<(X86call (i64 texternalsym:$dst)),
 308           (WINCALL64pcrel32 texternalsym:$dst)>, Requires<[IsWin64]>;
 309
 310 // tailcall stuff
 311 def : Pat<(X86tcret GR32_TC:$dst, imm:$off),
 312           (TCRETURNri GR32_TC:$dst, imm:$off)>,
 313           Requires<[In32BitMode]>;
 314
 315 // FIXME: This is disabled for 32-bit PIC mode because the global base
 316 // register which is part of the address mode may be assigned a
 317 // callee-saved register.
 318 def : Pat<(X86tcret (load addr:$dst), imm:$off),
 319           (TCRETURNmi addr:$dst, imm:$off)>,
 320           Requires<[In32BitMode, IsNotPIC]>;
 321
 322 def : Pat<(X86tcret (i32 tglobaladdr:$dst), imm:$off),
 323           (TCRETURNdi texternalsym:$dst, imm:$off)>,
 324           Requires<[In32BitMode]>;
 325
 326 def : Pat<(X86tcret (i32 texternalsym:$dst), imm:$off),
 327           (TCRETURNdi texternalsym:$dst, imm:$off)>,
 328           Requires<[In32BitMode]>;
 329
 330 def : Pat<(X86tcret GR64_TC:$dst, imm:$off),
 331           (TCRETURNri64 GR64_TC:$dst, imm:$off)>,
 332           Requires<[In64BitMode]>;
 333
 334 def : Pat<(X86tcret (load addr:$dst), imm:$off),
 335           (TCRETURNmi64 addr:$dst, imm:$off)>,
 336           Requires<[In64BitMode]>;
 337
 338 def : Pat<(X86tcret (i64 tglobaladdr:$dst), imm:$off),
 339           (TCRETURNdi64 tglobaladdr:$dst, imm:$off)>,
 340           Requires<[In64BitMode]>;
 341
 342 def : Pat<(X86tcret (i64 texternalsym:$dst), imm:$off),
 343           (TCRETURNdi64 texternalsym:$dst, imm:$off)>,
 344           Requires<[In64BitMode]>;
 345
 346 // Normal calls, with various flavors of addresses.
 347 def : Pat<(X86call (i32 tglobaladdr:$dst)),
 348           (CALLpcrel32 tglobaladdr:$dst)>;
 349 def : Pat<(X86call (i32 texternalsym:$dst)),
 350           (CALLpcrel32 texternalsym:$dst)>;
 351 def : Pat<(X86call (i32 imm:$dst)),
 352           (CALLpcrel32 imm:$dst)>, Requires<[CallImmAddr]>;
 353
 354 // X86 specific add which produces a flag.
 355 def : Pat<(addc GR32:$src1, GR32:$src2),
 356           (ADD32rr GR32:$src1, GR32:$src2)>;
 357 def : Pat<(addc GR32:$src1, (load addr:$src2)),
 358           (ADD32rm GR32:$src1, addr:$src2)>;
 359 def : Pat<(addc GR32:$src1, imm:$src2),
 360           (ADD32ri GR32:$src1, imm:$src2)>;
 361 def : Pat<(addc GR32:$src1, i32immSExt8:$src2),
 362           (ADD32ri8 GR32:$src1, i32immSExt8:$src2)>;
 363
 364 def : Pat<(addc GR64:$src1, GR64:$src2),
 365           (ADD64rr GR64:$src1, GR64:$src2)>;
 366 def : Pat<(addc GR64:$src1, (load addr:$src2)),
 367           (ADD64rm GR64:$src1, addr:$src2)>;
 368 def : Pat<(addc GR64:$src1, i64immSExt8:$src2),
 369           (ADD64ri8 GR64:$src1, i64immSExt8:$src2)>;
 370 def : Pat<(addc GR64:$src1, i64immSExt32:$src2),
 371           (ADD64ri32 GR64:$src1, imm:$src2)>;
 372
 373 def : Pat<(subc GR32:$src1, GR32:$src2),
 374           (SUB32rr GR32:$src1, GR32:$src2)>;
 375 def : Pat<(subc GR32:$src1, (load addr:$src2)),
 376           (SUB32rm GR32:$src1, addr:$src2)>;
 377 def : Pat<(subc GR32:$src1, imm:$src2),
 378           (SUB32ri GR32:$src1, imm:$src2)>;
 379 def : Pat<(subc GR32:$src1, i32immSExt8:$src2),
 380           (SUB32ri8 GR32:$src1, i32immSExt8:$src2)>;
 381
 382 def : Pat<(subc GR64:$src1, GR64:$src2),
 383           (SUB64rr GR64:$src1, GR64:$src2)>;
 384 def : Pat<(subc GR64:$src1, (load addr:$src2)),
 385           (SUB64rm GR64:$src1, addr:$src2)>;
 386 def : Pat<(subc GR64:$src1, i64immSExt8:$src2),
 387           (SUB64ri8 GR64:$src1, i64immSExt8:$src2)>;
 388 def : Pat<(subc GR64:$src1, imm:$src2),
 389           (SUB64ri32 GR64:$src1, i64immSExt32:$src2)>;
 390
 391 // Comparisons.
 392
 393 // TEST R,R is smaller than CMP R,0
 394 def : Pat<(X86cmp GR8:$src1, 0),
 395           (TEST8rr GR8:$src1, GR8:$src1)>;
 396 def : Pat<(X86cmp GR16:$src1, 0),
 397           (TEST16rr GR16:$src1, GR16:$src1)>;
 398 def : Pat<(X86cmp GR32:$src1, 0),
 399           (TEST32rr GR32:$src1, GR32:$src1)>;
 400 def : Pat<(X86cmp GR64:$src1, 0),
 401           (TEST64rr GR64:$src1, GR64:$src1)>;
 402
 403 // Conditional moves with folded loads with operands swapped and conditions
 404 // inverted.
 405 def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_B, EFLAGS),
 406           (CMOVAE16rm GR16:$src2, addr:$src1)>;
 407 def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_B, EFLAGS),
 408           (CMOVAE32rm GR32:$src2, addr:$src1)>;
 409 def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_AE, EFLAGS),
 410           (CMOVB16rm GR16:$src2, addr:$src1)>;
 411 def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_AE, EFLAGS),
 412           (CMOVB32rm GR32:$src2, addr:$src1)>;
 413 def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_E, EFLAGS),
 414           (CMOVNE16rm GR16:$src2, addr:$src1)>;
 415 def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_E, EFLAGS),
 416           (CMOVNE32rm GR32:$src2, addr:$src1)>;
 417 def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_NE, EFLAGS),
 418           (CMOVE16rm GR16:$src2, addr:$src1)>;
 419 def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_NE, EFLAGS),
 420           (CMOVE32rm GR32:$src2, addr:$src1)>;
 421 def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_BE, EFLAGS),
 422           (CMOVA16rm GR16:$src2, addr:$src1)>;
 423 def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_BE, EFLAGS),
 424           (CMOVA32rm GR32:$src2, addr:$src1)>;
 425 def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_A, EFLAGS),
 426           (CMOVBE16rm GR16:$src2, addr:$src1)>;
 427 def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_A, EFLAGS),
 428           (CMOVBE32rm GR32:$src2, addr:$src1)>;
 429 def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_L, EFLAGS),
 430           (CMOVGE16rm GR16:$src2, addr:$src1)>;
 431 def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_L, EFLAGS),
 432           (CMOVGE32rm GR32:$src2, addr:$src1)>;
 433 def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_GE, EFLAGS),
 434           (CMOVL16rm GR16:$src2, addr:$src1)>;
 435 def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_GE, EFLAGS),
 436           (CMOVL32rm GR32:$src2, addr:$src1)>;
 437 def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_LE, EFLAGS),
 438           (CMOVG16rm GR16:$src2, addr:$src1)>;
 439 def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_LE, EFLAGS),
 440           (CMOVG32rm GR32:$src2, addr:$src1)>;
 441 def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_G, EFLAGS),
 442           (CMOVLE16rm GR16:$src2, addr:$src1)>;
 443 def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_G, EFLAGS),
 444           (CMOVLE32rm GR32:$src2, addr:$src1)>;
 445 def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_P, EFLAGS),
 446           (CMOVNP16rm GR16:$src2, addr:$src1)>;
 447 def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_P, EFLAGS),
 448           (CMOVNP32rm GR32:$src2, addr:$src1)>;
 449 def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_NP, EFLAGS),
 450           (CMOVP16rm GR16:$src2, addr:$src1)>;
 451 def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_NP, EFLAGS),
 452           (CMOVP32rm GR32:$src2, addr:$src1)>;
 453 def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_S, EFLAGS),
 454           (CMOVNS16rm GR16:$src2, addr:$src1)>;
 455 def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_S, EFLAGS),
 456           (CMOVNS32rm GR32:$src2, addr:$src1)>;
 457 def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_NS, EFLAGS),
 458           (CMOVS16rm GR16:$src2, addr:$src1)>;
 459 def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_NS, EFLAGS),
 460           (CMOVS32rm GR32:$src2, addr:$src1)>;
 461 def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_O, EFLAGS),
 462           (CMOVNO16rm GR16:$src2, addr:$src1)>;
 463 def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_O, EFLAGS),
 464           (CMOVNO32rm GR32:$src2, addr:$src1)>;
 465 def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, X86_COND_NO, EFLAGS),
 466           (CMOVO16rm GR16:$src2, addr:$src1)>;
 467 def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, X86_COND_NO, EFLAGS),
 468           (CMOVO32rm GR32:$src2, addr:$src1)>;
 469
 470 def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_B, EFLAGS),
 471           (CMOVAE64rm GR64:$src2, addr:$src1)>;
 472 def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_AE, EFLAGS),
 473           (CMOVB64rm GR64:$src2, addr:$src1)>;
 474 def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_E, EFLAGS),
 475           (CMOVNE64rm GR64:$src2, addr:$src1)>;
 476 def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_NE, EFLAGS),
 477           (CMOVE64rm GR64:$src2, addr:$src1)>;
 478 def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_BE, EFLAGS),
 479           (CMOVA64rm GR64:$src2, addr:$src1)>;
 480 def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_A, EFLAGS),
 481           (CMOVBE64rm GR64:$src2, addr:$src1)>;
 482 def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_L, EFLAGS),
 483           (CMOVGE64rm GR64:$src2, addr:$src1)>;
 484 def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_GE, EFLAGS),
 485           (CMOVL64rm GR64:$src2, addr:$src1)>;
 486 def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_LE, EFLAGS),
 487           (CMOVG64rm GR64:$src2, addr:$src1)>;
 488 def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_G, EFLAGS),
 489           (CMOVLE64rm GR64:$src2, addr:$src1)>;
 490 def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_P, EFLAGS),
 491           (CMOVNP64rm GR64:$src2, addr:$src1)>;
 492 def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_NP, EFLAGS),
 493           (CMOVP64rm GR64:$src2, addr:$src1)>;
 494 def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_S, EFLAGS),
 495           (CMOVNS64rm GR64:$src2, addr:$src1)>;
 496 def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_NS, EFLAGS),
 497           (CMOVS64rm GR64:$src2, addr:$src1)>;
 498 def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_O, EFLAGS),
 499           (CMOVNO64rm GR64:$src2, addr:$src1)>;
 500 def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, X86_COND_NO, EFLAGS),
 501           (CMOVO64rm GR64:$src2, addr:$src1)>;
 502
 503
 504 // zextload bool -> zextload byte
 505 def : Pat<(zextloadi8i1  addr:$src), (MOV8rm     addr:$src)>;
 506 def : Pat<(zextloadi16i1 addr:$src), (MOVZX16rm8 addr:$src)>;
 507 def : Pat<(zextloadi32i1 addr:$src), (MOVZX32rm8 addr:$src)>;
 508 def : Pat<(zextloadi64i1 addr:$src), (MOVZX64rm8 addr:$src)>;
 509
 510 // extload bool -> extload byte
 511 // When extloading from 16-bit and smaller memory locations into 64-bit
 512 // registers, use zero-extending loads so that the entire 64-bit register is
 513 // defined, avoiding partial-register updates.
 514
 515 def : Pat<(extloadi8i1 addr:$src),   (MOV8rm      addr:$src)>;
 516 def : Pat<(extloadi16i1 addr:$src),  (MOVZX16rm8  addr:$src)>;
 517 def : Pat<(extloadi32i1 addr:$src),  (MOVZX32rm8  addr:$src)>;
 518 def : Pat<(extloadi16i8 addr:$src),  (MOVZX16rm8  addr:$src)>;
 519 def : Pat<(extloadi32i8 addr:$src),  (MOVZX32rm8  addr:$src)>;
 520 def : Pat<(extloadi32i16 addr:$src), (MOVZX32rm16 addr:$src)>;
 521
 522 def : Pat<(extloadi64i1 addr:$src),  (MOVZX64rm8  addr:$src)>;
 523 def : Pat<(extloadi64i8 addr:$src),  (MOVZX64rm8  addr:$src)>;
 524 def : Pat<(extloadi64i16 addr:$src), (MOVZX64rm16 addr:$src)>;
 525 // For other extloads, use subregs, since the high contents of the register are
 526 // defined after an extload.
 527 def : Pat<(extloadi64i32 addr:$src),
 528           (SUBREG_TO_REG (i64 0), (MOV32rm addr:$src),
 529                          sub_32bit)>;
 530
 531 // anyext. Define these to do an explicit zero-extend to
 532 // avoid partial-register updates.
 533 def : Pat<(i16 (anyext GR8 :$src)), (MOVZX16rr8  GR8 :$src)>;
 534 def : Pat<(i32 (anyext GR8 :$src)), (MOVZX32rr8  GR8 :$src)>;
 535
 536 // Except for i16 -> i32 since isel expect i16 ops to be promoted to i32.
 537 def : Pat<(i32 (anyext GR16:$src)),
 538           (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR16:$src, sub_16bit)>;
 539
 540 def : Pat<(i64 (anyext GR8 :$src)), (MOVZX64rr8  GR8  :$src)>;
 541 def : Pat<(i64 (anyext GR16:$src)), (MOVZX64rr16 GR16 :$src)>;
 542 def : Pat<(i64 (anyext GR32:$src)),
 543           (SUBREG_TO_REG (i64 0), GR32:$src, sub_32bit)>;
 544
 545 //===----------------------------------------------------------------------===//
 546 // Some peepholes
 547 //===----------------------------------------------------------------------===//
 548
 549 // Odd encoding trick: -128 fits into an 8-bit immediate field while
 550 // +128 doesn't, so in this special case use a sub instead of an add.
 551 def : Pat<(add GR16:$src1, 128),
 552           (SUB16ri8 GR16:$src1, -128)>;
 553 def : Pat<(store (add (loadi16 addr:$dst), 128), addr:$dst),
 554           (SUB16mi8 addr:$dst, -128)>;
 555
 556 def : Pat<(add GR32:$src1, 128),
 557           (SUB32ri8 GR32:$src1, -128)>;
 558 def : Pat<(store (add (loadi32 addr:$dst), 128), addr:$dst),
 559           (SUB32mi8 addr:$dst, -128)>;
 560
 561 def : Pat<(add GR64:$src1, 128),
 562           (SUB64ri8 GR64:$src1, -128)>;
 563 def : Pat<(store (add (loadi64 addr:$dst), 128), addr:$dst),
 564           (SUB64mi8 addr:$dst, -128)>;
 565
 566 // The same trick applies for 32-bit immediate fields in 64-bit
 567 // instructions.
 568 def : Pat<(add GR64:$src1, 0x0000000080000000),
 569           (SUB64ri32 GR64:$src1, 0xffffffff80000000)>;
 570 def : Pat<(store (add (loadi64 addr:$dst), 0x00000000800000000), addr:$dst),
 571           (SUB64mi32 addr:$dst, 0xffffffff80000000)>;
 572
 573 // Use a 32-bit and with implicit zero-extension instead of a 64-bit and if it
 574 // has an immediate with at least 32 bits of leading zeros, to avoid needing to
 575 // materialize that immediate in a register first.
 576 def : Pat<(and GR64:$src, i64immZExt32:$imm),
 577           (SUBREG_TO_REG
 578             (i64 0),
 579             (AND32ri
 580               (EXTRACT_SUBREG GR64:$src, sub_32bit),
 581               (i32 (GetLo32XForm imm:$imm))),
 582             sub_32bit)>;
 583
 584
 585 // r & (2^16-1) ==> movz
 586 def : Pat<(and GR32:$src1, 0xffff),
 587           (MOVZX32rr16 (EXTRACT_SUBREG GR32:$src1, sub_16bit))>;
 588 // r & (2^8-1) ==> movz
 589 def : Pat<(and GR32:$src1, 0xff),
 590           (MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src1,
 591                                                              GR32_ABCD)),
 592                                       sub_8bit))>,
 593       Requires<[In32BitMode]>;
 594 // r & (2^8-1) ==> movz
 595 def : Pat<(and GR16:$src1, 0xff),
 596           (MOVZX16rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src1,
 597                                                              GR16_ABCD)),
 598                                       sub_8bit))>,
 599       Requires<[In32BitMode]>;
 600
 601 // r & (2^32-1) ==> movz
 602 def : Pat<(and GR64:$src, 0x00000000FFFFFFFF),
 603           (MOVZX64rr32 (EXTRACT_SUBREG GR64:$src, sub_32bit))>;
 604 // r & (2^16-1) ==> movz
 605 def : Pat<(and GR64:$src, 0xffff),
 606           (MOVZX64rr16 (i16 (EXTRACT_SUBREG GR64:$src, sub_16bit)))>;
 607 // r & (2^8-1) ==> movz
 608 def : Pat<(and GR64:$src, 0xff),
 609           (MOVZX64rr8 (i8 (EXTRACT_SUBREG GR64:$src, sub_8bit)))>;
 610 // r & (2^8-1) ==> movz
 611 def : Pat<(and GR32:$src1, 0xff),
 612            (MOVZX32rr8 (EXTRACT_SUBREG GR32:$src1, sub_8bit))>,
 613       Requires<[In64BitMode]>;
 614 // r & (2^8-1) ==> movz
 615 def : Pat<(and GR16:$src1, 0xff),
 616            (MOVZX16rr8 (i8 (EXTRACT_SUBREG GR16:$src1, sub_8bit)))>,
 617       Requires<[In64BitMode]>;
 618
 619
 620 // sext_inreg patterns
 621 def : Pat<(sext_inreg GR32:$src, i16),
 622           (MOVSX32rr16 (EXTRACT_SUBREG GR32:$src, sub_16bit))>;
 623 def : Pat<(sext_inreg GR32:$src, i8),
 624           (MOVSX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src,
 625                                                              GR32_ABCD)),
 626                                       sub_8bit))>,
 627       Requires<[In32BitMode]>;
 628 def : Pat<(sext_inreg GR16:$src, i8),
 629           (MOVSX16rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src,
 630                                                              GR16_ABCD)),
 631                                       sub_8bit))>,
 632       Requires<[In32BitMode]>;
 633
 634 def : Pat<(sext_inreg GR64:$src, i32),
 635           (MOVSX64rr32 (EXTRACT_SUBREG GR64:$src, sub_32bit))>;
 636 def : Pat<(sext_inreg GR64:$src, i16),
 637           (MOVSX64rr16 (EXTRACT_SUBREG GR64:$src, sub_16bit))>;
 638 def : Pat<(sext_inreg GR64:$src, i8),
 639           (MOVSX64rr8 (EXTRACT_SUBREG GR64:$src, sub_8bit))>;
 640 def : Pat<(sext_inreg GR32:$src, i8),
 641           (MOVSX32rr8 (EXTRACT_SUBREG GR32:$src, sub_8bit))>,
 642       Requires<[In64BitMode]>;
 643 def : Pat<(sext_inreg GR16:$src, i8),
 644           (MOVSX16rr8 (i8 (EXTRACT_SUBREG GR16:$src, sub_8bit)))>,
 645       Requires<[In64BitMode]>;
 646
 647
 648 // trunc patterns
 649 def : Pat<(i16 (trunc GR32:$src)),
 650           (EXTRACT_SUBREG GR32:$src, sub_16bit)>;
 651 def : Pat<(i8 (trunc GR32:$src)),
 652           (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),
 653                           sub_8bit)>,
 654       Requires<[In32BitMode]>;
 655 def : Pat<(i8 (trunc GR16:$src)),
 656           (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
 657                           sub_8bit)>,
 658       Requires<[In32BitMode]>;
 659 def : Pat<(i32 (trunc GR64:$src)),
 660           (EXTRACT_SUBREG GR64:$src, sub_32bit)>;
 661 def : Pat<(i16 (trunc GR64:$src)),
 662           (EXTRACT_SUBREG GR64:$src, sub_16bit)>;
 663 def : Pat<(i8 (trunc GR64:$src)),
 664           (EXTRACT_SUBREG GR64:$src, sub_8bit)>;
 665 def : Pat<(i8 (trunc GR32:$src)),
 666           (EXTRACT_SUBREG GR32:$src, sub_8bit)>,
 667       Requires<[In64BitMode]>;
 668 def : Pat<(i8 (trunc GR16:$src)),
 669           (EXTRACT_SUBREG GR16:$src, sub_8bit)>,
 670       Requires<[In64BitMode]>;
 671
 672 // h-register tricks
 673 def : Pat<(i8 (trunc (srl_su GR16:$src, (i8 8)))),
 674           (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
 675                           sub_8bit_hi)>,
 676       Requires<[In32BitMode]>;
 677 def : Pat<(i8 (trunc (srl_su GR32:$src, (i8 8)))),
 678           (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),
 679                           sub_8bit_hi)>,
 680       Requires<[In32BitMode]>;
 681 def : Pat<(srl GR16:$src, (i8 8)),
 682           (EXTRACT_SUBREG
 683             (MOVZX32rr8
 684               (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
 685                               sub_8bit_hi)),
 686             sub_16bit)>,
 687       Requires<[In32BitMode]>;
 688 def : Pat<(i32 (zext (srl_su GR16:$src, (i8 8)))),
 689           (MOVZX32rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src,
 690                                                              GR16_ABCD)),
 691                                       sub_8bit_hi))>,
 692       Requires<[In32BitMode]>;
 693 def : Pat<(i32 (anyext (srl_su GR16:$src, (i8 8)))),
 694           (MOVZX32rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src,
 695                                                              GR16_ABCD)),
 696                                       sub_8bit_hi))>,
 697       Requires<[In32BitMode]>;
 698 def : Pat<(and (srl_su GR32:$src, (i8 8)), (i32 255)),
 699           (MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src,
 700                                                              GR32_ABCD)),
 701                                       sub_8bit_hi))>,
 702       Requires<[In32BitMode]>;
 703 def : Pat<(srl (and_su GR32:$src, 0xff00), (i8 8)),
 704           (MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src,
 705                                                              GR32_ABCD)),
 706                                       sub_8bit_hi))>,
 707       Requires<[In32BitMode]>;
 708
 709 // h-register tricks.
 710 // For now, be conservative on x86-64 and use an h-register extract only if the
 711 // value is immediately zero-extended or stored, which are somewhat common
 712 // cases. This uses a bunch of code to prevent a register requiring a REX prefix
 713 // from being allocated in the same instruction as the h register, as there's
 714 // currently no way to describe this requirement to the register allocator.
 715
 716 // h-register extract and zero-extend.
 717 def : Pat<(and (srl_su GR64:$src, (i8 8)), (i64 255)),
 718           (SUBREG_TO_REG
 719             (i64 0),
 720             (MOVZX32_NOREXrr8
 721               (EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS GR64:$src, GR64_ABCD)),
 722                               sub_8bit_hi)),
 723             sub_32bit)>;
 724 def : Pat<(and (srl_su GR32:$src, (i8 8)), (i32 255)),
 725           (MOVZX32_NOREXrr8
 726             (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),
 727                             sub_8bit_hi))>,
 728       Requires<[In64BitMode]>;
 729 def : Pat<(srl (and_su GR32:$src, 0xff00), (i8 8)),
 730           (MOVZX32_NOREXrr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src,
 731                                                                    GR32_ABCD)),
 732                                              sub_8bit_hi))>,
 733       Requires<[In64BitMode]>;
 734 def : Pat<(srl GR16:$src, (i8 8)),
 735           (EXTRACT_SUBREG
 736             (MOVZX32_NOREXrr8
 737               (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
 738                               sub_8bit_hi)),
 739             sub_16bit)>,
 740       Requires<[In64BitMode]>;
 741 def : Pat<(i32 (zext (srl_su GR16:$src, (i8 8)))),
 742           (MOVZX32_NOREXrr8
 743             (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
 744                             sub_8bit_hi))>,
 745       Requires<[In64BitMode]>;
 746 def : Pat<(i32 (anyext (srl_su GR16:$src, (i8 8)))),
 747           (MOVZX32_NOREXrr8
 748             (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
 749                             sub_8bit_hi))>,
 750       Requires<[In64BitMode]>;
 751 def : Pat<(i64 (zext (srl_su GR16:$src, (i8 8)))),
 752           (SUBREG_TO_REG
 753             (i64 0),
 754             (MOVZX32_NOREXrr8
 755               (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
 756                               sub_8bit_hi)),
 757             sub_32bit)>;
 758 def : Pat<(i64 (anyext (srl_su GR16:$src, (i8 8)))),
 759           (SUBREG_TO_REG
 760             (i64 0),
 761             (MOVZX32_NOREXrr8
 762               (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
 763                               sub_8bit_hi)),
 764             sub_32bit)>;
 765
 766 // h-register extract and store.
 767 def : Pat<(store (i8 (trunc_su (srl_su GR64:$src, (i8 8)))), addr:$dst),
 768           (MOV8mr_NOREX
 769             addr:$dst,
 770             (EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS GR64:$src, GR64_ABCD)),
 771                             sub_8bit_hi))>;
 772 def : Pat<(store (i8 (trunc_su (srl_su GR32:$src, (i8 8)))), addr:$dst),
 773           (MOV8mr_NOREX
 774             addr:$dst,
 775             (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),
 776                             sub_8bit_hi))>,
 777       Requires<[In64BitMode]>;
 778 def : Pat<(store (i8 (trunc_su (srl_su GR16:$src, (i8 8)))), addr:$dst),
 779           (MOV8mr_NOREX
 780             addr:$dst,
 781             (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
 782                             sub_8bit_hi))>,
 783       Requires<[In64BitMode]>;
 784
 785
 786 // (shl x, 1) ==> (add x, x)
 787 def : Pat<(shl GR8 :$src1, (i8 1)), (ADD8rr  GR8 :$src1, GR8 :$src1)>;
 788 def : Pat<(shl GR16:$src1, (i8 1)), (ADD16rr GR16:$src1, GR16:$src1)>;
 789 def : Pat<(shl GR32:$src1, (i8 1)), (ADD32rr GR32:$src1, GR32:$src1)>;
 790 def : Pat<(shl GR64:$src1, (i8 1)), (ADD64rr GR64:$src1, GR64:$src1)>;
 791
 792 // (shl x (and y, 31)) ==> (shl x, y)
 793 def : Pat<(shl GR8:$src1, (and CL, 31)),
 794           (SHL8rCL GR8:$src1)>;
 795 def : Pat<(shl GR16:$src1, (and CL, 31)),
 796           (SHL16rCL GR16:$src1)>;
 797 def : Pat<(shl GR32:$src1, (and CL, 31)),
 798           (SHL32rCL GR32:$src1)>;
 799 def : Pat<(store (shl (loadi8 addr:$dst), (and CL, 31)), addr:$dst),
 800           (SHL8mCL addr:$dst)>;
 801 def : Pat<(store (shl (loadi16 addr:$dst), (and CL, 31)), addr:$dst),
 802           (SHL16mCL addr:$dst)>;
 803 def : Pat<(store (shl (loadi32 addr:$dst), (and CL, 31)), addr:$dst),
 804           (SHL32mCL addr:$dst)>;
 805
 806 def : Pat<(srl GR8:$src1, (and CL, 31)),
 807           (SHR8rCL GR8:$src1)>;
 808 def : Pat<(srl GR16:$src1, (and CL, 31)),
 809           (SHR16rCL GR16:$src1)>;
 810 def : Pat<(srl GR32:$src1, (and CL, 31)),
 811           (SHR32rCL GR32:$src1)>;
 812 def : Pat<(store (srl (loadi8 addr:$dst), (and CL, 31)), addr:$dst),
 813           (SHR8mCL addr:$dst)>;
 814 def : Pat<(store (srl (loadi16 addr:$dst), (and CL, 31)), addr:$dst),
 815           (SHR16mCL addr:$dst)>;
 816 def : Pat<(store (srl (loadi32 addr:$dst), (and CL, 31)), addr:$dst),
 817           (SHR32mCL addr:$dst)>;
 818
 819 def : Pat<(sra GR8:$src1, (and CL, 31)),
 820           (SAR8rCL GR8:$src1)>;
 821 def : Pat<(sra GR16:$src1, (and CL, 31)),
 822           (SAR16rCL GR16:$src1)>;
 823 def : Pat<(sra GR32:$src1, (and CL, 31)),
 824           (SAR32rCL GR32:$src1)>;
 825 def : Pat<(store (sra (loadi8 addr:$dst), (and CL, 31)), addr:$dst),
 826           (SAR8mCL addr:$dst)>;
 827 def : Pat<(store (sra (loadi16 addr:$dst), (and CL, 31)), addr:$dst),
 828           (SAR16mCL addr:$dst)>;
 829 def : Pat<(store (sra (loadi32 addr:$dst), (and CL, 31)), addr:$dst),
 830           (SAR32mCL addr:$dst)>;
 831
 832 // (shl x (and y, 63)) ==> (shl x, y)
 833 def : Pat<(shl GR64:$src1, (and CL, 63)),
 834           (SHL64rCL GR64:$src1)>;
 835 def : Pat<(store (shl (loadi64 addr:$dst), (and CL, 63)), addr:$dst),
 836           (SHL64mCL addr:$dst)>;
 837
 838 def : Pat<(srl GR64:$src1, (and CL, 63)),
 839           (SHR64rCL GR64:$src1)>;
 840 def : Pat<(store (srl (loadi64 addr:$dst), (and CL, 63)), addr:$dst),
 841           (SHR64mCL addr:$dst)>;
 842
 843 def : Pat<(sra GR64:$src1, (and CL, 63)),
 844           (SAR64rCL GR64:$src1)>;
 845 def : Pat<(store (sra (loadi64 addr:$dst), (and CL, 63)), addr:$dst),
 846           (SAR64mCL addr:$dst)>;
 847
 848
 849 // (anyext (setcc_carry)) -> (setcc_carry)
 850 def : Pat<(i16 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
 851           (SETB_C16r)>;
 852 def : Pat<(i32 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
 853           (SETB_C32r)>;
 854 def : Pat<(i32 (anyext (i16 (X86setcc_c X86_COND_B, EFLAGS)))),
 855           (SETB_C32r)>;
 856
 857 // (or x1, x2) -> (add x1, x2) if two operands are known not to share bits.
 858 let AddedComplexity = 5 in { // Try this before the selecting to OR
 859 def : Pat<(or_is_add GR16:$src1, imm:$src2),
 860           (ADD16ri GR16:$src1, imm:$src2)>;
 861 def : Pat<(or_is_add GR32:$src1, imm:$src2),
 862           (ADD32ri GR32:$src1, imm:$src2)>;
 863 def : Pat<(or_is_add GR16:$src1, i16immSExt8:$src2),
 864           (ADD16ri8 GR16:$src1, i16immSExt8:$src2)>;
 865 def : Pat<(or_is_add GR32:$src1, i32immSExt8:$src2),
 866           (ADD32ri8 GR32:$src1, i32immSExt8:$src2)>;
 867 def : Pat<(or_is_add GR16:$src1, GR16:$src2),
 868           (ADD16rr GR16:$src1, GR16:$src2)>;
 869 def : Pat<(or_is_add GR32:$src1, GR32:$src2),
 870           (ADD32rr GR32:$src1, GR32:$src2)>;
 871 def : Pat<(or_is_add GR64:$src1, i64immSExt8:$src2),
 872           (ADD64ri8 GR64:$src1, i64immSExt8:$src2)>;
 873 def : Pat<(or_is_add GR64:$src1, i64immSExt32:$src2),
 874           (ADD64ri32 GR64:$src1, i64immSExt32:$src2)>;
 875 def : Pat<(or_is_add GR64:$src1, GR64:$src2),
 876           (ADD64rr GR64:$src1, GR64:$src2)>;
 877 } // AddedComplexity
 878
 879 //===----------------------------------------------------------------------===//
 880 // EFLAGS-defining Patterns
 881 //===----------------------------------------------------------------------===//
 882
 883 // add reg, reg
 884 def : Pat<(add GR8 :$src1, GR8 :$src2), (ADD8rr  GR8 :$src1, GR8 :$src2)>;
 885 def : Pat<(add GR16:$src1, GR16:$src2), (ADD16rr GR16:$src1, GR16:$src2)>;
 886 def : Pat<(add GR32:$src1, GR32:$src2), (ADD32rr GR32:$src1, GR32:$src2)>;
 887
 888 // add reg, mem
 889 def : Pat<(add GR8:$src1, (loadi8 addr:$src2)),
 890           (ADD8rm GR8:$src1, addr:$src2)>;
 891 def : Pat<(add GR16:$src1, (loadi16 addr:$src2)),
 892           (ADD16rm GR16:$src1, addr:$src2)>;
 893 def : Pat<(add GR32:$src1, (loadi32 addr:$src2)),
 894           (ADD32rm GR32:$src1, addr:$src2)>;
 895
 896 // add reg, imm
 897 def : Pat<(add GR8 :$src1, imm:$src2), (ADD8ri  GR8:$src1 , imm:$src2)>;
 898 def : Pat<(add GR16:$src1, imm:$src2), (ADD16ri GR16:$src1, imm:$src2)>;
 899 def : Pat<(add GR32:$src1, imm:$src2), (ADD32ri GR32:$src1, imm:$src2)>;
 900 def : Pat<(add GR16:$src1, i16immSExt8:$src2),
 901           (ADD16ri8 GR16:$src1, i16immSExt8:$src2)>;
 902 def : Pat<(add GR32:$src1, i32immSExt8:$src2),
 903           (ADD32ri8 GR32:$src1, i32immSExt8:$src2)>;
 904
 905 // sub reg, reg
 906 def : Pat<(sub GR8 :$src1, GR8 :$src2), (SUB8rr  GR8 :$src1, GR8 :$src2)>;
 907 def : Pat<(sub GR16:$src1, GR16:$src2), (SUB16rr GR16:$src1, GR16:$src2)>;
 908 def : Pat<(sub GR32:$src1, GR32:$src2), (SUB32rr GR32:$src1, GR32:$src2)>;
 909
 910 // sub reg, mem
 911 def : Pat<(sub GR8:$src1, (loadi8 addr:$src2)),
 912           (SUB8rm GR8:$src1, addr:$src2)>;
 913 def : Pat<(sub GR16:$src1, (loadi16 addr:$src2)),
 914           (SUB16rm GR16:$src1, addr:$src2)>;
 915 def : Pat<(sub GR32:$src1, (loadi32 addr:$src2)),
 916           (SUB32rm GR32:$src1, addr:$src2)>;
 917
 918 // sub reg, imm
 919 def : Pat<(sub GR8:$src1, imm:$src2),
 920           (SUB8ri GR8:$src1, imm:$src2)>;
 921 def : Pat<(sub GR16:$src1, imm:$src2),
 922           (SUB16ri GR16:$src1, imm:$src2)>;
 923 def : Pat<(sub GR32:$src1, imm:$src2),
 924           (SUB32ri GR32:$src1, imm:$src2)>;
 925 def : Pat<(sub GR16:$src1, i16immSExt8:$src2),
 926           (SUB16ri8 GR16:$src1, i16immSExt8:$src2)>;
 927 def : Pat<(sub GR32:$src1, i32immSExt8:$src2),
 928           (SUB32ri8 GR32:$src1, i32immSExt8:$src2)>;
 929
 930 // mul reg, reg
 931 def : Pat<(mul GR16:$src1, GR16:$src2),
 932           (IMUL16rr GR16:$src1, GR16:$src2)>;
 933 def : Pat<(mul GR32:$src1, GR32:$src2),
 934           (IMUL32rr GR32:$src1, GR32:$src2)>;
 935
 936 // mul reg, mem
 937 def : Pat<(mul GR16:$src1, (loadi16 addr:$src2)),
 938           (IMUL16rm GR16:$src1, addr:$src2)>;
 939 def : Pat<(mul GR32:$src1, (loadi32 addr:$src2)),
 940           (IMUL32rm GR32:$src1, addr:$src2)>;
 941
 942 // mul reg, imm
 943 def : Pat<(mul GR16:$src1, imm:$src2),
 944           (IMUL16rri GR16:$src1, imm:$src2)>;
 945 def : Pat<(mul GR32:$src1, imm:$src2),
 946           (IMUL32rri GR32:$src1, imm:$src2)>;
 947 def : Pat<(mul GR16:$src1, i16immSExt8:$src2),
 948           (IMUL16rri8 GR16:$src1, i16immSExt8:$src2)>;
 949 def : Pat<(mul GR32:$src1, i32immSExt8:$src2),
 950           (IMUL32rri8 GR32:$src1, i32immSExt8:$src2)>;
 951
 952 // reg = mul mem, imm
 953 def : Pat<(mul (loadi16 addr:$src1), imm:$src2),
 954           (IMUL16rmi addr:$src1, imm:$src2)>;
 955 def : Pat<(mul (loadi32 addr:$src1), imm:$src2),
 956           (IMUL32rmi addr:$src1, imm:$src2)>;
 957 def : Pat<(mul (loadi16 addr:$src1), i16immSExt8:$src2),
 958           (IMUL16rmi8 addr:$src1, i16immSExt8:$src2)>;
 959 def : Pat<(mul (loadi32 addr:$src1), i32immSExt8:$src2),
 960           (IMUL32rmi8 addr:$src1, i32immSExt8:$src2)>;
 961
 962 // Optimize multiply by 2 with EFLAGS result.
 963 let AddedComplexity = 2 in {
 964 def : Pat<(X86smul_flag GR16:$src1, 2), (ADD16rr GR16:$src1, GR16:$src1)>;
 965 def : Pat<(X86smul_flag GR32:$src1, 2), (ADD32rr GR32:$src1, GR32:$src1)>;
 966 }
 967
 968 // Patterns for nodes that do not produce flags, for instructions that do.
 969
 970 // addition
 971 def : Pat<(add GR64:$src1, GR64:$src2),
 972           (ADD64rr GR64:$src1, GR64:$src2)>;
 973 def : Pat<(add GR64:$src1, i64immSExt8:$src2),
 974           (ADD64ri8 GR64:$src1, i64immSExt8:$src2)>;
 975 def : Pat<(add GR64:$src1, i64immSExt32:$src2),
 976           (ADD64ri32 GR64:$src1, i64immSExt32:$src2)>;
 977 def : Pat<(add GR64:$src1, (loadi64 addr:$src2)),
 978           (ADD64rm GR64:$src1, addr:$src2)>;
 979
 980 // subtraction
 981 def : Pat<(sub GR64:$src1, GR64:$src2),
 982           (SUB64rr GR64:$src1, GR64:$src2)>;
 983 def : Pat<(sub GR64:$src1, (loadi64 addr:$src2)),
 984           (SUB64rm GR64:$src1, addr:$src2)>;
 985 def : Pat<(sub GR64:$src1, i64immSExt8:$src2),
 986           (SUB64ri8 GR64:$src1, i64immSExt8:$src2)>;
 987 def : Pat<(sub GR64:$src1, i64immSExt32:$src2),
 988           (SUB64ri32 GR64:$src1, i64immSExt32:$src2)>;
 989
 990 // Multiply
 991 def : Pat<(mul GR64:$src1, GR64:$src2),
 992           (IMUL64rr GR64:$src1, GR64:$src2)>;
 993 def : Pat<(mul GR64:$src1, (loadi64 addr:$src2)),
 994           (IMUL64rm GR64:$src1, addr:$src2)>;
 995 def : Pat<(mul GR64:$src1, i64immSExt8:$src2),
 996           (IMUL64rri8 GR64:$src1, i64immSExt8:$src2)>;
 997 def : Pat<(mul GR64:$src1, i64immSExt32:$src2),
 998           (IMUL64rri32 GR64:$src1, i64immSExt32:$src2)>;
 999 def : Pat<(mul (loadi64 addr:$src1), i64immSExt8:$src2),
1000           (IMUL64rmi8 addr:$src1, i64immSExt8:$src2)>;
1001 def : Pat<(mul (loadi64 addr:$src1), i64immSExt32:$src2),
1002           (IMUL64rmi32 addr:$src1, i64immSExt32:$src2)>;
1003
1004 // Increment reg.
1005 def : Pat<(add GR8 :$src, 1), (INC8r     GR8 :$src)>;
1006 def : Pat<(add GR16:$src, 1), (INC16r    GR16:$src)>, Requires<[In32BitMode]>;
1007 def : Pat<(add GR16:$src, 1), (INC64_16r GR16:$src)>, Requires<[In64BitMode]>;
1008 def : Pat<(add GR32:$src, 1), (INC32r    GR32:$src)>, Requires<[In32BitMode]>;
1009 def : Pat<(add GR32:$src, 1), (INC64_32r GR32:$src)>, Requires<[In64BitMode]>;
1010 def : Pat<(add GR64:$src, 1), (INC64r    GR64:$src)>;
1011
1012 // Decrement reg.
1013 def : Pat<(add GR8 :$src, -1), (DEC8r     GR8 :$src)>;
1014 def : Pat<(add GR16:$src, -1), (DEC16r    GR16:$src)>, Requires<[In32BitMode]>;
1015 def : Pat<(add GR16:$src, -1), (DEC64_16r GR16:$src)>, Requires<[In64BitMode]>;
1016 def : Pat<(add GR32:$src, -1), (DEC32r    GR32:$src)>, Requires<[In32BitMode]>;
1017 def : Pat<(add GR32:$src, -1), (DEC64_32r GR32:$src)>, Requires<[In64BitMode]>;
1018 def : Pat<(add GR64:$src, -1), (DEC64r    GR64:$src)>;
1019
1020 // or reg/reg.
1021 def : Pat<(or GR8 :$src1, GR8 :$src2), (OR8rr  GR8 :$src1, GR8 :$src2)>;
1022 def : Pat<(or GR16:$src1, GR16:$src2), (OR16rr GR16:$src1, GR16:$src2)>;
1023 def : Pat<(or GR32:$src1, GR32:$src2), (OR32rr GR32:$src1, GR32:$src2)>;
1024 def : Pat<(or GR64:$src1, GR64:$src2), (OR64rr GR64:$src1, GR64:$src2)>;
1025
1026 // or reg/mem
1027 def : Pat<(or GR8:$src1, (loadi8 addr:$src2)),
1028           (OR8rm GR8:$src1, addr:$src2)>;
1029 def : Pat<(or GR16:$src1, (loadi16 addr:$src2)),
1030           (OR16rm GR16:$src1, addr:$src2)>;
1031 def : Pat<(or GR32:$src1, (loadi32 addr:$src2)),
1032           (OR32rm GR32:$src1, addr:$src2)>;
1033 def : Pat<(or GR64:$src1, (loadi64 addr:$src2)),
1034           (OR64rm GR64:$src1, addr:$src2)>;
1035
1036 // or reg/imm
1037 def : Pat<(or GR8:$src1 , imm:$src2), (OR8ri  GR8 :$src1, imm:$src2)>;
1038 def : Pat<(or GR16:$src1, imm:$src2), (OR16ri GR16:$src1, imm:$src2)>;
1039 def : Pat<(or GR32:$src1, imm:$src2), (OR32ri GR32:$src1, imm:$src2)>;
1040 def : Pat<(or GR16:$src1, i16immSExt8:$src2),
1041           (OR16ri8 GR16:$src1, i16immSExt8:$src2)>;
1042 def : Pat<(or GR32:$src1, i32immSExt8:$src2),
1043           (OR32ri8 GR32:$src1, i32immSExt8:$src2)>;
1044 def : Pat<(or GR64:$src1, i64immSExt8:$src2),
1045           (OR64ri8 GR64:$src1, i64immSExt8:$src2)>;
1046 def : Pat<(or GR64:$src1, i64immSExt32:$src2),
1047           (OR64ri32 GR64:$src1, i64immSExt32:$src2)>;
1048
1049 // xor reg/reg
1050 def : Pat<(xor GR8 :$src1, GR8 :$src2), (XOR8rr  GR8 :$src1, GR8 :$src2)>;
1051 def : Pat<(xor GR16:$src1, GR16:$src2), (XOR16rr GR16:$src1, GR16:$src2)>;
1052 def : Pat<(xor GR32:$src1, GR32:$src2), (XOR32rr GR32:$src1, GR32:$src2)>;
1053 def : Pat<(xor GR64:$src1, GR64:$src2), (XOR64rr GR64:$src1, GR64:$src2)>;
1054
1055 // xor reg/mem
1056 def : Pat<(xor GR8:$src1, (loadi8 addr:$src2)),
1057           (XOR8rm GR8:$src1, addr:$src2)>;
1058 def : Pat<(xor GR16:$src1, (loadi16 addr:$src2)),
1059           (XOR16rm GR16:$src1, addr:$src2)>;
1060 def : Pat<(xor GR32:$src1, (loadi32 addr:$src2)),
1061           (XOR32rm GR32:$src1, addr:$src2)>;
1062 def : Pat<(xor GR64:$src1, (loadi64 addr:$src2)),
1063           (XOR64rm GR64:$src1, addr:$src2)>;
1064
1065 // xor reg/imm
1066 def : Pat<(xor GR8:$src1, imm:$src2),
1067           (XOR8ri GR8:$src1, imm:$src2)>;
1068 def : Pat<(xor GR16:$src1, imm:$src2),
1069           (XOR16ri GR16:$src1, imm:$src2)>;
1070 def : Pat<(xor GR32:$src1, imm:$src2),
1071           (XOR32ri GR32:$src1, imm:$src2)>;
1072 def : Pat<(xor GR16:$src1, i16immSExt8:$src2),
1073           (XOR16ri8 GR16:$src1, i16immSExt8:$src2)>;
1074 def : Pat<(xor GR32:$src1, i32immSExt8:$src2),
1075           (XOR32ri8 GR32:$src1, i32immSExt8:$src2)>;
1076 def : Pat<(xor GR64:$src1, i64immSExt8:$src2),
1077           (XOR64ri8 GR64:$src1, i64immSExt8:$src2)>;
1078 def : Pat<(xor GR64:$src1, i64immSExt32:$src2),
1079           (XOR64ri32 GR64:$src1, i64immSExt32:$src2)>;
1080
1081 // and reg/reg
1082 def : Pat<(and GR8 :$src1, GR8 :$src2), (AND8rr  GR8 :$src1, GR8 :$src2)>;
1083 def : Pat<(and GR16:$src1, GR16:$src2), (AND16rr GR16:$src1, GR16:$src2)>;
1084 def : Pat<(and GR32:$src1, GR32:$src2), (AND32rr GR32:$src1, GR32:$src2)>;
1085 def : Pat<(and GR64:$src1, GR64:$src2), (AND64rr GR64:$src1, GR64:$src2)>;
1086
1087 // and reg/mem
1088 def : Pat<(and GR8:$src1, (loadi8 addr:$src2)),
1089           (AND8rm GR8:$src1, addr:$src2)>;
1090 def : Pat<(and GR16:$src1, (loadi16 addr:$src2)),
1091           (AND16rm GR16:$src1, addr:$src2)>;
1092 def : Pat<(and GR32:$src1, (loadi32 addr:$src2)),
1093           (AND32rm GR32:$src1, addr:$src2)>;
1094 def : Pat<(and GR64:$src1, (loadi64 addr:$src2)),
1095           (AND64rm GR64:$src1, addr:$src2)>;
1096
1097 // and reg/imm
1098 def : Pat<(and GR8:$src1, imm:$src2),
1099           (AND8ri GR8:$src1, imm:$src2)>;
1100 def : Pat<(and GR16:$src1, imm:$src2),
1101           (AND16ri GR16:$src1, imm:$src2)>;
1102 def : Pat<(and GR32:$src1, imm:$src2),
1103           (AND32ri GR32:$src1, imm:$src2)>;
1104 def : Pat<(and GR16:$src1, i16immSExt8:$src2),
1105           (AND16ri8 GR16:$src1, i16immSExt8:$src2)>;
1106 def : Pat<(and GR32:$src1, i32immSExt8:$src2),
1107           (AND32ri8 GR32:$src1, i32immSExt8:$src2)>;
1108 def : Pat<(and GR64:$src1, i64immSExt8:$src2),
1109           (AND64ri8 GR64:$src1, i64immSExt8:$src2)>;
1110 def : Pat<(and GR64:$src1, i64immSExt32:$src2),
1111           (AND64ri32 GR64:$src1, i64immSExt32:$src2)>;
1112
1113
1114