From 11ba3b1af67796dabec10c70e16a93685e130719 Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Wed, 30 Jul 2008 18:09:17 +0000 Subject: [PATCH] Reapply r54147 with a constraint to only use the 8-bit subreg form on x86-64, to avoid the problem with x86-32 having GPRs that don't have 8-bit subregs. Also, change several 16-bit instructions to use equivalent 32-bit instructions. These have a smaller encoding and avoid partial-register updates. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@54223 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86Instr64bit.td | 54 +++++++++++++++++++++++--------- lib/Target/X86/X86InstrInfo.td | 32 +++++++++++++------ test/CodeGen/X86/zext-inreg-0.ll | 51 ++++++++++++++++++++++++++++++ test/CodeGen/X86/zext-inreg-1.ll | 13 ++++++++ test/CodeGen/X86/zext-inreg-2.ll | 28 +++++++++++++++++ 5 files changed, 153 insertions(+), 25 deletions(-) create mode 100644 test/CodeGen/X86/zext-inreg-0.ll create mode 100644 test/CodeGen/X86/zext-inreg-1.ll create mode 100644 test/CodeGen/X86/zext-inreg-2.ll diff --git a/lib/Target/X86/X86Instr64bit.td b/lib/Target/X86/X86Instr64bit.td index 23a403068bf..2e676f204b0 100644 --- a/lib/Target/X86/X86Instr64bit.td +++ b/lib/Target/X86/X86Instr64bit.td @@ -241,18 +241,22 @@ def MOVSX64rm32: RI<0x63, MRMSrcMem, (outs GR64:$dst), (ins i32mem:$src), "movs{lq|xd}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (sextloadi64i32 addr:$src))]>; -def MOVZX64rr8 : RI<0xB6, MRMSrcReg, (outs GR64:$dst), (ins GR8 :$src), - "movz{bq|x}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, (zext GR8:$src))]>, TB; -def MOVZX64rm8 : RI<0xB6, MRMSrcMem, (outs GR64:$dst), (ins i8mem :$src), - "movz{bq|x}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, (zextloadi64i8 addr:$src))]>, TB; -def MOVZX64rr16: RI<0xB7, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src), - "movz{wq|x}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, (zext GR16:$src))]>, TB; -def MOVZX64rm16: RI<0xB7, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src), - "movz{wq|x}\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, (zextloadi64i16 addr:$src))]>, TB; +// Use movzbl instead of movzbq when the destination is a register; it's +// equivalent due to implicit zero-extending, and it has a smaller encoding. +def MOVZX64rr8 : I<0xB6, MRMSrcReg, (outs GR64:$dst), (ins GR8 :$src), + "movz{bl|x}\t{$src, ${dst:subreg32}|${dst:subreg32}, $src}", + [(set GR64:$dst, (zext GR8:$src))]>, TB; +def MOVZX64rm8 : I<0xB6, MRMSrcMem, (outs GR64:$dst), (ins i8mem :$src), + "movz{bl|x}\t{$src, ${dst:subreg32}|${dst:subreg32}, $src}", + [(set GR64:$dst, (zextloadi64i8 addr:$src))]>, TB; +// Use movzwl instead of movzwq when the destination is a register; it's +// equivalent due to implicit zero-extending, and it has a smaller encoding. +def MOVZX64rr16: I<0xB7, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src), + "movz{wl|x}\t{$src, ${dst:subreg32}|${dst:subreg32}, $src}", + [(set GR64:$dst, (zext GR16:$src))]>, TB; +def MOVZX64rm16: I<0xB7, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src), + "movz{wl|x}\t{$src, ${dst:subreg32}|${dst:subreg32}, $src}", + [(set GR64:$dst, (zextloadi64i16 addr:$src))]>, TB; let neverHasSideEffects = 1 in { let Defs = [RAX], Uses = [EAX] in @@ -1093,9 +1097,9 @@ def Int_CVTTSS2SI64rm: RSSI<0x2C, MRMSrcMem, (outs GR64:$dst), (ins f32mem:$src) // when we have a better way to specify isel priority. let Defs = [EFLAGS], AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1 in -def MOV64r0 : RI<0x31, MRMInitReg, (outs GR64:$dst), (ins), - "xor{l}\t${dst:subreg32}, ${dst:subreg32}", - [(set GR64:$dst, 0)]>; +def MOV64r0 : I<0x31, MRMInitReg, (outs GR64:$dst), (ins), + "xor{l}\t${dst:subreg32}, ${dst:subreg32}", + [(set GR64:$dst, 0)]>; // Materialize i64 constant where top 32-bits are zero. let AddedComplexity = 1, isReMaterializable = 1 in @@ -1240,6 +1244,26 @@ def : Pat<(and GR64:$src, i64immFFFFFFFF), (SUBREG_TO_REG (i64 0), (i32 (EXTRACT_SUBREG GR64:$src, x86_subreg_32bit)), x86_subreg_32bit)>; +// r & (2^16-1) ==> movz +def : Pat<(and GR64:$src, 0xffff), + (MOVZX64rr16 (i16 (EXTRACT_SUBREG GR64:$src, x86_subreg_16bit)))>; +// r & (2^8-1) ==> movz +def : Pat<(and GR64:$src, 0xff), + (MOVZX64rr8 (i8 (EXTRACT_SUBREG GR64:$src, x86_subreg_8bit)))>; + +// TODO: The following two patterns could be adapted to apply to x86-32, except +// that they'll need some way to deal with the fact that in x86-32 not all GPRs +// have 8-bit subregs. The GR32_ and GR16_ classes are a step in this direction, +// but they aren't ready for this purpose yet. + +// r & (2^8-1) ==> movz +def : Pat<(and GR32:$src1, 0xff), + (MOVZX32rr8 (i8 (EXTRACT_SUBREG GR32:$src1, x86_subreg_8bit)))>, + Requires<[In64BitMode]>; +// r & (2^8-1) ==> movz +def : Pat<(and GR16:$src1, 0xff), + (MOVZX16rr8 (i8 (EXTRACT_SUBREG GR16:$src1, x86_subreg_8bit)))>, + Requires<[In64BitMode]>; // (shl x, 1) ==> (add x, x) def : Pat<(shl GR64:$src1, (i8 1)), (ADD64rr GR64:$src1, GR64:$src1)>; diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 4f3a3af0f97..a09a0431d54 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -2424,12 +2424,15 @@ def CMP32ri8 : Ii8<0x83, MRM7r, } // Defs = [EFLAGS] // Sign/Zero extenders +// Use movsbl intead of movsbw; we don't care about the high 16 bits +// of the register here. This has a smaller encoding and avoids a +// partial-register update. def MOVSX16rr8 : I<0xBE, MRMSrcReg, (outs GR16:$dst), (ins GR8 :$src), - "movs{bw|x}\t{$src, $dst|$dst, $src}", - [(set GR16:$dst, (sext GR8:$src))]>, TB, OpSize; + "movs{bl|x}\t{$src, ${dst:subreg32}|${dst:subreg32}, $src}", + [(set GR16:$dst, (sext GR8:$src))]>, TB; def MOVSX16rm8 : I<0xBE, MRMSrcMem, (outs GR16:$dst), (ins i8mem :$src), - "movs{bw|x}\t{$src, $dst|$dst, $src}", - [(set GR16:$dst, (sextloadi16i8 addr:$src))]>, TB, OpSize; + "movs{bl|x}\t{$src, ${dst:subreg32}|${dst:subreg32}, $src}", + [(set GR16:$dst, (sextloadi16i8 addr:$src))]>, TB; def MOVSX32rr8 : I<0xBE, MRMSrcReg, (outs GR32:$dst), (ins GR8 :$src), "movs{bl|x}\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (sext GR8:$src))]>, TB; @@ -2443,12 +2446,15 @@ def MOVSX32rm16: I<0xBF, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src), "movs{wl|x}\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (sextloadi32i16 addr:$src))]>, TB; +// Use movzbl intead of movzbw; we don't care about the high 16 bits +// of the register here. This has a smaller encoding and avoids a +// partial-register update. def MOVZX16rr8 : I<0xB6, MRMSrcReg, (outs GR16:$dst), (ins GR8 :$src), - "movz{bw|x}\t{$src, $dst|$dst, $src}", - [(set GR16:$dst, (zext GR8:$src))]>, TB, OpSize; + "movz{bl|x}\t{$src, ${dst:subreg32}|${dst:subreg32}, $src}", + [(set GR16:$dst, (zext GR8:$src))]>, TB; def MOVZX16rm8 : I<0xB6, MRMSrcMem, (outs GR16:$dst), (ins i8mem :$src), - "movz{bw|x}\t{$src, $dst|$dst, $src}", - [(set GR16:$dst, (zextloadi16i8 addr:$src))]>, TB, OpSize; + "movz{bl|x}\t{$src, ${dst:subreg32}|${dst:subreg32}, $src}", + [(set GR16:$dst, (zextloadi16i8 addr:$src))]>, TB; def MOVZX32rr8 : I<0xB6, MRMSrcReg, (outs GR32:$dst), (ins GR8 :$src), "movz{bl|x}\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (zext GR8:$src))]>, TB; @@ -2488,9 +2494,11 @@ let Defs = [EFLAGS], isReMaterializable = 1, isAsCheapAsAMove = 1 in { def MOV8r0 : I<0x30, MRMInitReg, (outs GR8 :$dst), (ins), "xor{b}\t$dst, $dst", [(set GR8:$dst, 0)]>; +// Use xorl instead of xorw since we don't care about the high 16 bits, +// it's smaller, and it avoids a partial-register update. def MOV16r0 : I<0x31, MRMInitReg, (outs GR16:$dst), (ins), - "xor{w}\t$dst, $dst", - [(set GR16:$dst, 0)]>, OpSize; + "xor{l}\t${dst:subreg32}, ${dst:subreg32}", + [(set GR16:$dst, 0)]>; def MOV32r0 : I<0x31, MRMInitReg, (outs GR32:$dst), (ins), "xor{l}\t$dst, $dst", [(set GR32:$dst, 0)]>; @@ -2763,6 +2771,10 @@ def : Pat<(i32 (and (loadi32 addr:$src), (i32 65535))),(MOVZX32rm16 addr:$src)>; // Some peepholes //===----------------------------------------------------------------------===// +// r & (2^16-1) ==> movz +def : Pat<(and GR32:$src1, 0xffff), + (MOVZX32rr16 (i16 (EXTRACT_SUBREG GR32:$src1, x86_subreg_16bit)))>; + // (shl x, 1) ==> (add x, x) def : Pat<(shl GR8 :$src1, (i8 1)), (ADD8rr GR8 :$src1, GR8 :$src1)>; def : Pat<(shl GR16:$src1, (i8 1)), (ADD16rr GR16:$src1, GR16:$src1)>; diff --git a/test/CodeGen/X86/zext-inreg-0.ll b/test/CodeGen/X86/zext-inreg-0.ll new file mode 100644 index 00000000000..62c651c8350 --- /dev/null +++ b/test/CodeGen/X86/zext-inreg-0.ll @@ -0,0 +1,51 @@ +; RUN: llvm-as < %s | llc -march=x86 | not grep and +; RUN: llvm-as < %s | llc -march=x86-64 > %t +; RUN: not grep and %t +; RUN: not grep movzbq %t +; RUN: not grep movzwq %t +; RUN: not grep movzlq %t + +; These should use movzbl instead of 'and 255'. +; This related to not having a ZERO_EXTEND_REG opcode. + +define i32 @c(i32 %d) nounwind { + %e = add i32 %d, 1 + %retval = and i32 %e, 65535 + ret i32 %retval +} +define i64 @e(i64 %d) nounwind { + %e = add i64 %d, 1 + %retval = and i64 %e, 65535 + ret i64 %retval +} +define i64 @f(i64 %d) nounwind { + %e = add i64 %d, 1 + %retval = and i64 %e, 4294967295 + ret i64 %retval +} + +define i32 @g(i8 %d) nounwind { + %e = add i8 %d, 1 + %retval = zext i8 %e to i32 + ret i32 %retval +} +define i32 @h(i16 %d) nounwind { + %e = add i16 %d, 1 + %retval = zext i16 %e to i32 + ret i32 %retval +} +define i64 @i(i8 %d) nounwind { + %e = add i8 %d, 1 + %retval = zext i8 %e to i64 + ret i64 %retval +} +define i64 @j(i16 %d) nounwind { + %e = add i16 %d, 1 + %retval = zext i16 %e to i64 + ret i64 %retval +} +define i64 @k(i32 %d) nounwind { + %e = add i32 %d, 1 + %retval = zext i32 %e to i64 + ret i64 %retval +} diff --git a/test/CodeGen/X86/zext-inreg-1.ll b/test/CodeGen/X86/zext-inreg-1.ll new file mode 100644 index 00000000000..6a678b2e3b2 --- /dev/null +++ b/test/CodeGen/X86/zext-inreg-1.ll @@ -0,0 +1,13 @@ +; RUN: llvm-as < %s | llc -march=x86 | not grep and + +; These tests differ from the ones in zext-inreg-0.ll in that +; on x86-64 they do require and instructions. + +; These should use movzbl instead of 'and 255'. +; This related to not having ZERO_EXTEND_REG node. + +define i64 @h(i64 %d) nounwind { + %e = add i64 %d, 1 + %retval = and i64 %e, 281474976710655 + ret i64 %retval +} diff --git a/test/CodeGen/X86/zext-inreg-2.ll b/test/CodeGen/X86/zext-inreg-2.ll new file mode 100644 index 00000000000..1209dac7f6f --- /dev/null +++ b/test/CodeGen/X86/zext-inreg-2.ll @@ -0,0 +1,28 @@ +; RUN: llvm-as < %s | llc -march=x86-64 > %t +; RUN: not grep and %t +; RUN: not grep movzbq %t +; RUN: not grep movzwq %t +; RUN: not grep movzlq %t + +; These should use movzbl instead of 'and 255'. +; This related to not having a ZERO_EXTEND_REG opcode. + +; This test was split out of zext-inreg-0.ll because these +; cases don't yet work on x86-32 due to the 8-bit subreg +; issue. + +define i32 @a(i32 %d) nounwind { + %e = add i32 %d, 1 + %retval = and i32 %e, 255 + ret i32 %retval +} +define i32 @b(float %d) nounwind { + %tmp12 = fptoui float %d to i8 + %retval = zext i8 %tmp12 to i32 + ret i32 %retval +} +define i64 @d(i64 %d) nounwind { + %e = add i64 %d, 1 + %retval = and i64 %e, 255 + ret i64 %retval +} -- 2.34.1