From: Dan Gohman Date: Wed, 26 Aug 2009 14:59:13 +0000 (+0000) Subject: Don't use INSERT_SUBREG to model anyext operations on x86-64, as it X-Git-Url: http://plrg.eecs.uci.edu/git/?a=commitdiff_plain;h=af70e5c676140a55ec619f552c8fd76b7edf3c6d;p=oota-llvm.git Don't use INSERT_SUBREG to model anyext operations on x86-64, as it leads to partial-register definitions. To help avoid redundant zero-extensions, also teach the h-register matching patterns that use movzbl to match anyext as well as zext. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@80099 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86Instr64bit.td b/lib/Target/X86/X86Instr64bit.td index 7eaf15d0f40..00dfc048dd1 100644 --- a/lib/Target/X86/X86Instr64bit.td +++ b/lib/Target/X86/X86Instr64bit.td @@ -1599,30 +1599,15 @@ def : Pat<(extloadi64i16 addr:$src), (MOVZX64rm16 addr:$src)>; // For other extloads, use subregs, since the high contents of the register are // defined after an extload. def : Pat<(extloadi64i32 addr:$src), - (INSERT_SUBREG (i64 (IMPLICIT_DEF)), (MOV32rm addr:$src), + (SUBREG_TO_REG (i64 0), (MOV32rm addr:$src), x86_subreg_32bit)>; -def : Pat<(extloadi16i1 addr:$src), - (INSERT_SUBREG (i16 (IMPLICIT_DEF)), (MOV8rm addr:$src), - x86_subreg_8bit)>, - Requires<[In64BitMode]>; -def : Pat<(extloadi16i8 addr:$src), - (INSERT_SUBREG (i16 (IMPLICIT_DEF)), (MOV8rm addr:$src), - x86_subreg_8bit)>, - Requires<[In64BitMode]>; - -// anyext -def : Pat<(i64 (anyext GR8:$src)), - (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR8:$src, x86_subreg_8bit)>; -def : Pat<(i64 (anyext GR16:$src)), - (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR16:$src, x86_subreg_16bit)>; -def : Pat<(i64 (anyext GR32:$src)), - (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$src, x86_subreg_32bit)>; -def : Pat<(i16 (anyext GR8:$src)), - (INSERT_SUBREG (i16 (IMPLICIT_DEF)), GR8:$src, x86_subreg_8bit)>, - Requires<[In64BitMode]>; -def : Pat<(i32 (anyext GR8:$src)), - (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, x86_subreg_8bit)>, - Requires<[In64BitMode]>; + +// anyext. Define these to do an explicit zero-extend to +// avoid partial-register updates. +def : Pat<(i64 (anyext GR8 :$src)), (MOVZX64rr8 GR8 :$src)>; +def : Pat<(i64 (anyext GR16:$src)), (MOVZX64rr16 GR16 :$src)>; +def : Pat<(i64 (anyext GR32:$src)), + (SUBREG_TO_REG (i64 0), GR32:$src, x86_subreg_32bit)>; //===----------------------------------------------------------------------===// // Some peepholes @@ -1720,6 +1705,11 @@ def : Pat<(i32 (zext (srl_su GR16:$src, (i8 8)))), (EXTRACT_SUBREG (COPY_TO_REGCLASS GR16:$src, GR16_ABCD), x86_subreg_8bit_hi))>, Requires<[In64BitMode]>; +def : Pat<(i32 (anyext (srl_su GR16:$src, (i8 8)))), + (MOVZX32_NOREXrr8 + (EXTRACT_SUBREG (COPY_TO_REGCLASS GR16:$src, GR16_ABCD), + x86_subreg_8bit_hi))>, + Requires<[In64BitMode]>; def : Pat<(i64 (zext (srl_su GR16:$src, (i8 8)))), (SUBREG_TO_REG (i64 0), @@ -1727,6 +1717,13 @@ def : Pat<(i64 (zext (srl_su GR16:$src, (i8 8)))), (EXTRACT_SUBREG (COPY_TO_REGCLASS GR16:$src, GR16_ABCD), x86_subreg_8bit_hi)), x86_subreg_32bit)>; +def : Pat<(i64 (anyext (srl_su GR16:$src, (i8 8)))), + (SUBREG_TO_REG + (i64 0), + (MOVZX32_NOREXrr8 + (EXTRACT_SUBREG (COPY_TO_REGCLASS GR16:$src, GR16_ABCD), + x86_subreg_8bit_hi)), + x86_subreg_32bit)>; // h-register extract and store. def : Pat<(store (i8 (trunc_su (srl_su GR64:$src, (i8 8)))), addr:$dst), diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index f13102640a9..f5661272f99 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -3630,21 +3630,17 @@ def : Pat<(zextloadi32i1 addr:$src), (MOVZX32rm8 addr:$src)>; // extload bool -> extload byte def : Pat<(extloadi8i1 addr:$src), (MOV8rm addr:$src)>; -def : Pat<(extloadi16i1 addr:$src), (MOVZX16rm8 addr:$src)>, - Requires<[In32BitMode]>; +def : Pat<(extloadi16i1 addr:$src), (MOVZX16rm8 addr:$src)>; def : Pat<(extloadi32i1 addr:$src), (MOVZX32rm8 addr:$src)>; -def : Pat<(extloadi16i8 addr:$src), (MOVZX16rm8 addr:$src)>, - Requires<[In32BitMode]>; +def : Pat<(extloadi16i8 addr:$src), (MOVZX16rm8 addr:$src)>; def : Pat<(extloadi32i8 addr:$src), (MOVZX32rm8 addr:$src)>; def : Pat<(extloadi32i16 addr:$src), (MOVZX32rm16 addr:$src)>; -// anyext -def : Pat<(i16 (anyext GR8 :$src)), (MOVZX16rr8 GR8 :$src)>, - Requires<[In32BitMode]>; -def : Pat<(i32 (anyext GR8 :$src)), (MOVZX32rr8 GR8 :$src)>, - Requires<[In32BitMode]>; -def : Pat<(i32 (anyext GR16:$src)), - (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR16:$src, x86_subreg_16bit)>; +// anyext. Define these to do an explicit zero-extend to +// avoid partial-register updates. +def : Pat<(i16 (anyext GR8 :$src)), (MOVZX16rr8 GR8 :$src)>; +def : Pat<(i32 (anyext GR8 :$src)), (MOVZX32rr8 GR8 :$src)>; +def : Pat<(i32 (anyext GR16:$src)), (MOVZX32rr16 GR16:$src)>; // (and (i32 load), 255) -> (zextload i8) def : Pat<(i32 (and (nvloadi32 addr:$src), (i32 255))), @@ -3725,6 +3721,10 @@ def : Pat<(i32 (zext (srl_su GR16:$src, (i8 8)))), (MOVZX32rr8 (EXTRACT_SUBREG (COPY_TO_REGCLASS GR16:$src, GR16_ABCD), x86_subreg_8bit_hi))>, Requires<[In32BitMode]>; +def : Pat<(i32 (anyext (srl_su GR16:$src, (i8 8)))), + (MOVZX32rr8 (EXTRACT_SUBREG (COPY_TO_REGCLASS GR16:$src, GR16_ABCD), + x86_subreg_8bit_hi))>, + Requires<[In32BitMode]>; def : Pat<(and (srl_su GR32:$src, (i8 8)), (i32 255)), (MOVZX32rr8 (EXTRACT_SUBREG (COPY_TO_REGCLASS GR32:$src, GR32_ABCD), x86_subreg_8bit_hi))>, diff --git a/test/CodeGen/X86/anyext.ll b/test/CodeGen/X86/anyext.ll new file mode 100644 index 00000000000..c5f03abee71 --- /dev/null +++ b/test/CodeGen/X86/anyext.ll @@ -0,0 +1,18 @@ +; RUN: llvm-as < %s | llc -march=x86-64 | grep movzbl | count 2 + +; Use movzbl to avoid partial-register updates. + +define i32 @foo(i32 %p, i8 zeroext %x) nounwind { + %q = trunc i32 %p to i8 + %r = udiv i8 %q, %x + %s = zext i8 %r to i32 + %t = and i32 %s, 1 + ret i32 %t +} +define i32 @bar(i32 %p, i16 zeroext %x) nounwind { + %q = trunc i32 %p to i16 + %r = udiv i16 %q, %x + %s = zext i16 %r to i32 + %t = and i32 %s, 1 + ret i32 %t +} diff --git a/test/CodeGen/X86/ins_subreg_coalesce-1.ll b/test/CodeGen/X86/ins_subreg_coalesce-1.ll index 863cda94c5f..a0347ea80d9 100644 --- a/test/CodeGen/X86/ins_subreg_coalesce-1.ll +++ b/test/CodeGen/X86/ins_subreg_coalesce-1.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -march=x86 | grep mov | count 2 +; RUN: llvm-as < %s | llc -march=x86 | grep mov | count 3 define fastcc i32 @sqlite3ExprResolveNames() nounwind { entry: