let AddedComplexity = 20;
}
+let Predicates = [OptForSize, NotSlowIncDec, Not64BitMode],
+ AddedComplexity = 1 in {
+ // Pseudo instructions for materializing 1 and -1 using XOR+INC/DEC,
+ // which only require 3 bytes compared to MOV32ri which requires 5.
+ let Defs = [EFLAGS], isReMaterializable = 1, isPseudo = 1 in {
+ def MOV32r1 : I<0, Pseudo, (outs GR32:$dst), (ins), "",
+ [(set GR32:$dst, 1)]>;
+ def MOV32r_1 : I<0, Pseudo, (outs GR32:$dst), (ins), "",
+ [(set GR32:$dst, -1)]>;
+ }
+
+ // MOV16ri is 4 bytes, so the instructions above are smaller.
+ def : Pat<(i16 1), (EXTRACT_SUBREG (MOV32r1), sub_16bit)>;
+ def : Pat<(i16 -1), (EXTRACT_SUBREG (MOV32r_1), sub_16bit)>;
+}
+
// Materialize i64 constant where top 32-bits are zero. This could theoretically
// use MOV32ri with a SUBREG_TO_REG to represent the zero-extension, however
// that would make it more difficult to rematerialize.
unsigned DestReg, unsigned SubIdx,
const MachineInstr *Orig,
const TargetRegisterInfo &TRI) const {
- // MOV32r0 is implemented with a xor which clobbers condition code.
- // Re-materialize it as movri instructions to avoid side effects.
- unsigned Opc = Orig->getOpcode();
- if (Opc == X86::MOV32r0 && !isSafeToClobberEFLAGS(MBB, I)) {
+ bool ClobbersEFLAGS = false;
+ for (const MachineOperand &MO : Orig->operands()) {
+ if (MO.isReg() && MO.isDef() && MO.getReg() == X86::EFLAGS) {
+ ClobbersEFLAGS = true;
+ break;
+ }
+ }
+
+ if (ClobbersEFLAGS && !isSafeToClobberEFLAGS(MBB, I)) {
+ // The instruction clobbers EFLAGS. Re-materialize as MOV32ri to avoid side
+ // effects.
+ int Value;
+ switch (Orig->getOpcode()) {
+ case X86::MOV32r0: Value = 0; break;
+ case X86::MOV32r1: Value = 1; break;
+ case X86::MOV32r_1: Value = -1; break;
+ default:
+ llvm_unreachable("Unexpected instruction!");
+ }
+
DebugLoc DL = Orig->getDebugLoc();
BuildMI(MBB, I, DL, get(X86::MOV32ri)).addOperand(Orig->getOperand(0))
- .addImm(0);
+ .addImm(Value);
} else {
MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig);
MBB.insert(I, MI);
return true;
}
+static bool expandMOV32r1(MachineInstrBuilder &MIB, const TargetInstrInfo &TII,
+ bool MinusOne) {
+ MachineBasicBlock &MBB = *MIB->getParent();
+ DebugLoc DL = MIB->getDebugLoc();
+ unsigned Reg = MIB->getOperand(0).getReg();
+
+ // Insert the XOR.
+ BuildMI(MBB, MIB.getInstr(), DL, TII.get(X86::XOR32rr), Reg)
+ .addReg(Reg, RegState::Undef)
+ .addReg(Reg, RegState::Undef);
+
+ // Turn the pseudo into an INC or DEC.
+ MIB->setDesc(TII.get(MinusOne ? X86::DEC32r : X86::INC32r));
+ MIB.addReg(Reg);
+
+ return true;
+}
+
// LoadStackGuard has so far only been implemented for 64-bit MachO. Different
// code sequence is needed for other targets.
static void expandLoadStackGuard(MachineInstrBuilder &MIB,
switch (MI->getOpcode()) {
case X86::MOV32r0:
return Expand2AddrUndef(MIB, get(X86::XOR32rr));
+ case X86::MOV32r1:
+ return expandMOV32r1(MIB, *this, /*MinusOne=*/ false);
+ case X86::MOV32r_1:
+ return expandMOV32r1(MIB, *this, /*MinusOne=*/ true);
case X86::SETB_C8r:
return Expand2AddrUndef(MIB, get(X86::SBB8rr));
case X86::SETB_C16r:
--- /dev/null
+; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+cmov %s -o - | FileCheck %s --check-prefix=CHECK32
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+cmov %s -o - | FileCheck %s --check-prefix=CHECK64
+
+define i32 @one32() optsize {
+entry:
+ ret i32 1
+
+; CHECK32-LABEL: one32
+; CHECK32: xorl %eax, %eax
+; CHECK32-NEXT: incl %eax
+; CHECK32-NEXT: ret
+
+; FIXME: Figure out the best approach in 64-bit mode.
+; CHECK64-LABEL: one32
+; CHECK64: movl $1, %eax
+; CHECK64-NEXT: retq
+}
+
+define i32 @minus_one32() optsize {
+entry:
+ ret i32 -1
+
+; CHECK32-LABEL: minus_one32
+; CHECK32: xorl %eax, %eax
+; CHECK32-NEXT: decl %eax
+; CHECK32-NEXT: ret
+}
+
+define i16 @one16() optsize {
+entry:
+ ret i16 1
+
+; CHECK32-LABEL: one16
+; CHECK32: xorl %eax, %eax
+; CHECK32-NEXT: incl %eax
+; CHECK32-NEXT: retl
+}
+
+define i16 @minus_one16() optsize {
+entry:
+ ret i16 -1
+
+; CHECK32-LABEL: minus_one16
+; CHECK32: xorl %eax, %eax
+; CHECK32-NEXT: decl %eax
+; CHECK32-NEXT: retl
+}
+
+define i32 @test_rematerialization() optsize {
+entry:
+ ; Materialize -1 (thiscall forces it into %ecx).
+ tail call x86_thiscallcc void @f(i32 -1)
+
+ ; Clobber all registers except %esp, leaving nowhere to store the -1 besides
+ ; spilling it to the stack.
+ tail call void asm sideeffect "", "~{eax},~{ebx},~{ecx},~{edx},~{edi},~{esi},~{ebp},~{dirflag},~{fpsr},~{flags}"()
+
+ ; -1 should be re-materialized here instead of getting spilled above.
+ ret i32 -1
+
+; CHECK32-LABEL: test_rematerialization
+; CHECK32: xorl %ecx, %ecx
+; CHECK32-NEXT: decl %ecx
+; CHECK32: calll
+; CHECK32: xorl %eax, %eax
+; CHECK32-NEXT: decl %eax
+; CHECK32-NOT: %eax
+; CHECK32: retl
+}
+
+define i32 @test_rematerialization2(i32 %x) optsize {
+entry:
+ ; Materialize -1 (thiscall forces it into %ecx).
+ tail call x86_thiscallcc void @f(i32 -1)
+
+ ; Clobber all registers except %esp, leaving nowhere to store the -1 besides
+ ; spilling it to the stack.
+ tail call void asm sideeffect "", "~{eax},~{ebx},~{ecx},~{edx},~{edi},~{esi},~{ebp},~{dirflag},~{fpsr},~{flags}"()
+
+ ; Define eflags.
+ %a = icmp ne i32 %x, 123
+ %b = zext i1 %a to i32
+ ; Cause -1 to be rematerialized right in front of the cmov, which needs eflags.
+ ; It must therefore not use the xor-dec lowering.
+ %c = select i1 %a, i32 %b, i32 -1
+ ret i32 %c
+
+; CHECK32-LABEL: test_rematerialization2
+; CHECK32: xorl %ecx, %ecx
+; CHECK32-NEXT: decl %ecx
+; CHECK32: calll
+; CHECK32: cmpl
+; CHECK32: setne
+; CHECK32-NOT: xorl
+; CHECK32: movl $-1
+; CHECK32: cmov
+; CHECK32: retl
+}
+
+declare x86_thiscallcc void @f(i32)