Codegen signed mod by 2 or -2 more efficiently. Instead of generating:
t:
mov %EDX, DWORD PTR [%ESP + 4]
mov %ECX, 2
mov %EAX, %EDX
sar %EDX, 31
idiv %ECX
mov %EAX, %EDX
ret
Generate:
t:
mov %ECX, DWORD PTR [%ESP + 4]
*** mov %EAX, %ECX
cdq
and %ECX, 1
xor %ECX, %EDX
sub %ECX, %EDX
*** mov %EAX, %ECX
ret
Note that the two marked moves are redundant, and should be eliminated by the
register allocator, but aren't.
Compare this to GCC, which generates:
t:
mov %eax, DWORD PTR [%esp+4]
mov %edx, %eax
shr %edx, 31
lea %ecx, [%edx+%eax]
and %ecx, -2
sub %eax, %ecx
ret
or ICC 8.0, which generates:
t:
movl 4(%esp), %ecx #3.5
movl $-
2147483647, %eax #3.25
imull %ecx #3.25
movl %ecx, %eax #3.25
sarl $31, %eax #3.25
addl %ecx, %edx #3.25
subl %edx, %eax #3.25
addl %eax, %eax #3.25
negl %eax #3.25
subl %eax, %ecx #3.25
movl %ecx, %eax #3.25
ret #3.25
We would be in great shape if not for the moves.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@16763
91177308-0d34-0410-b5e6-
96231b3b80d8