* Add support for compiling functions in both ARM and Thumb mode, then taking
the smallest.
+
* Add support for compiling individual basic blocks in thumb mode, when in a
larger ARM function. This can be used for presumed cold code, like paths
to abort (failure path of asserts), EH handling code, etc.
mov r1, #PCRELV0
add r1, pc
ldr r0, [r0, r1]
- cpy pc, r0
+ mov pc, r0
.align 2
LJTI1_0_0:
.long LBB1_3
LPCRELL0:
add r1, LJTI1_0_0
ldr r0, [r0, r1]
- cpy pc, r0
+ mov pc, r0
.align 2
LJTI1_0_0:
.long LBB1_3
//===---------------------------------------------------------------------===//
-We compiles the following using a jump table.
+We compile the following:
define i16 @func_entry_2E_ce(i32 %i) {
-newFuncRoot:
- br label %entry.ce
-
-bb12.exitStub: ; preds = %entry.ce
- ret i16 0
-
-bb4.exitStub: ; preds = %entry.ce, %entry.ce, %entry.ce
- ret i16 1
-
-bb9.exitStub: ; preds = %entry.ce, %entry.ce, %entry.ce
- ret i16 2
-
-bb.exitStub: ; preds = %entry.ce
- ret i16 3
-
-entry.ce: ; preds = %newFuncRoot
switch i32 %i, label %bb12.exitStub [
i32 0, label %bb4.exitStub
i32 1, label %bb9.exitStub
i32 8, label %bb.exitStub
i32 9, label %bb9.exitStub
]
+
+bb12.exitStub:
+ ret i16 0
+
+bb4.exitStub:
+ ret i16 1
+
+bb9.exitStub:
+ ret i16 2
+
+bb.exitStub:
+ ret i16 3
}
+into:
+
+_func_entry_2E_ce:
+ mov r2, #1
+ lsl r2, r0
+ cmp r0, #9
+ bhi LBB1_4 @bb12.exitStub
+LBB1_1: @newFuncRoot
+ mov r1, #13
+ tst r2, r1
+ bne LBB1_5 @bb4.exitStub
+LBB1_2: @newFuncRoot
+ ldr r1, LCPI1_0
+ tst r2, r1
+ bne LBB1_6 @bb9.exitStub
+LBB1_3: @newFuncRoot
+ mov r1, #1
+ lsl r1, r1, #8
+ tst r2, r1
+ bne LBB1_7 @bb.exitStub
+LBB1_4: @bb12.exitStub
+ mov r0, #0
+ bx lr
+LBB1_5: @bb4.exitStub
+ mov r0, #1
+ bx lr
+LBB1_6: @bb9.exitStub
+ mov r0, #2
+ bx lr
+LBB1_7: @bb.exitStub
+ mov r0, #3
+ bx lr
+LBB1_8:
+ .align 2
+LCPI1_0:
+ .long 642
+
+
gcc compiles to:
cmp r0, #9
.align 2
L11:
.long 642
+
+
+GCC is doing a couple of clever things here:
+ 1. It is predicating one of the returns. This isn't a clear win though: in
+ cases where that return isn't taken, it is replacing one condbranch with
+ two 'ne' predicated instructions.
+ 2. It is sinking the shift of "1 << i" into the tst, and using ands instead of
+ tst. This will probably require whole function isel.
+ 3. GCC emits:
+ tst r1, #256
+ we emit:
+ mov r1, #1
+ lsl r1, r1, #8
+ tst r2, r1
//===---------------------------------------------------------------------===//
//===---------------------------------------------------------------------===//
-We are reserving R3 as a scratch register under thumb mode. So if it is live in
-to the function, we save / restore R3 to / from R12. Until register scavenging
-is done, we should save R3 to a high callee saved reg at emitPrologue time
-(when hasFP is true or stack size is large) and restore R3 from that register
-instead. This allows us to at least get rid of the save to r12 everytime it is
-used.
-
-//===---------------------------------------------------------------------===//
-
Poor codegen test/CodeGen/ARM/select.ll f7:
ldr r5, LCPI1_0
add r5, pc
ldr r6, LCPI1_1
ldr r2, LCPI1_2
- cpy r3, r6
- cpy lr, pc
+ mov r3, r6
+ mov lr, pc
bx r5
//===---------------------------------------------------------------------===//
Make register allocator / spiller smarter so we can re-materialize "mov r, imm",
etc. Almost all Thumb instructions clobber condition code.
+
+//===---------------------------------------------------------------------===//
+
+Thumb load / store address mode offsets are scaled. The values kept in the
+instruction operands are pre-scale values. This probably ought to be changed
+to avoid extra work when we convert Thumb2 instructions to Thumb1 instructions.
+
+//===---------------------------------------------------------------------===//
+
+We need to make (some of the) Thumb1 instructions predicable. That will allow
+shrinking of predicated Thumb2 instructions. To allow this, we need to be able
+to toggle the 's' bit since they do not set CPSR when they are inside IT blocks.
+
+//===---------------------------------------------------------------------===//
+
+Make use of hi register variants of cmp: tCMPhir / tCMPZhir.
+
+//===---------------------------------------------------------------------===//
+
+Thumb1 immediate field sometimes keep pre-scaled values. See
+Thumb1RegisterInfo::eliminateFrameIndex. This is inconsistent from ARM and
+Thumb2.
+
+//===---------------------------------------------------------------------===//
+
+Rather than having tBR_JTr print a ".align 2" and constant island pass pad it,
+add a target specific ALIGN instruction instead. That way, GetInstSizeInBytes
+won't have to over-estimate. It can also be used for loop alignment pass.
+
+//===---------------------------------------------------------------------===//
+
+We generate conditional code for icmp when we don't need to. This code:
+
+ int foo(int s) {
+ return s == 1;
+ }
+
+produces:
+
+foo:
+ cmp r0, #1
+ mov.w r0, #0
+ it eq
+ moveq r0, #1
+ bx lr
+
+when it could use subs + adcs. This is GCC PR46975.