Make ARM and Thumb2 32-bit immediate materialization into a single 32-bit pseudo

author Evan Cheng <evan.cheng@apple.com>

Mon, 28 Sep 2009 09:14:39 +0000 (09:14 +0000)

committer Evan Cheng <evan.cheng@apple.com>

Mon, 28 Sep 2009 09:14:39 +0000 (09:14 +0000)
author Evan Cheng <evan.cheng@apple.com>
Mon, 28 Sep 2009 09:14:39 +0000 (09:14 +0000)
committer Evan Cheng <evan.cheng@apple.com>
Mon, 28 Sep 2009 09:14:39 +0000 (09:14 +0000)
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp

index 52af9786954b4f8df1ff92c1c6db8d01d6d94c4c..a228945f657183f3cb0f8e63148224b4e3f3fc82 100644 (file)
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -864,7 +864,8 @@ ARMBaseInstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
  /// getInstrPredicate - If instruction is predicated, returns its predicate
  /// condition, otherwise returns AL. It also returns the condition code
  /// register by reference.
-ARMCC::CondCodes llvm::getInstrPredicate(MachineInstr *MI, unsigned &PredReg) {
+ARMCC::CondCodes
+llvm::getInstrPredicate(const MachineInstr *MI, unsigned &PredReg) {
    int PIdx = MI->findFirstPredOperandIdx();
    if (PIdx == -1) {
      PredReg = 0;
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h

index 3632450ededd135157906489185b8734b81db7f0..a13155b9fd0d1b525fef1bf00df48fd02fb9b687 100644 (file)
--- a/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -296,7 +296,7 @@ bool isJumpTableBranchOpcode(int Opc) {
  /// getInstrPredicate - If instruction is predicated, returns its predicate
  /// condition, otherwise returns AL. It also returns the condition code
  /// register by reference.
-ARMCC::CondCodes getInstrPredicate(MachineInstr *MI, unsigned &PredReg);
+ARMCC::CondCodes getInstrPredicate(const MachineInstr *MI, unsigned &PredReg);
  
  int getMatchingCondBranchOpcode(int Opc);
  
diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp

index 57f7d38ae01152170be66deb85ebba59dc4bc3cf..5e0c11e5da873e797c8eeaaaa944aef77291fa42 100644 (file)
--- a/lib/Target/ARM/ARMCodeEmitter.cpp
+++ b/lib/Target/ARM/ARMCodeEmitter.cpp
@@ -596,7 +596,8 @@ void Emitter<CodeEmitter>::emitPseudoInstruction(const MachineInstr &MI) {
    unsigned Opcode = MI.getDesc().Opcode;
    switch (Opcode) {
    default:
-    llvm_unreachable("ARMCodeEmitter::emitPseudoInstruction");//FIXME:
+    llvm_unreachable("ARMCodeEmitter::emitPseudoInstruction");
+  // FIXME: Add support for MOVimm32.
    case TargetInstrInfo::INLINEASM: {
      // We allow inline assembler nodes with empty bodies - they can
      // implicitly define registers, which is ok for JIT.
diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td

index 5388197c3be9218e8b37010c54dddb264e85d848..b3c00287c86d798a26965ea71fa34fed1ec4e7a5 100644 (file)
--- a/lib/Target/ARM/ARMInstrFormats.td
+++ b/lib/Target/ARM/ARMInstrFormats.td
@@ -984,6 +984,11 @@ class T2JTI<dag oops, dag iops, InstrItinClass itin,
              string asm, list<dag> pattern>
    : Thumb2XI<oops, iops, AddrModeNone, SizeSpecial, itin, asm, "", pattern>;
  
+class T2Ix2<dag oops, dag iops, InstrItinClass itin,
+          string opc, string asm, list<dag> pattern>
+  : Thumb2I<oops, iops, AddrModeNone, Size8Bytes, itin, opc, asm, "", pattern>;
+
+
  // T2Iidxldst - Thumb2 indexed load / store instructions.
  class T2Iidxldst<dag oops, dag iops, AddrMode am, IndexMode im,
                   InstrItinClass itin,
diff --git a/lib/Target/ARM/ARMInstrInfo.cpp b/lib/Target/ARM/ARMInstrInfo.cpp

index 2ab355b190d5ed3b43b7d87c6ff522ee8fa9f4e6..4c92891c82bd6fca4a7313aa4287ff5c8d04a570 100644 (file)
--- a/lib/Target/ARM/ARMInstrInfo.cpp
+++ b/lib/Target/ARM/ARMInstrInfo.cpp
@@ -96,7 +96,6 @@ reMaterialize(MachineBasicBlock &MBB,
  
    MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig);
    MI->getOperand(0).setReg(DestReg);
-
    MBB.insert(I, MI);
  }
  
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td

index 4bc62733d6876b8ce7a8ab45ac5ca6d5c73040ce..1dcc4d61a9ede7ff5862bc5120dcb26ba0e42970 100644 (file)
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -100,6 +100,7 @@ def HasV5T    : Predicate<"Subtarget->hasV5TOps()">;
  def HasV5TE   : Predicate<"Subtarget->hasV5TEOps()">;
  def HasV6     : Predicate<"Subtarget->hasV6Ops()">;
  def HasV6T2   : Predicate<"Subtarget->hasV6T2Ops()">;
+def NoV6T2    : Predicate<"!Subtarget->hasV6T2Ops()">;
  def HasV7     : Predicate<"Subtarget->hasV7Ops()">;
  def HasVFP2   : Predicate<"Subtarget->hasVFP2()">;
  def HasVFP3   : Predicate<"Subtarget->hasVFP3()">;
@@ -938,7 +939,7 @@ def MOVi16 : AI1<0b1000, (outs GPR:$dst), (ins i32imm:$src),
    let Inst{25} = 1;
  }
  
-let isAsCheapAsAMove = 1, Constraints = "$src = $dst" in
+let Constraints = "$src = $dst" in
  def MOVTi16 : AI1<0b1010, (outs GPR:$dst), (ins GPR:$src, i32imm:$imm),
                    DPFrm, IIC_iMOVi,
                    "movt", " $dst, $imm", 
@@ -1510,7 +1511,8 @@ let isReMaterializable = 1 in
  def MOVi2pieces : AI1x2<(outs GPR:$dst), (ins so_imm2part:$src), 
                           Pseudo, IIC_iMOVi,
                           "mov", " $dst, $src",
-                         [(set GPR:$dst, so_imm2part:$src)]>;
+                         [(set GPR:$dst, so_imm2part:$src)]>,
+                  Requires<[IsARM, NoV6T2]>;
  
  def : ARMPat<(or GPR:$LHS, so_imm2part:$RHS),
               (ORRri (ORRri GPR:$LHS, (so_imm2part_1 imm:$RHS)),
@@ -1519,9 +1521,14 @@ def : ARMPat<(xor GPR:$LHS, so_imm2part:$RHS),
               (EORri (EORri GPR:$LHS, (so_imm2part_1 imm:$RHS)),
                      (so_imm2part_2 imm:$RHS))>;
  
-def : ARMPat<(i32 imm:$src),
-             (MOVTi16 (MOVi16 (lo16 imm:$src)), (hi16 imm:$src))>,
-       Requires<[IsARM, HasV6T2]>;
+// 32-bit immediate using movw + movt.
+// This is a single pseudo instruction to make it re-materializable. Remove
+// when we can do generalized remat.
+let isReMaterializable = 1 in
+def MOVi32imm : AI1x2<(outs GPR:$dst), (ins i32imm:$src), Pseudo, IIC_iMOVi,
+                     "movw", " $dst, ${src:lo16}\n\tmovt${p} $dst, ${src:hi16}",
+                     [(set GPR:$dst, (i32 imm:$src))]>,
+               Requires<[IsARM, HasV6T2]>;
  
  // TODO: add,sub,and, 3-instr forms?
  
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td

index d0fe432963433fd960a311ab85dfbd97c00496dc..27c4ef0dbb3fd2c12a26f51e37056233614d49e7 100644 (file)
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -649,7 +649,8 @@ let neverHasSideEffects = 1 in
  def t2MOVr : T2sI<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVr,
                     "mov", ".w $dst, $src", []>;
  
-let isReMaterializable = 1, isAsCheapAsAMove = 1 in
+// AddedComplexity to ensure isel tries t2MOVi before t2MOVi16.
+let isReMaterializable = 1, isAsCheapAsAMove = 1, AddedComplexity = 1 in
  def t2MOVi : T2sI<(outs GPR:$dst), (ins t2_so_imm:$src), IIC_iMOVi,
                     "mov", ".w $dst, $src",
                     [(set GPR:$dst, t2_so_imm:$src)]>;
@@ -660,10 +661,10 @@ def t2MOVi16 : T2I<(outs GPR:$dst), (ins i32imm:$src), IIC_iMOVi,
                     [(set GPR:$dst, imm0_65535:$src)]>;
  
  let Constraints = "$src = $dst" in
-def t2MOVTi16 : T2sI<(outs GPR:$dst), (ins GPR:$src, i32imm:$imm), IIC_iMOVi,
-                     "movt", " $dst, $imm",
-                     [(set GPR:$dst,
-                           (or (and GPR:$src, 0xffff), lo16AllZero:$imm))]>;
+def t2MOVTi16 : T2I<(outs GPR:$dst), (ins GPR:$src, i32imm:$imm), IIC_iMOVi,
+                    "movt", " $dst, $imm",
+                    [(set GPR:$dst,
+                          (or (and GPR:$src, 0xffff), lo16AllZero:$imm))]>;
  
  //===----------------------------------------------------------------------===//
  //  Extend Instructions.
@@ -1127,7 +1128,10 @@ def : T2Pat<(ARMWrapper  tconstpool  :$dst), (t2LEApcrel tconstpool  :$dst)>;
  def : T2Pat<(ARMWrapperJT tjumptable:$dst, imm:$id),
              (t2LEApcrelJT tjumptable:$dst, imm:$id)>;
  
-// Large immediate handling.
-
-def : T2Pat<(i32 imm:$src),
-            (t2MOVTi16 (t2MOVi16 (lo16 imm:$src)), (hi16 imm:$src))>;
+// 32-bit immediate using movw + movt.
+// This is a single pseudo instruction to make it re-materializable. Remove
+// when we can do generalized remat.
+let isReMaterializable = 1 in
+def t2MOVi32imm : T2Ix2<(outs GPR:$dst), (ins i32imm:$src), IIC_iMOVi,
+                     "movw", " $dst, ${src:lo16}\n\tmovt${p} $dst, ${src:hi16}",
+                     [(set GPR:$dst, (i32 imm:$src))]>;
diff --git a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp

index fa6720f2cdef4bdc21d3fcfb926174dd9bf79a5a..df349e33b471b52d8fd001b2fd5cbaf5fff2ca28 100644 (file)
--- a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
@@ -329,7 +329,14 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
      break;
    }
    case MachineOperand::MO_Immediate: {
-    O << '#' << MO.getImm();
+    int64_t Imm = MO.getImm();
+    if (Modifier) {
+      if (strcmp(Modifier, "lo16") == 0)
+        Imm = Imm & 0xffffLL;
+      else if (strcmp(Modifier, "hi16") == 0)
+        Imm = (Imm & 0xffff0000LL) >> 16;
+    }
+    O << '#' << Imm;
      break;
    }
    case MachineOperand::MO_MachineBasicBlock:
diff --git a/lib/Target/ARM/README.txt b/lib/Target/ARM/README.txt

index 08435c57ebb6803121141213a652965bc6483249..8fb1da30088f92fb23c972b976abf7c08d491a4a 100644 (file)
--- a/lib/Target/ARM/README.txt
+++ b/lib/Target/ARM/README.txt
@@ -592,3 +592,11 @@ conditional move:
  it saves an instruction and a register.
  
  //===---------------------------------------------------------------------===//
+
+add/sub/and/or + i32 imm can be simplified by folding part of the immediate
+into the operation.
+
+//===---------------------------------------------------------------------===//
+
+It might be profitable to cse MOVi16 if there are lots of 32-bit immediates
+with the same bottom half.
diff --git a/lib/Target/ARM/Thumb2ITBlockPass.cpp b/lib/Target/ARM/Thumb2ITBlockPass.cpp

index e74a526afaef62b9783d03477d90585ed33436ad..a06ee8eca87a4c58bf182247cff8e48469b1bdce 100644 (file)
--- a/lib/Target/ARM/Thumb2ITBlockPass.cpp
+++ b/lib/Target/ARM/Thumb2ITBlockPass.cpp
@@ -40,12 +40,11 @@ namespace {
    char Thumb2ITBlockPass::ID = 0;
  }
  
-static ARMCC::CondCodes getPredicate(const MachineInstr *MI,
-                                     const Thumb2InstrInfo *TII) {
+static ARMCC::CondCodes getPredicate(const MachineInstr *MI, unsigned &PredReg){
    unsigned Opc = MI->getOpcode();
    if (Opc == ARM::tBcc || Opc == ARM::t2Bcc)
      return ARMCC::AL;
-  return TII->getPredicate(MI);
+  return llvm::getInstrPredicate(MI, PredReg);
  }
  
  bool Thumb2ITBlockPass::InsertITBlocks(MachineBasicBlock &MBB) {
@@ -54,14 +53,39 @@ bool Thumb2ITBlockPass::InsertITBlocks(MachineBasicBlock &MBB) {
    MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
    while (MBBI != E) {
      MachineInstr *MI = &*MBBI;
-    ARMCC::CondCodes CC = getPredicate(MI, TII);
+    DebugLoc dl = MI->getDebugLoc();
+    unsigned PredReg = 0;
+    ARMCC::CondCodes CC = getPredicate(MI, PredReg);
+
+    // Splitting t2MOVi32imm into a pair of t2MOVi16 + t2MOVTi16 here.
+    // The only reason it was a single instruction was so it could be
+    // re-materialized. We want to split it before this and the thumb2
+    // size reduction pass to make sure the IT mask is correct and expose
+    // width reduction opportunities. It doesn't make sense to do this in a 
+    // separate pass so here it is.
+    if (MI->getOpcode() == ARM::t2MOVi32imm) {
+      unsigned DstReg = MI->getOperand(0).getReg();
+      bool DstDead = MI->getOperand(0).isDead(); // Is this possible?
+      unsigned Imm = MI->getOperand(1).getImm();
+      unsigned Lo16 = Imm & 0xffff;
+      unsigned Hi16 = (Imm >> 16) & 0xffff;
+      BuildMI(MBB, MBBI, dl, TII->get(ARM::t2MOVi16), DstReg)
+        .addImm(Lo16).addImm(CC).addReg(PredReg);
+      BuildMI(MBB, MBBI, dl, TII->get(ARM::t2MOVTi16))
+        .addReg(DstReg, getDefRegState(true) | getDeadRegState(DstDead))
+        .addReg(DstReg).addImm(Hi16).addImm(CC).addReg(PredReg);
+      --MBBI;
+      --MBBI;
+      MI->eraseFromParent();
+      continue;
+    }
+
      if (CC == ARMCC::AL) {
        ++MBBI;
        continue;
      }
  
      // Insert an IT instruction.
-    DebugLoc dl = MI->getDebugLoc();
      MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(ARM::t2IT))
        .addImm(CC);
      ++MBBI;
@@ -70,7 +94,8 @@ bool Thumb2ITBlockPass::InsertITBlocks(MachineBasicBlock &MBB) {
      ARMCC::CondCodes OCC = ARMCC::getOppositeCondition(CC);
      unsigned Mask = 0, Pos = 3;
      while (MBBI != E && Pos) {
-      ARMCC::CondCodes NCC = getPredicate(&*MBBI, TII);
+      unsigned Dummy = 0;
+      ARMCC::CondCodes NCC = getPredicate(&*MBBI, Dummy);
        if (NCC == OCC) {
          Mask |= (1 << Pos);
        } else if (NCC != CC)
diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp

index e1fb3585f74a470e47da7ab71ac01527308596bc..b8879d2ed1fd0eff221ca496ed1bcba938cf1dc1 100644 (file)
--- a/lib/Target/ARM/Thumb2SizeReduction.cpp
+++ b/lib/Target/ARM/Thumb2SizeReduction.cpp
@@ -79,6 +79,7 @@ namespace {
      { ARM::t2LSRri, ARM::tLSRri,  0,             5,   0,    1,   0,  0,0, 0 },
      { ARM::t2LSRrr, 0,            ARM::tLSRrr,   0,   0,    0,   1,  0,0, 0 },
      { ARM::t2MOVi,  ARM::tMOVi8,  0,             8,   0,    1,   0,  0,0, 0 },
+    { ARM::t2MOVi16,ARM::tMOVi8,  0,             8,   0,    1,   0,  0,0, 0 },
      // FIXME: Do we need the 16-bit 'S' variant?
      { ARM::t2MOVr,ARM::tMOVgpr2gpr,0,            0,   0,    0,   0,  1,0, 0 },
      { ARM::t2MOVCCr,0,            ARM::tMOVCCr,  0,   0,    0,   0,  0,1, 0 },
diff --git a/test/CodeGen/Thumb2/thumb2-mov2.ll b/test/CodeGen/Thumb2/thumb2-mov2.ll

index f45defe6eeb88a3aa922d6a4a8a73efb1e50963a..a02f4f087365ee3ad2cb786a3af683f80d0d865e 100644 (file)
--- a/test/CodeGen/Thumb2/thumb2-mov2.ll
+++ b/test/CodeGen/Thumb2/thumb2-mov2.ll
@@ -1,10 +1,11 @@
-; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep movt | grep #1234
-; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep movt | grep #1234
-; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep movt | grep #1234
-; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep movt | grep #1234
-; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep mov  | grep movt
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
  
  define i32 @t2MOVTi16_ok_1(i32 %a) {
+; CHECK: t2MOVTi16_ok_1:
+; CHECK:      movs r1, #0
+; CHECK-NEXT: movt r1, #1234
+; CHECK:      movw r1, #65535
+; CHECK-NEXT: movt r1, #1234
      %1 = and i32 %a, 65535
      %2 = shl i32 1234, 16
      %3 = or  i32 %1, %2
@@ -13,6 +14,11 @@ define i32 @t2MOVTi16_ok_1(i32 %a) {
  }
  
  define i32 @t2MOVTi16_test_1(i32 %a) {
+; CHECK: t2MOVTi16_test_1:
+; CHECK:      movs r1, #0
+; CHECK-NEXT: movt r1, #1234
+; CHECK:      movw r1, #65535
+; CHECK-NEXT: movt r1, #1234
      %1 = shl i32  255,   8
      %2 = shl i32 1234,   8
      %3 = or  i32   %1, 255  ; This give us 0xFFFF in %3
@@ -24,6 +30,11 @@ define i32 @t2MOVTi16_test_1(i32 %a) {
  }
  
  define i32 @t2MOVTi16_test_2(i32 %a) {
+; CHECK: t2MOVTi16_test_2:
+; CHECK:      movs r1, #0
+; CHECK-NEXT: movt r1, #1234
+; CHECK:      movw r1, #65535
+; CHECK-NEXT: movt r1, #1234
      %1 = shl i32  255,   8
      %2 = shl i32 1234,   8
      %3 = or  i32   %1, 255  ; This give us 0xFFFF in %3
@@ -36,6 +47,11 @@ define i32 @t2MOVTi16_test_2(i32 %a) {
  }
  
  define i32 @t2MOVTi16_test_3(i32 %a) {
+; CHECK: t2MOVTi16_test_3:
+; CHECK:      movs r1, #0
+; CHECK-NEXT: movt r1, #1234
+; CHECK:      movw r1, #65535
+; CHECK-NEXT: movt r1, #1234
      %1 = shl i32  255,   8
      %2 = shl i32 1234,   8
      %3 = or  i32   %1, 255  ; This give us 0xFFFF in %3
@@ -50,6 +66,11 @@ define i32 @t2MOVTi16_test_3(i32 %a) {
  }
  
  define i32 @t2MOVTi16_test_nomatch_1(i32 %a) {
+; CHECK: t2MOVTi16_test_nomatch_1:
+; CHECK:      movw r1, #16384
+; CHECK-NEXT: movt r1, #154
+; CHECK:      movw r1, #65535
+; CHECK-NEXT: movt r1, #154
      %1 = shl i32  255,   8
      %2 = shl i32 1234,   8
      %3 = or  i32   %1, 255  ; This give us 0xFFFF in %3
@@ -58,7 +79,6 @@ define i32 @t2MOVTi16_test_nomatch_1(i32 %a) {
      %6 = shl i32   %4,   2  ; This gives us (1234 << 16) in %6
      %7 = lshr i32  %6,   3
      %8 = or  i32   %5,  %7
-
      ret i32 %8
  }
author	Evan Cheng <evan.cheng@apple.com>
	Mon, 28 Sep 2009 09:14:39 +0000 (09:14 +0000)
committer	Evan Cheng <evan.cheng@apple.com>
	Mon, 28 Sep 2009 09:14:39 +0000 (09:14 +0000)
lib/Target/ARM/ARMBaseInstrInfo.cpp		patch \| blob \| history
lib/Target/ARM/ARMBaseInstrInfo.h		patch \| blob \| history
lib/Target/ARM/ARMCodeEmitter.cpp		patch \| blob \| history
lib/Target/ARM/ARMInstrFormats.td		patch \| blob \| history
lib/Target/ARM/ARMInstrInfo.cpp		patch \| blob \| history
lib/Target/ARM/ARMInstrInfo.td		patch \| blob \| history
lib/Target/ARM/ARMInstrThumb2.td		patch \| blob \| history
lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp		patch \| blob \| history
lib/Target/ARM/README.txt		patch \| blob \| history
lib/Target/ARM/Thumb2ITBlockPass.cpp		patch \| blob \| history
lib/Target/ARM/Thumb2SizeReduction.cpp		patch \| blob \| history
test/CodeGen/Thumb2/thumb2-mov2.ll		patch \| blob \| history