Implement x86 h-register extract support.

author Dan Gohman <gohman@apple.com>

Mon, 13 Apr 2009 16:09:41 +0000 (16:09 +0000)

committer Dan Gohman <gohman@apple.com>

Mon, 13 Apr 2009 16:09:41 +0000 (16:09 +0000)
author Dan Gohman <gohman@apple.com>
Mon, 13 Apr 2009 16:09:41 +0000 (16:09 +0000)
committer Dan Gohman <gohman@apple.com>
Mon, 13 Apr 2009 16:09:41 +0000 (16:09 +0000)
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp

index 2cfa719d73b07cc53d096c0359a0252fe39965e4..42844562020addfaee15b50a12eca9282148b548 100644 (file)
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -997,7 +997,7 @@ bool X86FastISel::X86SelectTrunc(Instruction *I) {
      return false;
  
    // First issue a copy to GR16_ or GR32_.
-  unsigned CopyOpc = (SrcVT == MVT::i16) ? X86::MOV16to16_ : X86::MOV32to32_;
+  unsigned CopyOpc = (SrcVT == MVT::i16) ? X86::MOV16rr : X86::MOV32rr;
    const TargetRegisterClass *CopyRC = (SrcVT == MVT::i16)
      ? X86::GR16_RegisterClass : X86::GR32_RegisterClass;
    unsigned CopyReg = createResultReg(CopyRC);
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp

index 6fd9d00e66172a6bc707c1919e2a70e7bb2f594b..41a3c416f85eeff91313bb6bd08292616f7b4eb4 100644 (file)
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -1019,21 +1019,69 @@ bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM,
      break;
        
    case ISD::AND: {
-    // Handle "(x << C1) & C2" as "(X & (C2>>C1)) << C1" if safe and if this
-    // allows us to fold the shift into this addressing mode.
+    // Perform some heroic transforms on an and of a constant-count shift
+    // with a constant to enable use of the scaled offset field.
+
      SDValue Shift = N.getOperand(0);
-    if (Shift.getOpcode() != ISD::SHL) break;
+    if (Shift.getNumOperands() != 2) break;
  
      // Scale must not be used already.
      if (AM.IndexReg.getNode() != 0 || AM.Scale != 1) break;
  
      // Not when RIP is used as the base.
      if (AM.isRIPRel) break;
-      
+
+    SDValue X = Shift.getOperand(0);
      ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N.getOperand(1));
      ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(Shift.getOperand(1));
      if (!C1 || !C2) break;
  
+    // Handle "(X >> (8-C1)) & C2" as "(X >> 8) & 0xff)" if safe. This
+    // allows us to convert the shift and and into an h-register extract and
+    // a scaled index.
+    if (Shift.getOpcode() == ISD::SRL && Shift.hasOneUse()) {
+      unsigned ScaleLog = 8 - C1->getZExtValue();
+      if (ScaleLog > 0 && ScaleLog < 64 &&
+          C2->getZExtValue() == (UINT64_C(0xff) << ScaleLog)) {
+        SDValue Eight = CurDAG->getConstant(8, MVT::i8);
+        SDValue Mask = CurDAG->getConstant(0xff, N.getValueType());
+        SDValue Srl = CurDAG->getNode(ISD::SRL, dl, N.getValueType(),
+                                      X, Eight);
+        SDValue And = CurDAG->getNode(ISD::AND, dl, N.getValueType(),
+                                      Srl, Mask);
+
+        // Insert the new nodes into the topological ordering.
+        if (Eight.getNode()->getNodeId() == -1 ||
+            Eight.getNode()->getNodeId() > X.getNode()->getNodeId()) {
+          CurDAG->RepositionNode(X.getNode(), Eight.getNode());
+          Eight.getNode()->setNodeId(X.getNode()->getNodeId());
+        }
+        if (Mask.getNode()->getNodeId() == -1 ||
+            Mask.getNode()->getNodeId() > X.getNode()->getNodeId()) {
+          CurDAG->RepositionNode(X.getNode(), Mask.getNode());
+          Mask.getNode()->setNodeId(X.getNode()->getNodeId());
+        }
+        if (Srl.getNode()->getNodeId() == -1 ||
+            Srl.getNode()->getNodeId() > Shift.getNode()->getNodeId()) {
+          CurDAG->RepositionNode(Shift.getNode(), Srl.getNode());
+          Srl.getNode()->setNodeId(Shift.getNode()->getNodeId());
+        }
+        if (And.getNode()->getNodeId() == -1 ||
+            And.getNode()->getNodeId() > N.getNode()->getNodeId()) {
+          CurDAG->RepositionNode(N.getNode(), And.getNode());
+          And.getNode()->setNodeId(N.getNode()->getNodeId());
+        }
+        CurDAG->ReplaceAllUsesWith(N, And);
+        AM.IndexReg = And;
+        AM.Scale = (1 << ScaleLog);
+        return false;
+      }
+    }
+
+    // Handle "(X << C1) & C2" as "(X & (C2>>C1)) << C1" if safe and if this
+    // allows us to fold the shift into this addressing mode.
+    if (Shift.getOpcode() != ISD::SHL) break;
+
      // Not likely to be profitable if either the AND or SHIFT node has more
      // than one use (unless all uses are for address computation). Besides,
      // isel mechanism requires their node ids to be reused.
@@ -1046,7 +1094,6 @@ bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM,
        break;
      
      // Get the new AND mask, this folds to a constant.
-    SDValue X = Shift.getOperand(0);
      SDValue NewANDMask = CurDAG->getNode(ISD::SRL, dl, N.getValueType(),
                                           SDValue(C2, 0), SDValue(C1, 0));
      SDValue NewAND = CurDAG->getNode(ISD::AND, dl, N.getValueType(), X, 
diff --git a/lib/Target/X86/X86Instr64bit.td b/lib/Target/X86/X86Instr64bit.td

index 10e66e88beedf479f0a25130fce17207fc7a3570..05bccabc304b073646ec4b735da7a426c689354e 100644 (file)
--- a/lib/Target/X86/X86Instr64bit.td
+++ b/lib/Target/X86/X86Instr64bit.td
@@ -1522,7 +1522,7 @@ def : Pat<(store (add (loadi64 addr:$dst), 0x00000000800000000), addr:$dst),
  
  // r & (2^32-1) ==> movz
  def : Pat<(and GR64:$src, 0x00000000FFFFFFFF),
-          (MOVZX64rr32 (i32 (EXTRACT_SUBREG GR64:$src, x86_subreg_32bit)))>;
+          (MOVZX64rr32 (EXTRACT_SUBREG GR64:$src, x86_subreg_32bit))>;
  // r & (2^16-1) ==> movz
  def : Pat<(and GR64:$src, 0xffff),
            (MOVZX64rr16 (i16 (EXTRACT_SUBREG GR64:$src, x86_subreg_16bit)))>;
@@ -1531,7 +1531,7 @@ def : Pat<(and GR64:$src, 0xff),
            (MOVZX64rr8 (i8 (EXTRACT_SUBREG GR64:$src, x86_subreg_8bit)))>;
  // r & (2^8-1) ==> movz
  def : Pat<(and GR32:$src1, 0xff),
-           (MOVZX32rr8 (i8 (EXTRACT_SUBREG GR32:$src1, x86_subreg_8bit)))>,
+           (MOVZX32rr8 (EXTRACT_SUBREG GR32:$src1, x86_subreg_8bit))>,
        Requires<[In64BitMode]>;
  // r & (2^8-1) ==> movz
  def : Pat<(and GR16:$src1, 0xff),
@@ -1540,13 +1540,13 @@ def : Pat<(and GR16:$src1, 0xff),
  
  // sext_inreg patterns
  def : Pat<(sext_inreg GR64:$src, i32),
-          (MOVSX64rr32 (i32 (EXTRACT_SUBREG GR64:$src, x86_subreg_32bit)))>;
+          (MOVSX64rr32 (EXTRACT_SUBREG GR64:$src, x86_subreg_32bit))>;
  def : Pat<(sext_inreg GR64:$src, i16),
-          (MOVSX64rr16 (i16 (EXTRACT_SUBREG GR64:$src, x86_subreg_16bit)))>;
+          (MOVSX64rr16 (EXTRACT_SUBREG GR64:$src, x86_subreg_16bit))>;
  def : Pat<(sext_inreg GR64:$src, i8),
-          (MOVSX64rr8 (i8 (EXTRACT_SUBREG GR64:$src, x86_subreg_8bit)))>;
+          (MOVSX64rr8 (EXTRACT_SUBREG GR64:$src, x86_subreg_8bit))>;
  def : Pat<(sext_inreg GR32:$src, i8),
-          (MOVSX32rr8 (i8 (EXTRACT_SUBREG GR32:$src, x86_subreg_8bit)))>,
+          (MOVSX32rr8 (EXTRACT_SUBREG GR32:$src, x86_subreg_8bit))>,
        Requires<[In64BitMode]>;
  def : Pat<(sext_inreg GR16:$src, i8),
            (MOVSX16rr8 (i8 (EXTRACT_SUBREG GR16:$src, x86_subreg_8bit)))>,
@@ -1554,16 +1554,63 @@ def : Pat<(sext_inreg GR16:$src, i8),
  
  // trunc patterns
  def : Pat<(i32 (trunc GR64:$src)),
-          (i32 (EXTRACT_SUBREG GR64:$src, x86_subreg_32bit))>;
+          (EXTRACT_SUBREG GR64:$src, x86_subreg_32bit)>;
  def : Pat<(i16 (trunc GR64:$src)),
-          (i16 (EXTRACT_SUBREG GR64:$src, x86_subreg_16bit))>;
+          (EXTRACT_SUBREG GR64:$src, x86_subreg_16bit)>;
  def : Pat<(i8 (trunc GR64:$src)),
-          (i8 (EXTRACT_SUBREG GR64:$src, x86_subreg_8bit))>;
+          (EXTRACT_SUBREG GR64:$src, x86_subreg_8bit)>;
  def : Pat<(i8 (trunc GR32:$src)),
-          (i8 (EXTRACT_SUBREG GR32:$src, x86_subreg_8bit))>,
+          (EXTRACT_SUBREG GR32:$src, x86_subreg_8bit)>,
        Requires<[In64BitMode]>;
  def : Pat<(i8 (trunc GR16:$src)),
-          (i8 (EXTRACT_SUBREG GR16:$src, x86_subreg_8bit))>,
+          (EXTRACT_SUBREG GR16:$src, x86_subreg_8bit)>,
+      Requires<[In64BitMode]>;
+
+// h-register tricks.
+// For now, be conservative and only the extract if the value is immediately
+// zero-extended or stored, which are somewhat common cases. This uses a bunch
+// of code to prevent a register requiring a REX prefix from being allocated in
+// the same instruction as the h register, as there's currently no way to
+// describe this requirement to the register allocator.
+
+// h-register extract and zero-extend.
+def : Pat<(and (srl_su GR64:$src, (i8 8)), (i64 255)),
+          (SUBREG_TO_REG
+            (i64 0),
+            (MOVZX32_NOREXrr8
+              (EXTRACT_SUBREG (COPY_TO_SUBCLASS GR64:$src, GR64_),
+                              x86_subreg_8bit_hi)),
+            x86_subreg_32bit)>;
+def : Pat<(and (srl_su GR32:$src, (i8 8)), (i32 255)),
+          (MOVZX32_NOREXrr8
+            (EXTRACT_SUBREG (COPY_TO_SUBCLASS GR32:$src, GR32_),
+                            x86_subreg_8bit_hi))>,
+      Requires<[In64BitMode]>;
+def : Pat<(srl_su GR16:$src, (i8 8)),
+          (EXTRACT_SUBREG
+            (MOVZX32_NOREXrr8
+              (EXTRACT_SUBREG (COPY_TO_SUBCLASS GR16:$src, GR16_),
+                              x86_subreg_8bit_hi)),
+            x86_subreg_16bit)>,
+      Requires<[In64BitMode]>;
+
+// h-register extract and store.
+def : Pat<(store (i8 (trunc_su (srl_su GR64:$src, (i8 8)))), addr:$dst),
+          (MOV8mr_NOREX
+            addr:$dst,
+            (EXTRACT_SUBREG (COPY_TO_SUBCLASS GR64:$src, GR64_),
+                            x86_subreg_8bit_hi))>;
+def : Pat<(store (i8 (trunc_su (srl_su GR32:$src, (i8 8)))), addr:$dst),
+          (MOV8mr_NOREX
+            addr:$dst,
+            (EXTRACT_SUBREG (COPY_TO_SUBCLASS GR32:$src, GR32_),
+                            x86_subreg_8bit_hi))>,
+      Requires<[In64BitMode]>;
+def : Pat<(store (i8 (trunc_su (srl_su GR16:$src, (i8 8)))), addr:$dst),
+          (MOV8mr_NOREX
+            addr:$dst,
+            (EXTRACT_SUBREG (COPY_TO_SUBCLASS GR16:$src, GR16_),
+                            x86_subreg_8bit_hi))>,
        Requires<[In64BitMode]>;
  
  // (shl x, 1) ==> (add x, x)
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp

index 77320587cb48e20984d76f82eb401f31c8ad45cb..77955a6a42619a3c77e9400b926f49bdb1ea4a1b 100644 (file)
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -258,10 +258,8 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
      { X86::JMP64r,      X86::JMP64m, 1 },
      { X86::MOV16ri,     X86::MOV16mi, 0 },
      { X86::MOV16rr,     X86::MOV16mr, 0 },
-    { X86::MOV16to16_,  X86::MOV16_mr, 0 },
      { X86::MOV32ri,     X86::MOV32mi, 0 },
      { X86::MOV32rr,     X86::MOV32mr, 0 },
-    { X86::MOV32to32_,  X86::MOV32_mr, 0 },
      { X86::MOV64ri32,   X86::MOV64mi32, 0 },
      { X86::MOV64rr,     X86::MOV64mr, 0 },
      { X86::MOV8ri,      X86::MOV8mi, 0 },
@@ -372,9 +370,7 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
      { X86::Int_UCOMISDrr,   X86::Int_UCOMISDrm },
      { X86::Int_UCOMISSrr,   X86::Int_UCOMISSrm },
      { X86::MOV16rr,         X86::MOV16rm },
-    { X86::MOV16to16_,      X86::MOV16_rm },
      { X86::MOV32rr,         X86::MOV32rm },
-    { X86::MOV32to32_,      X86::MOV32_rm },
      { X86::MOV64rr,         X86::MOV64rm },
      { X86::MOV64toPQIrr,    X86::MOVQI2PQIrm },
      { X86::MOV64toSDrr,     X86::MOV64toSDrm },
@@ -404,6 +400,7 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
      { X86::MOVZPQILo2PQIrr, X86::MOVZPQILo2PQIrm },
      { X86::MOVZX16rr8,      X86::MOVZX16rm8 },
      { X86::MOVZX32rr16,     X86::MOVZX32rm16 },
+    { X86::MOVZX32_NOREXrr8, X86::MOVZX32_NOREXrm8 },
      { X86::MOVZX32rr8,      X86::MOVZX32rm8 },
      { X86::MOVZX64rr16,     X86::MOVZX64rm16 },
      { X86::MOVZX64rr32,     X86::MOVZX64rm32 },
@@ -672,8 +669,6 @@ bool X86InstrInfo::isMoveInstr(const MachineInstr& MI,
    case X86::MOV16rr:
    case X86::MOV32rr: 
    case X86::MOV64rr:
-  case X86::MOV16to16_:
-  case X86::MOV32to32_:
    case X86::MOVSSrr:
    case X86::MOVSDrr:
  
@@ -710,9 +705,7 @@ unsigned X86InstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
    default: break;
    case X86::MOV8rm:
    case X86::MOV16rm:
-  case X86::MOV16_rm:
    case X86::MOV32rm:
-  case X86::MOV32_rm:
    case X86::MOV64rm:
    case X86::LD_Fp64m:
    case X86::MOVSSrm:
@@ -741,9 +734,7 @@ unsigned X86InstrInfo::isStoreToStackSlot(const MachineInstr *MI,
    default: break;
    case X86::MOV8mr:
    case X86::MOV16mr:
-  case X86::MOV16_mr:
    case X86::MOV32mr:
-  case X86::MOV32_mr:
    case X86::MOV64mr:
    case X86::ST_FpP64m:
    case X86::MOVSSmr:
@@ -795,9 +786,7 @@ X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI) const {
    default: break;
      case X86::MOV8rm:
      case X86::MOV16rm:
-    case X86::MOV16_rm:
      case X86::MOV32rm:
-    case X86::MOV32_rm:
      case X86::MOV64rm:
      case X86::LD_Fp64m:
      case X86::MOVSSrm:
@@ -1670,10 +1659,22 @@ bool X86InstrInfo::copyRegToReg(MachineBasicBlock &MBB,
        Opc = X86::MOV16rr;
      } else if (DestRC == &X86::GR8RegClass) {
        Opc = X86::MOV8rr;
+    } else if (DestRC == &X86::GR64_RegClass) {
+      Opc = X86::MOV64rr;
      } else if (DestRC == &X86::GR32_RegClass) {
-      Opc = X86::MOV32_rr;
+      Opc = X86::MOV32rr;
      } else if (DestRC == &X86::GR16_RegClass) {
-      Opc = X86::MOV16_rr;
+      Opc = X86::MOV16rr;
+    } else if (DestRC == &X86::GR8_RegClass) {
+      Opc = X86::MOV8rr;
+    } else if (DestRC == &X86::GR64_NOREXRegClass) {
+      Opc = X86::MOV64rr;
+    } else if (DestRC == &X86::GR32_NOREXRegClass) {
+      Opc = X86::MOV32rr;
+    } else if (DestRC == &X86::GR16_NOREXRegClass) {
+      Opc = X86::MOV16rr;
+    } else if (DestRC == &X86::GR8_NOREXRegClass) {
+      Opc = X86::MOV8rr;
      } else if (DestRC == &X86::RFP32RegClass) {
        Opc = X86::MOV_Fp3232;
      } else if (DestRC == &X86::RFP64RegClass || DestRC == &X86::RSTRegClass) {
@@ -1721,7 +1722,7 @@ bool X86InstrInfo::copyRegToReg(MachineBasicBlock &MBB,
        return true;
      }
    }
-  
+
    // Moving from ST(0) turns into FpGET_ST0_32 etc.
    if (SrcRC == &X86::RSTRegClass) {
      // Copying from ST(0)/ST(1).
@@ -1779,10 +1780,22 @@ static unsigned getStoreRegOpcode(const TargetRegisterClass *RC,
      Opc = X86::MOV16mr;
    } else if (RC == &X86::GR8RegClass) {
      Opc = X86::MOV8mr;
+  } else if (RC == &X86::GR64_RegClass) {
+    Opc = X86::MOV64mr;
    } else if (RC == &X86::GR32_RegClass) {
-    Opc = X86::MOV32_mr;
+    Opc = X86::MOV32mr;
    } else if (RC == &X86::GR16_RegClass) {
-    Opc = X86::MOV16_mr;
+    Opc = X86::MOV16mr;
+  } else if (RC == &X86::GR8_RegClass) {
+    Opc = X86::MOV8mr;
+  } else if (RC == &X86::GR64_NOREXRegClass) {
+    Opc = X86::MOV64mr;
+  } else if (RC == &X86::GR32_NOREXRegClass) {
+    Opc = X86::MOV32mr;
+  } else if (RC == &X86::GR16_NOREXRegClass) {
+    Opc = X86::MOV16mr;
+  } else if (RC == &X86::GR8_NOREXRegClass) {
+    Opc = X86::MOV8mr;
    } else if (RC == &X86::RFP80RegClass) {
      Opc = X86::ST_FpP80m;   // pops
    } else if (RC == &X86::RFP64RegClass) {
@@ -1847,10 +1860,22 @@ static unsigned getLoadRegOpcode(const TargetRegisterClass *RC,
      Opc = X86::MOV16rm;
    } else if (RC == &X86::GR8RegClass) {
      Opc = X86::MOV8rm;
+  } else if (RC == &X86::GR64_RegClass) {
+    Opc = X86::MOV64rm;
    } else if (RC == &X86::GR32_RegClass) {
-    Opc = X86::MOV32_rm;
+    Opc = X86::MOV32rm;
    } else if (RC == &X86::GR16_RegClass) {
-    Opc = X86::MOV16_rm;
+    Opc = X86::MOV16rm;
+  } else if (RC == &X86::GR8_RegClass) {
+    Opc = X86::MOV8rm;
+  } else if (RC == &X86::GR64_NOREXRegClass) {
+    Opc = X86::MOV64rm;
+  } else if (RC == &X86::GR32_NOREXRegClass) {
+    Opc = X86::MOV32rm;
+  } else if (RC == &X86::GR16_NOREXRegClass) {
+    Opc = X86::MOV16rm;
+  } else if (RC == &X86::GR8_NOREXRegClass) {
+    Opc = X86::MOV8rm;
    } else if (RC == &X86::RFP80RegClass) {
      Opc = X86::LD_Fp80m;
    } else if (RC == &X86::RFP64RegClass) {
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td

index bef6b72c80103df6d42507ca47cf457da29f13d4..830796e3a38e11544f96e4189a108563ced480d2 100644 (file)
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -181,6 +181,13 @@ def f64mem  : X86MemOperand<"printf64mem">;
  def f80mem  : X86MemOperand<"printf80mem">;
  def f128mem : X86MemOperand<"printf128mem">;
  
+// A version of i8mem for use on x86-64 that uses GR64_NOREX instead of
+// plain GR64, so that it doesn't potentially require a REX prefix.
+def i8mem_NOREX : Operand<i64> {
+  let PrintMethod = "printi8mem";
+  let MIOperandInfo = (ops GR64_NOREX, i8imm, GR64_NOREX, i32imm, i8imm);
+}
+
  def lea32mem : Operand<i32> {
    let PrintMethod = "printlea32mem";
    let MIOperandInfo = (ops GR32, i8imm, GR32, i32imm);
@@ -398,6 +405,14 @@ def extloadi32i16  : PatFrag<(ops node:$ptr), (i32 (extloadi16 node:$ptr))>;
  def and_su : PatFrag<(ops node:$lhs, node:$rhs), (and node:$lhs, node:$rhs), [{
    return N->hasOneUse();
  }]>;
+// An 'srl' node with a single use.
+def srl_su : PatFrag<(ops node:$lhs, node:$rhs), (srl node:$lhs, node:$rhs), [{
+  return N->hasOneUse();
+}]>;
+// An 'trunc' node with a single use.
+def trunc_su : PatFrag<(ops node:$src), (trunc node:$src), [{
+  return N->hasOneUse();
+}]>;
  
  // 'shld' and 'shrd' instruction patterns. Note that even though these have
  // the srl and shl in their patterns, the C++ code must still check for them,
@@ -767,7 +782,12 @@ def MOV16mr : I<0x89, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src),
  def MOV32mr : I<0x89, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
                  "mov{l}\t{$src, $dst|$dst, $src}",
                  [(store GR32:$src, addr:$dst)]>;
-                
+
+// A version of MOV8mr that uses i8mem_NOREX so that it can be used for
+// storing h registers, which can't be encoded when a REX prefix is present.
+def MOV8mr_NOREX : I<0x88, MRMDestMem, (outs), (ins i8mem_NOREX:$dst, GR8:$src),
+                   "mov{b}\t{$src, $dst|$dst, $src}  # NOREX", []>;
+
  //===----------------------------------------------------------------------===//
  //  Fixed-Register Multiplication and Division Instructions...
  //
@@ -2899,6 +2919,18 @@ def MOVZX32rm16: I<0xB7, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src),
                     "movz{wl|x}\t{$src, $dst|$dst, $src}",
                     [(set GR32:$dst, (zextloadi32i16 addr:$src))]>, TB;
  
+// These are the same as the regular regular MOVZX32rr8 and MOVZX32rm8
+// except that they use GR32_NOREX for the output operand register class
+// instead of GR32. This allows them to operate on h registers on x86-64.
+def MOVZX32_NOREXrr8 : I<0xB6, MRMSrcReg,
+                         (outs GR32_NOREX:$dst), (ins GR8:$src),
+                         "movz{bl|x}\t{$src, $dst|$dst, $src}  # NOREX",
+                         []>, TB;
+def MOVZX32_NOREXrm8 : I<0xB6, MRMSrcMem,
+                         (outs GR32_NOREX:$dst), (ins i8mem:$src),
+                         "movz{bl|x}\t{$src, $dst|$dst, $src}  # NOREX",
+                         []>, TB;
+
  let neverHasSideEffects = 1 in {
    let Defs = [AX], Uses = [AL] in
    def CBW : I<0x98, RawFrm, (outs), (ins),
@@ -2935,33 +2967,6 @@ def MOV32r0  : I<0x31, MRMInitReg,  (outs GR32:$dst), (ins),
                   [(set GR32:$dst, 0)]>;
  }
  
-// Basic operations on GR16 / GR32 subclasses GR16_ and GR32_ which contains only
-// those registers that have GR8 sub-registers (i.e. AX - DX, EAX - EDX).
-let neverHasSideEffects = 1, isAsCheapAsAMove = 1 in {
-def MOV16to16_ : I<0x89, MRMDestReg, (outs GR16_:$dst), (ins GR16:$src),
-                "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
-def MOV32to32_ : I<0x89, MRMDestReg, (outs GR32_:$dst), (ins GR32:$src),
-                "mov{l}\t{$src, $dst|$dst, $src}", []>;
-                
-def MOV16_rr : I<0x89, MRMDestReg, (outs GR16_:$dst), (ins GR16_:$src),
-                "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
-def MOV32_rr : I<0x89, MRMDestReg, (outs GR32_:$dst), (ins GR32_:$src),
-                "mov{l}\t{$src, $dst|$dst, $src}", []>;
-} // neverHasSideEffects
-
-let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in {
-def MOV16_rm : I<0x8B, MRMSrcMem, (outs GR16_:$dst), (ins i16mem:$src),
-                "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
-def MOV32_rm : I<0x8B, MRMSrcMem, (outs GR32_:$dst), (ins i32mem:$src),
-                "mov{l}\t{$src, $dst|$dst, $src}", []>;
-}
-let mayStore = 1, neverHasSideEffects = 1 in {
-def MOV16_mr : I<0x89, MRMDestMem, (outs), (ins i16mem:$dst, GR16_:$src),
-                "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
-def MOV32_mr : I<0x89, MRMDestMem, (outs), (ins i32mem:$dst, GR32_:$src),
-                "mov{l}\t{$src, $dst|$dst, $src}", []>;
-}
-
  //===----------------------------------------------------------------------===//
  // Thread Local Storage Instructions
  //
@@ -3341,38 +3346,61 @@ def : Pat<(store (add (loadi32 addr:$dst), 128), addr:$dst),
  
  // r & (2^16-1) ==> movz
  def : Pat<(and GR32:$src1, 0xffff),
-          (MOVZX32rr16 (i16 (EXTRACT_SUBREG GR32:$src1, x86_subreg_16bit)))>;
+          (MOVZX32rr16 (EXTRACT_SUBREG GR32:$src1, x86_subreg_16bit))>;
  // r & (2^8-1) ==> movz
  def : Pat<(and GR32:$src1, 0xff),
-          (MOVZX32rr8 (i8 (EXTRACT_SUBREG (MOV32to32_ GR32:$src1),
-                                          x86_subreg_8bit)))>,
+          (MOVZX32rr8 (EXTRACT_SUBREG (COPY_TO_SUBCLASS GR32:$src1, GR32_),
+                                      x86_subreg_8bit))>,
        Requires<[In32BitMode]>;
  // r & (2^8-1) ==> movz
  def : Pat<(and GR16:$src1, 0xff),
-          (MOVZX16rr8 (i8 (EXTRACT_SUBREG (MOV16to16_ GR16:$src1),
-                                          x86_subreg_8bit)))>,
+          (MOVZX16rr8 (EXTRACT_SUBREG (COPY_TO_SUBCLASS GR16:$src1, GR16_),
+                                      x86_subreg_8bit))>,
        Requires<[In32BitMode]>;
  
  // sext_inreg patterns
  def : Pat<(sext_inreg GR32:$src, i16),
-          (MOVSX32rr16 (i16 (EXTRACT_SUBREG GR32:$src, x86_subreg_16bit)))>;
+          (MOVSX32rr16 (EXTRACT_SUBREG GR32:$src, x86_subreg_16bit))>;
  def : Pat<(sext_inreg GR32:$src, i8),
-          (MOVSX32rr8 (i8 (EXTRACT_SUBREG (MOV32to32_ GR32:$src),
-                                          x86_subreg_8bit)))>,
+          (MOVSX32rr8 (EXTRACT_SUBREG (COPY_TO_SUBCLASS GR32:$src, GR32_),
+                                      x86_subreg_8bit))>,
        Requires<[In32BitMode]>;
  def : Pat<(sext_inreg GR16:$src, i8),
-          (MOVSX16rr8 (i8 (EXTRACT_SUBREG (MOV16to16_ GR16:$src),
-                                          x86_subreg_8bit)))>,
+          (MOVSX16rr8 (EXTRACT_SUBREG (COPY_TO_SUBCLASS GR16:$src, GR16_),
+                                      x86_subreg_8bit))>,
        Requires<[In32BitMode]>;
  
  // trunc patterns
  def : Pat<(i16 (trunc GR32:$src)),
-          (i16 (EXTRACT_SUBREG GR32:$src, x86_subreg_16bit))>;
+          (EXTRACT_SUBREG GR32:$src, x86_subreg_16bit)>;
  def : Pat<(i8 (trunc GR32:$src)),
-          (i8 (EXTRACT_SUBREG (MOV32to32_ GR32:$src), x86_subreg_8bit))>,
+          (EXTRACT_SUBREG (COPY_TO_SUBCLASS GR32:$src, GR32_),
+                          x86_subreg_8bit)>,
        Requires<[In32BitMode]>;
  def : Pat<(i8 (trunc GR16:$src)),
-          (i8 (EXTRACT_SUBREG (MOV16to16_ GR16:$src), x86_subreg_8bit))>,
+          (EXTRACT_SUBREG (COPY_TO_SUBCLASS GR16:$src, GR16_),
+                          x86_subreg_8bit)>,
+      Requires<[In32BitMode]>;
+
+// h-register tricks
+def : Pat<(i8 (trunc (srl_su GR16:$src, (i8 8)))),
+          (EXTRACT_SUBREG (COPY_TO_SUBCLASS GR16:$src, GR16_),
+                          x86_subreg_8bit_hi)>,
+      Requires<[In32BitMode]>;
+def : Pat<(i8 (trunc (srl_su GR32:$src, (i8 8)))),
+          (EXTRACT_SUBREG (COPY_TO_SUBCLASS GR32:$src, GR32_),
+                          x86_subreg_8bit_hi)>,
+      Requires<[In32BitMode]>;
+def : Pat<(srl_su GR16:$src, (i8 8)),
+          (EXTRACT_SUBREG
+            (MOVZX32rr8
+              (EXTRACT_SUBREG (COPY_TO_SUBCLASS GR16:$src, GR16_),
+                              x86_subreg_8bit_hi)),
+            x86_subreg_16bit)>,
+      Requires<[In32BitMode]>;
+def : Pat<(and (srl_su GR32:$src, (i8 8)), (i32 255)),
+          (MOVZX32rr8 (EXTRACT_SUBREG (COPY_TO_SUBCLASS GR32:$src, GR32_),
+                                      x86_subreg_8bit_hi))>,
        Requires<[In32BitMode]>;
  
  // (shl x, 1) ==> (add x, x)
diff --git a/lib/Target/X86/X86RegisterInfo.h b/lib/Target/X86/X86RegisterInfo.h

index 4856e2346dede8b14604d0563fcf81913337cb31..33b9f5edc73a6ed1daa179836bc41635d87c40ac 100644 (file)
--- a/lib/Target/X86/X86RegisterInfo.h
+++ b/lib/Target/X86/X86RegisterInfo.h
@@ -35,7 +35,7 @@ namespace X86 {
    /// these indices must be kept in sync with the class indices in the 
    /// X86RegisterInfo.td file.
    enum SubregIndex {
-    SUBREG_8BIT = 1, SUBREG_16BIT = 2, SUBREG_32BIT = 3
+    SUBREG_8BIT = 1, SUBREG_8BIT_HI = 2, SUBREG_16BIT = 3, SUBREG_32BIT = 4
    };
  }
  
diff --git a/lib/Target/X86/X86RegisterInfo.td b/lib/Target/X86/X86RegisterInfo.td

index a7b0f88963b0765b92a239f828349518ae120180..b323e78cfaba6cc938b9a1bd418b79b4f0308c5c 100644 (file)
--- a/lib/Target/X86/X86RegisterInfo.td
+++ b/lib/Target/X86/X86RegisterInfo.td
@@ -49,7 +49,8 @@ let Namespace = "X86" in {
    def R14B : Register<"r14b">, DwarfRegNum<[14, -2, -2]>;
    def R15B : Register<"r15b">, DwarfRegNum<[15, -2, -2]>;
  
-  // High registers X86-32 only
+  // High registers. On x86-64, these cannot be used in any instruction
+  // with a REX prefix.
    def AH : Register<"ah">, DwarfRegNum<[0, 0, 0]>;
    def DH : Register<"dh">, DwarfRegNum<[1, 2, 2]>;
    def CH : Register<"ch">, DwarfRegNum<[2, 1, 1]>;
@@ -185,41 +186,45 @@ let Namespace = "X86" in {
  //
  
  def x86_subreg_8bit    : PatLeaf<(i32 1)>;
-def x86_subreg_16bit   : PatLeaf<(i32 2)>;
-def x86_subreg_32bit   : PatLeaf<(i32 3)>;
+def x86_subreg_8bit_hi : PatLeaf<(i32 2)>;
+def x86_subreg_16bit   : PatLeaf<(i32 3)>;
+def x86_subreg_32bit   : PatLeaf<(i32 4)>;
  
  def : SubRegSet<1, [AX, CX, DX, BX, SP,  BP,  SI,  DI,  
                      R8W, R9W, R10W, R11W, R12W, R13W, R14W, R15W],
                     [AL, CL, DL, BL, SPL, BPL, SIL, DIL, 
                      R8B, R9B, R10B, R11B, R12B, R13B, R14B, R15B]>;
  
-// It's unclear if this subreg set is safe, given that not all registers
-// in the class have an 'H' subreg.
-// def : SubRegSet<2, [AX, CX, DX, BX],
-//                    [AH, CH, DH, BH]>;
+def : SubRegSet<2, [AX, CX, DX, BX],
+                   [AH, CH, DH, BH]>;
  
  def : SubRegSet<1, [EAX, ECX, EDX, EBX, ESP, EBP, ESI, EDI,  
                      R8D, R9D, R10D, R11D, R12D, R13D, R14D, R15D],
                     [AL, CL, DL, BL, SPL, BPL, SIL, DIL, 
                      R8B, R9B, R10B, R11B, R12B, R13B, R14B, R15B]>;
  
-def : SubRegSet<2, [EAX, ECX, EDX, EBX, ESP, EBP, ESI, EDI,  
+def : SubRegSet<2, [EAX, ECX, EDX, EBX],
+                   [AH, CH, DH, BH]>;
+
+def : SubRegSet<3, [EAX, ECX, EDX, EBX, ESP, EBP, ESI, EDI,
                      R8D, R9D, R10D, R11D, R12D, R13D, R14D, R15D],
                     [AX,  CX,  DX,  BX,  SP,  BP,  SI,  DI, 
                      R8W, R9W, R10W, R11W, R12W, R13W, R14W, R15W]>;
  
-
  def : SubRegSet<1, [RAX, RCX, RDX, RBX, RSP, RBP, RSI, RDI,  
                      R8,  R9,  R10, R11, R12, R13, R14, R15],
                     [AL, CL, DL, BL, SPL, BPL, SIL, DIL, 
                      R8B, R9B, R10B, R11B, R12B, R13B, R14B, R15B]>;
  
-def : SubRegSet<2, [RAX, RCX, RDX, RBX, RSP, RBP, RSI, RDI,  
+def : SubRegSet<2, [RAX, RCX, RDX, RBX],
+                   [AH, CH, DH, BH]>;
+
+def : SubRegSet<3, [RAX, RCX, RDX, RBX, RSP, RBP, RSI, RDI,
                      R8,  R9,  R10, R11, R12, R13, R14, R15],
                     [AX,  CX,  DX,  BX,  SP,  BP,  SI,  DI, 
                      R8W, R9W, R10W, R11W, R12W, R13W, R14W, R15W]>;
-                    
-def : SubRegSet<3, [RAX, RCX, RDX, RBX, RSP, RBP, RSI, RDI,  
+
+def : SubRegSet<4, [RAX, RCX, RDX, RBX, RSP, RBP, RSI, RDI,
                      R8,  R9,  R10, R11, R12, R13, R14, R15],
                     [EAX, ECX, EDX, EBX, ESP, EBP, ESI, EDI, 
                      R8D, R9D, R10D, R11D, R12D, R13D, R14D, R15D]>;
@@ -236,7 +241,11 @@ def : SubRegSet<3, [RAX, RCX, RDX, RBX, RSP, RBP, RSI, RDI,
  // R8B, ... R15B. 
  // Allocate R12 and R13 last, as these require an extra byte when
  // encoded in x86_64 instructions.
-// FIXME: Allow AH, CH, DH, BH in 64-mode for non-REX instructions,
+// FIXME: Allow AH, CH, DH, BH to be used as general-purpose registers in
+// 64-bit mode. The main complication is that they cannot be encoded in an
+// instruction requiring a REX prefix, while SIL, DIL, BPL, R8D, etc.
+// require a REX prefix. For example, "addb %ah, %dil" and "movzbl %ah, %r8d"
+// cannot be encoded.
  def GR8 : RegisterClass<"X86", [i8],  8,
                          [AL, CL, DL, BL, AH, CH, DH, BH, SIL, DIL, BPL, SPL,
                           R8B, R9B, R10B, R11B, R14B, R15B, R12B, R13B]> {
@@ -295,7 +304,7 @@ def GR8 : RegisterClass<"X86", [i8],  8,
  def GR16 : RegisterClass<"X86", [i16], 16,
                           [AX, CX, DX, SI, DI, BX, BP, SP,
                            R8W, R9W, R10W, R11W, R14W, R15W, R12W, R13W]> {
-  let SubRegClassList = [GR8];
+  let SubRegClassList = [GR8, GR8];
    let MethodProtos = [{
      iterator allocation_order_begin(const MachineFunction &MF) const;
      iterator allocation_order_end(const MachineFunction &MF) const;
@@ -363,7 +372,7 @@ def GR16 : RegisterClass<"X86", [i16], 16,
  def GR32 : RegisterClass<"X86", [i32], 32, 
                           [EAX, ECX, EDX, ESI, EDI, EBX, EBP, ESP,
                            R8D, R9D, R10D, R11D, R14D, R15D, R12D, R13D]> {
-  let SubRegClassList = [GR8, GR16];
+  let SubRegClassList = [GR8, GR8, GR16];
    let MethodProtos = [{
      iterator allocation_order_begin(const MachineFunction &MF) const;
      iterator allocation_order_end(const MachineFunction &MF) const;
@@ -431,7 +440,7 @@ def GR32 : RegisterClass<"X86", [i32], 32,
  def GR64 : RegisterClass<"X86", [i64], 64, 
                           [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
                            RBX, R14, R15, R12, R13, RBP, RSP]> {
-  let SubRegClassList = [GR8, GR16, GR32];
+  let SubRegClassList = [GR8, GR8, GR16, GR32];
    let MethodProtos = [{
      iterator allocation_order_end(const MachineFunction &MF) const;
    }];
@@ -452,13 +461,118 @@ def GR64 : RegisterClass<"X86", [i64], 64,
  }
  
  
-// GR16, GR32 subclasses which contain registers that have GR8 sub-registers.
-// These should only be used for 32-bit mode.
+// GR8_, GR16_, GR32_, GR64_ - Subclasses of GR8, GR16, GR32, and GR64
+// which contain just the "a" "b", "c", and "d" registers. On x86-32,
+// GR16_ and GR32_ are classes for registers that support 8-bit subreg
+// operations. On x86-64, GR16_, GR32_, and GR64_ are classes for registers
+// that support 8-bit h-register operations.
+def GR8_ : RegisterClass<"X86", [i8], 8, [AL, CL, DL, BL]> {
+}
  def GR16_ : RegisterClass<"X86", [i16], 16, [AX, CX, DX, BX]> {
-  let SubRegClassList = [GR8];
+  let SubRegClassList = [GR8_, GR8_];
  }
  def GR32_ : RegisterClass<"X86", [i32], 32, [EAX, ECX, EDX, EBX]> {
-  let SubRegClassList = [GR8, GR16];
+  let SubRegClassList = [GR8_, GR8_, GR16_];
+}
+def GR64_ : RegisterClass<"X86", [i64], 64, [RAX, RCX, RDX, RBX]> {
+  let SubRegClassList = [GR8_, GR8_, GR16_, GR32_];
+}
+
+// GR8_NOREX, GR16_NOREX, GR32_NOREX, GR64_NOREX - Subclasses of
+// GR8, GR16, GR32, and GR64 which contain only the first 8 GPRs.
+// On x86-64, GR64_NOREX, GR32_NOREX and GR16_NOREX are the classes
+// of registers which do not by themselves require a REX prefix.
+def GR8_NOREX : RegisterClass<"X86", [i8], 8,
+                              [AL, CL, DL, SIL, DIL, BL, BPL, SPL]> {
+}
+def GR16_NOREX : RegisterClass<"X86", [i16], 16,
+                               [AX, CX, DX, SI, DI, BX, BP, SP]> {
+  let SubRegClassList = [GR8_NOREX, GR8_NOREX];
+}
+// GR32_NOREX - GR32 registers which do not require a REX prefix.
+def GR32_NOREX : RegisterClass<"X86", [i32], 32,
+                               [EAX, ECX, EDX, ESI, EDI, EBX, EBP, ESP]> {
+  let SubRegClassList = [GR8_NOREX, GR8_NOREX, GR16_NOREX];
+  let MethodProtos = [{
+    iterator allocation_order_begin(const MachineFunction &MF) const;
+    iterator allocation_order_end(const MachineFunction &MF) const;
+  }];
+  let MethodBodies = [{
+    // Does the function dedicate RBP / EBP to being a frame ptr?
+    // If so, don't allocate ESP or EBP.
+    static const unsigned X86_GR32_NOREX_AO_fp[] = {
+      X86::EAX, X86::ECX, X86::EDX, X86::ESI, X86::EDI, X86::EBX
+    };
+    // If not, just don't allocate ESP.
+    static const unsigned X86_GR32_NOREX_AO[] = {
+      X86::EAX, X86::ECX, X86::EDX, X86::ESI, X86::EDI, X86::EBX, X86::EBP
+    };
+
+    GR32_NOREXClass::iterator
+    GR32_NOREXClass::allocation_order_begin(const MachineFunction &MF) const {
+      const TargetMachine &TM = MF.getTarget();
+      const TargetRegisterInfo *RI = TM.getRegisterInfo();
+      if (RI->hasFP(MF))
+        return X86_GR32_NOREX_AO_fp;
+      else
+        return X86_GR32_NOREX_AO;
+    }
+
+    GR32_NOREXClass::iterator
+    GR32_NOREXClass::allocation_order_end(const MachineFunction &MF) const {
+      const TargetMachine &TM = MF.getTarget();
+      const TargetRegisterInfo *RI = TM.getRegisterInfo();
+      if (RI->hasFP(MF))
+        return X86_GR32_NOREX_AO_fp +
+               (sizeof(X86_GR32_NOREX_AO_fp) / sizeof(unsigned));
+      else
+        return X86_GR32_NOREX_AO +
+               (sizeof(X86_GR32_NOREX_AO) / sizeof(unsigned));
+    }
+  }];
+}
+
+// GR64_NOREX - GR64 registers which do not require a REX prefix.
+def GR64_NOREX : RegisterClass<"X86", [i64], 64,
+                               [RAX, RCX, RDX, RSI, RDI, RBX, RBP, RSP]> {
+  let SubRegClassList = [GR8_NOREX, GR8_NOREX, GR16_NOREX, GR32_NOREX];
+  let MethodProtos = [{
+    iterator allocation_order_begin(const MachineFunction &MF) const;
+    iterator allocation_order_end(const MachineFunction &MF) const;
+  }];
+  let MethodBodies = [{
+    // Does the function dedicate RBP / EBP to being a frame ptr?
+    // If so, don't allocate RSP or RBP.
+    static const unsigned X86_GR64_NOREX_AO_fp[] = {
+      X86::RAX, X86::RCX, X86::RDX, X86::RSI, X86::RDI, X86::RBX
+    };
+    // If not, just don't allocate RSP.
+    static const unsigned X86_GR64_NOREX_AO[] = {
+      X86::RAX, X86::RCX, X86::RDX, X86::RSI, X86::RDI, X86::RBX, X86::RBP
+    };
+
+    GR64_NOREXClass::iterator
+    GR64_NOREXClass::allocation_order_begin(const MachineFunction &MF) const {
+      const TargetMachine &TM = MF.getTarget();
+      const TargetRegisterInfo *RI = TM.getRegisterInfo();
+      if (RI->hasFP(MF))
+        return X86_GR64_NOREX_AO_fp;
+      else
+        return X86_GR64_NOREX_AO;
+    }
+
+    GR64_NOREXClass::iterator
+    GR64_NOREXClass::allocation_order_end(const MachineFunction &MF) const {
+      const TargetMachine &TM = MF.getTarget();
+      const TargetRegisterInfo *RI = TM.getRegisterInfo();
+      if (RI->hasFP(MF))
+        return X86_GR64_NOREX_AO_fp +
+               (sizeof(X86_GR64_NOREX_AO_fp) / sizeof(unsigned));
+      else
+        return X86_GR64_NOREX_AO +
+               (sizeof(X86_GR64_NOREX_AO) / sizeof(unsigned));
+    }
+  }];
  }
  
  // A class to support the 'A' assembler constraint: EAX then EDX.
diff --git a/test/CodeGen/X86/h-register-addressing-32.ll b/test/CodeGen/X86/h-register-addressing-32.ll

new file mode 100644 (file)

index 0000000..41d9128
--- /dev/null
+++ b/test/CodeGen/X86/h-register-addressing-32.ll
@@ -0,0 +1,53 @@
+; RUN: llvm-as < %s | llc -march=x86 | grep {movzbl    %\[abcd\]h,} | count 7
+
+; Use h-register extract and zero-extend.
+
+define double @foo8(double* nocapture inreg %p, i32 inreg %x) nounwind readonly {
+  %t0 = lshr i32 %x, 8
+  %t1 = and i32 %t0, 255
+  %t2 = getelementptr double* %p, i32 %t1
+  %t3 = load double* %t2, align 8
+  ret double %t3
+}
+define float @foo4(float* nocapture inreg %p, i32 inreg %x) nounwind readonly {
+  %t0 = lshr i32 %x, 8
+  %t1 = and i32 %t0, 255
+  %t2 = getelementptr float* %p, i32 %t1
+  %t3 = load float* %t2, align 8
+  ret float %t3
+}
+define i16 @foo2(i16* nocapture inreg %p, i32 inreg %x) nounwind readonly {
+  %t0 = lshr i32 %x, 8
+  %t1 = and i32 %t0, 255
+  %t2 = getelementptr i16* %p, i32 %t1
+  %t3 = load i16* %t2, align 8
+  ret i16 %t3
+}
+define i8 @foo1(i8* nocapture inreg %p, i32 inreg %x) nounwind readonly {
+  %t0 = lshr i32 %x, 8
+  %t1 = and i32 %t0, 255
+  %t2 = getelementptr i8* %p, i32 %t1
+  %t3 = load i8* %t2, align 8
+  ret i8 %t3
+}
+define i8 @bar8(i8* nocapture inreg %p, i32 inreg %x) nounwind readonly {
+  %t0 = lshr i32 %x, 5
+  %t1 = and i32 %t0, 2040
+  %t2 = getelementptr i8* %p, i32 %t1
+  %t3 = load i8* %t2, align 8
+  ret i8 %t3
+}
+define i8 @bar4(i8* nocapture inreg %p, i32 inreg %x) nounwind readonly {
+  %t0 = lshr i32 %x, 6
+  %t1 = and i32 %t0, 1020
+  %t2 = getelementptr i8* %p, i32 %t1
+  %t3 = load i8* %t2, align 8
+  ret i8 %t3
+}
+define i8 @bar2(i8* nocapture inreg %p, i32 inreg %x) nounwind readonly {
+  %t0 = lshr i32 %x, 7
+  %t1 = and i32 %t0, 510
+  %t2 = getelementptr i8* %p, i32 %t1
+  %t3 = load i8* %t2, align 8
+  ret i8 %t3
+}
diff --git a/test/CodeGen/X86/h-register-addressing-64.ll b/test/CodeGen/X86/h-register-addressing-64.ll

new file mode 100644 (file)

index 0000000..b38e0e4
--- /dev/null
+++ b/test/CodeGen/X86/h-register-addressing-64.ll
@@ -0,0 +1,53 @@
+; RUN: llvm-as < %s | llc -march=x86-64 | grep {movzbl %\[abcd\]h,} | count 7
+
+; Use h-register extract and zero-extend.
+
+define double @foo8(double* nocapture inreg %p, i64 inreg %x) nounwind readonly {
+  %t0 = lshr i64 %x, 8
+  %t1 = and i64 %t0, 255
+  %t2 = getelementptr double* %p, i64 %t1
+  %t3 = load double* %t2, align 8
+  ret double %t3
+}
+define float @foo4(float* nocapture inreg %p, i64 inreg %x) nounwind readonly {
+  %t0 = lshr i64 %x, 8
+  %t1 = and i64 %t0, 255
+  %t2 = getelementptr float* %p, i64 %t1
+  %t3 = load float* %t2, align 8
+  ret float %t3
+}
+define i16 @foo2(i16* nocapture inreg %p, i64 inreg %x) nounwind readonly {
+  %t0 = lshr i64 %x, 8
+  %t1 = and i64 %t0, 255
+  %t2 = getelementptr i16* %p, i64 %t1
+  %t3 = load i16* %t2, align 8
+  ret i16 %t3
+}
+define i8 @foo1(i8* nocapture inreg %p, i64 inreg %x) nounwind readonly {
+  %t0 = lshr i64 %x, 8
+  %t1 = and i64 %t0, 255
+  %t2 = getelementptr i8* %p, i64 %t1
+  %t3 = load i8* %t2, align 8
+  ret i8 %t3
+}
+define i8 @bar8(i8* nocapture inreg %p, i64 inreg %x) nounwind readonly {
+  %t0 = lshr i64 %x, 5
+  %t1 = and i64 %t0, 2040
+  %t2 = getelementptr i8* %p, i64 %t1
+  %t3 = load i8* %t2, align 8
+  ret i8 %t3
+}
+define i8 @bar4(i8* nocapture inreg %p, i64 inreg %x) nounwind readonly {
+  %t0 = lshr i64 %x, 6
+  %t1 = and i64 %t0, 1020
+  %t2 = getelementptr i8* %p, i64 %t1
+  %t3 = load i8* %t2, align 8
+  ret i8 %t3
+}
+define i8 @bar2(i8* nocapture inreg %p, i64 inreg %x) nounwind readonly {
+  %t0 = lshr i64 %x, 7
+  %t1 = and i64 %t0, 510
+  %t2 = getelementptr i8* %p, i64 %t1
+  %t3 = load i8* %t2, align 8
+  ret i8 %t3
+}
diff --git a/test/CodeGen/X86/h-register-store.ll b/test/CodeGen/X86/h-register-store.ll

new file mode 100644 (file)

index 0000000..e867242
--- /dev/null
+++ b/test/CodeGen/X86/h-register-store.ll
@@ -0,0 +1,27 @@
+; RUN: llvm-as < %s | llc -march=x86-64 > %t
+; RUN: grep mov %t | count 6
+; RUN: grep {movb      %ah, (%rsi)} %t | count 3
+; RUN: llvm-as < %s | llc -march=x86 > %t
+; RUN: grep mov %t | count 3
+; RUN: grep {movb      %ah, (%e} %t | count 3
+
+; Use h-register extract and store.
+
+define void @foo16(i16 inreg %p, i8* inreg %z) nounwind {
+  %q = lshr i16 %p, 8
+  %t = trunc i16 %q to i8
+  store i8 %t, i8* %z
+  ret void
+}
+define void @foo32(i32 inreg %p, i8* inreg %z) nounwind {
+  %q = lshr i32 %p, 8
+  %t = trunc i32 %q to i8
+  store i8 %t, i8* %z
+  ret void
+}
+define void @foo64(i64 inreg %p, i8* inreg %z) nounwind {
+  %q = lshr i64 %p, 8
+  %t = trunc i64 %q to i8
+  store i8 %t, i8* %z
+  ret void
+}
diff --git a/test/CodeGen/X86/h-registers.ll b/test/CodeGen/X86/h-registers.ll

new file mode 100644 (file)

index 0000000..2777be9
--- /dev/null
+++ b/test/CodeGen/X86/h-registers.ll
@@ -0,0 +1,48 @@
+; RUN: llvm-as < %s | llc -march=x86-64 | grep {movzbl %\[abcd\]h,} | count 4
+; RUN: llvm-as < %s | llc -march=x86    > %t
+; RUN: grep {incb      %ah} %t | count 3
+; RUN: grep {movzbl    %ah,} %t | count 3
+
+; Use h registers. On x86-64, codegen doesn't support general allocation
+; of h registers yet, due to x86 encoding complications.
+
+define void @bar64(i64 inreg %x, i8* inreg %p) nounwind {
+  %t0 = lshr i64 %x, 8
+  %t1 = trunc i64 %t0 to i8
+  %t2 = add i8 %t1, 1
+  store i8 %t2, i8* %p
+  ret void
+}
+
+define void @bar32(i32 inreg %x, i8* inreg %p) nounwind {
+  %t0 = lshr i32 %x, 8
+  %t1 = trunc i32 %t0 to i8
+  %t2 = add i8 %t1, 1
+  store i8 %t2, i8* %p
+  ret void
+}
+
+define void @bar16(i16 inreg %x, i8* inreg %p) nounwind {
+  %t0 = lshr i16 %x, 8
+  %t1 = trunc i16 %t0 to i8
+  %t2 = add i8 %t1, 1
+  store i8 %t2, i8* %p
+  ret void
+}
+
+define i64 @qux64(i64 inreg %x) nounwind {
+  %t0 = lshr i64 %x, 8
+  %t1 = and i64 %t0, 255
+  ret i64 %t1
+}
+
+define i32 @qux32(i32 inreg %x) nounwind {
+  %t0 = lshr i32 %x, 8
+  %t1 = and i32 %t0, 255
+  ret i32 %t1
+}
+
+define i16 @qux16(i16 inreg %x) nounwind {
+  %t0 = lshr i16 %x, 8
+  ret i16 %t0
+}
diff --git a/test/CodeGen/X86/inline-asm-out-regs.ll b/test/CodeGen/X86/inline-asm-out-regs.ll

index 3a84bad94d0d370415ae8f7afbef6b12b7bac2ba..01f1397830a4b511032f9d2a800e855fa3a0e73c 100644 (file)
--- a/test/CodeGen/X86/inline-asm-out-regs.ll
+++ b/test/CodeGen/X86/inline-asm-out-regs.ll
@@ -1,6 +1,4 @@
  ; RUN: llvm-as < %s | llc -mtriple=i386-unknown-linux-gnu
-; XFAIL: *
-; Expected to run out of registers during allocation.
  ; PR3391
  
  @pci_indirect = external global { }             ; <{ }*> [#uses=1]
author	Dan Gohman <gohman@apple.com>
	Mon, 13 Apr 2009 16:09:41 +0000 (16:09 +0000)
committer	Dan Gohman <gohman@apple.com>
	Mon, 13 Apr 2009 16:09:41 +0000 (16:09 +0000)
lib/Target/X86/X86FastISel.cpp		patch \| blob \| history
lib/Target/X86/X86ISelDAGToDAG.cpp		patch \| blob \| history
lib/Target/X86/X86Instr64bit.td		patch \| blob \| history
lib/Target/X86/X86InstrInfo.cpp		patch \| blob \| history
lib/Target/X86/X86InstrInfo.td		patch \| blob \| history
lib/Target/X86/X86RegisterInfo.h		patch \| blob \| history
lib/Target/X86/X86RegisterInfo.td		patch \| blob \| history
test/CodeGen/X86/h-register-addressing-32.ll	[new file with mode: 0644]	patch \| blob
test/CodeGen/X86/h-register-addressing-64.ll	[new file with mode: 0644]	patch \| blob
test/CodeGen/X86/h-register-store.ll	[new file with mode: 0644]	patch \| blob
test/CodeGen/X86/h-registers.ll	[new file with mode: 0644]	patch \| blob
test/CodeGen/X86/inline-asm-out-regs.ll		patch \| blob \| history