Add support for escaping {} in asm strings, based on patch from Nick Burns.

[oota-llvm.git] / utils / TableGen / AsmWriterEmitter.cpp
diff --git a/utils/TableGen/AsmWriterEmitter.cpp b/utils/TableGen/AsmWriterEmitter.cpp

index 3ddda97a0297cfbc93eb1ad5caf032dcd76b40b2..4ba22758a600e17658e2a60ab9f5677e945d8fc8 100644 (file)
--- a/utils/TableGen/AsmWriterEmitter.cpp
+++ b/utils/TableGen/AsmWriterEmitter.cpp
@@ -2,8 +2,8 @@
  //
  //                     The LLVM Compiler Infrastructure
  //
-// This file was developed by the LLVM research group and is distributed under
-// the University of Illinois Open Source License. See LICENSE.TXT for details.
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
  //
  //===----------------------------------------------------------------------===//
  //
@@ -19,7 +19,6 @@
  #include "llvm/Support/Debug.h"
  #include "llvm/Support/MathExtras.h"
  #include <algorithm>
-#include <ostream>
  using namespace llvm;
  
  static bool isIdentChar(char C) {
@@ -29,7 +28,8 @@ static bool isIdentChar(char C) {
           C == '_';
  }
  
-namespace {
+// This should be an anon namespace, this works around a GCC warning.
+namespace llvm {  
    struct AsmWriterOperand {
      enum { isLiteralTextOperand, isMachineInstrOperand } OperandType;
  
@@ -69,7 +69,8 @@ namespace {
  }
  
  namespace llvm {
-  struct AsmWriterInst {
+  class AsmWriterInst {
+  public:
      std::vector<AsmWriterOperand> Operands;
      const CodeGenInstruction *CGI;
  
@@ -98,7 +99,9 @@ std::string AsmWriterOperand::getCode() const {
    if (OperandType == isLiteralTextOperand)
      return "O << \"" + Str + "\"; ";
  
-  std::string Result = Str + "(MI, " + utostr(MIOpNo);
+  std::string Result = Str + "(MI";
+  if (MIOpNo != ~0U)
+    Result += ", " + utostr(MIOpNo);
    if (!MiModifier.empty())
      Result += ", \"" + MiModifier + '"';
    return Result + "); ";
@@ -119,7 +122,7 @@ AsmWriterInst::AsmWriterInst(const CodeGenInstruction &CGI, unsigned Variant) {
    std::string::size_type LastEmitted = 0;
    while (LastEmitted != AsmString.size()) {
      std::string::size_type DollarPos =
-      AsmString.find_first_of("${|}", LastEmitted);
+      AsmString.find_first_of("${|}\\", LastEmitted);
      if (DollarPos == std::string::npos) DollarPos = AsmString.size();
  
      // Emit a constant string fragment.
@@ -129,6 +132,23 @@ AsmWriterInst::AsmWriterInst(const CodeGenInstruction &CGI, unsigned Variant) {
          AddLiteralString(std::string(AsmString.begin()+LastEmitted,
                                       AsmString.begin()+DollarPos));
        LastEmitted = DollarPos;
+    } else if (AsmString[DollarPos] == '\\') {
+      if (DollarPos+1 != AsmString.size() &&
+          (CurVariant == Variant || CurVariant == ~0U)) {
+        if (AsmString[DollarPos+1] == 'n') {
+          AddLiteralString("\\n");
+        } else if (AsmString[DollarPos+1] == 't') {
+          AddLiteralString("\\t");
+        } else if (std::string("${|}\\").find(AsmString[DollarPos+1]) 
+                   != std::string::npos) {
+          AddLiteralString(std::string(1, AsmString[DollarPos+1]));
+        } else {
+          throw "Non-supported escaped character found in instruction '" +
+            CGI.TheDef->getName() + "'!";
+        }
+        LastEmitted = DollarPos+2;
+        continue;
+      }
      } else if (AsmString[DollarPos] == '{') {
        if (CurVariant != ~0U)
          throw "Nested variants found for instruction '" +
@@ -172,7 +192,8 @@ AsmWriterInst::AsmWriterInst(const CodeGenInstruction &CGI, unsigned Variant) {
                            AsmString.begin()+VarEnd);
  
        // Modifier - Support ${foo:modifier} syntax, where "modifier" is passed
-      // into printOperand.
+      // into printOperand.  Also support ${:feature}, which is passed into
+      // PrintSpecial.
        std::string Modifier;
        
        // In order to avoid starting the next string at the terminating curly
@@ -204,26 +225,24 @@ AsmWriterInst::AsmWriterInst(const CodeGenInstruction &CGI, unsigned Variant) {
                  + CGI.TheDef->getName() + "'";
          ++VarEnd;
        }
-      if (VarName.empty())
+      if (VarName.empty() && Modifier.empty())
          throw "Stray '$' in '" + CGI.TheDef->getName() +
                "' asm string, maybe you want $$?";
  
-      unsigned OpNo = CGI.getOperandNamed(VarName);
-      CodeGenInstruction::OperandInfo OpInfo = CGI.OperandList[OpNo];
-
-      // If this is a two-address instruction and we are not accessing the
-      // 0th operand, remove an operand.
-      unsigned MIOp = OpInfo.MIOperandNo;
-      if (CGI.isTwoAddress && MIOp != 0) {
-        if (MIOp == 1)
-          throw "Should refer to operand #0 instead of #1 for two-address"
-            " instruction '" + CGI.TheDef->getName() + "'!";
-        --MIOp;
+      if (VarName.empty()) {
+        // Just a modifier, pass this into PrintSpecial.
+        Operands.push_back(AsmWriterOperand("PrintSpecial", ~0U, Modifier));
+      } else {
+        // Otherwise, normal operand.
+        unsigned OpNo = CGI.getOperandNamed(VarName);
+        CodeGenInstruction::OperandInfo OpInfo = CGI.OperandList[OpNo];
+
+        if (CurVariant == Variant || CurVariant == ~0U) {
+          unsigned MIOp = OpInfo.MIOperandNo;
+          Operands.push_back(AsmWriterOperand(OpInfo.PrinterMethodName, MIOp,
+                                              Modifier));
+        }
        }
-
-      if (CurVariant == Variant || CurVariant == ~0U) 
-        Operands.push_back(AsmWriterOperand(OpInfo.PrinterMethodName, MIOp,
-                                            Modifier));
        LastEmitted = VarEnd;
      }
    }
@@ -240,11 +259,12 @@ unsigned AsmWriterInst::MatchesAllButOneOp(const AsmWriterInst &Other)const{
  
    unsigned MismatchOperand = ~0U;
    for (unsigned i = 0, e = Operands.size(); i != e; ++i) {
-    if (Operands[i] != Other.Operands[i])
+    if (Operands[i] != Other.Operands[i]) {
        if (MismatchOperand != ~0U)  // Already have one mismatch?
          return ~1U;
        else
          MismatchOperand = i;
+    }
    }
    return MismatchOperand;
  }
@@ -330,27 +350,30 @@ static void EmitInstructions(std::vector<AsmWriterInst> &Insts,
  
  void AsmWriterEmitter::
  FindUniqueOperandCommands(std::vector<std::string> &UniqueOperandCommands, 
-                          std::vector<unsigned> &InstIdxs, unsigned Op) const {
-  InstIdxs.clear();
-  InstIdxs.resize(NumberedInstructions.size());
+                          std::vector<unsigned> &InstIdxs,
+                          std::vector<unsigned> &InstOpsUsed) const {
+  InstIdxs.assign(NumberedInstructions.size(), ~0U);
    
    // This vector parallels UniqueOperandCommands, keeping track of which
    // instructions each case are used for.  It is a comma separated string of
    // enums.
    std::vector<std::string> InstrsForCase;
    InstrsForCase.resize(UniqueOperandCommands.size());
+  InstOpsUsed.assign(UniqueOperandCommands.size(), 0);
    
    for (unsigned i = 0, e = NumberedInstructions.size(); i != e; ++i) {
      const AsmWriterInst *Inst = getAsmWriterInstByID(i);
-    if (Inst == 0) continue;  // PHI, INLINEASM, etc.
+    if (Inst == 0) continue;  // PHI, INLINEASM, LABEL, etc.
      
      std::string Command;
-    if (Op > Inst->Operands.size())
+    if (Inst->Operands.empty())
        continue;   // Instruction already done.
-    else if (Op == Inst->Operands.size())
-      Command = "    return true;\n";
-    else
-      Command = "    " + Inst->Operands[Op].getCode() + "\n";
+
+    Command = "    " + Inst->Operands[0].getCode() + "\n";
+
+    // If this is the last operand, emit a return.
+    if (Inst->Operands.size() == 1)
+      Command += "    return true;\n";
      
      // Check to see if we already have 'Command' in UniqueOperandCommands.
      // If not, add it.
@@ -367,6 +390,60 @@ FindUniqueOperandCommands(std::vector<std::string> &UniqueOperandCommands,
        InstIdxs[i] = UniqueOperandCommands.size();
        UniqueOperandCommands.push_back(Command);
        InstrsForCase.push_back(Inst->CGI->TheDef->getName());
+
+      // This command matches one operand so far.
+      InstOpsUsed.push_back(1);
+    }
+  }
+  
+  // For each entry of UniqueOperandCommands, there is a set of instructions
+  // that uses it.  If the next command of all instructions in the set are
+  // identical, fold it into the command.
+  for (unsigned CommandIdx = 0, e = UniqueOperandCommands.size();
+       CommandIdx != e; ++CommandIdx) {
+    
+    for (unsigned Op = 1; ; ++Op) {
+      // Scan for the first instruction in the set.
+      std::vector<unsigned>::iterator NIT =
+        std::find(InstIdxs.begin(), InstIdxs.end(), CommandIdx);
+      if (NIT == InstIdxs.end()) break;  // No commonality.
+
+      // If this instruction has no more operands, we isn't anything to merge
+      // into this command.
+      const AsmWriterInst *FirstInst = 
+        getAsmWriterInstByID(NIT-InstIdxs.begin());
+      if (!FirstInst || FirstInst->Operands.size() == Op)
+        break;
+
+      // Otherwise, scan to see if all of the other instructions in this command
+      // set share the operand.
+      bool AllSame = true;
+      
+      for (NIT = std::find(NIT+1, InstIdxs.end(), CommandIdx);
+           NIT != InstIdxs.end();
+           NIT = std::find(NIT+1, InstIdxs.end(), CommandIdx)) {
+        // Okay, found another instruction in this command set.  If the operand
+        // matches, we're ok, otherwise bail out.
+        const AsmWriterInst *OtherInst = 
+          getAsmWriterInstByID(NIT-InstIdxs.begin());
+        if (!OtherInst || OtherInst->Operands.size() == Op ||
+            OtherInst->Operands[Op] != FirstInst->Operands[Op]) {
+          AllSame = false;
+          break;
+        }
+      }
+      if (!AllSame) break;
+      
+      // Okay, everything in this command set has the same next operand.  Add it
+      // to UniqueOperandCommands and remember that it was consumed.
+      std::string Command = "    " + FirstInst->Operands[Op].getCode() + "\n";
+      
+      // If this is the last operand, emit a return after the code.
+      if (FirstInst->Operands.size() == Op+1)
+        Command += "    return true;\n";
+      
+      UniqueOperandCommands[CommandIdx] += Command;
+      InstOpsUsed[CommandIdx]++;
      }
    }
    
@@ -421,88 +498,113 @@ void AsmWriterEmitter::run(std::ostream &O) {
    // Build an aggregate string, and build a table of offsets into it.
    std::map<std::string, unsigned> StringOffset;
    std::string AggregateString;
-  AggregateString += '\0';
+  AggregateString.push_back(0);  // "\0"
+  AggregateString.push_back(0);  // "\0"
    
-  /// OpcodeInfo - The first value in the pair is the index into the string, the
-  /// second is an index used for operand printing information.
-  std::vector<std::pair<unsigned short, unsigned short> > OpcodeInfo;
+  /// OpcodeInfo - This encodes the index of the string to use for the first
+  /// chunk of the output as well as indices used for operand printing.
+  std::vector<unsigned> OpcodeInfo;
    
+  unsigned MaxStringIdx = 0;
    for (unsigned i = 0, e = NumberedInstructions.size(); i != e; ++i) {
      AsmWriterInst *AWI = CGIAWIMap[NumberedInstructions[i]];
      unsigned Idx;
-    if (AWI == 0 || AWI->Operands[0].Str.empty()) {
+    if (AWI == 0) {
        // Something not handled by the asmwriter printer.
        Idx = 0;
+    } else if (AWI->Operands[0].OperandType != 
+                        AsmWriterOperand::isLiteralTextOperand ||
+               AWI->Operands[0].Str.empty()) {
+      // Something handled by the asmwriter printer, but with no leading string.
+      Idx = 1;
      } else {
        unsigned &Entry = StringOffset[AWI->Operands[0].Str];
        if (Entry == 0) {
          // Add the string to the aggregate if this is the first time found.
-        Entry = AggregateString.size();
+        MaxStringIdx = Entry = AggregateString.size();
          std::string Str = AWI->Operands[0].Str;
          UnescapeString(Str);
          AggregateString += Str;
          AggregateString += '\0';
        }
        Idx = Entry;
-      assert(Entry < 65536 && "Must not use unsigned short for table idx!");
  
        // Nuke the string from the operand list.  It is now handled!
        AWI->Operands.erase(AWI->Operands.begin());
      }
-    OpcodeInfo.push_back(std::pair<unsigned short, unsigned short>(Idx,0));
+    OpcodeInfo.push_back(Idx);
    }
    
+  // Figure out how many bits we used for the string index.
+  unsigned AsmStrBits = Log2_32_Ceil(MaxStringIdx);
+  
    // To reduce code size, we compactify common instructions into a few bits
    // in the opcode-indexed table.
-  // 16 bits to play with.
-  unsigned BitsLeft = 16;
+  unsigned BitsLeft = 32-AsmStrBits;
  
    std::vector<std::vector<std::string> > TableDrivenOperandPrinters;
    
-  for (unsigned i = 0; ; ++i) {
+  bool isFirst = true;
+  while (1) {
      std::vector<std::string> UniqueOperandCommands;
  
-    // For the first operand check, add a default value that unhandled
-    // instructions will use.
-    if (i == 0)
-      UniqueOperandCommands.push_back("    return false;\n");
+    // For the first operand check, add a default value for instructions with
+    // just opcode strings to use.
+    if (isFirst) {
+      UniqueOperandCommands.push_back("    return true;\n");
+      isFirst = false;
+    }
      
      std::vector<unsigned> InstIdxs;
-    FindUniqueOperandCommands(UniqueOperandCommands, InstIdxs, i);
+    std::vector<unsigned> NumInstOpsHandled;
+    FindUniqueOperandCommands(UniqueOperandCommands, InstIdxs,
+                              NumInstOpsHandled);
      
      // If we ran out of operands to print, we're done.
      if (UniqueOperandCommands.empty()) break;
      
-    // FIXME: GROW THEM MAXIMALLY.
-
      // Compute the number of bits we need to represent these cases, this is
      // ceil(log2(numentries)).
      unsigned NumBits = Log2_32_Ceil(UniqueOperandCommands.size());
      
      // If we don't have enough bits for this operand, don't include it.
      if (NumBits > BitsLeft) {
-      DEBUG(std::cerr << "Not enough bits to densely encode " << NumBits
-                      << " more bits\n");
+      DOUT << "Not enough bits to densely encode " << NumBits
+           << " more bits\n";
        break;
      }
      
      // Otherwise, we can include this in the initial lookup table.  Add it in.
      BitsLeft -= NumBits;
      for (unsigned i = 0, e = InstIdxs.size(); i != e; ++i)
-      OpcodeInfo[i].second |= InstIdxs[i] << BitsLeft;
+      if (InstIdxs[i] != ~0U)
+        OpcodeInfo[i] |= InstIdxs[i] << (BitsLeft+AsmStrBits);
      
+    // Remove the info about this operand.
+    for (unsigned i = 0, e = NumberedInstructions.size(); i != e; ++i) {
+      if (AsmWriterInst *Inst = getAsmWriterInstByID(i))
+        if (!Inst->Operands.empty()) {
+          unsigned NumOps = NumInstOpsHandled[InstIdxs[i]];
+          assert(NumOps <= Inst->Operands.size() &&
+                 "Can't remove this many ops!");
+          Inst->Operands.erase(Inst->Operands.begin(),
+                               Inst->Operands.begin()+NumOps);
+        }
+    }
+    
+    // Remember the handlers for this set of operands.
      TableDrivenOperandPrinters.push_back(UniqueOperandCommands);
    }
    
    
    
-  O<<"  static const struct { unsigned short StrIdx, Bits; } OpInfo[] = {\n";
+  O<<"  static const unsigned OpInfo[] = {\n";
    for (unsigned i = 0, e = NumberedInstructions.size(); i != e; ++i) {
-    O << "    { " << OpcodeInfo[i].first << ", " << OpcodeInfo[i].second
-      << " },\t// " << NumberedInstructions[i]->TheDef->getName() << "\n";
+    O << "    " << OpcodeInfo[i] << "U,\t// "
+      << NumberedInstructions[i]->TheDef->getName() << "\n";
    }
    // Add a dummy entry so the array init doesn't end with a comma.
-  O << "    { 65535, 65535 }\n";
+  O << "    0U\n";
    O << "  };\n\n";
    
    // Emit the string itself.
@@ -536,17 +638,29 @@ void AsmWriterEmitter::run(std::ostream &O) {
    O << "\";\n\n";
  
    O << "  if (MI->getOpcode() == TargetInstrInfo::INLINEASM) {\n"
+    << "    O << \"\\t\";\n"
      << "    printInlineAsm(MI);\n"
      << "    return true;\n"
+    << "  } else if (MI->getOpcode() == TargetInstrInfo::LABEL) {\n"
+    << "    printLabel(MI);\n"
+    << "    return true;\n"
+    << "  } else if (MI->getOpcode() == TargetInstrInfo::DECLARE) {\n"
+    << "    printDeclare(MI);\n"
+    << "    return true;\n"
+    << "  } else if (MI->getOpcode() == TargetInstrInfo::IMPLICIT_DEF) {\n"
+    << "    printImplicitDef(MI);\n"
+    << "    return true;\n"
      << "  }\n\n";
    
+  O << "  O << \"\\t\";\n\n";
+
    O << "  // Emit the opcode for the instruction.\n"
-    << "  O << AsmStrs+OpInfo[MI->getOpcode()].StrIdx;\n\n";
+    << "  unsigned Bits = OpInfo[MI->getOpcode()];\n"
+    << "  if (Bits == 0) return false;\n"
+    << "  O << AsmStrs+(Bits & " << (1 << AsmStrBits)-1 << ");\n\n";
  
    // Output the table driven operand information.
-  O << "  unsigned short Bits = OpInfo[MI->getOpcode()].Bits;\n";
-
-  BitsLeft = 16;
+  BitsLeft = 32-AsmStrBits;
    for (unsigned i = 0, e = TableDrivenOperandPrinters.size(); i != e; ++i) {
      std::vector<std::string> &Commands = TableDrivenOperandPrinters[i];
  
@@ -559,32 +673,38 @@ void AsmWriterEmitter::run(std::ostream &O) {
      BitsLeft -= NumBits;
      
      O << "\n  // Fragment " << i << " encoded into " << NumBits
-      << " bits for " << Commands.size() << " unique commands.\n"
-      << "  switch ((Bits >> " << BitsLeft << ") & " << ((1 << NumBits)-1)
-      << ") {\n"
-      << "  default:   // unreachable.\n";
+      << " bits for " << Commands.size() << " unique commands.\n";
      
-    // Print out all the cases.
-    for (unsigned i = 0, e = Commands.size(); i != e; ++i) {
-      O << "  case " << i << ":\n";
-      O << Commands[i];
-      O << "    break;\n";
+    if (Commands.size() == 2) {
+      // Emit two possibilitys with if/else.
+      O << "  if ((Bits >> " << (BitsLeft+AsmStrBits) << ") & "
+        << ((1 << NumBits)-1) << ") {\n"
+        << Commands[1]
+        << "  } else {\n"
+        << Commands[0]
+        << "  }\n\n";
+    } else {
+      O << "  switch ((Bits >> " << (BitsLeft+AsmStrBits) << ") & "
+        << ((1 << NumBits)-1) << ") {\n"
+        << "  default:   // unreachable.\n";
+      
+      // Print out all the cases.
+      for (unsigned i = 0, e = Commands.size(); i != e; ++i) {
+        O << "  case " << i << ":\n";
+        O << Commands[i];
+        O << "    break;\n";
+      }
+      O << "  }\n\n";
      }
-    O << "  }\n\n";
    }
    
-  // Okay, go through and strip out the operand information that we just
-  // emitted.
-  unsigned NumOpsToRemove = TableDrivenOperandPrinters.size();
+  // Okay, delete instructions with no operand info left.
    for (unsigned i = 0, e = Instructions.size(); i != e; ++i) {
      // Entire instruction has been emitted?
      AsmWriterInst &Inst = Instructions[i];
-    if (Inst.Operands.size() <= NumOpsToRemove) {
+    if (Inst.Operands.empty()) {
        Instructions.erase(Instructions.begin()+i);
-      --i; --e;      
-    } else {
-      Inst.Operands.erase(Inst.Operands.begin(),
-                          Inst.Operands.begin()+NumOpsToRemove);
+      --i; --e;
      }
    }
  
@@ -593,12 +713,15 @@ void AsmWriterEmitter::run(std::ostream &O) {
    // elements in the vector.
    std::reverse(Instructions.begin(), Instructions.end());
    
-  // Find the opcode # of inline asm
-  O << "  switch (MI->getOpcode()) {\n";
-  while (!Instructions.empty())
-    EmitInstructions(Instructions, O);
-
-  O << "  }\n"
-       "  return true;\n"
-       "}\n";
+  if (!Instructions.empty()) {
+    // Find the opcode # of inline asm.
+    O << "  switch (MI->getOpcode()) {\n";
+    while (!Instructions.empty())
+      EmitInstructions(Instructions, O);
+
+    O << "  }\n";
+    O << "  return true;\n";
+  }
+  
+  O << "}\n";
  }