Change from llvm::SmallSet<std::string> to llvm::StringMap<char>.

[oota-llvm.git] / utils / TableGen / AsmWriterEmitter.cpp
diff --git a/utils/TableGen/AsmWriterEmitter.cpp b/utils/TableGen/AsmWriterEmitter.cpp

index 016bac5c760fb896af791a1b4872e9e47a5d3dbd..8fbb2cfbb61c887136cc68d75dcedf7f43f32715 100644 (file)
--- a/utils/TableGen/AsmWriterEmitter.cpp
+++ b/utils/TableGen/AsmWriterEmitter.cpp
@@ -2,8 +2,8 @@
  //
  //                     The LLVM Compiler Infrastructure
  //
-// This file was developed by the LLVM research group and is distributed under
-// the University of Illinois Open Source License. See LICENSE.TXT for details.
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
  //
  //===----------------------------------------------------------------------===//
  //
@@ -19,7 +19,6 @@
  #include "llvm/Support/Debug.h"
  #include "llvm/Support/MathExtras.h"
  #include <algorithm>
-#include <ostream>
  using namespace llvm;
  
  static bool isIdentChar(char C) {
@@ -29,7 +28,8 @@ static bool isIdentChar(char C) {
           C == '_';
  }
  
-namespace {
+// This should be an anon namespace, this works around a GCC warning.
+namespace llvm {  
    struct AsmWriterOperand {
      enum { isLiteralTextOperand, isMachineInstrOperand } OperandType;
  
@@ -69,7 +69,8 @@ namespace {
  }
  
  namespace llvm {
-  struct AsmWriterInst {
+  class AsmWriterInst {
+  public:
      std::vector<AsmWriterOperand> Operands;
      const CodeGenInstruction *CGI;
  
@@ -98,7 +99,9 @@ std::string AsmWriterOperand::getCode() const {
    if (OperandType == isLiteralTextOperand)
      return "O << \"" + Str + "\"; ";
  
-  std::string Result = Str + "(MI, " + utostr(MIOpNo);
+  std::string Result = Str + "(MI";
+  if (MIOpNo != ~0U)
+    Result += ", " + utostr(MIOpNo);
    if (!MiModifier.empty())
      Result += ", \"" + MiModifier + '"';
    return Result + "); ";
@@ -119,7 +122,7 @@ AsmWriterInst::AsmWriterInst(const CodeGenInstruction &CGI, unsigned Variant) {
    std::string::size_type LastEmitted = 0;
    while (LastEmitted != AsmString.size()) {
      std::string::size_type DollarPos =
-      AsmString.find_first_of("${|}", LastEmitted);
+      AsmString.find_first_of("${|}\\", LastEmitted);
      if (DollarPos == std::string::npos) DollarPos = AsmString.size();
  
      // Emit a constant string fragment.
@@ -129,6 +132,23 @@ AsmWriterInst::AsmWriterInst(const CodeGenInstruction &CGI, unsigned Variant) {
          AddLiteralString(std::string(AsmString.begin()+LastEmitted,
                                       AsmString.begin()+DollarPos));
        LastEmitted = DollarPos;
+    } else if (AsmString[DollarPos] == '\\') {
+      if (DollarPos+1 != AsmString.size() &&
+          (CurVariant == Variant || CurVariant == ~0U)) {
+        if (AsmString[DollarPos+1] == 'n') {
+          AddLiteralString("\\n");
+        } else if (AsmString[DollarPos+1] == 't') {
+          AddLiteralString("\\t");
+        } else if (std::string("${|}\\").find(AsmString[DollarPos+1]) 
+                   != std::string::npos) {
+          AddLiteralString(std::string(1, AsmString[DollarPos+1]));
+        } else {
+          throw "Non-supported escaped character found in instruction '" +
+            CGI.TheDef->getName() + "'!";
+        }
+        LastEmitted = DollarPos+2;
+        continue;
+      }
      } else if (AsmString[DollarPos] == '{') {
        if (CurVariant != ~0U)
          throw "Nested variants found for instruction '" +
@@ -172,7 +192,8 @@ AsmWriterInst::AsmWriterInst(const CodeGenInstruction &CGI, unsigned Variant) {
                            AsmString.begin()+VarEnd);
  
        // Modifier - Support ${foo:modifier} syntax, where "modifier" is passed
-      // into printOperand.
+      // into printOperand.  Also support ${:feature}, which is passed into
+      // PrintSpecial.
        std::string Modifier;
        
        // In order to avoid starting the next string at the terminating curly
@@ -204,26 +225,24 @@ AsmWriterInst::AsmWriterInst(const CodeGenInstruction &CGI, unsigned Variant) {
                  + CGI.TheDef->getName() + "'";
          ++VarEnd;
        }
-      if (VarName.empty())
+      if (VarName.empty() && Modifier.empty())
          throw "Stray '$' in '" + CGI.TheDef->getName() +
                "' asm string, maybe you want $$?";
  
-      unsigned OpNo = CGI.getOperandNamed(VarName);
-      CodeGenInstruction::OperandInfo OpInfo = CGI.OperandList[OpNo];
-
-      // If this is a two-address instruction and we are not accessing the
-      // 0th operand, remove an operand.
-      unsigned MIOp = OpInfo.MIOperandNo;
-      if (CGI.isTwoAddress && MIOp != 0) {
-        if (MIOp == 1)
-          throw "Should refer to operand #0 instead of #1 for two-address"
-            " instruction '" + CGI.TheDef->getName() + "'!";
-        --MIOp;
+      if (VarName.empty()) {
+        // Just a modifier, pass this into PrintSpecial.
+        Operands.push_back(AsmWriterOperand("PrintSpecial", ~0U, Modifier));
+      } else {
+        // Otherwise, normal operand.
+        unsigned OpNo = CGI.getOperandNamed(VarName);
+        CodeGenInstruction::OperandInfo OpInfo = CGI.OperandList[OpNo];
+
+        if (CurVariant == Variant || CurVariant == ~0U) {
+          unsigned MIOp = OpInfo.MIOperandNo;
+          Operands.push_back(AsmWriterOperand(OpInfo.PrinterMethodName, MIOp,
+                                              Modifier));
+        }
        }
-
-      if (CurVariant == Variant || CurVariant == ~0U) 
-        Operands.push_back(AsmWriterOperand(OpInfo.PrinterMethodName, MIOp,
-                                            Modifier));
        LastEmitted = VarEnd;
      }
    }
@@ -240,11 +259,12 @@ unsigned AsmWriterInst::MatchesAllButOneOp(const AsmWriterInst &Other)const{
  
    unsigned MismatchOperand = ~0U;
    for (unsigned i = 0, e = Operands.size(); i != e; ++i) {
-    if (Operands[i] != Other.Operands[i])
+    if (Operands[i] != Other.Operands[i]) {
        if (MismatchOperand != ~0U)  // Already have one mismatch?
          return ~1U;
        else
          MismatchOperand = i;
+    }
    }
    return MismatchOperand;
  }
@@ -330,19 +350,20 @@ static void EmitInstructions(std::vector<AsmWriterInst> &Insts,
  
  void AsmWriterEmitter::
  FindUniqueOperandCommands(std::vector<std::string> &UniqueOperandCommands, 
-                          std::vector<unsigned> &InstIdxs) const {
-  InstIdxs.clear();
-  InstIdxs.resize(NumberedInstructions.size());
+                          std::vector<unsigned> &InstIdxs,
+                          std::vector<unsigned> &InstOpsUsed) const {
+  InstIdxs.assign(NumberedInstructions.size(), ~0U);
    
    // This vector parallels UniqueOperandCommands, keeping track of which
    // instructions each case are used for.  It is a comma separated string of
    // enums.
    std::vector<std::string> InstrsForCase;
    InstrsForCase.resize(UniqueOperandCommands.size());
+  InstOpsUsed.assign(UniqueOperandCommands.size(), 0);
    
    for (unsigned i = 0, e = NumberedInstructions.size(); i != e; ++i) {
      const AsmWriterInst *Inst = getAsmWriterInstByID(i);
-    if (Inst == 0) continue;  // PHI, INLINEASM, etc.
+    if (Inst == 0) continue;  // PHI, INLINEASM, LABEL, etc.
      
      std::string Command;
      if (Inst->Operands.empty())
@@ -369,6 +390,60 @@ FindUniqueOperandCommands(std::vector<std::string> &UniqueOperandCommands,
        InstIdxs[i] = UniqueOperandCommands.size();
        UniqueOperandCommands.push_back(Command);
        InstrsForCase.push_back(Inst->CGI->TheDef->getName());
+
+      // This command matches one operand so far.
+      InstOpsUsed.push_back(1);
+    }
+  }
+  
+  // For each entry of UniqueOperandCommands, there is a set of instructions
+  // that uses it.  If the next command of all instructions in the set are
+  // identical, fold it into the command.
+  for (unsigned CommandIdx = 0, e = UniqueOperandCommands.size();
+       CommandIdx != e; ++CommandIdx) {
+    
+    for (unsigned Op = 1; ; ++Op) {
+      // Scan for the first instruction in the set.
+      std::vector<unsigned>::iterator NIT =
+        std::find(InstIdxs.begin(), InstIdxs.end(), CommandIdx);
+      if (NIT == InstIdxs.end()) break;  // No commonality.
+
+      // If this instruction has no more operands, we isn't anything to merge
+      // into this command.
+      const AsmWriterInst *FirstInst = 
+        getAsmWriterInstByID(NIT-InstIdxs.begin());
+      if (!FirstInst || FirstInst->Operands.size() == Op)
+        break;
+
+      // Otherwise, scan to see if all of the other instructions in this command
+      // set share the operand.
+      bool AllSame = true;
+      
+      for (NIT = std::find(NIT+1, InstIdxs.end(), CommandIdx);
+           NIT != InstIdxs.end();
+           NIT = std::find(NIT+1, InstIdxs.end(), CommandIdx)) {
+        // Okay, found another instruction in this command set.  If the operand
+        // matches, we're ok, otherwise bail out.
+        const AsmWriterInst *OtherInst = 
+          getAsmWriterInstByID(NIT-InstIdxs.begin());
+        if (!OtherInst || OtherInst->Operands.size() == Op ||
+            OtherInst->Operands[Op] != FirstInst->Operands[Op]) {
+          AllSame = false;
+          break;
+        }
+      }
+      if (!AllSame) break;
+      
+      // Okay, everything in this command set has the same next operand.  Add it
+      // to UniqueOperandCommands and remember that it was consumed.
+      std::string Command = "    " + FirstInst->Operands[Op].getCode() + "\n";
+      
+      // If this is the last operand, emit a return after the code.
+      if (FirstInst->Operands.size() == Op+1)
+        Command += "    return true;\n";
+      
+      UniqueOperandCommands[CommandIdx] += Command;
+      InstOpsUsed[CommandIdx]++;
      }
    }
    
@@ -423,9 +498,10 @@ void AsmWriterEmitter::run(std::ostream &O) {
    // Build an aggregate string, and build a table of offsets into it.
    std::map<std::string, unsigned> StringOffset;
    std::string AggregateString;
-  AggregateString += '\0';
+  AggregateString.push_back(0);  // "\0"
+  AggregateString.push_back(0);  // "\0"
    
-  /// OpcodeInfo - Theis encodes the index of the string to use for the first
+  /// OpcodeInfo - This encodes the index of the string to use for the first
    /// chunk of the output as well as indices used for operand printing.
    std::vector<unsigned> OpcodeInfo;
    
@@ -433,9 +509,14 @@ void AsmWriterEmitter::run(std::ostream &O) {
    for (unsigned i = 0, e = NumberedInstructions.size(); i != e; ++i) {
      AsmWriterInst *AWI = CGIAWIMap[NumberedInstructions[i]];
      unsigned Idx;
-    if (AWI == 0 || AWI->Operands[0].Str.empty()) {
+    if (AWI == 0) {
        // Something not handled by the asmwriter printer.
        Idx = 0;
+    } else if (AWI->Operands[0].OperandType != 
+                        AsmWriterOperand::isLiteralTextOperand ||
+               AWI->Operands[0].Str.empty()) {
+      // Something handled by the asmwriter printer, but with no leading string.
+      Idx = 1;
      } else {
        unsigned &Entry = StringOffset[AWI->Operands[0].Str];
        if (Entry == 0) {
@@ -455,7 +536,7 @@ void AsmWriterEmitter::run(std::ostream &O) {
    }
    
    // Figure out how many bits we used for the string index.
-  unsigned AsmStrBits = Log2_32_Ceil(MaxStringIdx);
+  unsigned AsmStrBits = Log2_32_Ceil(MaxStringIdx+1);
    
    // To reduce code size, we compactify common instructions into a few bits
    // in the opcode-indexed table.
@@ -467,42 +548,48 @@ void AsmWriterEmitter::run(std::ostream &O) {
    while (1) {
      std::vector<std::string> UniqueOperandCommands;
  
-    // For the first operand check, add a default value that unhandled
-    // instructions will use.
+    // For the first operand check, add a default value for instructions with
+    // just opcode strings to use.
      if (isFirst) {
-      UniqueOperandCommands.push_back("    return false;\n");
+      UniqueOperandCommands.push_back("    return true;\n");
        isFirst = false;
      }
      
      std::vector<unsigned> InstIdxs;
-    FindUniqueOperandCommands(UniqueOperandCommands, InstIdxs);
+    std::vector<unsigned> NumInstOpsHandled;
+    FindUniqueOperandCommands(UniqueOperandCommands, InstIdxs,
+                              NumInstOpsHandled);
      
      // If we ran out of operands to print, we're done.
      if (UniqueOperandCommands.empty()) break;
      
-    // FIXME: GROW THEM MAXIMALLY.
-
      // Compute the number of bits we need to represent these cases, this is
      // ceil(log2(numentries)).
      unsigned NumBits = Log2_32_Ceil(UniqueOperandCommands.size());
      
      // If we don't have enough bits for this operand, don't include it.
      if (NumBits > BitsLeft) {
-      DEBUG(std::cerr << "Not enough bits to densely encode " << NumBits
-                      << " more bits\n");
+      DOUT << "Not enough bits to densely encode " << NumBits
+           << " more bits\n";
        break;
      }
      
      // Otherwise, we can include this in the initial lookup table.  Add it in.
      BitsLeft -= NumBits;
      for (unsigned i = 0, e = InstIdxs.size(); i != e; ++i)
-      OpcodeInfo[i] |= InstIdxs[i] << (BitsLeft+AsmStrBits);
+      if (InstIdxs[i] != ~0U)
+        OpcodeInfo[i] |= InstIdxs[i] << (BitsLeft+AsmStrBits);
      
      // Remove the info about this operand.
      for (unsigned i = 0, e = NumberedInstructions.size(); i != e; ++i) {
        if (AsmWriterInst *Inst = getAsmWriterInstByID(i))
-        if (!Inst->Operands.empty())
-          Inst->Operands.erase(Inst->Operands.begin());
+        if (!Inst->Operands.empty()) {
+          unsigned NumOps = NumInstOpsHandled[InstIdxs[i]];
+          assert(NumOps <= Inst->Operands.size() &&
+                 "Can't remove this many ops!");
+          Inst->Operands.erase(Inst->Operands.begin(),
+                               Inst->Operands.begin()+NumOps);
+        }
      }
      
      // Remember the handlers for this set of operands.
@@ -551,12 +638,25 @@ void AsmWriterEmitter::run(std::ostream &O) {
    O << "\";\n\n";
  
    O << "  if (MI->getOpcode() == TargetInstrInfo::INLINEASM) {\n"
+    << "    O << \"\\t\";\n"
      << "    printInlineAsm(MI);\n"
      << "    return true;\n"
+    << "  } else if (MI->getOpcode() == TargetInstrInfo::LABEL) {\n"
+    << "    printLabel(MI);\n"
+    << "    return true;\n"
+    << "  } else if (MI->getOpcode() == TargetInstrInfo::DECLARE) {\n"
+    << "    printDeclare(MI);\n"
+    << "    return true;\n"
+    << "  } else if (MI->getOpcode() == TargetInstrInfo::IMPLICIT_DEF) {\n"
+    << "    printImplicitDef(MI);\n"
+    << "    return true;\n"
      << "  }\n\n";
    
+  O << "  O << \"\\t\";\n\n";
+
    O << "  // Emit the opcode for the instruction.\n"
      << "  unsigned Bits = OpInfo[MI->getOpcode()];\n"
+    << "  if (Bits == 0) return false;\n"
      << "  O << AsmStrs+(Bits & " << (1 << AsmStrBits)-1 << ");\n\n";
  
    // Output the table driven operand information.
@@ -575,10 +675,7 @@ void AsmWriterEmitter::run(std::ostream &O) {
      O << "\n  // Fragment " << i << " encoded into " << NumBits
        << " bits for " << Commands.size() << " unique commands.\n";
      
-    if (Commands.size() == 1) {
-      // Only one possibility, just emit it.
-      O << Commands[0];
-    } else if (Commands.size() == 2) {
+    if (Commands.size() == 2) {
        // Emit two possibilitys with if/else.
        O << "  if ((Bits >> " << (BitsLeft+AsmStrBits) << ") & "
          << ((1 << NumBits)-1) << ") {\n"
@@ -623,8 +720,8 @@ void AsmWriterEmitter::run(std::ostream &O) {
        EmitInstructions(Instructions, O);
  
      O << "  }\n";
+    O << "  return true;\n";
    }
    
-  O << "  return true;\n"
-       "}\n";
+  O << "}\n";
  }