Implement support for a new LLVM 1.3 bytecode format, which uses uint's

author Chris Lattner <sabre@nondot.org>

Mon, 5 Apr 2004 01:27:26 +0000 (01:27 +0000)

committer Chris Lattner <sabre@nondot.org>

Mon, 5 Apr 2004 01:27:26 +0000 (01:27 +0000)
author Chris Lattner <sabre@nondot.org>
Mon, 5 Apr 2004 01:27:26 +0000 (01:27 +0000)
committer Chris Lattner <sabre@nondot.org>
Mon, 5 Apr 2004 01:27:26 +0000 (01:27 +0000)
diff --git a/lib/Bytecode/Reader/ConstantReader.cpp b/lib/Bytecode/Reader/ConstantReader.cpp

index b4a219df6b2005ee148413c88198a3f4bb79d140..8691b26544b3876ee325377fd10b26e6e202d2f5 100644 (file)
--- a/lib/Bytecode/Reader/ConstantReader.cpp
+++ b/lib/Bytecode/Reader/ConstantReader.cpp
@@ -15,6 +15,7 @@
  #include "ReaderInternals.h"
  #include "llvm/Module.h"
  #include "llvm/Constants.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
  #include <algorithm>
  using namespace llvm;
  
@@ -164,6 +165,20 @@ Constant *BytecodeParser::parseConstantValue(const unsigned char *&Buf,
        return ConstantExpr::getCast(ArgVec[0], getType(TypeID));
      } else if (Opcode == Instruction::GetElementPtr) { // GetElementPtr
        std::vector<Constant*> IdxList(ArgVec.begin()+1, ArgVec.end());
+
+      if (hasRestrictedGEPTypes) {
+        const Type *BaseTy = ArgVec[0]->getType();
+        generic_gep_type_iterator<std::vector<Constant*>::iterator>
+          GTI = gep_type_begin(BaseTy, IdxList.begin(), IdxList.end()),
+          E = gep_type_end(BaseTy, IdxList.begin(), IdxList.end());
+        for (unsigned i = 0; GTI != E; ++GTI, ++i)
+          if (isa<StructType>(*GTI)) {
+            if (IdxList[i]->getType() != Type::UByteTy)
+              throw std::string("Invalid index for getelementptr!");
+            IdxList[i] = ConstantExpr::getCast(IdxList[i], Type::UIntTy);
+          }
+      }
+
        return ConstantExpr::getGetElementPtr(ArgVec[0], IdxList);
      } else if (Opcode == Instruction::Select) {
        assert(ArgVec.size() == 3);
diff --git a/lib/Bytecode/Reader/InstructionReader.cpp b/lib/Bytecode/Reader/InstructionReader.cpp

index 90be8cd6f5a166d31e5ae02488742d48a855e537..d66b12cf0d95a0bdfebb457712b390bc3cf5b6c6 100644 (file)
--- a/lib/Bytecode/Reader/InstructionReader.cpp
+++ b/lib/Bytecode/Reader/InstructionReader.cpp
@@ -308,10 +308,35 @@ void BytecodeParser::ParseInstruction(const unsigned char *&Buf,
      for (unsigned i = 1, e = Args.size(); i != e; ++i) {
        const CompositeType *TopTy = dyn_cast_or_null<CompositeType>(NextTy);
        if (!TopTy) throw std::string("Invalid getelementptr instruction!"); 
-      // FIXME: when PR82 is resolved.
-      unsigned IdxTy = isa<StructType>(TopTy) ? Type::UByteTyID :Type::LongTyID;
-        
-      Idx.push_back(getValue(IdxTy, Args[i]));
+
+      unsigned ValIdx = Args[i];
+      unsigned IdxTy;
+      if (!hasRestrictedGEPTypes) {
+        // Struct indices are always uints, sequential type indices can be any
+        // of the 32 or 64-bit integer types.  The actual choice of type is
+        // encoded in the low two bits of the slot number.
+        if (isa<StructType>(TopTy))
+          IdxTy = Type::UIntTyID;
+        else {
+          switch (ValIdx & 3) {
+          case 0: IdxTy = Type::UIntTyID; break;
+          case 1: IdxTy = Type::IntTyID; break;
+          case 2: IdxTy = Type::ULongTyID; break;
+          case 3: IdxTy = Type::LongTyID; break;
+          }
+          ValIdx >>= 2;
+        }
+      } else {
+        IdxTy = isa<StructType>(TopTy) ? Type::UByteTyID : Type::LongTyID;
+      }
+
+      Idx.push_back(getValue(IdxTy, ValIdx));
+
+      // Convert ubyte struct indices into uint struct indices.
+      if (isa<StructType>(TopTy) && hasRestrictedGEPTypes)
+        if (ConstantUInt *C = dyn_cast<ConstantUInt>(Idx.back()))
+          Idx[Idx.size()-1] = ConstantExpr::getCast(C, Type::UIntTy);
+
        NextTy = GetElementPtrInst::getIndexedType(InstTy, Idx, true);
      }
  
diff --git a/lib/Bytecode/Reader/Reader.cpp b/lib/Bytecode/Reader/Reader.cpp

index 54c91811a9ba746af34a5b1fee447ec717011c2d..2f0879ba3943323c4e83899d7a23b3ca05ff7259 100644 (file)
--- a/lib/Bytecode/Reader/Reader.cpp
+++ b/lib/Bytecode/Reader/Reader.cpp
@@ -647,12 +647,10 @@ void BytecodeParser::ParseVersionInfo(const unsigned char *&Buf,
    // Default values for the current bytecode version
    hasInconsistentModuleGlobalInfo = false;
    hasExplicitPrimitiveZeros = false;
+  hasRestrictedGEPTypes = false;
  
    switch (RevisionNum) {
    case 0:               //  LLVM 1.0, 1.1 release version
-    // Compared to rev #2, we added support for weak linkage, a more dense
-    // encoding, and better varargs support.
-
      // Base LLVM 1.0 bytecode format.
      hasInconsistentModuleGlobalInfo = true;
      hasExplicitPrimitiveZeros = true;
@@ -663,6 +661,13 @@ void BytecodeParser::ParseVersionInfo(const unsigned char *&Buf,
      // Also, it fixed the problem where the size of the ModuleGlobalInfo block
      // included the size for the alignment at the end, where the rest of the
      // blocks did not.
+
+    // LLVM 1.2 and before required that GEP indices be ubyte constants for
+    // structures and longs for sequential types.
+    hasRestrictedGEPTypes = true;
+
+    // FALL THROUGH
+  case 2:               // LLVM 1.3 release version
      break;
  
    default:
diff --git a/lib/Bytecode/Reader/ReaderInternals.h b/lib/Bytecode/Reader/ReaderInternals.h

index 86bf800ca6e721d8596ab3c22266f3857363425a..9e0ffc2c3632c7d9b876a6db4b748f8bfe5e8e6e 100644 (file)
--- a/lib/Bytecode/Reader/ReaderInternals.h
+++ b/lib/Bytecode/Reader/ReaderInternals.h
@@ -108,6 +108,13 @@ private:
    // int/sbyte/etc.
    bool hasExplicitPrimitiveZeros;
  
+  // Flags to control features specific the LLVM 1.2 and before (revision #1)
+
+  // LLVM 1.2 and earlier required that getelementptr structure indices were
+  // ubyte constants and that sequential type indices were longs.
+  bool hasRestrictedGEPTypes;
+
+
    typedef std::vector<ValueList*> ValueTable;
    ValueTable Values;
    ValueTable ModuleValues;
diff --git a/lib/Bytecode/Writer/InstructionWriter.cpp b/lib/Bytecode/Writer/InstructionWriter.cpp

index e86b027135bb22713786561e1e2e58e55f49e5b1..9e063510dfe5c39f33161686a2156ed3eabdcdba 100644 (file)
--- a/lib/Bytecode/Writer/InstructionWriter.cpp
+++ b/lib/Bytecode/Writer/InstructionWriter.cpp
@@ -16,6 +16,7 @@
  #include "llvm/Module.h"
  #include "llvm/DerivedTypes.h"
  #include "llvm/Instructions.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
  #include "Support/Statistic.h"
  #include <algorithm>
  using namespace llvm;
@@ -38,20 +39,48 @@ static void outputInstructionFormat0(const Instruction *I, unsigned Opcode,
    output_vbr(NumArgs + (isa<CastInst>(I) || isa<VANextInst>(I) ||
                          isa<VAArgInst>(I)), Out);
  
-  for (unsigned i = 0; i < NumArgs; ++i) {
-    int Slot = Table.getSlot(I->getOperand(i));
-    assert(Slot >= 0 && "No slot number for value!?!?");      
-    output_vbr((unsigned)Slot, Out);
-  }
+  if (!isa<GetElementPtrInst>(&I)) {
+    for (unsigned i = 0; i < NumArgs; ++i) {
+      int Slot = Table.getSlot(I->getOperand(i));
+      assert(Slot >= 0 && "No slot number for value!?!?");      
+      output_vbr((unsigned)Slot, Out);
+    }
  
-  if (isa<CastInst>(I) || isa<VAArgInst>(I)) {
-    int Slot = Table.getSlot(I->getType());
-    assert(Slot != -1 && "Cast return type unknown?");
-    output_vbr((unsigned)Slot, Out);
-  } else if (const VANextInst *VAI = dyn_cast<VANextInst>(I)) {
-    int Slot = Table.getSlot(VAI->getArgType());
-    assert(Slot != -1 && "VarArg argument type unknown?");
-    output_vbr((unsigned)Slot, Out);
+    if (isa<CastInst>(I) || isa<VAArgInst>(I)) {
+      int Slot = Table.getSlot(I->getType());
+      assert(Slot != -1 && "Cast return type unknown?");
+      output_vbr((unsigned)Slot, Out);
+    } else if (const VANextInst *VAI = dyn_cast<VANextInst>(I)) {
+      int Slot = Table.getSlot(VAI->getArgType());
+      assert(Slot != -1 && "VarArg argument type unknown?");
+      output_vbr((unsigned)Slot, Out);
+    }
+
+  } else {
+    int Slot = Table.getSlot(I->getOperand(0));
+    assert(Slot >= 0 && "No slot number for value!?!?");      
+    output_vbr(unsigned(Slot), Out);
+
+    // We need to encode the type of sequential type indices into their slot #
+    unsigned Idx = 1;
+    for (gep_type_iterator TI = gep_type_begin(I), E = gep_type_end(I);
+         Idx != NumArgs; ++TI, ++Idx) {
+      Slot = Table.getSlot(I->getOperand(Idx));
+      assert(Slot >= 0 && "No slot number for value!?!?");      
+    
+      if (isa<SequentialType>(*TI)) {
+        unsigned IdxId;
+        switch (I->getOperand(Idx)->getType()->getPrimitiveID()) {
+        default: assert(0 && "Unknown index type!");
+        case Type::UIntTyID:  IdxId = 0; break;
+        case Type::IntTyID:   IdxId = 1; break;
+        case Type::ULongTyID: IdxId = 2; break;
+        case Type::LongTyID:  IdxId = 3; break;
+        }
+        Slot = (Slot << 2) | IdxId;
+      }
+      output_vbr(unsigned(Slot), Out);
+    }
    }
  
    align32(Out);    // We must maintain correct alignment!
@@ -119,8 +148,9 @@ static void outputInstrVarArgsCall(const Instruction *I, unsigned Opcode,
  // operand index is >= 2^12.
  //
  static void outputInstructionFormat1(const Instruction *I, unsigned Opcode,
-                                    const SlotCalculator &Table, int *Slots,
-                                    unsigned Type, std::deque<uchar> &Out) {
+                                    const SlotCalculator &Table,
+                                     unsigned *Slots, unsigned Type, 
+                                     std::deque<uchar> &Out) {
    // bits   Instruction format:
    // --------------------------
    // 01-00: Opcode type, fixed to 1.
@@ -138,8 +168,9 @@ static void outputInstructionFormat1(const Instruction *I, unsigned Opcode,
  // operand index is >= 2^8.
  //
  static void outputInstructionFormat2(const Instruction *I, unsigned Opcode,
-                                    const SlotCalculator &Table, int *Slots,
-                                    unsigned Type, std::deque<uchar> &Out) {
+                                    const SlotCalculator &Table,
+                                     unsigned *Slots, unsigned Type, 
+                                     std::deque<uchar> &Out) {
    // bits   Instruction format:
    // --------------------------
    // 01-00: Opcode type, fixed to 2.
@@ -160,8 +191,9 @@ static void outputInstructionFormat2(const Instruction *I, unsigned Opcode,
  // operand index is >= 2^6.
  //
  static void outputInstructionFormat3(const Instruction *I, unsigned Opcode,
-                                    const SlotCalculator &Table, int *Slots,
-                                    unsigned Type, std::deque<uchar> &Out) {
+                                    const SlotCalculator &Table,
+                                     unsigned *Slots, unsigned Type,
+                                     std::deque<uchar> &Out) {
    // bits   Instruction format:
    // --------------------------
    // 01-00: Opcode type, fixed to 3.
@@ -181,6 +213,7 @@ static void outputInstructionFormat3(const Instruction *I, unsigned Opcode,
  void BytecodeWriter::outputInstruction(const Instruction &I) {
    assert(I.getOpcode() < 62 && "Opcode too big???");
    unsigned Opcode = I.getOpcode();
+  unsigned NumOperands = I.getNumOperands();
  
    // Encode 'volatile load' as 62 and 'volatile store' as 63.
    if (isa<LoadInst>(I) && cast<LoadInst>(I).isVolatile())
@@ -188,17 +221,6 @@ void BytecodeWriter::outputInstruction(const Instruction &I) {
    if (isa<StoreInst>(I) && cast<StoreInst>(I).isVolatile())
      Opcode = 63;
  
-  unsigned NumOperands = I.getNumOperands();
-  int MaxOpSlot = 0;
-  int Slots[3]; Slots[0] = (1 << 12)-1;   // Marker to signify 0 operands
-
-  for (unsigned i = 0; i != NumOperands; ++i) {
-    int slot = Table.getSlot(I.getOperand(i));
-    assert(slot != -1 && "Broken bytecode!");
-    if (slot > MaxOpSlot) MaxOpSlot = slot;
-    if (i < 3) Slots[i] = slot;
-  }
-
    // Figure out which type to encode with the instruction.  Typically we want
    // the type of the first parameter, as opposed to the type of the instruction
    // (for example, with setcc, we always know it returns bool, but the type of
@@ -226,71 +248,101 @@ void BytecodeWriter::outputInstruction(const Instruction &I) {
    assert(Slot != -1 && "Type not available!!?!");
    Type = (unsigned)Slot;
  
-  // Make sure that we take the type number into consideration.  We don't want
-  // to overflow the field size for the instruction format we select.
-  //
-  if (Slot > MaxOpSlot) MaxOpSlot = Slot;
-
-  // Handle the special case for cast...
-  if (isa<CastInst>(I) || isa<VAArgInst>(I)) {
-    // Cast has to encode the destination type as the second argument in the
-    // packet, or else we won't know what type to cast to!
-    Slots[1] = Table.getSlot(I.getType());
-    assert(Slots[1] != -1 && "Cast return type unknown?");
-    if (Slots[1] > MaxOpSlot) MaxOpSlot = Slots[1];
-    NumOperands++;
-  } else if (const VANextInst *VANI = dyn_cast<VANextInst>(&I)) {
-    Slots[1] = Table.getSlot(VANI->getArgType());
-    assert(Slots[1] != -1 && "va_next return type unknown?");
-    if (Slots[1] > MaxOpSlot) MaxOpSlot = Slots[1];
-    NumOperands++;
-  } else if (const CallInst *CI = dyn_cast<CallInst>(&I)){// Handle VarArg calls
-    const PointerType *Ty = cast<PointerType>(CI->getCalledValue()->getType());
+  // Varargs calls and invokes are encoded entirely different from any other
+  // instructions.
+  if (const CallInst *CI = dyn_cast<CallInst>(&I)){
+    const PointerType *Ty =cast<PointerType>(CI->getCalledValue()->getType());
      if (cast<FunctionType>(Ty->getElementType())->isVarArg()) {
        outputInstrVarArgsCall(CI, Opcode, Table, Type, Out);
        return;
      }
-  } else if (const InvokeInst *II = dyn_cast<InvokeInst>(&I)) {// ...  & Invokes
-    const PointerType *Ty = cast<PointerType>(II->getCalledValue()->getType());
+  } else if (const InvokeInst *II = dyn_cast<InvokeInst>(&I)) {
+    const PointerType *Ty =cast<PointerType>(II->getCalledValue()->getType());
      if (cast<FunctionType>(Ty->getElementType())->isVarArg()) {
        outputInstrVarArgsCall(II, Opcode, Table, Type, Out);
        return;
      }
    }
  
-  // Decide which instruction encoding to use.  This is determined primarily by
-  // the number of operands, and secondarily by whether or not the max operand
-  // will fit into the instruction encoding.  More operands == fewer bits per
-  // operand.
-  //
-  switch (NumOperands) {
-  case 0:
-  case 1:
-    if (MaxOpSlot < (1 << 12)-1) { // -1 because we use 4095 to indicate 0 ops
-      outputInstructionFormat1(&I, Opcode, Table, Slots, Type, Out);
-      return;
+  if (NumOperands <= 3) {
+    // Make sure that we take the type number into consideration.  We don't want
+    // to overflow the field size for the instruction format we select.
+    //
+    unsigned MaxOpSlot = Type;
+    unsigned Slots[3]; Slots[0] = (1 << 12)-1;   // Marker to signify 0 operands
+    
+    for (unsigned i = 0; i != NumOperands; ++i) {
+      int slot = Table.getSlot(I.getOperand(i));
+      assert(slot != -1 && "Broken bytecode!");
+      if (unsigned(slot) > MaxOpSlot) MaxOpSlot = unsigned(slot);
+      Slots[i] = unsigned(slot);
      }
-    break;
  
-  case 2:
-    if (MaxOpSlot < (1 << 8)) {
-      outputInstructionFormat2(&I, Opcode, Table, Slots, Type, Out);
-      return;
+    // Handle the special cases for various instructions...
+    if (isa<CastInst>(I) || isa<VAArgInst>(I)) {
+      // Cast has to encode the destination type as the second argument in the
+      // packet, or else we won't know what type to cast to!
+      Slots[1] = Table.getSlot(I.getType());
+      assert(Slots[1] != ~0U && "Cast return type unknown?");
+      if (Slots[1] > MaxOpSlot) MaxOpSlot = Slots[1];
+      NumOperands++;
+    } else if (const VANextInst *VANI = dyn_cast<VANextInst>(&I)) {
+      Slots[1] = Table.getSlot(VANI->getArgType());
+      assert(Slots[1] != ~0U && "va_next return type unknown?");
+      if (Slots[1] > MaxOpSlot) MaxOpSlot = Slots[1];
+      NumOperands++;
+    } else if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&I)) {
+      // We need to encode the type of sequential type indices into their slot #
+      unsigned Idx = 1;
+      for (gep_type_iterator I = gep_type_begin(GEP), E = gep_type_end(GEP);
+           I != E; ++I, ++Idx)
+        if (isa<SequentialType>(*I)) {
+          unsigned IdxId;
+          switch (GEP->getOperand(Idx)->getType()->getPrimitiveID()) {
+          default: assert(0 && "Unknown index type!");
+          case Type::UIntTyID:  IdxId = 0; break;
+          case Type::IntTyID:   IdxId = 1; break;
+          case Type::ULongTyID: IdxId = 2; break;
+          case Type::LongTyID:  IdxId = 3; break;
+          }
+          Slots[Idx] = (Slots[Idx] << 2) | IdxId;
+          if (Slots[Idx] > MaxOpSlot) MaxOpSlot = Slots[Idx];
+        }
      }
-    break;
  
-  case 3:
-    if (MaxOpSlot < (1 << 6)) {
-      outputInstructionFormat3(&I, Opcode, Table, Slots, Type, Out);
-      return;
+    // Decide which instruction encoding to use.  This is determined primarily
+    // by the number of operands, and secondarily by whether or not the max
+    // operand will fit into the instruction encoding.  More operands == fewer
+    // bits per operand.
+    //
+    switch (NumOperands) {
+    case 0:
+    case 1:
+      if (MaxOpSlot < (1 << 12)-1) { // -1 because we use 4095 to indicate 0 ops
+        outputInstructionFormat1(&I, Opcode, Table, Slots, Type, Out);
+        return;
+      }
+      break;
+
+    case 2:
+      if (MaxOpSlot < (1 << 8)) {
+        outputInstructionFormat2(&I, Opcode, Table, Slots, Type, Out);
+        return;
+      }
+      break;
+
+    case 3:
+      if (MaxOpSlot < (1 << 6)) {
+        outputInstructionFormat3(&I, Opcode, Table, Slots, Type, Out);
+        return;
+      }
+      break;
+    default:
+      break;
      }
-    break;
-  default:
-    break;
    }
  
    // If we weren't handled before here, we either have a large number of
    // operands or a large operand index that we are referring to.
    outputInstructionFormat0(&I, Opcode, Table, Type, Out);
  }
-
diff --git a/lib/Bytecode/Writer/Writer.cpp b/lib/Bytecode/Writer/Writer.cpp

index 432a39fb3308bf1cd9636338d65fca771a81851d..82fe40db18c39a07fb4790f6cde884ef6efc8477 100644 (file)
--- a/lib/Bytecode/Writer/Writer.cpp
+++ b/lib/Bytecode/Writer/Writer.cpp
@@ -54,9 +54,9 @@ BytecodeWriter::BytecodeWriter(std::deque<unsigned char> &o, const Module *M)
    bool hasNoEndianness  = M->getEndianness() == Module::AnyEndianness;
    bool hasNoPointerSize = M->getPointerSize() == Module::AnyPointerSize;
  
-  // Output the version identifier... we are currently on bytecode version #1,
-  // which corresponds to LLVM v1.2.
-  unsigned Version = (1 << 4) | isBigEndian | (hasLongPointers << 1) |
+  // Output the version identifier... we are currently on bytecode version #2,
+  // which corresponds to LLVM v1.3.
+  unsigned Version = (2 << 4) | isBigEndian | (hasLongPointers << 1) |
                       (hasNoEndianness << 2) | (hasNoPointerSize << 3);
    output_vbr(Version, Out);
    align32(Out);
author	Chris Lattner <sabre@nondot.org>
	Mon, 5 Apr 2004 01:27:26 +0000 (01:27 +0000)
committer	Chris Lattner <sabre@nondot.org>
	Mon, 5 Apr 2004 01:27:26 +0000 (01:27 +0000)
lib/Bytecode/Reader/ConstantReader.cpp		patch \| blob \| history
lib/Bytecode/Reader/InstructionReader.cpp		patch \| blob \| history
lib/Bytecode/Reader/Reader.cpp		patch \| blob \| history
lib/Bytecode/Reader/ReaderInternals.h		patch \| blob \| history
lib/Bytecode/Writer/InstructionWriter.cpp		patch \| blob \| history
lib/Bytecode/Writer/Writer.cpp		patch \| blob \| history