From 21cc4460efa104e8591b05a90f20130291614344 Mon Sep 17 00:00:00 2001 From: Nick Lewycky Date: Sat, 4 Apr 2009 07:22:01 +0000 Subject: [PATCH] Add support for embedded metadata to LLVM. This introduces two new types of Constant, MDString and MDNode which can only be used by globals with a name that starts with "llvm." or as arguments to a function with the same naming restriction. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@68420 91177308-0d34-0410-b5e6-96231b3b80d8 --- docs/LangRef.html | 38 ++++++++++ include/llvm/Bitcode/LLVMBitCodes.h | 4 +- include/llvm/Constants.h | 104 +++++++++++++++++++++++++++ include/llvm/Type.h | 2 +- include/llvm/Value.h | 4 +- lib/AsmParser/LLLexer.cpp | 1 + lib/AsmParser/LLParser.cpp | 43 ++++++++++- lib/AsmParser/LLParser.h | 3 + lib/AsmParser/LLToken.h | 3 + lib/Bitcode/Reader/BitcodeReader.cpp | 29 +++++++- lib/Bitcode/Writer/BitcodeWriter.cpp | 31 ++++++++ lib/VMCore/AsmWriter.cpp | 25 ++++++- lib/VMCore/Constants.cpp | 80 +++++++++++++++++++++ lib/VMCore/Type.cpp | 2 + lib/VMCore/Verifier.cpp | 51 ++++++++++++- test/Feature/embeddedmetadata.ll | 11 +++ unittests/VMCore/MetadataTest.cpp | 96 +++++++++++++++++++++++++ 17 files changed, 518 insertions(+), 9 deletions(-) create mode 100644 test/Feature/embeddedmetadata.ll create mode 100644 unittests/VMCore/MetadataTest.cpp diff --git a/docs/LangRef.html b/docs/LangRef.html index cac8fc79dad..457a56ab0cb 100644 --- a/docs/LangRef.html +++ b/docs/LangRef.html @@ -65,6 +65,7 @@
  • Global Variable and Function Addresses
  • Undefined Values
  • Constant Expressions
  • +
  • Embedded Metadata
  • Other Values @@ -1847,6 +1848,14 @@ constants and smaller complex constants.

    large arrays) and is always exactly equivalent to using explicit zero initializers. + +
    Metadata node
    + +
    A metadata node is a structure-like constant with the type of an empty + struct. For example: "{ } !{ i32 0, { } !"test" }". Unlike other + constants that are meant to be interpreted as part of the instruction stream, + metadata is a place to attach additional information such as debug info. +
    @@ -2015,6 +2024,35 @@ following is the syntax for constant expressions:

    + + + +
    + +

    Embedded metadata provides a way to attach arbitrary data to the +instruction stream without affecting the behaviour of the program. There are +two metadata primitives, strings and nodes. All metadata has the type of an +empty struct and is identified in syntax by a preceding exclamation point +('!'). +

    + +

    A metadata string is a string surrounded by double quotes. It can contain +any character by escaping non-printable characters with "\xx" where "xx" is +the two digit hex code. For example: "!"test\00"". +

    + +

    Metadata nodes are represented with notation similar to structure constants +(a comma separated list of elements, surrounded by braces and preceeded by an +exclamation point). For example: "!{ { } !"test\00", i32 10}". +

    + +

    Optimizations may rely on metadata to provide additional information about +the program that isn't available in the instructions, or that isn't easily +computable. Similarly, the code generator may expect a certain metadata format +to be used to express debugging information.

    +
    + diff --git a/include/llvm/Bitcode/LLVMBitCodes.h b/include/llvm/Bitcode/LLVMBitCodes.h index 7770b39615e..f41747315a9 100644 --- a/include/llvm/Bitcode/LLVMBitCodes.h +++ b/include/llvm/Bitcode/LLVMBitCodes.h @@ -126,7 +126,9 @@ namespace bitc { CST_CODE_CE_SHUFFLEVEC = 16, // CE_SHUFFLEVEC: [opval, opval, opval] CST_CODE_CE_CMP = 17, // CE_CMP: [opty, opval, opval, pred] CST_CODE_INLINEASM = 18, // INLINEASM: [sideeffect,asmstr,conststr] - CST_CODE_CE_SHUFVEC_EX = 19 // SHUFVEC_EX: [opty, opval, opval, opval] + CST_CODE_CE_SHUFVEC_EX = 19, // SHUFVEC_EX: [opty, opval, opval, opval] + CST_CODE_MDSTRING = 20, // MDSTRING: [values] + CST_CODE_MDNODE = 21 // MDNODE: [n x (type num, value num)] }; /// CastOpcodes - These are values used in the bitcode files to encode which diff --git a/include/llvm/Constants.h b/include/llvm/Constants.h index f25d010f4ac..0f4c29a0587 100644 --- a/include/llvm/Constants.h +++ b/include/llvm/Constants.h @@ -26,6 +26,7 @@ #include "llvm/OperandTraits.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/APFloat.h" +#include "llvm/ADT/FoldingSet.h" #include "llvm/ADT/SmallVector.h" namespace llvm { @@ -813,6 +814,109 @@ public: } }; +//===----------------------------------------------------------------------===// +/// MDString - a single uniqued string. +/// These are used to efficiently contain a byte sequence for metadata. +/// +class MDString : public Constant { + MDString(const MDString &); // DO NOT IMPLEMENT + void *operator new(size_t, unsigned); // DO NOT IMPLEMENT + MDString(const char *begin, const char *end); + + const char *StrBegin, *StrEnd; +protected: + // allocate space for exactly zero operands + void *operator new(size_t s) { + return User::operator new(s, 0); + } +public: + /// get() - Static factory methods - Return objects of the specified value. + /// + static MDString *get(const char *StrBegin, const char *StrEnd); + + /// size() - The length of this string. + /// + unsigned size() const { return StrEnd - StrBegin; } + + /// begin() - Pointer to the first byte of the string. + /// + const char *begin() const { return StrBegin; } + + /// end() - Pointer to one byte past the end of the string. + /// + const char *end() const { return StrEnd; } + + /// getType() specialization - Type is always an empty struct. + /// + inline const Type *getType() const { + return Type::EmptyStructTy; + } + + /// isNullValue - Return true if this is the value that would be returned by + /// getNullValue. This always returns false because getNullValue will never + /// produce metadata. + virtual bool isNullValue() const { + return false; + } + + virtual void destroyConstant(); + + /// Methods for support type inquiry through isa, cast, and dyn_cast: + static inline bool classof(const MDString *) { return true; } + static bool classof(const Value *V) { + return V->getValueID() == MDStringVal; + } +}; + +//===----------------------------------------------------------------------===// +/// MDNode - a tuple of other values. +/// These contain a list of the Constants that represent the metadata. +/// +class MDNode : public Constant, public FoldingSetNode { + MDNode(const MDNode &); // DO NOT IMPLEMENT +protected: + explicit MDNode(Constant*const* Vals, unsigned NumVals); +public: + /// get() - Static factory methods - Return objects of the specified value. + /// + static MDNode *get(Constant*const* Vals, unsigned NumVals); + + // Transparently provide more efficient getOperand methods. + DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Constant); + + /// getType() specialization - Type is always an empty struct. + /// + inline const Type *getType() const { + return Type::EmptyStructTy; + } + + /// isNullValue - Return true if this is the value that would be returned by + /// getNullValue. This always returns false because getNullValue will never + /// produce metadata. + virtual bool isNullValue() const { + return false; + } + + /// Profile - calculate a unique identifier for this MDNode to collapse + /// duplicates + void Profile(FoldingSetNodeID &ID); + + virtual void destroyConstant(); + virtual void replaceUsesOfWithOnConstant(Value *From, Value *To, Use *U); + + /// Methods for support type inquiry through isa, cast, and dyn_cast: + static inline bool classof(const MDNode *) { return true; } + static bool classof(const Value *V) { + return V->getValueID() == MDNodeVal; + } +}; + +template <> +struct OperandTraits : VariadicOperandTraits<> { +}; + +DEFINE_TRANSPARENT_CASTED_OPERAND_ACCESSORS(MDNode, Constant) + } // End llvm namespace #endif diff --git a/include/llvm/Type.h b/include/llvm/Type.h index 3d2a6f8d8f7..a284d80bb5c 100644 --- a/include/llvm/Type.h +++ b/include/llvm/Type.h @@ -326,7 +326,7 @@ public: //===--------------------------------------------------------------------===// // These are the builtin types that are always available... // - static const Type *VoidTy, *LabelTy, *FloatTy, *DoubleTy; + static const Type *VoidTy, *LabelTy, *FloatTy, *DoubleTy, *EmptyStructTy; static const Type *X86_FP80Ty, *FP128Ty, *PPC_FP128Ty; static const IntegerType *Int1Ty, *Int8Ty, *Int16Ty, *Int32Ty, *Int64Ty; diff --git a/include/llvm/Value.h b/include/llvm/Value.h index 85b8a185c8a..a38d8cb8d98 100644 --- a/include/llvm/Value.h +++ b/include/llvm/Value.h @@ -203,13 +203,15 @@ public: ConstantStructVal, // This is an instance of ConstantStruct ConstantVectorVal, // This is an instance of ConstantVector ConstantPointerNullVal, // This is an instance of ConstantPointerNull + MDStringVal, // This is an instance of MDString + MDNodeVal, // This is an instance of MDNode InlineAsmVal, // This is an instance of InlineAsm PseudoSourceValueVal, // This is an instance of PseudoSourceValue InstructionVal, // This is an instance of Instruction // Markers: ConstantFirstVal = FunctionVal, - ConstantLastVal = ConstantPointerNullVal + ConstantLastVal = MDNodeVal }; /// getValueID - Return an ID for the concrete type of this object. This is diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp index 95e6c90a59c..d4815d83d68 100644 --- a/lib/AsmParser/LLLexer.cpp +++ b/lib/AsmParser/LLLexer.cpp @@ -265,6 +265,7 @@ lltok::Kind LLLexer::LexToken() { case ';': SkipLineComment(); return LexToken(); + case '!': return lltok::Metadata; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': case '-': diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp index 7800b8f7218..177161e3a61 100644 --- a/lib/AsmParser/LLParser.cpp +++ b/lib/AsmParser/LLParser.cpp @@ -1561,6 +1561,29 @@ bool LLParser::ParseValID(ValID &ID) { ID.StrVal = Lex.getStrVal(); ID.Kind = ValID::t_LocalName; break; + case lltok::Metadata: { // !{...} MDNode, !"foo" MDString + ID.Kind = ValID::t_Constant; + Lex.Lex(); + if (Lex.getKind() == lltok::lbrace) { + // MDNode: + // ::= '!' '{' TypeAndValue (',' TypeAndValue)* '}' + SmallVector Elts; + if (ParseMDNodeVector(Elts) || + ParseToken(lltok::rbrace, "expected end of metadata node")) + return true; + + ID.ConstantVal = MDNode::get(&Elts[0], Elts.size()); + return false; + } + + // MDString: + // ::= '!' STRINGCONSTANT + std::string Str; + if (ParseStringConstant(Str)) return true; + + ID.ConstantVal = MDString::get(Str.data(), Str.data() + Str.size()); + return false; + } case lltok::APSInt: ID.APSIntVal = Lex.getAPSIntVal(); ID.Kind = ValID::t_APSInt; @@ -1661,7 +1684,7 @@ bool LLParser::ParseValID(ValID &ID) { "array element #" + utostr(i) + " is not of type '" +Elts[0]->getType()->getDescription()); } - + ID.ConstantVal = ConstantArray::get(ATy, &Elts[0], Elts.size()); ID.Kind = ValID::t_Constant; return false; @@ -3221,3 +3244,21 @@ bool LLParser::ParseInsertValue(Instruction *&Inst, PerFunctionState &PFS) { Inst = InsertValueInst::Create(Val0, Val1, Indices.begin(), Indices.end()); return false; } + +//===----------------------------------------------------------------------===// +// Embedded metadata. +//===----------------------------------------------------------------------===// + +/// ParseMDNodeVector +/// ::= TypeAndValue (',' TypeAndValue)* +bool LLParser::ParseMDNodeVector(SmallVectorImpl &Elts) { + assert(Lex.getKind() == lltok::lbrace); + Lex.Lex(); + do { + Constant *C; + if (ParseGlobalTypeAndValue(C)) return true; + Elts.push_back(C); + } while (EatIfPresent(lltok::comma)); + + return false; +} diff --git a/lib/AsmParser/LLParser.h b/lib/AsmParser/LLParser.h index 3fc2fd262d0..44f4c2a6524 100644 --- a/lib/AsmParser/LLParser.h +++ b/lib/AsmParser/LLParser.h @@ -27,6 +27,8 @@ namespace llvm { class Instruction; class Constant; class GlobalValue; + class MDString; + class MDNode; struct ValID; class LLParser { @@ -156,6 +158,7 @@ namespace llvm { bool ParseGlobalValue(const Type *Ty, Constant *&V); bool ParseGlobalTypeAndValue(Constant *&V); bool ParseGlobalValueVector(SmallVectorImpl &Elts); + bool ParseMDNodeVector(SmallVectorImpl &); // Function Semantic Analysis. diff --git a/lib/AsmParser/LLToken.h b/lib/AsmParser/LLToken.h index ec3769bdd09..35cb4dbaa74 100644 --- a/lib/AsmParser/LLToken.h +++ b/lib/AsmParser/LLToken.h @@ -115,6 +115,9 @@ namespace lltok { LocalVar, // %foo %"foo" StringConstant, // "foo" + // Metadata valued tokens. + Metadata, // !"foo" !{i8 42} + // Type valued tokens (TyVal). Type, diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp index dd9db8f4366..66ccdc2f90d 100644 --- a/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/lib/Bitcode/Reader/BitcodeReader.cpp @@ -286,10 +286,12 @@ void BitcodeReaderValueList::ResolveConstantForwardRefs() { UserCS->getType()->isPacked()); } else if (isa(UserC)) { NewC = ConstantVector::get(&NewOps[0], NewOps.size()); - } else { - // Must be a constant expression. + } else if (isa(UserC)) { NewC = cast(UserC)->getWithOperands(&NewOps[0], NewOps.size()); + } else { + assert(isa(UserC) && "Must be a metadata node."); + NewC = MDNode::get(&NewOps[0], NewOps.size()); } UserC->replaceAllUsesWith(NewC); @@ -999,6 +1001,29 @@ bool BitcodeReader::ParseConstants() { AsmStr, ConstrStr, HasSideEffects); break; } + case bitc::CST_CODE_MDSTRING: { + if (Record.size() < 2) return Error("Invalid MDSTRING record"); + unsigned MDStringLength = Record.size(); + SmallString<8> String; + String.resize(MDStringLength); + for (unsigned i = 0; i != MDStringLength; ++i) + String[i] = Record[i]; + V = MDString::get(String.c_str(), String.c_str() + MDStringLength); + break; + } + case bitc::CST_CODE_MDNODE: { + if (Record.empty() || Record.size() % 2 == 1) + return Error("Invalid CST_MDNODE record"); + + unsigned Size = Record.size(); + SmallVector Elts; + for (unsigned i = 0; i != Size; i += 2) { + const Type *Ty = getTypeByID(Record[i], false); + Elts.push_back(ValueList.getConstantFwdRef(Record[i+1], Ty)); + } + V = MDNode::get(&Elts[0], Elts.size()); + break; + } } ValueList.AssignValue(V, NextCstNo); diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp index d4d3443ee05..c836d39d259 100644 --- a/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -458,6 +458,8 @@ static void WriteConstants(unsigned FirstVal, unsigned LastVal, unsigned String8Abbrev = 0; unsigned CString7Abbrev = 0; unsigned CString6Abbrev = 0; + unsigned MDString8Abbrev = 0; + unsigned MDString6Abbrev = 0; // If this is a constant pool for the module, emit module-specific abbrevs. if (isGlobal) { // Abbrev for CST_CODE_AGGREGATE. @@ -485,6 +487,19 @@ static void WriteConstants(unsigned FirstVal, unsigned LastVal, Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6)); CString6Abbrev = Stream.EmitAbbrev(Abbv); + + // Abbrev for CST_CODE_MDSTRING. + Abbv = new BitCodeAbbrev(); + Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_MDSTRING)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8)); + MDString8Abbrev = Stream.EmitAbbrev(Abbv); + // Abbrev for CST_CODE_MDSTRING. + Abbv = new BitCodeAbbrev(); + Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_MDSTRING)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6)); + MDString6Abbrev = Stream.EmitAbbrev(Abbv); } SmallVector Record; @@ -678,6 +693,22 @@ static void WriteConstants(unsigned FirstVal, unsigned LastVal, Record.push_back(CE->getPredicate()); break; } + } else if (const MDString *S = dyn_cast(C)) { + Code = bitc::CST_CODE_MDSTRING; + AbbrevToUse = MDString6Abbrev; + for (unsigned i = 0, e = S->size(); i != e; ++i) { + char V = S->begin()[i]; + Record.push_back(V); + + if (!BitCodeAbbrevOp::isChar6(V)) + AbbrevToUse = MDString8Abbrev; + } + } else if (const MDNode *N = dyn_cast(C)) { + Code = bitc::CST_CODE_MDNODE; + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + Record.push_back(VE.getTypeID(N->getOperand(i)->getType())); + Record.push_back(VE.getValueID(N->getOperand(i))); + } } else { assert(0 && "Unknown constant!"); } diff --git a/lib/VMCore/AsmWriter.cpp b/lib/VMCore/AsmWriter.cpp index 3f8be4781f3..83fc35bbedf 100644 --- a/lib/VMCore/AsmWriter.cpp +++ b/lib/VMCore/AsmWriter.cpp @@ -23,6 +23,7 @@ #include "llvm/InlineAsm.h" #include "llvm/Instruction.h" #include "llvm/Instructions.h" +#include "llvm/Metadata.h" #include "llvm/Module.h" #include "llvm/ValueSymbolTable.h" #include "llvm/TypeSymbolTable.h" @@ -361,8 +362,8 @@ namespace { return; // If this is a structure or opaque type, add a name for the type. - if ((isa(Ty) || isa(Ty)) - && !TP.hasTypeName(Ty)) { + if (((isa(Ty) && cast(Ty)->getNumElements()) + || isa(Ty)) && !TP.hasTypeName(Ty)) { TP.addTypeName(Ty, "%"+utostr(unsigned(NumberedTypes.size()))); NumberedTypes.push_back(Ty); } @@ -935,7 +936,27 @@ static void WriteConstantInt(raw_ostream &Out, const Constant *CV, Out << "undef"; return; } + + if (const MDString *S = dyn_cast(CV)) { + Out << "!\""; + PrintEscapedString(S->begin(), S->size(), Out); + Out << '"'; + return; + } + if (const MDNode *N = dyn_cast(CV)) { + Out << "!{"; + for (MDNode::const_op_iterator I = N->op_begin(), E = N->op_end(); I != E;){ + TypePrinter.print((*I)->getType(), Out); + Out << ' '; + WriteAsOperandInternal(Out, *I, TypePrinter, Machine); + if (++I != E) + Out << ", "; + } + Out << "}"; + return; + } + if (const ConstantExpr *CE = dyn_cast(CV)) { Out << CE->getOpcodeName(); if (CE->isCompare()) diff --git a/lib/VMCore/Constants.cpp b/lib/VMCore/Constants.cpp index 7cd3599a61a..2afaa6c7b37 100644 --- a/lib/VMCore/Constants.cpp +++ b/lib/VMCore/Constants.cpp @@ -17,7 +17,9 @@ #include "llvm/GlobalValue.h" #include "llvm/Instructions.h" #include "llvm/Module.h" +#include "llvm/ADT/FoldingSet.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringMap.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ManagedStatic.h" @@ -1657,6 +1659,63 @@ void UndefValue::destroyConstant() { destroyConstantImpl(); } +//---- MDString::get() implementation +// + +MDString::MDString(const char *begin, const char *end) + : Constant(Type::EmptyStructTy, MDStringVal, 0, 0), + StrBegin(begin), StrEnd(end) {} + +static ManagedStatic > MDStringCache; + +MDString *MDString::get(const char *StrBegin, const char *StrEnd) { + StringMapEntry &Entry = MDStringCache->GetOrCreateValue(StrBegin, + StrEnd); + MDString *&S = Entry.getValue(); + if (!S) S = new MDString(Entry.getKeyData(), + Entry.getKeyData() + Entry.getKeyLength()); + return S; +} + +void MDString::destroyConstant() { + MDStringCache->erase(MDStringCache->find(StrBegin, StrEnd)); + destroyConstantImpl(); +} + +//---- MDNode::get() implementation +// + +static ManagedStatic > MDNodeSet; + +MDNode::MDNode(Constant*const* Vals, unsigned NumVals) + : Constant(Type::EmptyStructTy, MDNodeVal, + OperandTraits::op_end(this) - NumVals, NumVals) { + std::copy(Vals, Vals + NumVals, OperandList); +} + +void MDNode::Profile(FoldingSetNodeID &ID) { + for (op_iterator I = op_begin(), E = op_end(); I != E; ++I) + ID.AddPointer(*I); +} + +MDNode *MDNode::get(Constant*const* Vals, unsigned NumVals) { + FoldingSetNodeID ID; + for (unsigned i = 0; i != NumVals; ++i) + ID.AddPointer(Vals[i]); + + void *InsertPoint; + if (MDNode *N = MDNodeSet->FindNodeOrInsertPos(ID, InsertPoint)) + return N; + + // InsertPoint will have been set by the FindNodeOrInsertPos call. + MDNode *N = new(NumVals) MDNode(Vals, NumVals); + MDNodeSet->InsertNode(N, InsertPoint); + return N; +} + +void MDNode::destroyConstant() { + destroyConstantImpl(); +} //---- ConstantExpr::get() implementations... // @@ -2741,3 +2800,24 @@ void ConstantExpr::replaceUsesOfWithOnConstant(Value *From, Value *ToV, // Delete the old constant! destroyConstant(); } + +void MDNode::replaceUsesOfWithOnConstant(Value *From, Value *To, Use *U) { + assert(isa(To) && "Cannot make Constant refer to non-constant!"); + + SmallVector Values; + Values.reserve(getNumOperands()); // Build replacement array... + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { + Constant *Val = getOperand(i); + if (Val == From) Val = cast(To); + Values.push_back(Val); + } + + Constant *Replacement = MDNode::get(&Values[0], Values.size()); + assert(Replacement != this && "I didn't contain From!"); + + // Everyone using this now uses the replacement. + uncheckedReplaceAllUsesWith(Replacement); + + // Delete the old constant! + destroyConstant(); +} diff --git a/lib/VMCore/Type.cpp b/lib/VMCore/Type.cpp index 237ec05fcad..c14d5119e5d 100644 --- a/lib/VMCore/Type.cpp +++ b/lib/VMCore/Type.cpp @@ -288,6 +288,8 @@ const IntegerType *Type::Int16Ty = new BuiltinIntegerType(16); const IntegerType *Type::Int32Ty = new BuiltinIntegerType(32); const IntegerType *Type::Int64Ty = new BuiltinIntegerType(64); +const Type *Type::EmptyStructTy = StructType::get(NULL, NULL); + //===----------------------------------------------------------------------===// // Derived Type Constructors diff --git a/lib/VMCore/Verifier.cpp b/lib/VMCore/Verifier.cpp index 5af66df7c9a..df9aceec024 100644 --- a/lib/VMCore/Verifier.cpp +++ b/lib/VMCore/Verifier.cpp @@ -338,6 +338,36 @@ static RegisterPass X("verify", "Module Verifier"); #define Assert4(C, M, V1, V2, V3, V4) \ do { if (!(C)) { CheckFailed(M, V1, V2, V3, V4); return; } } while (0) +/// Check whether or not a Value is metadata or made up of a constant +/// expression involving metadata. +static bool isMetadata(Value *X) { + SmallPtrSet Visited; + SmallVector Queue; + Queue.push_back(X); + + while (!Queue.empty()) { + Value *V = Queue.back(); + Queue.pop_back(); + if (!Visited.insert(V)) + continue; + + if (isa(V) || isa(V)) + return true; + if (!isa(V)) + continue; + ConstantExpr *CE = cast(V); + + if (CE->getType() != Type::EmptyStructTy) + continue; + + // The only constant expression that works on metadata type is select. + if (CE->getOpcode() != Instruction::Select) return false; + + Queue.push_back(CE->getOperand(1)); + Queue.push_back(CE->getOperand(2)); + } + return false; +} void Verifier::visit(Instruction &I) { for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) @@ -649,6 +679,7 @@ void Verifier::visitReturnInst(ReturnInst &RI) { "Found return instr that returns non-void in Function of void " "return type!", &RI, F->getReturnType()); else if (N == 1 && F->getReturnType() == RI.getOperand(0)->getType()) { + Assert1(!isMetadata(RI.getOperand(0)), "Invalid use of metadata!", &RI); // Exactly one return value and it matches the return type. Good. } else if (const StructType *STy = dyn_cast(F->getReturnType())) { // The return type is a struct; check for multiple return values. @@ -696,6 +727,8 @@ void Verifier::visitSelectInst(SelectInst &SI) { Assert1(SI.getTrueValue()->getType() == SI.getType(), "Select values must have same type as select instruction!", &SI); + Assert1(!isMetadata(SI.getOperand(1)) && !isMetadata(SI.getOperand(2)), + "Invalid use of metadata!", &SI); visitInstruction(SI); } @@ -951,6 +984,13 @@ void Verifier::visitPHINode(PHINode &PN) { Assert1(PN.getType() == PN.getIncomingValue(i)->getType(), "PHI node operands are not the same type as the result!", &PN); + // Check that it's not a PHI of metadata. + if (PN.getType() == Type::EmptyStructTy) { + for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) + Assert1(!isMetadata(PN.getIncomingValue(i)), + "Invalid use of metadata!", &PN); + } + // All other PHI node constraints are checked in the visitBasicBlock method. visitInstruction(PN); @@ -981,6 +1021,14 @@ void Verifier::VerifyCallSite(CallSite CS) { "Call parameter type does not match function signature!", CS.getArgument(i), FTy->getParamType(i), I); + if (CS.getCalledValue()->getNameLen() < 5 || + strncmp(CS.getCalledValue()->getNameStart(), "llvm.", 5) != 0) { + // Verify that none of the arguments are metadata... + for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i) + Assert2(!isMetadata(CS.getArgument(i)), "Invalid use of metadata!", + CS.getArgument(i), I); + } + const AttrListPtr &Attrs = CS.getAttributes(); Assert1(VerifyAttributeCount(Attrs, CS.arg_size()), @@ -1152,6 +1200,7 @@ void Verifier::visitStoreInst(StoreInst &SI) { cast(SI.getOperand(1)->getType())->getElementType(); Assert2(ElTy == SI.getOperand(0)->getType(), "Stored value type does not match pointer operand type!", &SI, ElTy); + Assert1(!isMetadata(SI.getOperand(0)), "Invalid use of metadata!", &SI); visitInstruction(SI); } @@ -1481,7 +1530,7 @@ bool Verifier::PerformTypeCheck(Intrinsic::ID ID, Function *F, const Type *Ty, if (EltTy != Ty) Suffix += "v" + utostr(NumElts); - Suffix += "i" + utostr(GotBits);; + Suffix += "i" + utostr(GotBits); // Check some constraints on various intrinsics. switch (ID) { diff --git a/test/Feature/embeddedmetadata.ll b/test/Feature/embeddedmetadata.ll new file mode 100644 index 00000000000..6f16e6aeda0 --- /dev/null +++ b/test/Feature/embeddedmetadata.ll @@ -0,0 +1,11 @@ +; RUN: llvm-as < %s | llvm-dis | not grep undef + +declare i8 @llvm.something({ } %a) + +@llvm.foo = internal constant { } !{i17 123, { } !"foobar"} + +define void @foo() { + %x = call i8 @llvm.something({ } !{{ } !"f\00oa", i42 123}) + ret void +} + diff --git a/unittests/VMCore/MetadataTest.cpp b/unittests/VMCore/MetadataTest.cpp new file mode 100644 index 00000000000..cd258bb1708 --- /dev/null +++ b/unittests/VMCore/MetadataTest.cpp @@ -0,0 +1,96 @@ +//===- llvm/unittest/VMCore/Metadata.cpp - Metadata unit tests ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "gtest/gtest.h" +#include "llvm/Constants.h" +#include + +using namespace llvm; + +namespace { + +// Test that construction of MDString with different value produces different +// MDString objects, even with the same string pointer and nulls in the string. +TEST(MDStringTest, CreateDifferent) { + char x[3] = { 'f', 0, 'A' }; + MDString *s1 = MDString::get(&x[0], &x[3]); + x[2] = 'B'; + MDString *s2 = MDString::get(&x[0], &x[3]); + EXPECT_NE(s1, s2); +} + +// Test that creation of MDStrings with the same string contents produces the +// same MDString object, even with different pointers. +TEST(MDStringTest, CreateSame) { + char x[4] = { 'a', 'b', 'c', 'X' }; + char y[4] = { 'a', 'b', 'c', 'Y' }; + + MDString *s1 = MDString::get(&x[0], &x[3]); + MDString *s2 = MDString::get(&y[0], &y[3]); + EXPECT_EQ(s1, s2); +} + +// Test that MDString prints out the string we fed it. +TEST(MDStringTest, PrintingSimple) { + char *str = new char[13]; + strncpy(str, "testing 1 2 3", 13); + MDString *s = MDString::get(str, str+13); + strncpy(str, "aaaaaaaaaaaaa", 13); + delete[] str; + + std::ostringstream oss; + s->print(oss); + EXPECT_STREQ("{ } !\"testing 1 2 3\"", oss.str().c_str()); +} + +// Test printing of MDString with non-printable characters. +TEST(MDStringTest, PrintingComplex) { + char str[5] = {0, '\n', '"', '\\', -1}; + MDString *s = MDString::get(str+0, str+5); + std::ostringstream oss; + s->print(oss); + EXPECT_STREQ("{ } !\"\\00\\0A\\22\\5C\\FF\"", oss.str().c_str()); +} + +// Test the two constructors, and containing other Constants. +TEST(MDNodeTest, Everything) { + char x[3] = { 'a', 'b', 'c' }; + char y[3] = { '1', '2', '3' }; + + MDString *s1 = MDString::get(&x[0], &x[3]); + MDString *s2 = MDString::get(&y[0], &y[3]); + ConstantInt *CI = ConstantInt::get(APInt(8, 0)); + + std::vector V; + V.push_back(s1); + V.push_back(CI); + V.push_back(s2); + + MDNode *n1 = MDNode::get(&V[0], 3); + MDNode *n2 = MDNode::get((Constant**)&n1, 1); + MDNode *n3 = MDNode::get(&V[0], 3); + EXPECT_NE(n1, n2); + EXPECT_EQ(n1, n3); + + EXPECT_EQ(3u, n1->getNumOperands()); + EXPECT_EQ(s1, n1->getOperand(0)); + EXPECT_EQ(CI, n1->getOperand(1)); + EXPECT_EQ(s2, n1->getOperand(2)); + + EXPECT_EQ(1u, n2->getNumOperands()); + EXPECT_EQ(n1, n2->getOperand(0)); + + std::ostringstream oss1, oss2; + n1->print(oss1); + n2->print(oss2); + EXPECT_STREQ("{ } !{{ } !\"abc\", i8 0, { } !\"123\"}", oss1.str().c_str()); + EXPECT_STREQ("{ } !{{ } !{{ } !\"abc\", i8 0, { } !\"123\"}}", + oss2.str().c_str()); +} +} -- 2.34.1