//===----------------------------------------------------------------------===//
#include "llvm/Support/YAMLParser.h"
-
-#include "llvm/ADT/ilist.h"
-#include "llvm/ADT/ilist_node.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/Twine.h"
+#include "llvm/ADT/ilist.h"
+#include "llvm/ADT/ilist_node.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/raw_ostream.h"
using namespace llvm;
using namespace yaml;
enum UnicodeEncodingForm {
- UEF_UTF32_LE, //< UTF-32 Little Endian
- UEF_UTF32_BE, //< UTF-32 Big Endian
- UEF_UTF16_LE, //< UTF-16 Little Endian
- UEF_UTF16_BE, //< UTF-16 Big Endian
- UEF_UTF8, //< UTF-8 or ascii.
- UEF_Unknown //< Not a valid Unicode encoding.
+ UEF_UTF32_LE, ///< UTF-32 Little Endian
+ UEF_UTF32_BE, ///< UTF-32 Big Endian
+ UEF_UTF16_LE, ///< UTF-16 Little Endian
+ UEF_UTF16_BE, ///< UTF-16 Big Endian
+ UEF_UTF8, ///< UTF-8 or ascii.
+ UEF_Unknown ///< Not a valid Unicode encoding.
};
/// EncodingInfo - Holds the encoding type and length of the byte order mark if
namespace llvm {
namespace yaml {
+/// Pin the vtables to this file.
+void Node::anchor() {}
+void NullNode::anchor() {}
+void ScalarNode::anchor() {}
+void KeyValueNode::anchor() {}
+void MappingNode::anchor() {}
+void SequenceNode::anchor() {}
+void AliasNode::anchor() {}
+
/// Token - A single YAML token.
struct Token : ilist_node<Token> {
enum TokenKind {
/// @brief Scans YAML tokens from a MemoryBuffer.
class Scanner {
public:
- Scanner(const StringRef Input, SourceMgr &SM);
+ Scanner(StringRef Input, SourceMgr &SM);
+ Scanner(MemoryBufferRef Buffer, SourceMgr &SM_);
/// @brief Parse the next token and return it without popping it.
Token &peekNext();
Token getNext();
void printError(SMLoc Loc, SourceMgr::DiagKind Kind, const Twine &Message,
- ArrayRef<SMRange> Ranges = ArrayRef<SMRange>()) {
+ ArrayRef<SMRange> Ranges = None) {
SM.PrintMessage(Loc, Kind, Message, Ranges);
}
}
private:
+ void init(MemoryBufferRef Buffer);
+
StringRef currentInput() {
return StringRef(Current, End - Current);
}
/// sequence of ns-uri-char.
StringRef scan_ns_uri_char();
- /// @brief Scan ns-plain-one-line[133] starting at \a Cur.
- StringRef scan_ns_plain_one_line();
-
/// @brief Consume a minimal well-formed code unit subsequence starting at
/// \a Cur. Return false if it is not the same Unicode scalar value as
/// \a Expected. This updates \a Column.
SourceMgr &SM;
/// @brief The original input.
- MemoryBuffer *InputBuffer;
+ MemoryBufferRef InputBuffer;
/// @brief The current position of the scanner.
StringRef::iterator Current;
/// @brief Can the next token be the start of a simple key?
bool IsSimpleKeyAllowed;
- /// @brief Is the next token required to start a simple key?
- bool IsSimpleKeyRequired;
-
/// @brief True if an error has occurred.
bool Failed;
EscapedInput += "\\r";
else if (*i == 0x1B)
EscapedInput += "\\e";
- else if (*i >= 0 && *i < 0x20) { // Control characters not handled above.
+ else if ((unsigned char)*i < 0x20) { // Control characters not handled above.
std::string HexStr = utohexstr(*i);
EscapedInput += "\\x" + std::string(2 - HexStr.size(), '0') + HexStr;
} else if (*i & 0x80) { // UTF-8 multiple code unit subsequence.
return EscapedInput;
}
-Scanner::Scanner(StringRef Input, SourceMgr &sm)
- : SM(sm)
- , Indent(-1)
- , Column(0)
- , Line(0)
- , FlowLevel(0)
- , IsStartOfStream(true)
- , IsSimpleKeyAllowed(true)
- , IsSimpleKeyRequired(false)
- , Failed(false) {
- InputBuffer = MemoryBuffer::getMemBuffer(Input, "YAML");
- SM.AddNewSourceBuffer(InputBuffer, SMLoc());
- Current = InputBuffer->getBufferStart();
- End = InputBuffer->getBufferEnd();
+Scanner::Scanner(StringRef Input, SourceMgr &sm) : SM(sm) {
+ init(MemoryBufferRef(Input, "YAML"));
+}
+
+Scanner::Scanner(MemoryBufferRef Buffer, SourceMgr &SM_) : SM(SM_) {
+ init(Buffer);
+}
+
+void Scanner::init(MemoryBufferRef Buffer) {
+ InputBuffer = Buffer;
+ Current = InputBuffer.getBufferStart();
+ End = InputBuffer.getBufferEnd();
+ Indent = -1;
+ Column = 0;
+ Line = 0;
+ FlowLevel = 0;
+ IsStartOfStream = true;
+ IsSimpleKeyAllowed = true;
+ Failed = false;
+ std::unique_ptr<MemoryBuffer> InputBufferOwner =
+ MemoryBuffer::getMemBuffer(Buffer);
+ SM.AddNewSourceBuffer(std::move(InputBufferOwner), SMLoc());
}
Token &Scanner::peekNext() {
}
StringRef::iterator Scanner::skip_nb_char(StringRef::iterator Position) {
+ if (Position == End)
+ return Position;
// Check 7 bit c-printable - b-char.
if ( *Position == 0x09
|| (*Position >= 0x20 && *Position <= 0x7E))
}
StringRef::iterator Scanner::skip_b_break(StringRef::iterator Position) {
+ if (Position == End)
+ return Position;
if (*Position == 0x0D) {
if (Position + 1 != End && *(Position + 1) == 0x0A)
return Position + 2;
return StringRef(Start, Current - Start);
}
-StringRef Scanner::scan_ns_plain_one_line() {
- StringRef::iterator start = Current;
- // The first character must already be verified.
- ++Current;
- while (true) {
- if (Current == End) {
- break;
- } else if (*Current == ':') {
- // Check if the next character is a ns-char.
- if (Current + 1 == End)
- break;
- StringRef::iterator i = skip_ns_char(Current + 1);
- if (Current + 1 != i) {
- Current = i;
- Column += 2; // Consume both the ':' and ns-char.
- } else
- break;
- } else if (*Current == '#') {
- // Check if the previous character was a ns-char.
- // The & 0x80 check is to check for the trailing byte of a utf-8
- if (*(Current - 1) & 0x80 || skip_ns_char(Current - 1) == Current) {
- ++Current;
- ++Column;
- } else
- break;
- } else {
- StringRef::iterator i = skip_nb_char(Current);
- if (i == Current)
- break;
- Current = i;
- ++Column;
- }
- }
- return StringRef(start, Current - start);
-}
-
bool Scanner::consume(uint32_t Expected) {
if (Expected >= 0x80)
report_fatal_error("Not dealing with this yet");
void Scanner::skip(uint32_t Distance) {
Current += Distance;
Column += Distance;
+ assert(Current <= End && "Skipped past the end");
}
bool Scanner::isBlankOrBreak(StringRef::iterator Position) {
Current = skip_while(&Scanner::skip_ns_char, Current);
StringRef Name(NameStart, Current - NameStart);
Current = skip_while(&Scanner::skip_s_white, Current);
-
+
+ Token T;
if (Name == "YAML") {
Current = skip_while(&Scanner::skip_ns_char, Current);
- Token T;
T.Kind = Token::TK_VersionDirective;
T.Range = StringRef(Start, Current - Start);
TokenQueue.push_back(T);
return true;
+ } else if(Name == "TAG") {
+ Current = skip_while(&Scanner::skip_ns_char, Current);
+ Current = skip_while(&Scanner::skip_s_white, Current);
+ Current = skip_while(&Scanner::skip_ns_char, Current);
+ T.Kind = Token::TK_TagDirective;
+ T.Range = StringRef(Start, Current - Start);
+ TokenQueue.push_back(T);
+ return true;
}
return false;
}
++Current;
// Repeat until the previous character was not a '\' or was an escaped
// backslash.
- } while (*(Current - 1) == '\\' && wasEscaped(Start + 1, Current));
+ } while ( Current != End
+ && *(Current - 1) == '\\'
+ && wasEscaped(Start + 1, Current));
} else {
skip(1);
while (true) {
}
}
}
+
+ if (Current == End) {
+ setError("Expected quote at end of scalar", Current);
+ return false;
+ }
+
skip(1); // Skip ending quote.
Token T;
T.Kind = Token::TK_Scalar;
}
Stream::Stream(StringRef Input, SourceMgr &SM)
- : scanner(new Scanner(Input, SM))
- , CurrentDoc(0) {}
+ : scanner(new Scanner(Input, SM)), CurrentDoc() {}
+
+Stream::Stream(MemoryBufferRef InputBuffer, SourceMgr &SM)
+ : scanner(new Scanner(InputBuffer, SM)), CurrentDoc() {}
Stream::~Stream() {}
, Ranges);
}
-void Stream::handleYAMLDirective(const Token &t) {
- // TODO: Ensure version is 1.x.
-}
-
document_iterator Stream::begin() {
if (CurrentDoc)
report_fatal_error("Can only iterate over the stream once");
i->skip();
}
-Node::Node(unsigned int Type, OwningPtr<Document> &D, StringRef A)
- : Doc(D)
- , TypeID(Type)
- , Anchor(A) {
+Node::Node(unsigned int Type, std::unique_ptr<Document> &D, StringRef A,
+ StringRef T)
+ : Doc(D), TypeID(Type), Anchor(A), Tag(T) {
SMLoc Start = SMLoc::getFromPointer(peekNext().Range.begin());
SourceRange = SMRange(Start, Start);
}
+std::string Node::getVerbatimTag() const {
+ StringRef Raw = getRawTag();
+ if (!Raw.empty() && Raw != "!") {
+ std::string Ret;
+ if (Raw.find_last_of('!') == 0) {
+ Ret = Doc->getTagMap().find("!")->second;
+ Ret += Raw.substr(1);
+ return Ret;
+ } else if (Raw.startswith("!!")) {
+ Ret = Doc->getTagMap().find("!!")->second;
+ Ret += Raw.substr(2);
+ return Ret;
+ } else {
+ StringRef TagHandle = Raw.substr(0, Raw.find_last_of('!') + 1);
+ std::map<StringRef, StringRef>::const_iterator It =
+ Doc->getTagMap().find(TagHandle);
+ if (It != Doc->getTagMap().end())
+ Ret = It->second;
+ else {
+ Token T;
+ T.Kind = Token::TK_Tag;
+ T.Range = TagHandle;
+ setError(Twine("Unknown tag handle ") + TagHandle, T);
+ }
+ Ret += Raw.substr(Raw.find_last_of('!') + 1);
+ return Ret;
+ }
+ }
+
+ switch (getType()) {
+ case NK_Null:
+ return "tag:yaml.org,2002:null";
+ case NK_Scalar:
+ // TODO: Tag resolution.
+ return "tag:yaml.org,2002:str";
+ case NK_Mapping:
+ return "tag:yaml.org,2002:map";
+ case NK_Sequence:
+ return "tag:yaml.org,2002:seq";
+ }
+
+ return "";
+}
+
Token &Node::peekNext() {
return Doc->peekNext();
}
return UnquotedValue;
}
// Plain or block.
- size_t trimtrail = Value.rfind(' ');
- return Value.drop_back(
- trimtrail == StringRef::npos ? 0 : Value.size() - trimtrail);
+ return Value.rtrim(" ");
}
StringRef ScalarNode::unescapeDoubleQuoted( StringRef UnquotedValue
// TODO: Report error.
break;
unsigned int UnicodeScalarValue;
- UnquotedValue.substr(1, 2).getAsInteger(16, UnicodeScalarValue);
+ if (UnquotedValue.substr(1, 2).getAsInteger(16, UnicodeScalarValue))
+ // TODO: Report error.
+ UnicodeScalarValue = 0xFFFD;
encodeUTF8(UnicodeScalarValue, Storage);
UnquotedValue = UnquotedValue.substr(2);
break;
// TODO: Report error.
break;
unsigned int UnicodeScalarValue;
- UnquotedValue.substr(1, 4).getAsInteger(16, UnicodeScalarValue);
+ if (UnquotedValue.substr(1, 4).getAsInteger(16, UnicodeScalarValue))
+ // TODO: Report error.
+ UnicodeScalarValue = 0xFFFD;
encodeUTF8(UnicodeScalarValue, Storage);
UnquotedValue = UnquotedValue.substr(4);
break;
// TODO: Report error.
break;
unsigned int UnicodeScalarValue;
- UnquotedValue.substr(1, 8).getAsInteger(16, UnicodeScalarValue);
+ if (UnquotedValue.substr(1, 8).getAsInteger(16, UnicodeScalarValue))
+ // TODO: Report error.
+ UnicodeScalarValue = 0xFFFD;
encodeUTF8(UnicodeScalarValue, Storage);
UnquotedValue = UnquotedValue.substr(8);
break;
void MappingNode::increment() {
if (failed()) {
IsAtEnd = true;
- CurrentEntry = 0;
+ CurrentEntry = nullptr;
return;
}
if (CurrentEntry) {
CurrentEntry->skip();
if (Type == MT_Inline) {
IsAtEnd = true;
- CurrentEntry = 0;
+ CurrentEntry = nullptr;
return;
}
}
case Token::TK_BlockEnd:
getNext();
IsAtEnd = true;
- CurrentEntry = 0;
+ CurrentEntry = nullptr;
break;
default:
setError("Unexpected token. Expected Key or Block End", T);
case Token::TK_Error:
IsAtEnd = true;
- CurrentEntry = 0;
+ CurrentEntry = nullptr;
}
} else {
switch (T.Kind) {
case Token::TK_Error:
// Set this to end iterator.
IsAtEnd = true;
- CurrentEntry = 0;
+ CurrentEntry = nullptr;
break;
default:
setError( "Unexpected token. Expected Key, Flow Entry, or Flow "
"Mapping End."
, T);
IsAtEnd = true;
- CurrentEntry = 0;
+ CurrentEntry = nullptr;
}
}
}
void SequenceNode::increment() {
if (failed()) {
IsAtEnd = true;
- CurrentEntry = 0;
+ CurrentEntry = nullptr;
return;
}
if (CurrentEntry)
case Token::TK_BlockEntry:
getNext();
CurrentEntry = parseBlockNode();
- if (CurrentEntry == 0) { // An error occurred.
+ if (!CurrentEntry) { // An error occurred.
IsAtEnd = true;
- CurrentEntry = 0;
+ CurrentEntry = nullptr;
}
break;
case Token::TK_BlockEnd:
getNext();
IsAtEnd = true;
- CurrentEntry = 0;
+ CurrentEntry = nullptr;
break;
default:
setError( "Unexpected token. Expected Block Entry or Block End."
, T);
case Token::TK_Error:
IsAtEnd = true;
- CurrentEntry = 0;
+ CurrentEntry = nullptr;
}
} else if (SeqType == ST_Indentless) {
switch (T.Kind) {
case Token::TK_BlockEntry:
getNext();
CurrentEntry = parseBlockNode();
- if (CurrentEntry == 0) { // An error occurred.
+ if (!CurrentEntry) { // An error occurred.
IsAtEnd = true;
- CurrentEntry = 0;
+ CurrentEntry = nullptr;
}
break;
default:
case Token::TK_Error:
IsAtEnd = true;
- CurrentEntry = 0;
+ CurrentEntry = nullptr;
}
} else if (SeqType == ST_Flow) {
switch (T.Kind) {
case Token::TK_Error:
// Set this to end iterator.
IsAtEnd = true;
- CurrentEntry = 0;
+ CurrentEntry = nullptr;
break;
case Token::TK_StreamEnd:
case Token::TK_DocumentEnd:
setError("Could not find closing ]!", T);
// Set this to end iterator.
IsAtEnd = true;
- CurrentEntry = 0;
+ CurrentEntry = nullptr;
break;
default:
if (!WasPreviousTokenFlowEntry) {
setError("Expected , between entries!", T);
IsAtEnd = true;
- CurrentEntry = 0;
+ CurrentEntry = nullptr;
break;
}
// Otherwise it must be a flow entry.
}
}
-Document::Document(Stream &S) : stream(S), Root(0) {
+Document::Document(Stream &S) : stream(S), Root(nullptr) {
+ // Tag maps starts with two default mappings.
+ TagMap["!"] = "!";
+ TagMap["!!"] = "tag:yaml.org,2002:";
+
if (parseDirectives())
expectToken(Token::TK_DocumentStart);
Token &T = peekNext();
Token T = peekNext();
// Handle properties.
Token AnchorInfo;
+ Token TagInfo;
parse_property:
switch (T.Kind) {
case Token::TK_Alias:
case Token::TK_Anchor:
if (AnchorInfo.Kind == Token::TK_Anchor) {
setError("Already encountered an anchor for this node!", T);
- return 0;
+ return nullptr;
}
AnchorInfo = getNext(); // Consume TK_Anchor.
T = peekNext();
goto parse_property;
case Token::TK_Tag:
- getNext(); // Skip TK_Tag.
+ if (TagInfo.Kind == Token::TK_Tag) {
+ setError("Already encountered a tag for this node!", T);
+ return nullptr;
+ }
+ TagInfo = getNext(); // Consume TK_Tag.
T = peekNext();
goto parse_property;
default:
// Don't eat the TK_BlockEntry, SequenceNode needs it.
return new (NodeAllocator) SequenceNode( stream.CurrentDoc
, AnchorInfo.Range.substr(1)
+ , TagInfo.Range
, SequenceNode::ST_Indentless);
case Token::TK_BlockSequenceStart:
getNext();
return new (NodeAllocator)
SequenceNode( stream.CurrentDoc
, AnchorInfo.Range.substr(1)
+ , TagInfo.Range
, SequenceNode::ST_Block);
case Token::TK_BlockMappingStart:
getNext();
return new (NodeAllocator)
MappingNode( stream.CurrentDoc
, AnchorInfo.Range.substr(1)
+ , TagInfo.Range
, MappingNode::MT_Block);
case Token::TK_FlowSequenceStart:
getNext();
return new (NodeAllocator)
SequenceNode( stream.CurrentDoc
, AnchorInfo.Range.substr(1)
+ , TagInfo.Range
, SequenceNode::ST_Flow);
case Token::TK_FlowMappingStart:
getNext();
return new (NodeAllocator)
MappingNode( stream.CurrentDoc
, AnchorInfo.Range.substr(1)
+ , TagInfo.Range
, MappingNode::MT_Flow);
case Token::TK_Scalar:
getNext();
return new (NodeAllocator)
ScalarNode( stream.CurrentDoc
, AnchorInfo.Range.substr(1)
+ , TagInfo.Range
, T.Range);
case Token::TK_Key:
// Don't eat the TK_Key, KeyValueNode expects it.
return new (NodeAllocator)
MappingNode( stream.CurrentDoc
, AnchorInfo.Range.substr(1)
+ , TagInfo.Range
, MappingNode::MT_Inline);
case Token::TK_DocumentStart:
case Token::TK_DocumentEnd:
// !!null null.
return new (NodeAllocator) NullNode(stream.CurrentDoc);
case Token::TK_Error:
- return 0;
+ return nullptr;
}
llvm_unreachable("Control flow shouldn't reach here.");
- return 0;
+ return nullptr;
}
bool Document::parseDirectives() {
while (true) {
Token T = peekNext();
if (T.Kind == Token::TK_TagDirective) {
- handleTagDirective(getNext());
+ parseTAGDirective();
isDirective = true;
} else if (T.Kind == Token::TK_VersionDirective) {
- stream.handleYAMLDirective(getNext());
+ parseYAMLDirective();
isDirective = true;
} else
break;
return isDirective;
}
+void Document::parseYAMLDirective() {
+ getNext(); // Eat %YAML <version>
+}
+
+void Document::parseTAGDirective() {
+ Token Tag = getNext(); // %TAG <handle> <prefix>
+ StringRef T = Tag.Range;
+ // Strip %TAG
+ T = T.substr(T.find_first_of(" \t")).ltrim(" \t");
+ std::size_t HandleEnd = T.find_first_of(" \t");
+ StringRef TagHandle = T.substr(0, HandleEnd);
+ StringRef TagPrefix = T.substr(HandleEnd).ltrim(" \t");
+ TagMap[TagHandle] = TagPrefix;
+}
+
bool Document::expectToken(int TK) {
Token T = getNext();
if (T.Kind != TK) {
}
return true;
}
-
-OwningPtr<Document> document_iterator::NullDoc;