//===----------------------------------------------------------------------===//
#include "llvm/Support/YAMLParser.h"
-
-#include "llvm/ADT/ilist.h"
-#include "llvm/ADT/ilist_node.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/Twine.h"
+#include "llvm/ADT/ilist.h"
+#include "llvm/ADT/ilist_node.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/raw_ostream.h"
using namespace llvm;
using namespace yaml;
enum UnicodeEncodingForm {
- UEF_UTF32_LE, //< UTF-32 Little Endian
- UEF_UTF32_BE, //< UTF-32 Big Endian
- UEF_UTF16_LE, //< UTF-16 Little Endian
- UEF_UTF16_BE, //< UTF-16 Big Endian
- UEF_UTF8, //< UTF-8 or ascii.
- UEF_Unknown //< Not a valid Unicode encoding.
+ UEF_UTF32_LE, ///< UTF-32 Little Endian
+ UEF_UTF32_BE, ///< UTF-32 Big Endian
+ UEF_UTF16_LE, ///< UTF-16 Little Endian
+ UEF_UTF16_BE, ///< UTF-16 Big Endian
+ UEF_UTF8, ///< UTF-8 or ascii.
+ UEF_Unknown ///< Not a valid Unicode encoding.
};
/// EncodingInfo - Holds the encoding type and length of the byte order mark if
class Scanner {
public:
Scanner(const StringRef Input, SourceMgr &SM);
+ Scanner(MemoryBuffer *Buffer, SourceMgr &SM_);
/// @brief Parse the next token and return it without popping it.
Token &peekNext();
Token getNext();
void printError(SMLoc Loc, SourceMgr::DiagKind Kind, const Twine &Message,
- ArrayRef<SMRange> Ranges = ArrayRef<SMRange>()) {
+ ArrayRef<SMRange> Ranges = None) {
SM.PrintMessage(Loc, Kind, Message, Ranges);
}
/// @brief Can the next token be the start of a simple key?
bool IsSimpleKeyAllowed;
- /// @brief Is the next token required to start a simple key?
- bool IsSimpleKeyRequired;
-
/// @brief True if an error has occurred.
bool Failed;
EscapedInput += "\\r";
else if (*i == 0x1B)
EscapedInput += "\\e";
- else if (*i >= 0 && *i < 0x20) { // Control characters not handled above.
+ else if ((unsigned char)*i < 0x20) { // Control characters not handled above.
std::string HexStr = utohexstr(*i);
EscapedInput += "\\x" + std::string(2 - HexStr.size(), '0') + HexStr;
} else if (*i & 0x80) { // UTF-8 multiple code unit subsequence.
, FlowLevel(0)
, IsStartOfStream(true)
, IsSimpleKeyAllowed(true)
- , IsSimpleKeyRequired(false)
, Failed(false) {
InputBuffer = MemoryBuffer::getMemBuffer(Input, "YAML");
SM.AddNewSourceBuffer(InputBuffer, SMLoc());
End = InputBuffer->getBufferEnd();
}
+Scanner::Scanner(MemoryBuffer *Buffer, SourceMgr &SM_)
+ : SM(SM_)
+ , InputBuffer(Buffer)
+ , Current(InputBuffer->getBufferStart())
+ , End(InputBuffer->getBufferEnd())
+ , Indent(-1)
+ , Column(0)
+ , Line(0)
+ , FlowLevel(0)
+ , IsStartOfStream(true)
+ , IsSimpleKeyAllowed(true)
+ , Failed(false) {
+ SM.AddNewSourceBuffer(InputBuffer, SMLoc());
+}
+
Token &Scanner::peekNext() {
// If the current token is a possible simple key, keep parsing until we
// can confirm.
}
StringRef::iterator Scanner::skip_nb_char(StringRef::iterator Position) {
+ if (Position == End)
+ return Position;
// Check 7 bit c-printable - b-char.
if ( *Position == 0x09
|| (*Position >= 0x20 && *Position <= 0x7E))
}
StringRef::iterator Scanner::skip_b_break(StringRef::iterator Position) {
+ if (Position == End)
+ return Position;
if (*Position == 0x0D) {
if (Position + 1 != End && *(Position + 1) == 0x0A)
return Position + 2;
void Scanner::skip(uint32_t Distance) {
Current += Distance;
Column += Distance;
+ assert(Current <= End && "Skipped past the end");
}
bool Scanner::isBlankOrBreak(StringRef::iterator Position) {
Current = skip_while(&Scanner::skip_ns_char, Current);
StringRef Name(NameStart, Current - NameStart);
Current = skip_while(&Scanner::skip_s_white, Current);
-
+
+ Token T;
if (Name == "YAML") {
Current = skip_while(&Scanner::skip_ns_char, Current);
- Token T;
T.Kind = Token::TK_VersionDirective;
T.Range = StringRef(Start, Current - Start);
TokenQueue.push_back(T);
return true;
+ } else if(Name == "TAG") {
+ Current = skip_while(&Scanner::skip_ns_char, Current);
+ Current = skip_while(&Scanner::skip_s_white, Current);
+ Current = skip_while(&Scanner::skip_ns_char, Current);
+ T.Kind = Token::TK_TagDirective;
+ T.Range = StringRef(Start, Current - Start);
+ TokenQueue.push_back(T);
+ return true;
}
return false;
}
++Current;
// Repeat until the previous character was not a '\' or was an escaped
// backslash.
- } while (*(Current - 1) == '\\' && wasEscaped(Start + 1, Current));
+ } while ( Current != End
+ && *(Current - 1) == '\\'
+ && wasEscaped(Start + 1, Current));
} else {
skip(1);
while (true) {
}
}
}
+
+ if (Current == End) {
+ setError("Expected quote at end of scalar", Current);
+ return false;
+ }
+
skip(1); // Skip ending quote.
Token T;
T.Kind = Token::TK_Scalar;
: scanner(new Scanner(Input, SM))
, CurrentDoc(0) {}
+Stream::Stream(MemoryBuffer *InputBuffer, SourceMgr &SM)
+ : scanner(new Scanner(InputBuffer, SM))
+ , CurrentDoc(0) {}
+
+Stream::~Stream() {}
+
bool Stream::failed() { return scanner->failed(); }
void Stream::printError(Node *N, const Twine &Msg) {
, Ranges);
}
-void Stream::handleYAMLDirective(const Token &t) {
- // TODO: Ensure version is 1.x.
-}
-
document_iterator Stream::begin() {
if (CurrentDoc)
report_fatal_error("Can only iterate over the stream once");
i->skip();
}
-Node::Node(unsigned int Type, OwningPtr<Document> &D, StringRef A)
+Node::Node(unsigned int Type, OwningPtr<Document> &D, StringRef A, StringRef T)
: Doc(D)
, TypeID(Type)
- , Anchor(A) {
+ , Anchor(A)
+ , Tag(T) {
SMLoc Start = SMLoc::getFromPointer(peekNext().Range.begin());
SourceRange = SMRange(Start, Start);
}
+std::string Node::getVerbatimTag() const {
+ StringRef Raw = getRawTag();
+ if (!Raw.empty() && Raw != "!") {
+ std::string Ret;
+ if (Raw.find_last_of('!') == 0) {
+ Ret = Doc->getTagMap().find("!")->second;
+ Ret += Raw.substr(1);
+ return llvm_move(Ret);
+ } else if (Raw.startswith("!!")) {
+ Ret = Doc->getTagMap().find("!!")->second;
+ Ret += Raw.substr(2);
+ return llvm_move(Ret);
+ } else {
+ StringRef TagHandle = Raw.substr(0, Raw.find_last_of('!') + 1);
+ std::map<StringRef, StringRef>::const_iterator It =
+ Doc->getTagMap().find(TagHandle);
+ if (It != Doc->getTagMap().end())
+ Ret = It->second;
+ else {
+ Token T;
+ T.Kind = Token::TK_Tag;
+ T.Range = TagHandle;
+ setError(Twine("Unknown tag handle ") + TagHandle, T);
+ }
+ Ret += Raw.substr(Raw.find_last_of('!') + 1);
+ return llvm_move(Ret);
+ }
+ }
+
+ switch (getType()) {
+ case NK_Null:
+ return "tag:yaml.org,2002:null";
+ case NK_Scalar:
+ // TODO: Tag resolution.
+ return "tag:yaml.org,2002:str";
+ case NK_Mapping:
+ return "tag:yaml.org,2002:map";
+ case NK_Sequence:
+ return "tag:yaml.org,2002:seq";
+ }
+
+ return "";
+}
+
Token &Node::peekNext() {
return Doc->peekNext();
}
return UnquotedValue;
}
// Plain or block.
- size_t trimtrail = Value.rfind(' ');
- return Value.drop_back(
- trimtrail == StringRef::npos ? 0 : Value.size() - trimtrail);
+ return Value.rtrim(" ");
}
StringRef ScalarNode::unescapeDoubleQuoted( StringRef UnquotedValue
// TODO: Report error.
break;
unsigned int UnicodeScalarValue;
- UnquotedValue.substr(1, 2).getAsInteger(16, UnicodeScalarValue);
+ if (UnquotedValue.substr(1, 2).getAsInteger(16, UnicodeScalarValue))
+ // TODO: Report error.
+ UnicodeScalarValue = 0xFFFD;
encodeUTF8(UnicodeScalarValue, Storage);
UnquotedValue = UnquotedValue.substr(2);
break;
// TODO: Report error.
break;
unsigned int UnicodeScalarValue;
- UnquotedValue.substr(1, 4).getAsInteger(16, UnicodeScalarValue);
+ if (UnquotedValue.substr(1, 4).getAsInteger(16, UnicodeScalarValue))
+ // TODO: Report error.
+ UnicodeScalarValue = 0xFFFD;
encodeUTF8(UnicodeScalarValue, Storage);
UnquotedValue = UnquotedValue.substr(4);
break;
// TODO: Report error.
break;
unsigned int UnicodeScalarValue;
- UnquotedValue.substr(1, 8).getAsInteger(16, UnicodeScalarValue);
+ if (UnquotedValue.substr(1, 8).getAsInteger(16, UnicodeScalarValue))
+ // TODO: Report error.
+ UnicodeScalarValue = 0xFFFD;
encodeUTF8(UnicodeScalarValue, Storage);
UnquotedValue = UnquotedValue.substr(8);
break;
}
Document::Document(Stream &S) : stream(S), Root(0) {
+ // Tag maps starts with two default mappings.
+ TagMap["!"] = "!";
+ TagMap["!!"] = "tag:yaml.org,2002:";
+
if (parseDirectives())
expectToken(Token::TK_DocumentStart);
Token &T = peekNext();
Token T = peekNext();
// Handle properties.
Token AnchorInfo;
+ Token TagInfo;
parse_property:
switch (T.Kind) {
case Token::TK_Alias:
T = peekNext();
goto parse_property;
case Token::TK_Tag:
- getNext(); // Skip TK_Tag.
+ if (TagInfo.Kind == Token::TK_Tag) {
+ setError("Already encountered a tag for this node!", T);
+ return 0;
+ }
+ TagInfo = getNext(); // Consume TK_Tag.
T = peekNext();
goto parse_property;
default:
// Don't eat the TK_BlockEntry, SequenceNode needs it.
return new (NodeAllocator) SequenceNode( stream.CurrentDoc
, AnchorInfo.Range.substr(1)
+ , TagInfo.Range
, SequenceNode::ST_Indentless);
case Token::TK_BlockSequenceStart:
getNext();
return new (NodeAllocator)
SequenceNode( stream.CurrentDoc
, AnchorInfo.Range.substr(1)
+ , TagInfo.Range
, SequenceNode::ST_Block);
case Token::TK_BlockMappingStart:
getNext();
return new (NodeAllocator)
MappingNode( stream.CurrentDoc
, AnchorInfo.Range.substr(1)
+ , TagInfo.Range
, MappingNode::MT_Block);
case Token::TK_FlowSequenceStart:
getNext();
return new (NodeAllocator)
SequenceNode( stream.CurrentDoc
, AnchorInfo.Range.substr(1)
+ , TagInfo.Range
, SequenceNode::ST_Flow);
case Token::TK_FlowMappingStart:
getNext();
return new (NodeAllocator)
MappingNode( stream.CurrentDoc
, AnchorInfo.Range.substr(1)
+ , TagInfo.Range
, MappingNode::MT_Flow);
case Token::TK_Scalar:
getNext();
return new (NodeAllocator)
ScalarNode( stream.CurrentDoc
, AnchorInfo.Range.substr(1)
+ , TagInfo.Range
, T.Range);
case Token::TK_Key:
// Don't eat the TK_Key, KeyValueNode expects it.
return new (NodeAllocator)
MappingNode( stream.CurrentDoc
, AnchorInfo.Range.substr(1)
+ , TagInfo.Range
, MappingNode::MT_Inline);
case Token::TK_DocumentStart:
case Token::TK_DocumentEnd:
while (true) {
Token T = peekNext();
if (T.Kind == Token::TK_TagDirective) {
- handleTagDirective(getNext());
+ parseTAGDirective();
isDirective = true;
} else if (T.Kind == Token::TK_VersionDirective) {
- stream.handleYAMLDirective(getNext());
+ parseYAMLDirective();
isDirective = true;
} else
break;
return isDirective;
}
+void Document::parseYAMLDirective() {
+ getNext(); // Eat %YAML <version>
+}
+
+void Document::parseTAGDirective() {
+ Token Tag = getNext(); // %TAG <handle> <prefix>
+ StringRef T = Tag.Range;
+ // Strip %TAG
+ T = T.substr(T.find_first_of(" \t")).ltrim(" \t");
+ std::size_t HandleEnd = T.find_first_of(" \t");
+ StringRef TagHandle = T.substr(0, HandleEnd);
+ StringRef TagPrefix = T.substr(HandleEnd).ltrim(" \t");
+ TagMap[TagHandle] = TagPrefix;
+}
+
bool Document::expectToken(int TK) {
Token T = getNext();
if (T.Kind != TK) {
}
return true;
}
-
-OwningPtr<Document> document_iterator::NullDoc;