From: Michael J. Spencer Date: Tue, 3 Apr 2012 23:09:22 +0000 (+0000) Subject: Add YAML parser to Support. X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=commitdiff_plain;h=93210e847a1496b24cef881723e57c489082dcfe Add YAML parser to Support. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@153977 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/CMakeLists.txt b/CMakeLists.txt index 33dd12314d4..8336bc975e3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -397,6 +397,7 @@ add_subdirectory(utils/count) add_subdirectory(utils/not) add_subdirectory(utils/llvm-lit) add_subdirectory(utils/json-bench) +add_subdirectory(utils/yaml-bench) add_subdirectory(projects) diff --git a/LICENSE.TXT b/LICENSE.TXT index 837688e76ab..00cf6011694 100644 --- a/LICENSE.TXT +++ b/LICENSE.TXT @@ -67,3 +67,4 @@ Autoconf llvm/autoconf CellSPU backend llvm/lib/Target/CellSPU/README.txt Google Test llvm/utils/unittest/googletest OpenBSD regex llvm/lib/Support/{reg*, COPYRIGHT.regex} +pyyaml tests llvm/test/YAMLParser/{*.data, LICENSE.TXT} diff --git a/include/llvm/Support/YAMLParser.h b/include/llvm/Support/YAMLParser.h new file mode 100644 index 00000000000..27d039164c1 --- /dev/null +++ b/include/llvm/Support/YAMLParser.h @@ -0,0 +1,564 @@ +//===--- YAMLParser.h - Simple YAML parser --------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This is a YAML 1.2 parser. +// +// See http://www.yaml.org/spec/1.2/spec.html for the full standard. +// +// This currently does not implement the following: +// * Multi-line literal folding. +// * Tag resolution. +// * UTF-16. +// * BOMs anywhere other than the first Unicode scalar value in the file. +// +// The most important class here is Stream. This represents a YAML stream with +// 0, 1, or many documents. +// +// SourceMgr sm; +// StringRef input = getInput(); +// yaml::Stream stream(input, sm); +// +// for (yaml::document_iterator di = stream.begin(), de = stream.end(); +// di != de; ++di) { +// yaml::Node *n = di->getRoot(); +// if (n) { +// // Do something with n... +// } else +// break; +// } +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_SUPPORT_YAML_PARSER_H +#define LLVM_SUPPORT_YAML_PARSER_H + +#include "llvm/ADT/OwningPtr.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/SMLoc.h" + +#include +#include + +namespace llvm { +class MemoryBuffer; +class SourceMgr; +class raw_ostream; +class Twine; + +namespace yaml { + +class document_iterator; +class Document; +class Node; +class Scanner; +struct Token; + +/// @brief Dump all the tokens in this stream to OS. +/// @returns true if there was an error, false otherwise. +bool dumpTokens(StringRef Input, raw_ostream &); + +/// @brief Scans all tokens in input without outputting anything. This is used +/// for benchmarking the tokenizer. +/// @returns true if there was an error, false otherwise. +bool scanTokens(StringRef Input); + +/// @brief Escape \a Input for a double quoted scalar. +std::string escape(StringRef Input); + +/// @brief This class represents a YAML stream potentially containing multiple +/// documents. +class Stream { +public: + Stream(StringRef Input, SourceMgr &); + + document_iterator begin(); + document_iterator end(); + void skip(); + bool failed(); + bool validate() { + skip(); + return !failed(); + } + + void printError(Node *N, const Twine &Msg); + +private: + OwningPtr scanner; + OwningPtr CurrentDoc; + + friend class Document; + + /// @brief Validate a %YAML x.x directive. + void handleYAMLDirective(const Token &); +}; + +/// @brief Abstract base class for all Nodes. +class Node { +public: + enum NodeKind { + NK_Null, + NK_Scalar, + NK_KeyValue, + NK_Mapping, + NK_Sequence, + NK_Alias + }; + + Node(unsigned int Type, OwningPtr&, StringRef Anchor); + virtual ~Node(); + + /// @brief Get the value of the anchor attached to this node. If it does not + /// have one, getAnchor().size() will be 0. + StringRef getAnchor() const { return Anchor; } + + SMRange getSourceRange() const { return SourceRange; } + void setSourceRange(SMRange SR) { SourceRange = SR; } + + // These functions forward to Document and Scanner. + Token &peekNext(); + Token getNext(); + Node *parseBlockNode(); + BumpPtrAllocator &getAllocator(); + void setError(const Twine &Message, Token &Location) const; + bool failed() const; + + virtual void skip() {}; + + unsigned int getType() const { return TypeID; } + static inline bool classof(const Node *) { return true; } + + void *operator new ( size_t Size + , BumpPtrAllocator &Alloc + , size_t Alignment = 16) throw() { + return Alloc.Allocate(Size, Alignment); + } + + void operator delete(void *Ptr, BumpPtrAllocator &Alloc, size_t) throw() { + Alloc.Deallocate(Ptr); + } + +protected: + OwningPtr &Doc; + SMRange SourceRange; + +private: + unsigned int TypeID; + StringRef Anchor; +}; + +/// @brief A null value. +/// +/// Example: +/// !!null null +class NullNode : public Node { +public: + NullNode(OwningPtr &D) : Node(NK_Null, D, StringRef()) {} + + static inline bool classof(const NullNode *) { return true; } + static inline bool classof(const Node *N) { + return N->getType() == NK_Null; + } +}; + +/// @brief A scalar node is an opaque datum that can be presented as a +/// series of zero or more Unicode scalar values. +/// +/// Example: +/// Adena +class ScalarNode : public Node { +public: + ScalarNode(OwningPtr &D, StringRef Anchor, StringRef Val) + : Node(NK_Scalar, D, Anchor) + , Value(Val) { + SMLoc Start = SMLoc::getFromPointer(Val.begin()); + SMLoc End = SMLoc::getFromPointer(Val.end() - 1); + SourceRange = SMRange(Start, End); + } + + // Return Value without any escaping or folding or other fun YAML stuff. This + // is the exact bytes that are contained in the file (after conversion to + // utf8). + StringRef getRawValue() const { return Value; } + + /// @brief Gets the value of this node as a StringRef. + /// + /// @param Storage is used to store the content of the returned StringRef iff + /// it requires any modification from how it appeared in the source. + /// This happens with escaped characters and multi-line literals. + StringRef getValue(SmallVectorImpl &Storage) const; + + static inline bool classof(const ScalarNode *) { return true; } + static inline bool classof(const Node *N) { + return N->getType() == NK_Scalar; + } + +private: + StringRef Value; + + StringRef unescapeDoubleQuoted( StringRef UnquotedValue + , StringRef::size_type Start + , SmallVectorImpl &Storage) const; +}; + +static bool getAs(const ScalarNode *SN, bool &Result) { + SmallString<4> Storage; + StringRef Value = SN->getValue(Storage); + if (Value == "true") + Result = true; + else if (Value == "false") + Result = false; + else + return false; + return true; +} + +template +typename enable_if_c::is_integer, bool>::type +getAs(const ScalarNode *SN, T &Result) { + SmallString<4> Storage; + return !SN->getValue(Storage).getAsInteger(0, Result); +} + +/// @brief A key and value pair. While not technically a Node under the YAML +/// representation graph, it is easier to treat them this way. +/// +/// TODO: Consider making this not a child of Node. +/// +/// Example: +/// Section: .text +class KeyValueNode : public Node { +public: + KeyValueNode(OwningPtr &D) + : Node(NK_KeyValue, D, StringRef()) + , Key(0) + , Value(0) + {} + + /// @brief Parse and return the key. + /// + /// This may be called multiple times. + /// + /// @returns The key, or nullptr if failed() == true. + Node *getKey(); + + /// @brief Parse and return the value. + /// + /// This may be called multiple times. + /// + /// @returns The value, or nullptr if failed() == true. + Node *getValue(); + + virtual void skip() { + getKey()->skip(); + getValue()->skip(); + } + + static inline bool classof(const KeyValueNode *) { return true; } + static inline bool classof(const Node *N) { + return N->getType() == NK_KeyValue; + } + +private: + Node *Key; + Node *Value; +}; + +/// @brief This is an iterator abstraction over YAML collections shared by both +/// sequences and maps. +/// +/// BaseT must have a ValueT* member named CurrentEntry and a member function +/// increment() which must set CurrentEntry to 0 to create an end iterator. +template +class basic_collection_iterator + : public std::iterator { +public: + basic_collection_iterator() : Base(0) {} + basic_collection_iterator(BaseT *B) : Base(B) {} + + ValueT *operator ->() const { + assert(Base && Base->CurrentEntry && "Attempted to access end iterator!"); + return Base->CurrentEntry; + } + + ValueT &operator *() const { + assert(Base && Base->CurrentEntry && + "Attempted to dereference end iterator!"); + return *Base->CurrentEntry; + } + + operator ValueT*() const { + assert(Base && Base->CurrentEntry && "Attempted to access end iterator!"); + return Base->CurrentEntry; + } + + bool operator !=(const basic_collection_iterator &Other) const { + if(Base != Other.Base) + return true; + return (Base && Other.Base) && Base->CurrentEntry + != Other.Base->CurrentEntry; + } + + basic_collection_iterator &operator++() { + assert(Base && "Attempted to advance iterator past end!"); + Base->increment(); + // Create an end iterator. + if (Base->CurrentEntry == 0) + Base = 0; + return *this; + } + +private: + BaseT *Base; +}; + +// The following two templates are used for both MappingNode and Sequence Node. +template +typename CollectionType::iterator begin(CollectionType &C) { + assert(C.IsAtBeginning && "You may only iterate over a collection once!"); + C.IsAtBeginning = false; + typename CollectionType::iterator ret(&C); + ++ret; + return ret; +} + +template +void skip(CollectionType &C) { + // TODO: support skipping from the middle of a parsed collection ;/ + assert((C.IsAtBeginning || C.IsAtEnd) && "Cannot skip mid parse!"); + if (C.IsAtBeginning) + for (typename CollectionType::iterator i = begin(C), e = C.end(); + i != e; ++i) + i->skip(); +} + +/// @brief Represents a YAML map created from either a block map for a flow map. +/// +/// This parses the YAML stream as increment() is called. +/// +/// Example: +/// Name: _main +/// Scope: Global +class MappingNode : public Node { +public: + enum MappingType { + MT_Block, + MT_Flow, + MT_Inline //< An inline mapping node is used for "[key: value]". + }; + + MappingNode(OwningPtr &D, StringRef Anchor, MappingType MT) + : Node(NK_Mapping, D, Anchor) + , Type(MT) + , IsAtBeginning(true) + , IsAtEnd(false) + , CurrentEntry(0) + {} + + friend class basic_collection_iterator; + typedef basic_collection_iterator iterator; + template friend typename T::iterator yaml::begin(T &); + template friend void yaml::skip(T &); + + iterator begin() { + return yaml::begin(*this); + } + + iterator end() { return iterator(); } + + virtual void skip() { + yaml::skip(*this); + } + + static inline bool classof(const MappingNode *) { return true; } + static inline bool classof(const Node *N) { + return N->getType() == NK_Mapping; + } + +private: + MappingType Type; + bool IsAtBeginning; + bool IsAtEnd; + KeyValueNode *CurrentEntry; + + void increment(); +}; + +/// @brief Represents a YAML sequence created from either a block sequence for a +/// flow sequence. +/// +/// This parses the YAML stream as increment() is called. +/// +/// Example: +/// - Hello +/// - World +class SequenceNode : public Node { +public: + enum SequenceType { + ST_Block, + ST_Flow, + // Use for: + // + // key: + // - val1 + // - val2 + // + // As a BlockMappingEntry and BlockEnd are not created in this case. + ST_Indentless + }; + + SequenceNode(OwningPtr &D, StringRef Anchor, SequenceType ST) + : Node(NK_Sequence, D, Anchor) + , SeqType(ST) + , IsAtBeginning(true) + , IsAtEnd(false) + , WasPreviousTokenFlowEntry(true) // Start with an imaginary ','. + , CurrentEntry(0) + {} + + friend class basic_collection_iterator; + typedef basic_collection_iterator iterator; + template friend typename T::iterator yaml::begin(T &); + template friend void yaml::skip(T &); + + void increment(); + + iterator begin() { + return yaml::begin(*this); + } + + iterator end() { return iterator(); } + + virtual void skip() { + yaml::skip(*this); + } + + static inline bool classof(const SequenceNode *) { return true; } + static inline bool classof(const Node *N) { + return N->getType() == NK_Sequence; + } + +private: + SequenceType SeqType; + bool IsAtBeginning; + bool IsAtEnd; + bool WasPreviousTokenFlowEntry; + Node *CurrentEntry; +}; + +/// @brief Represents an alias to a Node with an anchor. +/// +/// Example: +/// *AnchorName +class AliasNode : public Node { +public: + AliasNode(OwningPtr &D, StringRef Val) + : Node(NK_Alias, D, StringRef()), Name(Val) {} + + StringRef getName() const { return Name; } + Node *getTarget(); + + static inline bool classof(const ScalarNode *) { return true; } + static inline bool classof(const Node *N) { + return N->getType() == NK_Alias; + } + +private: + StringRef Name; +}; + +/// @brief A YAML Stream is a sequence of Documents. A document contains a root +/// node. +class Document { +public: + /// @brief Root for parsing a node. Returns a single node. + Node *parseBlockNode(); + + Document(Stream &ParentStream); + + /// @brief Finish parsing the current document and return true if there are + /// more. Return false otherwise. + bool skip(); + + /// @brief Parse and return the root level node. + Node *getRoot() { + if (Root) + return Root; + return Root = parseBlockNode(); + } + +private: + friend class Node; + friend class document_iterator; + + /// @brief Stream to read tokens from. + Stream &stream; + + /// @brief Used to allocate nodes to. All are destroyed without calling their + /// destructor when the document is destroyed. + BumpPtrAllocator NodeAllocator; + + /// @brief The root node. Used to support skipping a partially parsed + /// document. + Node *Root; + + Token &peekNext(); + Token getNext(); + void setError(const Twine &Message, Token &Location) const; + bool failed() const; + + void handleTagDirective(const Token &Tag) { + // TODO: Track tags. + } + + /// @brief Parse %BLAH directives and return true if any were encountered. + bool parseDirectives(); + + /// @brief Consume the next token and error if it is not \a TK. + bool expectToken(int TK); +}; + +/// @brief Iterator abstraction for Documents over a Stream. +class document_iterator { +public: + document_iterator() : Doc(NullDoc) {} + document_iterator(OwningPtr &D) : Doc(D) {} + + bool operator !=(const document_iterator &Other) { + return Doc != Other.Doc; + } + + document_iterator operator ++() { + if (!Doc->skip()) { + Doc.reset(0); + } else { + Stream &S = Doc->stream; + Doc.reset(new Document(S)); + } + return *this; + } + + Document &operator *() { + return *Doc; + } + + OwningPtr &operator ->() { + return Doc; + } + +private: + static OwningPtr NullDoc; + OwningPtr &Doc; +}; + +} +} + +#endif diff --git a/lib/Support/CMakeLists.txt b/lib/Support/CMakeLists.txt index 0b69238274e..9b3b6c801dd 100644 --- a/lib/Support/CMakeLists.txt +++ b/lib/Support/CMakeLists.txt @@ -54,6 +54,7 @@ add_llvm_library(LLVMSupport ToolOutputFile.cpp Triple.cpp Twine.cpp + YAMLParser.cpp raw_os_ostream.cpp raw_ostream.cpp regcomp.c diff --git a/lib/Support/YAMLParser.cpp b/lib/Support/YAMLParser.cpp new file mode 100644 index 00000000000..3e302d0eb1b --- /dev/null +++ b/lib/Support/YAMLParser.cpp @@ -0,0 +1,2115 @@ +//===--- YAMLParser.cpp - Simple YAML parser ------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a YAML parser. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/YAMLParser.h" + +#include "llvm/ADT/ilist.h" +#include "llvm/ADT/ilist_node.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/SourceMgr.h" + +using namespace llvm; +using namespace yaml; + +enum UnicodeEncodingForm { + UEF_UTF32_LE, //< UTF-32 Little Endian + UEF_UTF32_BE, //< UTF-32 Big Endian + UEF_UTF16_LE, //< UTF-16 Little Endian + UEF_UTF16_BE, //< UTF-16 Big Endian + UEF_UTF8, //< UTF-8 or ascii. + UEF_Unknown //< Not a valid Unicode encoding. +}; + +/// EncodingInfo - Holds the encoding type and length of the byte order mark if +/// it exists. Length is in {0, 2, 3, 4}. +typedef std::pair EncodingInfo; + +/// getUnicodeEncoding - Reads up to the first 4 bytes to determine the Unicode +/// encoding form of \a Input. +/// +/// @param Input A string of length 0 or more. +/// @returns An EncodingInfo indicating the Unicode encoding form of the input +/// and how long the byte order mark is if one exists. +static EncodingInfo getUnicodeEncoding(StringRef Input) { + if (Input.size() == 0) + return std::make_pair(UEF_Unknown, 0); + + switch (uint8_t(Input[0])) { + case 0x00: + if (Input.size() >= 4) { + if ( Input[1] == 0 + && uint8_t(Input[2]) == 0xFE + && uint8_t(Input[3]) == 0xFF) + return std::make_pair(UEF_UTF32_BE, 4); + if (Input[1] == 0 && Input[2] == 0 && Input[3] != 0) + return std::make_pair(UEF_UTF32_BE, 0); + } + + if (Input.size() >= 2 && Input[1] != 0) + return std::make_pair(UEF_UTF16_BE, 0); + return std::make_pair(UEF_Unknown, 0); + case 0xFF: + if ( Input.size() >= 4 + && uint8_t(Input[1]) == 0xFE + && Input[2] == 0 + && Input[3] == 0) + return std::make_pair(UEF_UTF32_LE, 4); + + if (Input.size() >= 2 && uint8_t(Input[1]) == 0xFE) + return std::make_pair(UEF_UTF16_LE, 2); + return std::make_pair(UEF_Unknown, 0); + case 0xFE: + if (Input.size() >= 2 && uint8_t(Input[1]) == 0xFF) + return std::make_pair(UEF_UTF16_BE, 2); + return std::make_pair(UEF_Unknown, 0); + case 0xEF: + if ( Input.size() >= 3 + && uint8_t(Input[1]) == 0xBB + && uint8_t(Input[2]) == 0xBF) + return std::make_pair(UEF_UTF8, 3); + return std::make_pair(UEF_Unknown, 0); + } + + // It could still be utf-32 or utf-16. + if (Input.size() >= 4 && Input[1] == 0 && Input[2] == 0 && Input[3] == 0) + return std::make_pair(UEF_UTF32_LE, 0); + + if (Input.size() >= 2 && Input[1] == 0) + return std::make_pair(UEF_UTF16_LE, 0); + + return std::make_pair(UEF_UTF8, 0); +} + +namespace llvm { +namespace yaml { +/// Token - A single YAML token. +struct Token : ilist_node { + enum TokenKind { + TK_Error, // Uninitialized token. + TK_StreamStart, + TK_StreamEnd, + TK_VersionDirective, + TK_TagDirective, + TK_DocumentStart, + TK_DocumentEnd, + TK_BlockEntry, + TK_BlockEnd, + TK_BlockSequenceStart, + TK_BlockMappingStart, + TK_FlowEntry, + TK_FlowSequenceStart, + TK_FlowSequenceEnd, + TK_FlowMappingStart, + TK_FlowMappingEnd, + TK_Key, + TK_Value, + TK_Scalar, + TK_Alias, + TK_Anchor, + TK_Tag + } Kind; + + /// A string of length 0 or more whose begin() points to the logical location + /// of the token in the input. + StringRef Range; + + Token() : Kind(TK_Error) {} +}; +} +} + +template<> +struct ilist_sentinel_traits { + Token *createSentinel() const { + return &Sentinel; + } + static void destroySentinel(Token*) {} + + Token *provideInitialHead() const { return createSentinel(); } + Token *ensureHead(Token*) const { return createSentinel(); } + static void noteHead(Token*, Token*) {} + +private: + mutable Token Sentinel; +}; + +template<> +struct ilist_node_traits { + Token *createNode(const Token &V) { + return new (Alloc.Allocate()) Token(V); + } + static void deleteNode(Token *V) {} + + void addNodeToList(Token *) {} + void removeNodeFromList(Token *) {} + void transferNodesFromList(ilist_node_traits & /*SrcTraits*/, + ilist_iterator /*first*/, + ilist_iterator /*last*/) {} + + BumpPtrAllocator Alloc; +}; + +typedef ilist TokenQueueT; + +namespace { +/// @brief This struct is used to track simple keys. +/// +/// Simple keys are handled by creating an entry in SimpleKeys for each Token +/// which could legally be the start of a simple key. When peekNext is called, +/// if the Token To be returned is referenced by a SimpleKey, we continue +/// tokenizing until that potential simple key has either been found to not be +/// a simple key (we moved on to the next line or went further than 1024 chars). +/// Or when we run into a Value, and then insert a Key token (and possibly +/// others) before the SimpleKey's Tok. +struct SimpleKey { + TokenQueueT::iterator Tok; + unsigned Column; + unsigned Line; + unsigned FlowLevel; + bool IsRequired; + + bool operator ==(const SimpleKey &Other) { + return Tok == Other.Tok; + } +}; +} + +/// @brief The Unicode scalar value of a UTF-8 minimal well-formed code unit +/// subsequence and the subsequence's length in code units (uint8_t). +/// A length of 0 represents an error. +typedef std::pair UTF8Decoded; + +static UTF8Decoded decodeUTF8(StringRef Range) { + StringRef::iterator Position= Range.begin(); + StringRef::iterator End = Range.end(); + // 1 byte: [0x00, 0x7f] + // Bit pattern: 0xxxxxxx + if ((*Position & 0x80) == 0) { + return std::make_pair(*Position, 1); + } + // 2 bytes: [0x80, 0x7ff] + // Bit pattern: 110xxxxx 10xxxxxx + if (Position + 1 != End && + ((*Position & 0xE0) == 0xC0) && + ((*(Position + 1) & 0xC0) == 0x80)) { + uint32_t codepoint = ((*Position & 0x1F) << 6) | + (*(Position + 1) & 0x3F); + if (codepoint >= 0x80) + return std::make_pair(codepoint, 2); + } + // 3 bytes: [0x8000, 0xffff] + // Bit pattern: 1110xxxx 10xxxxxx 10xxxxxx + if (Position + 2 != End && + ((*Position & 0xF0) == 0xE0) && + ((*(Position + 1) & 0xC0) == 0x80) && + ((*(Position + 2) & 0xC0) == 0x80)) { + uint32_t codepoint = ((*Position & 0x0F) << 12) | + ((*(Position + 1) & 0x3F) << 6) | + (*(Position + 2) & 0x3F); + // Codepoints between 0xD800 and 0xDFFF are invalid, as + // they are high / low surrogate halves used by UTF-16. + if (codepoint >= 0x800 && + (codepoint < 0xD800 || codepoint > 0xDFFF)) + return std::make_pair(codepoint, 3); + } + // 4 bytes: [0x10000, 0x10FFFF] + // Bit pattern: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx + if (Position + 3 != End && + ((*Position & 0xF8) == 0xF0) && + ((*(Position + 1) & 0xC0) == 0x80) && + ((*(Position + 2) & 0xC0) == 0x80) && + ((*(Position + 3) & 0xC0) == 0x80)) { + uint32_t codepoint = ((*Position & 0x07) << 18) | + ((*(Position + 1) & 0x3F) << 12) | + ((*(Position + 2) & 0x3F) << 6) | + (*(Position + 3) & 0x3F); + if (codepoint >= 0x10000 && codepoint <= 0x10FFFF) + return std::make_pair(codepoint, 4); + } + return std::make_pair(0, 0); +} + +namespace llvm { +namespace yaml { +/// @brief Scans YAML tokens from a MemoryBuffer. +class Scanner { +public: + Scanner(const StringRef Input, SourceMgr &SM); + + /// @brief Parse the next token and return it without popping it. + Token &peekNext(); + + /// @brief Parse the next token and pop it from the queue. + Token getNext(); + + void printError(SMLoc Loc, SourceMgr::DiagKind Kind, const Twine &Message, + ArrayRef Ranges = ArrayRef()) { + SM.PrintMessage(Loc, Kind, Message, Ranges); + } + + void setError(const Twine &Message, StringRef::iterator Position) { + if (Current >= End) + Current = End - 1; + + // Don't print out more errors after the first one we encounter. The rest + // are just the result of the first, and have no meaning. + if (!Failed) + printError(SMLoc::getFromPointer(Current), SourceMgr::DK_Error, Message); + Failed = true; + } + + void setError(const Twine &Message) { + setError(Message, Current); + } + + /// @brief Returns true if an error occurred while parsing. + bool failed() { + return Failed; + } + +private: + StringRef currentInput() { + return StringRef(Current, End - Current); + } + + /// @brief Decode a UTF-8 minimal well-formed code unit subsequence starting + /// at \a Position. + /// + /// If the UTF-8 code units starting at Position do not form a well-formed + /// code unit subsequence, then the Unicode scalar value is 0, and the length + /// is 0. + UTF8Decoded decodeUTF8(StringRef::iterator Position) { + return ::decodeUTF8(StringRef(Position, End - Position)); + } + + // The following functions are based on the gramar rules in the YAML spec. The + // style of the function names it meant to closely match how they are written + // in the spec. The number within the [] is the number of the grammar rule in + // the spec. + // + // See 4.2 [Production Naming Conventions] for the meaning of the prefixes. + // + // c- + // A production starting and ending with a special character. + // b- + // A production matching a single line break. + // nb- + // A production starting and ending with a non-break character. + // s- + // A production starting and ending with a white space character. + // ns- + // A production starting and ending with a non-space character. + // l- + // A production matching complete line(s). + + /// @brief Skip a single nb-char[27] starting at Position. + /// + /// A nb-char is 0x9 | [0x20-0x7E] | 0x85 | [0xA0-0xD7FF] | [0xE000-0xFEFE] + /// | [0xFF00-0xFFFD] | [0x10000-0x10FFFF] + /// + /// @returns The code unit after the nb-char, or Position if it's not an + /// nb-char. + StringRef::iterator skip_nb_char(StringRef::iterator Position); + + /// @brief Skip a single b-break[28] starting at Position. + /// + /// A b-break is 0xD 0xA | 0xD | 0xA + /// + /// @returns The code unit after the b-break, or Position if it's not a + /// b-break. + StringRef::iterator skip_b_break(StringRef::iterator Position); + + /// @brief Skip a single s-white[33] starting at Position. + /// + /// A s-white is 0x20 | 0x9 + /// + /// @returns The code unit after the s-white, or Position if it's not a + /// s-white. + StringRef::iterator skip_s_white(StringRef::iterator Position); + + /// @brief Skip a single ns-char[34] starting at Position. + /// + /// A ns-char is nb-char - s-white + /// + /// @returns The code unit after the ns-char, or Position if it's not a + /// ns-char. + StringRef::iterator skip_ns_char(StringRef::iterator Position); + + typedef StringRef::iterator (Scanner::*SkipWhileFunc)(StringRef::iterator); + /// @brief Skip minimal well-formed code unit subsequences until Func + /// returns its input. + /// + /// @returns The code unit after the last minimal well-formed code unit + /// subsequence that Func accepted. + StringRef::iterator skip_while( SkipWhileFunc Func + , StringRef::iterator Position); + + /// @brief Scan ns-uri-char[39]s starting at Cur. + /// + /// This updates Cur and Column while scanning. + /// + /// @returns A StringRef starting at Cur which covers the longest contiguous + /// sequence of ns-uri-char. + StringRef scan_ns_uri_char(); + + /// @brief Scan ns-plain-one-line[133] starting at \a Cur. + StringRef scan_ns_plain_one_line(); + + /// @brief Consume a minimal well-formed code unit subsequence starting at + /// \a Cur. Return false if it is not the same Unicode scalar value as + /// \a Expected. This updates \a Column. + bool consume(uint32_t Expected); + + /// @brief Skip \a Distance UTF-8 code units. Updates \a Cur and \a Column. + void skip(uint32_t Distance); + + /// @brief Return true if the minimal well-formed code unit subsequence at + /// Pos is whitespace or a new line + bool isBlankOrBreak(StringRef::iterator Position); + + /// @brief If IsSimpleKeyAllowed, create and push_back a new SimpleKey. + void saveSimpleKeyCandidate( TokenQueueT::iterator Tok + , unsigned AtColumn + , bool IsRequired); + + /// @brief Remove simple keys that can no longer be valid simple keys. + /// + /// Invalid simple keys are not on the current line or are further than 1024 + /// columns back. + void removeStaleSimpleKeyCandidates(); + + /// @brief Remove all simple keys on FlowLevel \a Level. + void removeSimpleKeyCandidatesOnFlowLevel(unsigned Level); + + /// @brief Unroll indentation in \a Indents back to \a Col. Creates BlockEnd + /// tokens if needed. + bool unrollIndent(int ToColumn); + + /// @brief Increase indent to \a Col. Creates \a Kind token at \a InsertPoint + /// if needed. + bool rollIndent( int ToColumn + , Token::TokenKind Kind + , TokenQueueT::iterator InsertPoint); + + /// @brief Skip whitespace and comments until the start of the next token. + void scanToNextToken(); + + /// @brief Must be the first token generated. + bool scanStreamStart(); + + /// @brief Generate tokens needed to close out the stream. + bool scanStreamEnd(); + + /// @brief Scan a %BLAH directive. + bool scanDirective(); + + /// @brief Scan a ... or ---. + bool scanDocumentIndicator(bool IsStart); + + /// @brief Scan a [ or { and generate the proper flow collection start token. + bool scanFlowCollectionStart(bool IsSequence); + + /// @brief Scan a ] or } and generate the proper flow collection end token. + bool scanFlowCollectionEnd(bool IsSequence); + + /// @brief Scan the , that separates entries in a flow collection. + bool scanFlowEntry(); + + /// @brief Scan the - that starts block sequence entries. + bool scanBlockEntry(); + + /// @brief Scan an explicit ? indicating a key. + bool scanKey(); + + /// @brief Scan an explicit : indicating a value. + bool scanValue(); + + /// @brief Scan a quoted scalar. + bool scanFlowScalar(bool IsDoubleQuoted); + + /// @brief Scan an unquoted scalar. + bool scanPlainScalar(); + + /// @brief Scan an Alias or Anchor starting with * or &. + bool scanAliasOrAnchor(bool IsAlias); + + /// @brief Scan a block scalar starting with | or >. + bool scanBlockScalar(bool IsLiteral); + + /// @brief Scan a tag of the form !stuff. + bool scanTag(); + + /// @brief Dispatch to the next scanning function based on \a *Cur. + bool fetchMoreTokens(); + + /// @brief The SourceMgr used for diagnostics and buffer management. + SourceMgr &SM; + + /// @brief The original input. + MemoryBuffer *InputBuffer; + + /// @brief The current position of the scanner. + StringRef::iterator Current; + + /// @brief The end of the input (one past the last character). + StringRef::iterator End; + + /// @brief Current YAML indentation level in spaces. + int Indent; + + /// @brief Current column number in Unicode code points. + unsigned Column; + + /// @brief Current line number. + unsigned Line; + + /// @brief How deep we are in flow style containers. 0 Means at block level. + unsigned FlowLevel; + + /// @brief Are we at the start of the stream? + bool IsStartOfStream; + + /// @brief Can the next token be the start of a simple key? + bool IsSimpleKeyAllowed; + + /// @brief Is the next token required to start a simple key? + bool IsSimpleKeyRequired; + + /// @brief True if an error has occurred. + bool Failed; + + /// @brief Queue of tokens. This is required to queue up tokens while looking + /// for the end of a simple key. And for cases where a single character + /// can produce multiple tokens (e.g. BlockEnd). + TokenQueueT TokenQueue; + + /// @brief Indentation levels. + SmallVector Indents; + + /// @brief Potential simple keys. + SmallVector SimpleKeys; +}; + +} // end namespace yaml +} // end namespace llvm + +/// encodeUTF8 - Encode \a UnicodeScalarValue in UTF-8 and append it to result. +static void encodeUTF8( uint32_t UnicodeScalarValue + , SmallVectorImpl &Result) { + if (UnicodeScalarValue <= 0x7F) { + Result.push_back(UnicodeScalarValue & 0x7F); + } else if (UnicodeScalarValue <= 0x7FF) { + uint8_t FirstByte = 0xC0 | ((UnicodeScalarValue & 0x7C0) >> 6); + uint8_t SecondByte = 0x80 | (UnicodeScalarValue & 0x3F); + Result.push_back(FirstByte); + Result.push_back(SecondByte); + } else if (UnicodeScalarValue <= 0xFFFF) { + uint8_t FirstByte = 0xE0 | ((UnicodeScalarValue & 0xF000) >> 12); + uint8_t SecondByte = 0x80 | ((UnicodeScalarValue & 0xFC0) >> 6); + uint8_t ThirdByte = 0x80 | (UnicodeScalarValue & 0x3F); + Result.push_back(FirstByte); + Result.push_back(SecondByte); + Result.push_back(ThirdByte); + } else if (UnicodeScalarValue <= 0x10FFFF) { + uint8_t FirstByte = 0xF0 | ((UnicodeScalarValue & 0x1F0000) >> 18); + uint8_t SecondByte = 0x80 | ((UnicodeScalarValue & 0x3F000) >> 12); + uint8_t ThirdByte = 0x80 | ((UnicodeScalarValue & 0xFC0) >> 6); + uint8_t FourthByte = 0x80 | (UnicodeScalarValue & 0x3F); + Result.push_back(FirstByte); + Result.push_back(SecondByte); + Result.push_back(ThirdByte); + Result.push_back(FourthByte); + } +} + +bool yaml::dumpTokens(StringRef Input, raw_ostream &OS) { + SourceMgr SM; + Scanner scanner(Input, SM); + while (true) { + Token T = scanner.getNext(); + switch (T.Kind) { + case Token::TK_StreamStart: + OS << "Stream-Start: "; + break; + case Token::TK_StreamEnd: + OS << "Stream-End: "; + break; + case Token::TK_VersionDirective: + OS << "Version-Directive: "; + break; + case Token::TK_TagDirective: + OS << "Tag-Directive: "; + break; + case Token::TK_DocumentStart: + OS << "Document-Start: "; + break; + case Token::TK_DocumentEnd: + OS << "Document-End: "; + break; + case Token::TK_BlockEntry: + OS << "Block-Entry: "; + break; + case Token::TK_BlockEnd: + OS << "Block-End: "; + break; + case Token::TK_BlockSequenceStart: + OS << "Block-Sequence-Start: "; + break; + case Token::TK_BlockMappingStart: + OS << "Block-Mapping-Start: "; + break; + case Token::TK_FlowEntry: + OS << "Flow-Entry: "; + break; + case Token::TK_FlowSequenceStart: + OS << "Flow-Sequence-Start: "; + break; + case Token::TK_FlowSequenceEnd: + OS << "Flow-Sequence-End: "; + break; + case Token::TK_FlowMappingStart: + OS << "Flow-Mapping-Start: "; + break; + case Token::TK_FlowMappingEnd: + OS << "Flow-Mapping-End: "; + break; + case Token::TK_Key: + OS << "Key: "; + break; + case Token::TK_Value: + OS << "Value: "; + break; + case Token::TK_Scalar: + OS << "Scalar: "; + break; + case Token::TK_Alias: + OS << "Alias: "; + break; + case Token::TK_Anchor: + OS << "Anchor: "; + break; + case Token::TK_Tag: + OS << "Tag: "; + break; + case Token::TK_Error: + break; + } + OS << T.Range << "\n"; + if (T.Kind == Token::TK_StreamEnd) + break; + else if (T.Kind == Token::TK_Error) + return false; + } + return true; +} + +bool yaml::scanTokens(StringRef Input) { + llvm::SourceMgr SM; + llvm::yaml::Scanner scanner(Input, SM); + for (;;) { + llvm::yaml::Token T = scanner.getNext(); + if (T.Kind == Token::TK_StreamEnd) + break; + else if (T.Kind == Token::TK_Error) + return false; + } + return true; +} + +std::string yaml::escape(StringRef Input) { + std::string EscapedInput; + for (StringRef::iterator i = Input.begin(), e = Input.end(); i != e; ++i) { + if (*i == '\\') + EscapedInput += "\\\\"; + else if (*i == '"') + EscapedInput += "\\\""; + else if (*i == 0) + EscapedInput += "\\0"; + else if (*i == 0x07) + EscapedInput += "\\a"; + else if (*i == 0x08) + EscapedInput += "\\b"; + else if (*i == 0x09) + EscapedInput += "\\t"; + else if (*i == 0x0A) + EscapedInput += "\\n"; + else if (*i == 0x0B) + EscapedInput += "\\v"; + else if (*i == 0x0C) + EscapedInput += "\\f"; + else if (*i == 0x0D) + EscapedInput += "\\r"; + else if (*i == 0x1B) + EscapedInput += "\\e"; + else if (*i >= 0 && *i < 0x20) { // Control characters not handled above. + std::string HexStr = utohexstr(*i); + EscapedInput += "\\x" + std::string(2 - HexStr.size(), '0') + HexStr; + } else if (*i & 0x80) { // UTF-8 multiple code unit subsequence. + UTF8Decoded UnicodeScalarValue + = decodeUTF8(StringRef(i, Input.end() - i)); + if (UnicodeScalarValue.second == 0) { + // Found invalid char. + SmallString<4> Val; + encodeUTF8(0xFFFD, Val); + EscapedInput.insert(EscapedInput.end(), Val.begin(), Val.end()); + // FIXME: Error reporting. + return EscapedInput; + } + if (UnicodeScalarValue.first == 0x85) + EscapedInput += "\\N"; + else if (UnicodeScalarValue.first == 0xA0) + EscapedInput += "\\_"; + else if (UnicodeScalarValue.first == 0x2028) + EscapedInput += "\\L"; + else if (UnicodeScalarValue.first == 0x2029) + EscapedInput += "\\P"; + else { + std::string HexStr = utohexstr(UnicodeScalarValue.first); + if (HexStr.size() <= 2) + EscapedInput += "\\x" + std::string(2 - HexStr.size(), '0') + HexStr; + else if (HexStr.size() <= 4) + EscapedInput += "\\u" + std::string(4 - HexStr.size(), '0') + HexStr; + else if (HexStr.size() <= 8) + EscapedInput += "\\U" + std::string(8 - HexStr.size(), '0') + HexStr; + } + i += UnicodeScalarValue.second - 1; + } else + EscapedInput.push_back(*i); + } + return EscapedInput; +} + +Scanner::Scanner(StringRef Input, SourceMgr &sm) + : SM(sm) + , Indent(-1) + , Column(0) + , Line(0) + , FlowLevel(0) + , IsStartOfStream(true) + , IsSimpleKeyAllowed(true) + , IsSimpleKeyRequired(false) + , Failed(false) { + InputBuffer = MemoryBuffer::getMemBuffer(Input, "YAML"); + SM.AddNewSourceBuffer(InputBuffer, SMLoc()); + Current = InputBuffer->getBufferStart(); + End = InputBuffer->getBufferEnd(); +} + +Token &Scanner::peekNext() { + // If the current token is a possible simple key, keep parsing until we + // can confirm. + bool NeedMore = false; + while (true) { + if (TokenQueue.empty() || NeedMore) { + if (!fetchMoreTokens()) { + TokenQueue.clear(); + TokenQueue.push_back(Token()); + return TokenQueue.front(); + } + } + assert(!TokenQueue.empty() && + "fetchMoreTokens lied about getting tokens!"); + + removeStaleSimpleKeyCandidates(); + SimpleKey SK; + SK.Tok = TokenQueue.front(); + if (std::find(SimpleKeys.begin(), SimpleKeys.end(), SK) + == SimpleKeys.end()) + break; + else + NeedMore = true; + } + return TokenQueue.front(); +} + +Token Scanner::getNext() { + Token Ret = peekNext(); + // TokenQueue can be empty if there was an error getting the next token. + if (!TokenQueue.empty()) + TokenQueue.pop_front(); + + // There cannot be any referenced Token's if the TokenQueue is empty. So do a + // quick deallocation of them all. + if (TokenQueue.empty()) { + TokenQueue.Alloc.Reset(); + } + + return Ret; +} + +StringRef::iterator Scanner::skip_nb_char(StringRef::iterator Position) { + // Check 7 bit c-printable - b-char. + if ( *Position == 0x09 + || (*Position >= 0x20 && *Position <= 0x7E)) + return Position + 1; + + // Check for valid UTF-8. + if (uint8_t(*Position) & 0x80) { + UTF8Decoded u8d = decodeUTF8(Position); + if ( u8d.second != 0 + && u8d.first != 0xFEFF + && ( u8d.first == 0x85 + || ( u8d.first >= 0xA0 + && u8d.first <= 0xD7FF) + || ( u8d.first >= 0xE000 + && u8d.first <= 0xFFFD) + || ( u8d.first >= 0x10000 + && u8d.first <= 0x10FFFF))) + return Position + u8d.second; + } + return Position; +} + +StringRef::iterator Scanner::skip_b_break(StringRef::iterator Position) { + if (*Position == 0x0D) { + if (Position + 1 != End && *(Position + 1) == 0x0A) + return Position + 2; + return Position + 1; + } + + if (*Position == 0x0A) + return Position + 1; + return Position; +} + + +StringRef::iterator Scanner::skip_s_white(StringRef::iterator Position) { + if (Position == End) + return Position; + if (*Position == ' ' || *Position == '\t') + return Position + 1; + return Position; +} + +StringRef::iterator Scanner::skip_ns_char(StringRef::iterator Position) { + if (Position == End) + return Position; + if (*Position == ' ' || *Position == '\t') + return Position; + return skip_nb_char(Position); +} + +StringRef::iterator Scanner::skip_while( SkipWhileFunc Func + , StringRef::iterator Position) { + while (true) { + StringRef::iterator i = (this->*Func)(Position); + if (i == Position) + break; + Position = i; + } + return Position; +} + +static bool is_ns_hex_digit(const char C) { + return (C >= '0' && C <= '9') + || (C >= 'a' && C <= 'z') + || (C >= 'A' && C <= 'Z'); +} + +static bool is_ns_word_char(const char C) { + return C == '-' + || (C >= 'a' && C <= 'z') + || (C >= 'A' && C <= 'Z'); +} + +StringRef Scanner::scan_ns_uri_char() { + StringRef::iterator Start = Current; + while (true) { + if (Current == End) + break; + if (( *Current == '%' + && Current + 2 < End + && is_ns_hex_digit(*(Current + 1)) + && is_ns_hex_digit(*(Current + 2))) + || is_ns_word_char(*Current) + || StringRef(Current, 1).find_first_of("#;/?:@&=+$,_.!~*'()[]") + != StringRef::npos) { + ++Current; + ++Column; + } else + break; + } + return StringRef(Start, Current - Start); +} + +StringRef Scanner::scan_ns_plain_one_line() { + StringRef::iterator start = Current; + // The first character must already be verified. + ++Current; + while (true) { + if (Current == End) { + break; + } else if (*Current == ':') { + // Check if the next character is a ns-char. + if (Current + 1 == End) + break; + StringRef::iterator i = skip_ns_char(Current + 1); + if (Current + 1 != i) { + Current = i; + Column += 2; // Consume both the ':' and ns-char. + } else + break; + } else if (*Current == '#') { + // Check if the previous character was a ns-char. + // The & 0x80 check is to check for the trailing byte of a utf-8 + if (*(Current - 1) & 0x80 || skip_ns_char(Current - 1) == Current) { + ++Current; + ++Column; + } else + break; + } else { + StringRef::iterator i = skip_nb_char(Current); + if (i == Current) + break; + Current = i; + ++Column; + } + } + return StringRef(start, Current - start); +} + +bool Scanner::consume(uint32_t Expected) { + if (Expected >= 0x80) + report_fatal_error("Not dealing with this yet"); + if (Current == End) + return false; + if (uint8_t(*Current) >= 0x80) + report_fatal_error("Not dealing with this yet"); + if (uint8_t(*Current) == Expected) { + ++Current; + ++Column; + return true; + } + return false; +} + +void Scanner::skip(uint32_t Distance) { + Current += Distance; + Column += Distance; +} + +bool Scanner::isBlankOrBreak(StringRef::iterator Position) { + if (Position == End) + return false; + if ( *Position == ' ' || *Position == '\t' + || *Position == '\r' || *Position == '\n') + return true; + return false; +} + +void Scanner::saveSimpleKeyCandidate( TokenQueueT::iterator Tok + , unsigned AtColumn + , bool IsRequired) { + if (IsSimpleKeyAllowed) { + SimpleKey SK; + SK.Tok = Tok; + SK.Line = Line; + SK.Column = AtColumn; + SK.IsRequired = IsRequired; + SK.FlowLevel = FlowLevel; + SimpleKeys.push_back(SK); + } +} + +void Scanner::removeStaleSimpleKeyCandidates() { + for (SmallVectorImpl::iterator i = SimpleKeys.begin(); + i != SimpleKeys.end();) { + if (i->Line != Line || i->Column + 1024 < Column) { + if (i->IsRequired) + setError( "Could not find expected : for simple key" + , i->Tok->Range.begin()); + i = SimpleKeys.erase(i); + } else + ++i; + } +} + +void Scanner::removeSimpleKeyCandidatesOnFlowLevel(unsigned Level) { + if (!SimpleKeys.empty() && (SimpleKeys.end() - 1)->FlowLevel == Level) + SimpleKeys.pop_back(); +} + +bool Scanner::unrollIndent(int ToColumn) { + Token T; + // Indentation is ignored in flow. + if (FlowLevel != 0) + return true; + + while (Indent > ToColumn) { + T.Kind = Token::TK_BlockEnd; + T.Range = StringRef(Current, 1); + TokenQueue.push_back(T); + Indent = Indents.pop_back_val(); + } + + return true; +} + +bool Scanner::rollIndent( int ToColumn + , Token::TokenKind Kind + , TokenQueueT::iterator InsertPoint) { + if (FlowLevel) + return true; + if (Indent < ToColumn) { + Indents.push_back(Indent); + Indent = ToColumn; + + Token T; + T.Kind = Kind; + T.Range = StringRef(Current, 0); + TokenQueue.insert(InsertPoint, T); + } + return true; +} + +void Scanner::scanToNextToken() { + while (true) { + while (*Current == ' ' || *Current == '\t') { + skip(1); + } + + // Skip comment. + if (*Current == '#') { + while (true) { + // This may skip more than one byte, thus Column is only incremented + // for code points. + StringRef::iterator i = skip_nb_char(Current); + if (i == Current) + break; + Current = i; + ++Column; + } + } + + // Skip EOL. + StringRef::iterator i = skip_b_break(Current); + if (i == Current) + break; + Current = i; + ++Line; + Column = 0; + // New lines may start a simple key. + if (!FlowLevel) + IsSimpleKeyAllowed = true; + } +} + +bool Scanner::scanStreamStart() { + IsStartOfStream = false; + + EncodingInfo EI = getUnicodeEncoding(currentInput()); + + Token T; + T.Kind = Token::TK_StreamStart; + T.Range = StringRef(Current, EI.second); + TokenQueue.push_back(T); + Current += EI.second; + return true; +} + +bool Scanner::scanStreamEnd() { + // Force an ending new line if one isn't present. + if (Column != 0) { + Column = 0; + ++Line; + } + + unrollIndent(-1); + SimpleKeys.clear(); + IsSimpleKeyAllowed = false; + + Token T; + T.Kind = Token::TK_StreamEnd; + T.Range = StringRef(Current, 0); + TokenQueue.push_back(T); + return true; +} + +bool Scanner::scanDirective() { + // Reset the indentation level. + unrollIndent(-1); + SimpleKeys.clear(); + IsSimpleKeyAllowed = false; + + StringRef::iterator Start = Current; + consume('%'); + StringRef::iterator NameStart = Current; + Current = skip_while(&Scanner::skip_ns_char, Current); + StringRef Name(NameStart, Current - NameStart); + Current = skip_while(&Scanner::skip_s_white, Current); + + if (Name == "YAML") { + Current = skip_while(&Scanner::skip_ns_char, Current); + Token T; + T.Kind = Token::TK_VersionDirective; + T.Range = StringRef(Start, Current - Start); + TokenQueue.push_back(T); + return true; + } + return false; +} + +bool Scanner::scanDocumentIndicator(bool IsStart) { + unrollIndent(-1); + SimpleKeys.clear(); + IsSimpleKeyAllowed = false; + + Token T; + T.Kind = IsStart ? Token::TK_DocumentStart : Token::TK_DocumentEnd; + T.Range = StringRef(Current, 3); + skip(3); + TokenQueue.push_back(T); + return true; +} + +bool Scanner::scanFlowCollectionStart(bool IsSequence) { + Token T; + T.Kind = IsSequence ? Token::TK_FlowSequenceStart + : Token::TK_FlowMappingStart; + T.Range = StringRef(Current, 1); + skip(1); + TokenQueue.push_back(T); + + // [ and { may begin a simple key. + saveSimpleKeyCandidate(TokenQueue.back(), Column - 1, false); + + // And may also be followed by a simple key. + IsSimpleKeyAllowed = true; + ++FlowLevel; + return true; +} + +bool Scanner::scanFlowCollectionEnd(bool IsSequence) { + removeSimpleKeyCandidatesOnFlowLevel(FlowLevel); + IsSimpleKeyAllowed = false; + Token T; + T.Kind = IsSequence ? Token::TK_FlowSequenceEnd + : Token::TK_FlowMappingEnd; + T.Range = StringRef(Current, 1); + skip(1); + TokenQueue.push_back(T); + if (FlowLevel) + --FlowLevel; + return true; +} + +bool Scanner::scanFlowEntry() { + removeSimpleKeyCandidatesOnFlowLevel(FlowLevel); + IsSimpleKeyAllowed = true; + Token T; + T.Kind = Token::TK_FlowEntry; + T.Range = StringRef(Current, 1); + skip(1); + TokenQueue.push_back(T); + return true; +} + +bool Scanner::scanBlockEntry() { + rollIndent(Column, Token::TK_BlockSequenceStart, TokenQueue.end()); + removeSimpleKeyCandidatesOnFlowLevel(FlowLevel); + IsSimpleKeyAllowed = true; + Token T; + T.Kind = Token::TK_BlockEntry; + T.Range = StringRef(Current, 1); + skip(1); + TokenQueue.push_back(T); + return true; +} + +bool Scanner::scanKey() { + if (!FlowLevel) + rollIndent(Column, Token::TK_BlockMappingStart, TokenQueue.end()); + + removeSimpleKeyCandidatesOnFlowLevel(FlowLevel); + IsSimpleKeyAllowed = !FlowLevel; + + Token T; + T.Kind = Token::TK_Key; + T.Range = StringRef(Current, 1); + skip(1); + TokenQueue.push_back(T); + return true; +} + +bool Scanner::scanValue() { + // If the previous token could have been a simple key, insert the key token + // into the token queue. + if (!SimpleKeys.empty()) { + SimpleKey SK = SimpleKeys.pop_back_val(); + Token T; + T.Kind = Token::TK_Key; + T.Range = SK.Tok->Range; + TokenQueueT::iterator i, e; + for (i = TokenQueue.begin(), e = TokenQueue.end(); i != e; ++i) { + if (i == SK.Tok) + break; + } + assert(i != e && "SimpleKey not in token queue!"); + i = TokenQueue.insert(i, T); + + // We may also need to add a Block-Mapping-Start token. + rollIndent(SK.Column, Token::TK_BlockMappingStart, i); + + IsSimpleKeyAllowed = false; + } else { + if (!FlowLevel) + rollIndent(Column, Token::TK_BlockMappingStart, TokenQueue.end()); + IsSimpleKeyAllowed = !FlowLevel; + } + + Token T; + T.Kind = Token::TK_Value; + T.Range = StringRef(Current, 1); + skip(1); + TokenQueue.push_back(T); + return true; +} + +// Forbidding inlining improves performance by roughly 20%. +// FIXME: Remove once llvm optimizes this to the faster version without hints. +LLVM_ATTRIBUTE_NOINLINE static bool +wasEscaped(StringRef::iterator First, StringRef::iterator Position); + +// Returns whether a character at 'Position' was escaped with a leading '\'. +// 'First' specifies the position of the first character in the string. +static bool wasEscaped(StringRef::iterator First, + StringRef::iterator Position) { + assert(Position - 1 >= First); + StringRef::iterator I = Position - 1; + // We calculate the number of consecutive '\'s before the current position + // by iterating backwards through our string. + while (I >= First && *I == '\\') --I; + // (Position - 1 - I) now contains the number of '\'s before the current + // position. If it is odd, the character at 'Position' was escaped. + return (Position - 1 - I) % 2 == 1; +} + +bool Scanner::scanFlowScalar(bool IsDoubleQuoted) { + StringRef::iterator Start = Current; + unsigned ColStart = Column; + if (IsDoubleQuoted) { + do { + ++Current; + while (Current != End && *Current != '"') + ++Current; + // Repeat until the previous character was not a '\' or was an escaped + // backslash. + } while (*(Current - 1) == '\\' && wasEscaped(Start + 1, Current)); + } else { + skip(1); + while (true) { + // Skip a ' followed by another '. + if (Current + 1 < End && *Current == '\'' && *(Current + 1) == '\'') { + skip(2); + continue; + } else if (*Current == '\'') + break; + StringRef::iterator i = skip_nb_char(Current); + if (i == Current) { + i = skip_b_break(Current); + if (i == Current) + break; + Current = i; + Column = 0; + ++Line; + } else { + if (i == End) + break; + Current = i; + ++Column; + } + } + } + skip(1); // Skip ending quote. + Token T; + T.Kind = Token::TK_Scalar; + T.Range = StringRef(Start, Current - Start); + TokenQueue.push_back(T); + + saveSimpleKeyCandidate(TokenQueue.back(), ColStart, false); + + IsSimpleKeyAllowed = false; + + return true; +} + +bool Scanner::scanPlainScalar() { + StringRef::iterator Start = Current; + unsigned ColStart = Column; + unsigned LeadingBlanks = 0; + assert(Indent >= -1 && "Indent must be >= -1 !"); + unsigned indent = static_cast(Indent + 1); + while (true) { + if (*Current == '#') + break; + + while (!isBlankOrBreak(Current)) { + if ( FlowLevel && *Current == ':' + && !(isBlankOrBreak(Current + 1) || *(Current + 1) == ',')) { + setError("Found unexpected ':' while scanning a plain scalar", Current); + return false; + } + + // Check for the end of the plain scalar. + if ( (*Current == ':' && isBlankOrBreak(Current + 1)) + || ( FlowLevel + && (StringRef(Current, 1).find_first_of(",:?[]{}") + != StringRef::npos))) + break; + + StringRef::iterator i = skip_nb_char(Current); + if (i == Current) + break; + Current = i; + ++Column; + } + + // Are we at the end? + if (!isBlankOrBreak(Current)) + break; + + // Eat blanks. + StringRef::iterator Tmp = Current; + while (isBlankOrBreak(Tmp)) { + StringRef::iterator i = skip_s_white(Tmp); + if (i != Tmp) { + if (LeadingBlanks && (Column < indent) && *Tmp == '\t') { + setError("Found invalid tab character in indentation", Tmp); + return false; + } + Tmp = i; + ++Column; + } else { + i = skip_b_break(Tmp); + if (!LeadingBlanks) + LeadingBlanks = 1; + Tmp = i; + Column = 0; + ++Line; + } + } + + if (!FlowLevel && Column < indent) + break; + + Current = Tmp; + } + if (Start == Current) { + setError("Got empty plain scalar", Start); + return false; + } + Token T; + T.Kind = Token::TK_Scalar; + T.Range = StringRef(Start, Current - Start); + TokenQueue.push_back(T); + + // Plain scalars can be simple keys. + saveSimpleKeyCandidate(TokenQueue.back(), ColStart, false); + + IsSimpleKeyAllowed = false; + + return true; +} + +bool Scanner::scanAliasOrAnchor(bool IsAlias) { + StringRef::iterator Start = Current; + unsigned ColStart = Column; + skip(1); + while(true) { + if ( *Current == '[' || *Current == ']' + || *Current == '{' || *Current == '}' + || *Current == ',' + || *Current == ':') + break; + StringRef::iterator i = skip_ns_char(Current); + if (i == Current) + break; + Current = i; + ++Column; + } + + if (Start == Current) { + setError("Got empty alias or anchor", Start); + return false; + } + + Token T; + T.Kind = IsAlias ? Token::TK_Alias : Token::TK_Anchor; + T.Range = StringRef(Start, Current - Start); + TokenQueue.push_back(T); + + // Alias and anchors can be simple keys. + saveSimpleKeyCandidate(TokenQueue.back(), ColStart, false); + + IsSimpleKeyAllowed = false; + + return true; +} + +bool Scanner::scanBlockScalar(bool IsLiteral) { + StringRef::iterator Start = Current; + skip(1); // Eat | or > + while(true) { + StringRef::iterator i = skip_nb_char(Current); + if (i == Current) { + if (Column == 0) + break; + i = skip_b_break(Current); + if (i != Current) { + // We got a line break. + Column = 0; + ++Line; + Current = i; + continue; + } else { + // There was an error, which should already have been printed out. + return false; + } + } + Current = i; + ++Column; + } + + if (Start == Current) { + setError("Got empty block scalar", Start); + return false; + } + + Token T; + T.Kind = Token::TK_Scalar; + T.Range = StringRef(Start, Current - Start); + TokenQueue.push_back(T); + return true; +} + +bool Scanner::scanTag() { + StringRef::iterator Start = Current; + unsigned ColStart = Column; + skip(1); // Eat !. + if (Current == End || isBlankOrBreak(Current)); // An empty tag. + else if (*Current == '<') { + skip(1); + scan_ns_uri_char(); + if (!consume('>')) + return false; + } else { + // FIXME: Actually parse the c-ns-shorthand-tag rule. + Current = skip_while(&Scanner::skip_ns_char, Current); + } + + Token T; + T.Kind = Token::TK_Tag; + T.Range = StringRef(Start, Current - Start); + TokenQueue.push_back(T); + + // Tags can be simple keys. + saveSimpleKeyCandidate(TokenQueue.back(), ColStart, false); + + IsSimpleKeyAllowed = false; + + return true; +} + +bool Scanner::fetchMoreTokens() { + if (IsStartOfStream) + return scanStreamStart(); + + scanToNextToken(); + + if (Current == End) + return scanStreamEnd(); + + removeStaleSimpleKeyCandidates(); + + unrollIndent(Column); + + if (Column == 0 && *Current == '%') + return scanDirective(); + + if (Column == 0 && Current + 4 <= End + && *Current == '-' + && *(Current + 1) == '-' + && *(Current + 2) == '-' + && (Current + 3 == End || isBlankOrBreak(Current + 3))) + return scanDocumentIndicator(true); + + if (Column == 0 && Current + 4 <= End + && *Current == '.' + && *(Current + 1) == '.' + && *(Current + 2) == '.' + && (Current + 3 == End || isBlankOrBreak(Current + 3))) + return scanDocumentIndicator(false); + + if (*Current == '[') + return scanFlowCollectionStart(true); + + if (*Current == '{') + return scanFlowCollectionStart(false); + + if (*Current == ']') + return scanFlowCollectionEnd(true); + + if (*Current == '}') + return scanFlowCollectionEnd(false); + + if (*Current == ',') + return scanFlowEntry(); + + if (*Current == '-' && isBlankOrBreak(Current + 1)) + return scanBlockEntry(); + + if (*Current == '?' && (FlowLevel || isBlankOrBreak(Current + 1))) + return scanKey(); + + if (*Current == ':' && (FlowLevel || isBlankOrBreak(Current + 1))) + return scanValue(); + + if (*Current == '*') + return scanAliasOrAnchor(true); + + if (*Current == '&') + return scanAliasOrAnchor(false); + + if (*Current == '!') + return scanTag(); + + if (*Current == '|' && !FlowLevel) + return scanBlockScalar(true); + + if (*Current == '>' && !FlowLevel) + return scanBlockScalar(false); + + if (*Current == '\'') + return scanFlowScalar(false); + + if (*Current == '"') + return scanFlowScalar(true); + + // Get a plain scalar. + StringRef FirstChar(Current, 1); + if (!(isBlankOrBreak(Current) + || FirstChar.find_first_of("-?:,[]{}#&*!|>'\"%@`") != StringRef::npos) + || (*Current == '-' && !isBlankOrBreak(Current + 1)) + || (!FlowLevel && (*Current == '?' || *Current == ':') + && isBlankOrBreak(Current + 1)) + || (!FlowLevel && *Current == ':' + && Current + 2 < End + && *(Current + 1) == ':' + && !isBlankOrBreak(Current + 2))) + return scanPlainScalar(); + + setError("Unrecognized character while tokenizing."); + return false; +} + +Stream::Stream(StringRef Input, SourceMgr &SM) + : scanner(new Scanner(Input, SM)) + , CurrentDoc(0) {} + +bool Stream::failed() { return scanner->failed(); } + +void Stream::printError(Node *N, const Twine &Msg) { + SmallVector Ranges; + Ranges.push_back(N->getSourceRange()); + scanner->printError( N->getSourceRange().Start + , SourceMgr::DK_Error + , Msg + , Ranges); +} + +void Stream::handleYAMLDirective(const Token &t) { + // TODO: Ensure version is 1.x. +} + +document_iterator Stream::begin() { + if (CurrentDoc) + report_fatal_error("Can only iterate over the stream once"); + + // Skip Stream-Start. + scanner->getNext(); + + CurrentDoc.reset(new Document(*this)); + return document_iterator(CurrentDoc); +} + +document_iterator Stream::end() { + return document_iterator(); +} + +void Stream::skip() { + for (document_iterator i = begin(), e = end(); i != e; ++i) + i->skip(); +} + +Node::Node(unsigned int Type, OwningPtr &D, StringRef A) + : Doc(D) + , TypeID(Type) + , Anchor(A) { + SMLoc Start = SMLoc::getFromPointer(peekNext().Range.begin()); + SourceRange = SMRange(Start, Start); +} + +Node::~Node() {} + +Token &Node::peekNext() { + return Doc->peekNext(); +} + +Token Node::getNext() { + return Doc->getNext(); +} + +Node *Node::parseBlockNode() { + return Doc->parseBlockNode(); +} + +BumpPtrAllocator &Node::getAllocator() { + return Doc->NodeAllocator; +} + +void Node::setError(const Twine &Msg, Token &Tok) const { + Doc->setError(Msg, Tok); +} + +bool Node::failed() const { + return Doc->failed(); +} + + + +StringRef ScalarNode::getValue(SmallVectorImpl &Storage) const { + // TODO: Handle newlines properly. We need to remove leading whitespace. + if (Value[0] == '"') { // Double quoted. + // Pull off the leading and trailing "s. + StringRef UnquotedValue = Value.substr(1, Value.size() - 2); + // Search for characters that would require unescaping the value. + StringRef::size_type i = UnquotedValue.find_first_of("\\\r\n"); + if (i != StringRef::npos) + return unescapeDoubleQuoted(UnquotedValue, i, Storage); + return UnquotedValue; + } else if (Value[0] == '\'') { // Single quoted. + // Pull off the leading and trailing 's. + StringRef UnquotedValue = Value.substr(1, Value.size() - 2); + StringRef::size_type i = UnquotedValue.find('\''); + if (i != StringRef::npos) { + // We're going to need Storage. + Storage.clear(); + Storage.reserve(UnquotedValue.size()); + for (; i != StringRef::npos; i = UnquotedValue.find('\'')) { + StringRef Valid(UnquotedValue.begin(), i); + Storage.insert(Storage.end(), Valid.begin(), Valid.end()); + Storage.push_back('\''); + UnquotedValue = UnquotedValue.substr(i + 2); + } + Storage.insert(Storage.end(), UnquotedValue.begin(), UnquotedValue.end()); + return StringRef(Storage.begin(), Storage.size()); + } + return UnquotedValue; + } + // Plain or block. + size_t trimtrail = Value.rfind(' '); + return Value.drop_back( + trimtrail == StringRef::npos ? 0 : Value.size() - trimtrail); +} + +StringRef ScalarNode::unescapeDoubleQuoted( StringRef UnquotedValue + , StringRef::size_type i + , SmallVectorImpl &Storage) + const { + // Use Storage to build proper value. + Storage.clear(); + Storage.reserve(UnquotedValue.size()); + for (; i != StringRef::npos; i = UnquotedValue.find_first_of("\\\r\n")) { + // Insert all previous chars into Storage. + StringRef Valid(UnquotedValue.begin(), i); + Storage.insert(Storage.end(), Valid.begin(), Valid.end()); + // Chop off inserted chars. + UnquotedValue = UnquotedValue.substr(i); + + assert(!UnquotedValue.empty() && "Can't be empty!"); + + // Parse escape or line break. + switch (UnquotedValue[0]) { + case '\r': + case '\n': + Storage.push_back('\n'); + if ( UnquotedValue.size() > 1 + && (UnquotedValue[1] == '\r' || UnquotedValue[1] == '\n')) + UnquotedValue = UnquotedValue.substr(1); + UnquotedValue = UnquotedValue.substr(1); + break; + default: + if (UnquotedValue.size() == 1) + // TODO: Report error. + break; + UnquotedValue = UnquotedValue.substr(1); + switch (UnquotedValue[0]) { + default: { + Token T; + T.Range = StringRef(UnquotedValue.begin(), 1); + setError("Unrecognized escape code!", T); + return ""; + } + case '\r': + case '\n': + // Remove the new line. + if ( UnquotedValue.size() > 1 + && (UnquotedValue[1] == '\r' || UnquotedValue[1] == '\n')) + UnquotedValue = UnquotedValue.substr(1); + // If this was just a single byte newline, it will get skipped + // below. + break; + case '0': + Storage.push_back(0x00); + break; + case 'a': + Storage.push_back(0x07); + break; + case 'b': + Storage.push_back(0x08); + break; + case 't': + case 0x09: + Storage.push_back(0x09); + break; + case 'n': + Storage.push_back(0x0A); + break; + case 'v': + Storage.push_back(0x0B); + break; + case 'f': + Storage.push_back(0x0C); + break; + case 'r': + Storage.push_back(0x0D); + break; + case 'e': + Storage.push_back(0x1B); + break; + case ' ': + Storage.push_back(0x20); + break; + case '"': + Storage.push_back(0x22); + break; + case '/': + Storage.push_back(0x2F); + break; + case '\\': + Storage.push_back(0x5C); + break; + case 'N': + encodeUTF8(0x85, Storage); + break; + case '_': + encodeUTF8(0xA0, Storage); + break; + case 'L': + encodeUTF8(0x2028, Storage); + break; + case 'P': + encodeUTF8(0x2029, Storage); + break; + case 'x': { + if (UnquotedValue.size() < 3) + // TODO: Report error. + break; + unsigned int UnicodeScalarValue; + UnquotedValue.substr(1, 2).getAsInteger(16, UnicodeScalarValue); + encodeUTF8(UnicodeScalarValue, Storage); + UnquotedValue = UnquotedValue.substr(2); + break; + } + case 'u': { + if (UnquotedValue.size() < 5) + // TODO: Report error. + break; + unsigned int UnicodeScalarValue; + UnquotedValue.substr(1, 4).getAsInteger(16, UnicodeScalarValue); + encodeUTF8(UnicodeScalarValue, Storage); + UnquotedValue = UnquotedValue.substr(4); + break; + } + case 'U': { + if (UnquotedValue.size() < 9) + // TODO: Report error. + break; + unsigned int UnicodeScalarValue; + UnquotedValue.substr(1, 8).getAsInteger(16, UnicodeScalarValue); + encodeUTF8(UnicodeScalarValue, Storage); + UnquotedValue = UnquotedValue.substr(8); + break; + } + } + UnquotedValue = UnquotedValue.substr(1); + } + } + Storage.insert(Storage.end(), UnquotedValue.begin(), UnquotedValue.end()); + return StringRef(Storage.begin(), Storage.size()); +} + +Node *KeyValueNode::getKey() { + if (Key) + return Key; + // Handle implicit null keys. + { + Token &t = peekNext(); + if ( t.Kind == Token::TK_BlockEnd + || t.Kind == Token::TK_Value + || t.Kind == Token::TK_Error) { + return Key = new (getAllocator()) NullNode(Doc); + } + if (t.Kind == Token::TK_Key) + getNext(); // skip TK_Key. + } + + // Handle explicit null keys. + Token &t = peekNext(); + if (t.Kind == Token::TK_BlockEnd || t.Kind == Token::TK_Value) { + return Key = new (getAllocator()) NullNode(Doc); + } + + // We've got a normal key. + return Key = parseBlockNode(); +} + +Node *KeyValueNode::getValue() { + if (Value) + return Value; + getKey()->skip(); + if (failed()) + return Value = new (getAllocator()) NullNode(Doc); + + // Handle implicit null values. + { + Token &t = peekNext(); + if ( t.Kind == Token::TK_BlockEnd + || t.Kind == Token::TK_FlowMappingEnd + || t.Kind == Token::TK_Key + || t.Kind == Token::TK_FlowEntry + || t.Kind == Token::TK_Error) { + return Value = new (getAllocator()) NullNode(Doc); + } + + if (t.Kind != Token::TK_Value) { + setError("Unexpected token in Key Value.", t); + return Value = new (getAllocator()) NullNode(Doc); + } + getNext(); // skip TK_Value. + } + + // Handle explicit null values. + Token &t = peekNext(); + if (t.Kind == Token::TK_BlockEnd || t.Kind == Token::TK_Key) { + return Value = new (getAllocator()) NullNode(Doc); + } + + // We got a normal value. + return Value = parseBlockNode(); +} + +void MappingNode::increment() { + if (failed()) { + IsAtEnd = true; + CurrentEntry = 0; + return; + } + if (CurrentEntry) { + CurrentEntry->skip(); + if (Type == MT_Inline) { + IsAtEnd = true; + CurrentEntry = 0; + return; + } + } + Token T = peekNext(); + if (T.Kind == Token::TK_Key || T.Kind == Token::TK_Scalar) { + // KeyValueNode eats the TK_Key. That way it can detect null keys. + CurrentEntry = new (getAllocator()) KeyValueNode(Doc); + } else if (Type == MT_Block) { + switch (T.Kind) { + case Token::TK_BlockEnd: + getNext(); + IsAtEnd = true; + CurrentEntry = 0; + break; + default: + setError("Unexpected token. Expected Key or Block End", T); + case Token::TK_Error: + IsAtEnd = true; + CurrentEntry = 0; + } + } else { + switch (T.Kind) { + case Token::TK_FlowEntry: + // Eat the flow entry and recurse. + getNext(); + return increment(); + case Token::TK_FlowMappingEnd: + getNext(); + case Token::TK_Error: + // Set this to end iterator. + IsAtEnd = true; + CurrentEntry = 0; + break; + default: + setError( "Unexpected token. Expected Key, Flow Entry, or Flow " + "Mapping End." + , T); + IsAtEnd = true; + CurrentEntry = 0; + } + } +} + +void SequenceNode::increment() { + if (failed()) { + IsAtEnd = true; + CurrentEntry = 0; + return; + } + if (CurrentEntry) + CurrentEntry->skip(); + Token T = peekNext(); + if (SeqType == ST_Block) { + switch (T.Kind) { + case Token::TK_BlockEntry: + getNext(); + CurrentEntry = parseBlockNode(); + if (CurrentEntry == 0) { // An error occurred. + IsAtEnd = true; + CurrentEntry = 0; + } + break; + case Token::TK_BlockEnd: + getNext(); + IsAtEnd = true; + CurrentEntry = 0; + break; + default: + setError( "Unexpected token. Expected Block Entry or Block End." + , T); + case Token::TK_Error: + IsAtEnd = true; + CurrentEntry = 0; + } + } else if (SeqType == ST_Indentless) { + switch (T.Kind) { + case Token::TK_BlockEntry: + getNext(); + CurrentEntry = parseBlockNode(); + if (CurrentEntry == 0) { // An error occurred. + IsAtEnd = true; + CurrentEntry = 0; + } + break; + default: + case Token::TK_Error: + IsAtEnd = true; + CurrentEntry = 0; + } + } else if (SeqType == ST_Flow) { + switch (T.Kind) { + case Token::TK_FlowEntry: + // Eat the flow entry and recurse. + getNext(); + WasPreviousTokenFlowEntry = true; + return increment(); + case Token::TK_FlowSequenceEnd: + getNext(); + case Token::TK_Error: + // Set this to end iterator. + IsAtEnd = true; + CurrentEntry = 0; + break; + case Token::TK_StreamEnd: + case Token::TK_DocumentEnd: + case Token::TK_DocumentStart: + setError("Could not find closing ]!", T); + // Set this to end iterator. + IsAtEnd = true; + CurrentEntry = 0; + break; + default: + if (!WasPreviousTokenFlowEntry) { + setError("Expected , between entries!", T); + IsAtEnd = true; + CurrentEntry = 0; + break; + } + // Otherwise it must be a flow entry. + CurrentEntry = parseBlockNode(); + if (!CurrentEntry) { + IsAtEnd = true; + } + WasPreviousTokenFlowEntry = false; + break; + } + } +} + +Document::Document(Stream &S) : stream(S), Root(0) { + if (parseDirectives()) + expectToken(Token::TK_DocumentStart); + Token &T = peekNext(); + if (T.Kind == Token::TK_DocumentStart) + getNext(); +} + +bool Document::skip() { + if (stream.scanner->failed()) + return false; + if (!Root) + getRoot(); + Root->skip(); + Token &T = peekNext(); + if (T.Kind == Token::TK_StreamEnd) + return false; + if (T.Kind == Token::TK_DocumentEnd) { + getNext(); + return skip(); + } + return true; +} + +Token &Document::peekNext() { + return stream.scanner->peekNext(); +} + +Token Document::getNext() { + return stream.scanner->getNext(); +} + +void Document::setError(const Twine &Message, Token &Location) const { + stream.scanner->setError(Message, Location.Range.begin()); +} + +bool Document::failed() const { + return stream.scanner->failed(); +} + +Node *Document::parseBlockNode() { + Token T = peekNext(); + // Handle properties. + Token AnchorInfo; +parse_property: + switch (T.Kind) { + case Token::TK_Alias: + getNext(); + return new (NodeAllocator) AliasNode(stream.CurrentDoc, T.Range.substr(1)); + case Token::TK_Anchor: + if (AnchorInfo.Kind == Token::TK_Anchor) { + setError("Already encountered an anchor for this node!", T); + return 0; + } + AnchorInfo = getNext(); // Consume TK_Anchor. + T = peekNext(); + goto parse_property; + case Token::TK_Tag: + getNext(); // Skip TK_Tag. + T = peekNext(); + goto parse_property; + default: + break; + } + + switch (T.Kind) { + case Token::TK_BlockEntry: + // We got an unindented BlockEntry sequence. This is not terminated with + // a BlockEnd. + // Don't eat the TK_BlockEntry, SequenceNode needs it. + return new (NodeAllocator) SequenceNode( stream.CurrentDoc + , AnchorInfo.Range.substr(1) + , SequenceNode::ST_Indentless); + case Token::TK_BlockSequenceStart: + getNext(); + return new (NodeAllocator) + SequenceNode( stream.CurrentDoc + , AnchorInfo.Range.substr(1) + , SequenceNode::ST_Block); + case Token::TK_BlockMappingStart: + getNext(); + return new (NodeAllocator) + MappingNode( stream.CurrentDoc + , AnchorInfo.Range.substr(1) + , MappingNode::MT_Block); + case Token::TK_FlowSequenceStart: + getNext(); + return new (NodeAllocator) + SequenceNode( stream.CurrentDoc + , AnchorInfo.Range.substr(1) + , SequenceNode::ST_Flow); + case Token::TK_FlowMappingStart: + getNext(); + return new (NodeAllocator) + MappingNode( stream.CurrentDoc + , AnchorInfo.Range.substr(1) + , MappingNode::MT_Flow); + case Token::TK_Scalar: + getNext(); + return new (NodeAllocator) + ScalarNode( stream.CurrentDoc + , AnchorInfo.Range.substr(1) + , T.Range); + case Token::TK_Key: + // Don't eat the TK_Key, KeyValueNode expects it. + return new (NodeAllocator) + MappingNode( stream.CurrentDoc + , AnchorInfo.Range.substr(1) + , MappingNode::MT_Inline); + case Token::TK_DocumentStart: + case Token::TK_DocumentEnd: + case Token::TK_StreamEnd: + default: + // TODO: Properly handle tags. "[!!str ]" should resolve to !!str "", not + // !!null null. + return new (NodeAllocator) NullNode(stream.CurrentDoc); + case Token::TK_Error: + return 0; + } + llvm_unreachable("Control flow shouldn't reach here."); + return 0; +} + +bool Document::parseDirectives() { + bool isDirective = false; + while (true) { + Token T = peekNext(); + if (T.Kind == Token::TK_TagDirective) { + handleTagDirective(getNext()); + isDirective = true; + } else if (T.Kind == Token::TK_VersionDirective) { + stream.handleYAMLDirective(getNext()); + isDirective = true; + } else + break; + } + return isDirective; +} + +bool Document::expectToken(int TK) { + Token T = getNext(); + if (T.Kind != TK) { + setError("Unexpected token", T); + return false; + } + return true; +} + +OwningPtr document_iterator::NullDoc; diff --git a/test/YAMLParser/LICENSE.txt b/test/YAMLParser/LICENSE.txt new file mode 100644 index 00000000000..050ced23f68 --- /dev/null +++ b/test/YAMLParser/LICENSE.txt @@ -0,0 +1,19 @@ +Copyright (c) 2006 Kirill Simonov + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/test/YAMLParser/bool.data b/test/YAMLParser/bool.data new file mode 100644 index 00000000000..e987a0ec1e3 --- /dev/null +++ b/test/YAMLParser/bool.data @@ -0,0 +1,6 @@ +# RUN: yaml-bench -canonical %s + +- yes +- NO +- True +- on diff --git a/test/YAMLParser/construct-bool.data b/test/YAMLParser/construct-bool.data new file mode 100644 index 00000000000..035ec0c8580 --- /dev/null +++ b/test/YAMLParser/construct-bool.data @@ -0,0 +1,11 @@ +# RUN: yaml-bench -canonical %s + +canonical: yes +answer: NO +logical: True +option: on + + +but: + y: is a string + n: is a string diff --git a/test/YAMLParser/construct-custom.data b/test/YAMLParser/construct-custom.data new file mode 100644 index 00000000000..cac95e0a5fb --- /dev/null +++ b/test/YAMLParser/construct-custom.data @@ -0,0 +1,28 @@ +# RUN: yaml-bench -canonical %s + +--- +- !tag1 + x: 1 +- !tag1 + x: 1 + 'y': 2 + z: 3 +- !tag2 + 10 +- !tag2 + =: 10 + 'y': 20 + z: 30 +- !tag3 + x: 1 +- !tag3 + x: 1 + 'y': 2 + z: 3 +- !tag3 + =: 1 + 'y': 2 + z: 3 +- !foo + my-parameter: foo + my-another-parameter: [1,2,3] diff --git a/test/YAMLParser/construct-float.data b/test/YAMLParser/construct-float.data new file mode 100644 index 00000000000..07c51bdd833 --- /dev/null +++ b/test/YAMLParser/construct-float.data @@ -0,0 +1,8 @@ +# RUN: yaml-bench -canonical %s + +canonical: 6.8523015e+5 +exponential: 685.230_15e+03 +fixed: 685_230.15 +sexagesimal: 190:20:30.15 +negative infinity: -.inf +not a number: .NaN diff --git a/test/YAMLParser/construct-int.data b/test/YAMLParser/construct-int.data new file mode 100644 index 00000000000..b14c37f7880 --- /dev/null +++ b/test/YAMLParser/construct-int.data @@ -0,0 +1,8 @@ +# RUN: yaml-bench -canonical %s + +canonical: 685230 +decimal: +685_230 +octal: 02472256 +hexadecimal: 0x_0A_74_AE +binary: 0b1010_0111_0100_1010_1110 +sexagesimal: 190:20:30 diff --git a/test/YAMLParser/construct-map.data b/test/YAMLParser/construct-map.data new file mode 100644 index 00000000000..1b681206d17 --- /dev/null +++ b/test/YAMLParser/construct-map.data @@ -0,0 +1,8 @@ +# RUN: yaml-bench -canonical %s + +# Unordered set of key: value pairs. +Block style: !!map + Clark : Evans + Brian : Ingerson + Oren : Ben-Kiki +Flow style: !!map { Clark: Evans, Brian: Ingerson, Oren: Ben-Kiki } diff --git a/test/YAMLParser/construct-merge.data b/test/YAMLParser/construct-merge.data new file mode 100644 index 00000000000..0ebc9f612d5 --- /dev/null +++ b/test/YAMLParser/construct-merge.data @@ -0,0 +1,29 @@ +# RUN: yaml-bench -canonical %s + +--- +- &CENTER { x: 1, 'y': 2 } +- &LEFT { x: 0, 'y': 2 } +- &BIG { r: 10 } +- &SMALL { r: 1 } + +# All the following maps are equal: + +- # Explicit keys + x: 1 + 'y': 2 + r: 10 + label: center/big + +- # Merge one map + << : *CENTER + r: 10 + label: center/big + +- # Merge multiple maps + << : [ *CENTER, *BIG ] + label: center/big + +- # Override + << : [ *BIG, *LEFT, *SMALL ] + x: 1 + label: center/big diff --git a/test/YAMLParser/construct-null.data b/test/YAMLParser/construct-null.data new file mode 100644 index 00000000000..51f8b61e24e --- /dev/null +++ b/test/YAMLParser/construct-null.data @@ -0,0 +1,20 @@ +# RUN: yaml-bench -canonical %s + +# A document may be null. +--- +--- +# This mapping has four keys, +# one has a value. +empty: +canonical: ~ +english: null +~: null key +--- +# This sequence has five +# entries, two have values. +sparse: + - ~ + - 2nd entry + - + - 4th entry + - Null diff --git a/test/YAMLParser/construct-omap.data b/test/YAMLParser/construct-omap.data new file mode 100644 index 00000000000..b96d6799c7d --- /dev/null +++ b/test/YAMLParser/construct-omap.data @@ -0,0 +1,10 @@ +# RUN: yaml-bench -canonical %s + +# Explicitly typed ordered map (dictionary). +Bestiary: !!omap + - aardvark: African pig-like ant eater. Ugly. + - anteater: South-American ant eater. Two species. + - anaconda: South-American constrictor snake. Scaly. + # Etc. +# Flow style +Numbers: !!omap [ one: 1, two: 2, three : 3 ] diff --git a/test/YAMLParser/construct-pairs.data b/test/YAMLParser/construct-pairs.data new file mode 100644 index 00000000000..40f288d1d74 --- /dev/null +++ b/test/YAMLParser/construct-pairs.data @@ -0,0 +1,9 @@ +# RUN: yaml-bench -canonical %s + +# Explicitly typed pairs. +Block tasks: !!pairs + - meeting: with team. + - meeting: with boss. + - break: lunch. + - meeting: with client. +Flow tasks: !!pairs [ meeting: with team, meeting: with boss ] diff --git a/test/YAMLParser/construct-seq.data b/test/YAMLParser/construct-seq.data new file mode 100644 index 00000000000..f43fd39f8ed --- /dev/null +++ b/test/YAMLParser/construct-seq.data @@ -0,0 +1,17 @@ +# RUN: yaml-bench -canonical %s + +# Ordered sequence of nodes +Block style: !!seq +- Mercury # Rotates - no light/dark sides. +- Venus # Deadliest. Aptly named. +- Earth # Mostly dirt. +- Mars # Seems empty. +- Jupiter # The king. +- Saturn # Pretty. +- Uranus # Where the sun hardly shines. +- Neptune # Boring. No rings. +- Pluto # You call this a planet? +Flow style: !!seq [ Mercury, Venus, Earth, Mars, # Rocks + Jupiter, Saturn, Uranus, Neptune, # Gas + Pluto ] # Overrated + diff --git a/test/YAMLParser/construct-set.data b/test/YAMLParser/construct-set.data new file mode 100644 index 00000000000..3e9d095e714 --- /dev/null +++ b/test/YAMLParser/construct-set.data @@ -0,0 +1,9 @@ +# RUN: yaml-bench -canonical %s + +# Explicitly typed set. +baseball players: !!set + ? Mark McGwire + ? Sammy Sosa + ? Ken Griffey +# Flow style +baseball teams: !!set { Boston Red Sox, Detroit Tigers, New York Yankees } diff --git a/test/YAMLParser/construct-str-ascii.data b/test/YAMLParser/construct-str-ascii.data new file mode 100644 index 00000000000..24290ae8a99 --- /dev/null +++ b/test/YAMLParser/construct-str-ascii.data @@ -0,0 +1,3 @@ +# RUN: yaml-bench -canonical %s + +--- !!str "ascii string" diff --git a/test/YAMLParser/construct-str.data b/test/YAMLParser/construct-str.data new file mode 100644 index 00000000000..dc1ce825cdc --- /dev/null +++ b/test/YAMLParser/construct-str.data @@ -0,0 +1,3 @@ +# RUN: yaml-bench -canonical %s + +string: abcd diff --git a/test/YAMLParser/construct-timestamp.data b/test/YAMLParser/construct-timestamp.data new file mode 100644 index 00000000000..f262c2d02c0 --- /dev/null +++ b/test/YAMLParser/construct-timestamp.data @@ -0,0 +1,7 @@ +# RUN: yaml-bench -canonical %s + +canonical: 2001-12-15T02:59:43.1Z +valid iso8601: 2001-12-14t21:59:43.10-05:00 +space separated: 2001-12-14 21:59:43.10 -5 +no time zone (Z): 2001-12-15 2:59:43.10 +date (00:00:00Z): 2002-12-14 diff --git a/test/YAMLParser/construct-value.data b/test/YAMLParser/construct-value.data new file mode 100644 index 00000000000..fe01a0dc904 --- /dev/null +++ b/test/YAMLParser/construct-value.data @@ -0,0 +1,12 @@ +# RUN: yaml-bench -canonical %s + +--- # Old schema +link with: + - library1.dll + - library2.dll +--- # New schema +link with: + - = : library1.dll + version: 1.2 + - = : library2.dll + version: 2.3 diff --git a/test/YAMLParser/duplicate-key.former-loader-error.data b/test/YAMLParser/duplicate-key.former-loader-error.data new file mode 100644 index 00000000000..9272103fe65 --- /dev/null +++ b/test/YAMLParser/duplicate-key.former-loader-error.data @@ -0,0 +1,5 @@ +# RUN: yaml-bench -canonical %s + +--- +foo: bar +foo: baz diff --git a/test/YAMLParser/duplicate-mapping-key.former-loader-error.data b/test/YAMLParser/duplicate-mapping-key.former-loader-error.data new file mode 100644 index 00000000000..96d175d2ac1 --- /dev/null +++ b/test/YAMLParser/duplicate-mapping-key.former-loader-error.data @@ -0,0 +1,8 @@ +# RUN: yaml-bench -canonical %s + +--- +&anchor foo: + foo: bar + *anchor: duplicate key + baz: bat + *anchor: duplicate key diff --git a/test/YAMLParser/duplicate-merge-key.former-loader-error.data b/test/YAMLParser/duplicate-merge-key.former-loader-error.data new file mode 100644 index 00000000000..6b1276436ab --- /dev/null +++ b/test/YAMLParser/duplicate-merge-key.former-loader-error.data @@ -0,0 +1,6 @@ +# RUN: yaml-bench -canonical %s + +--- +<<: {x: 1, y: 2} +foo: bar +<<: {z: 3, t: 4} diff --git a/test/YAMLParser/duplicate-value-key.former-loader-error.data b/test/YAMLParser/duplicate-value-key.former-loader-error.data new file mode 100644 index 00000000000..dc20e0b275c --- /dev/null +++ b/test/YAMLParser/duplicate-value-key.former-loader-error.data @@ -0,0 +1,6 @@ +# RUN: yaml-bench -canonical %s + +--- +=: 1 +foo: bar +=: 2 diff --git a/test/YAMLParser/emit-block-scalar-in-simple-key-context-bug.data b/test/YAMLParser/emit-block-scalar-in-simple-key-context-bug.data new file mode 100644 index 00000000000..f5adedb1350 --- /dev/null +++ b/test/YAMLParser/emit-block-scalar-in-simple-key-context-bug.data @@ -0,0 +1,6 @@ +# RUN: yaml-bench -canonical %s + +? |- + foo +: |- + bar diff --git a/test/YAMLParser/empty-document-bug.data b/test/YAMLParser/empty-document-bug.data new file mode 100644 index 00000000000..fa131fe78ef --- /dev/null +++ b/test/YAMLParser/empty-document-bug.data @@ -0,0 +1,2 @@ +# RUN: yaml-bench -canonical %s + diff --git a/test/YAMLParser/float.data b/test/YAMLParser/float.data new file mode 100644 index 00000000000..c4de97037c8 --- /dev/null +++ b/test/YAMLParser/float.data @@ -0,0 +1,8 @@ +# RUN: yaml-bench -canonical %s + +- 6.8523015e+5 +- 685.230_15e+03 +- 685_230.15 +- 190:20:30.15 +- -.inf +- .NaN diff --git a/test/YAMLParser/int.data b/test/YAMLParser/int.data new file mode 100644 index 00000000000..2651d096ff6 --- /dev/null +++ b/test/YAMLParser/int.data @@ -0,0 +1,8 @@ +# RUN: yaml-bench -canonical %s + +- 685230 +- +685_230 +- 02472256 +- 0x_0A_74_AE +- 0b1010_0111_0100_1010_1110 +- 190:20:30 diff --git a/test/YAMLParser/invalid-single-quote-bug.data b/test/YAMLParser/invalid-single-quote-bug.data new file mode 100644 index 00000000000..3722a003dff --- /dev/null +++ b/test/YAMLParser/invalid-single-quote-bug.data @@ -0,0 +1,4 @@ +# RUN: yaml-bench -canonical %s + +- "foo 'bar'" +- "foo\n'bar'" diff --git a/test/YAMLParser/merge.data b/test/YAMLParser/merge.data new file mode 100644 index 00000000000..86313596e19 --- /dev/null +++ b/test/YAMLParser/merge.data @@ -0,0 +1,3 @@ +# RUN: yaml-bench -canonical %s + +- << diff --git a/test/YAMLParser/more-floats.data b/test/YAMLParser/more-floats.data new file mode 100644 index 00000000000..668b31cd13a --- /dev/null +++ b/test/YAMLParser/more-floats.data @@ -0,0 +1,3 @@ +# RUN: yaml-bench -canonical %s + +[0.0, +1.0, -1.0, +.inf, -.inf, .nan, .nan] diff --git a/test/YAMLParser/negative-float-bug.data b/test/YAMLParser/negative-float-bug.data new file mode 100644 index 00000000000..0ba0ffee301 --- /dev/null +++ b/test/YAMLParser/negative-float-bug.data @@ -0,0 +1,3 @@ +# RUN: yaml-bench -canonical %s + +-1.0 diff --git a/test/YAMLParser/null.data b/test/YAMLParser/null.data new file mode 100644 index 00000000000..a38d7fa6c52 --- /dev/null +++ b/test/YAMLParser/null.data @@ -0,0 +1,5 @@ +# RUN: yaml-bench -canonical %s + +- +- ~ +- null diff --git a/test/YAMLParser/resolver.data b/test/YAMLParser/resolver.data new file mode 100644 index 00000000000..8cbba6328b6 --- /dev/null +++ b/test/YAMLParser/resolver.data @@ -0,0 +1,32 @@ +# RUN: yaml-bench -canonical %s + +--- +"this scalar should be selected" +--- +key11: !foo + key12: + is: [selected] + key22: + key13: [not, selected] + key23: [not, selected] + key32: + key31: [not, selected] + key32: [not, selected] + key33: {not: selected} +key21: !bar + - not selected + - selected + - not selected +key31: !baz + key12: + key13: + key14: {selected} + key23: + key14: [not, selected] + key33: + key14: {selected} + key24: {not: selected} + key22: + - key14: {selected} + key24: {not: selected} + - key14: {selected} diff --git a/test/YAMLParser/run-parser-crash-bug.data b/test/YAMLParser/run-parser-crash-bug.data new file mode 100644 index 00000000000..3ec910ce047 --- /dev/null +++ b/test/YAMLParser/run-parser-crash-bug.data @@ -0,0 +1,10 @@ +# RUN: yaml-bench -canonical %s + +--- +- Harry Potter and the Prisoner of Azkaban +- Harry Potter and the Goblet of Fire +- Harry Potter and the Order of the Phoenix +--- +- Memoirs Found in a Bathtub +- Snow Crash +- Ghost World diff --git a/test/YAMLParser/scan-document-end-bug.data b/test/YAMLParser/scan-document-end-bug.data new file mode 100644 index 00000000000..7354caf8cd8 --- /dev/null +++ b/test/YAMLParser/scan-document-end-bug.data @@ -0,0 +1,5 @@ +# RUN: yaml-bench -canonical %s + +# Ticket #4 +--- +... diff --git a/test/YAMLParser/scan-line-break-bug.data b/test/YAMLParser/scan-line-break-bug.data new file mode 100644 index 00000000000..792973d3f56 --- /dev/null +++ b/test/YAMLParser/scan-line-break-bug.data @@ -0,0 +1,5 @@ +# RUN: yaml-bench -canonical %s + +foo: + bar + baz diff --git a/test/YAMLParser/single-dot-is-not-float-bug.data b/test/YAMLParser/single-dot-is-not-float-bug.data new file mode 100644 index 00000000000..810a5936a89 --- /dev/null +++ b/test/YAMLParser/single-dot-is-not-float-bug.data @@ -0,0 +1,3 @@ +# RUN: yaml-bench -canonical %s + +. diff --git a/test/YAMLParser/sloppy-indentation.data b/test/YAMLParser/sloppy-indentation.data new file mode 100644 index 00000000000..2b2b62b14af --- /dev/null +++ b/test/YAMLParser/sloppy-indentation.data @@ -0,0 +1,19 @@ +# RUN: yaml-bench -canonical %s + +--- +in the block context: + indentation should be kept: { + but in the flow context: [ +it may be violated] +} +--- +the parser does not require scalars +to be indented with at least one space +... +--- +"the parser does not require scalars +to be indented with at least one space" +--- +foo: + bar: 'quoted scalars +may not adhere indentation' diff --git a/test/YAMLParser/spec-02-01.data b/test/YAMLParser/spec-02-01.data new file mode 100644 index 00000000000..dd15b2bc26a --- /dev/null +++ b/test/YAMLParser/spec-02-01.data @@ -0,0 +1,5 @@ +# RUN: yaml-bench -canonical %s + +- Mark McGwire +- Sammy Sosa +- Ken Griffey diff --git a/test/YAMLParser/spec-02-02.data b/test/YAMLParser/spec-02-02.data new file mode 100644 index 00000000000..a5695d5c27a --- /dev/null +++ b/test/YAMLParser/spec-02-02.data @@ -0,0 +1,5 @@ +# RUN: yaml-bench -canonical %s + +hr: 65 # Home runs +avg: 0.278 # Batting average +rbi: 147 # Runs Batted In diff --git a/test/YAMLParser/spec-02-03.data b/test/YAMLParser/spec-02-03.data new file mode 100644 index 00000000000..81f8d991f74 --- /dev/null +++ b/test/YAMLParser/spec-02-03.data @@ -0,0 +1,10 @@ +# RUN: yaml-bench -canonical %s + +american: + - Boston Red Sox + - Detroit Tigers + - New York Yankees +national: + - New York Mets + - Chicago Cubs + - Atlanta Braves diff --git a/test/YAMLParser/spec-02-04.data b/test/YAMLParser/spec-02-04.data new file mode 100644 index 00000000000..44a218d5926 --- /dev/null +++ b/test/YAMLParser/spec-02-04.data @@ -0,0 +1,10 @@ +# RUN: yaml-bench -canonical %s + +- + name: Mark McGwire + hr: 65 + avg: 0.278 +- + name: Sammy Sosa + hr: 63 + avg: 0.288 diff --git a/test/YAMLParser/spec-02-05.data b/test/YAMLParser/spec-02-05.data new file mode 100644 index 00000000000..c9a4a7572f7 --- /dev/null +++ b/test/YAMLParser/spec-02-05.data @@ -0,0 +1,5 @@ +# RUN: yaml-bench -canonical %s + +- [name , hr, avg ] +- [Mark McGwire, 65, 0.278] +- [Sammy Sosa , 63, 0.288] diff --git a/test/YAMLParser/spec-02-06.data b/test/YAMLParser/spec-02-06.data new file mode 100644 index 00000000000..85c1e2bab8c --- /dev/null +++ b/test/YAMLParser/spec-02-06.data @@ -0,0 +1,7 @@ +# RUN: yaml-bench -canonical %s + +Mark McGwire: {hr: 65, avg: 0.278} +Sammy Sosa: { + hr: 63, + avg: 0.288 + } diff --git a/test/YAMLParser/spec-02-07.data b/test/YAMLParser/spec-02-07.data new file mode 100644 index 00000000000..c349662a98a --- /dev/null +++ b/test/YAMLParser/spec-02-07.data @@ -0,0 +1,12 @@ +# RUN: yaml-bench -canonical %s + +# Ranking of 1998 home runs +--- +- Mark McGwire +- Sammy Sosa +- Ken Griffey + +# Team ranking +--- +- Chicago Cubs +- St Louis Cardinals diff --git a/test/YAMLParser/spec-02-08.data b/test/YAMLParser/spec-02-08.data new file mode 100644 index 00000000000..9746a43788a --- /dev/null +++ b/test/YAMLParser/spec-02-08.data @@ -0,0 +1,12 @@ +# RUN: yaml-bench -canonical %s + +--- +time: 20:03:20 +player: Sammy Sosa +action: strike (miss) +... +--- +time: 20:03:47 +player: Sammy Sosa +action: grand slam +... diff --git a/test/YAMLParser/spec-02-09.data b/test/YAMLParser/spec-02-09.data new file mode 100644 index 00000000000..6aef9333799 --- /dev/null +++ b/test/YAMLParser/spec-02-09.data @@ -0,0 +1,10 @@ +# RUN: yaml-bench -canonical %s + +--- +hr: # 1998 hr ranking + - Mark McGwire + - Sammy Sosa +rbi: + # 1998 rbi ranking + - Sammy Sosa + - Ken Griffey diff --git a/test/YAMLParser/spec-02-10.data b/test/YAMLParser/spec-02-10.data new file mode 100644 index 00000000000..0302fa75009 --- /dev/null +++ b/test/YAMLParser/spec-02-10.data @@ -0,0 +1,10 @@ +# RUN: yaml-bench -canonical %s + +--- +hr: + - Mark McGwire + # Following node labeled SS + - &SS Sammy Sosa +rbi: + - *SS # Subsequent occurrence + - Ken Griffey diff --git a/test/YAMLParser/spec-02-11.data b/test/YAMLParser/spec-02-11.data new file mode 100644 index 00000000000..d8cf863b2c1 --- /dev/null +++ b/test/YAMLParser/spec-02-11.data @@ -0,0 +1,11 @@ +# RUN: yaml-bench -canonical %s + +? - Detroit Tigers + - Chicago cubs +: + - 2001-07-23 + +? [ New York Yankees, + Atlanta Braves ] +: [ 2001-07-02, 2001-08-12, + 2001-08-14 ] diff --git a/test/YAMLParser/spec-02-12.data b/test/YAMLParser/spec-02-12.data new file mode 100644 index 00000000000..3b4d5370a93 --- /dev/null +++ b/test/YAMLParser/spec-02-12.data @@ -0,0 +1,10 @@ +# RUN: yaml-bench -canonical %s + +--- +# products purchased +- item : Super Hoop + quantity: 1 +- item : Basketball + quantity: 4 +- item : Big Shoes + quantity: 1 diff --git a/test/YAMLParser/spec-02-13.data b/test/YAMLParser/spec-02-13.data new file mode 100644 index 00000000000..2bbccbf5d7d --- /dev/null +++ b/test/YAMLParser/spec-02-13.data @@ -0,0 +1,6 @@ +# RUN: yaml-bench -canonical %s + +# ASCII Art +--- | + \//||\/|| + // || ||__ diff --git a/test/YAMLParser/spec-02-14.data b/test/YAMLParser/spec-02-14.data new file mode 100644 index 00000000000..5a18ea213e6 --- /dev/null +++ b/test/YAMLParser/spec-02-14.data @@ -0,0 +1,6 @@ +# RUN: yaml-bench -canonical %s + +--- + Mark McGwire's + year was crippled + by a knee injury. diff --git a/test/YAMLParser/spec-02-15.data b/test/YAMLParser/spec-02-15.data new file mode 100644 index 00000000000..2a7fbe96ad4 --- /dev/null +++ b/test/YAMLParser/spec-02-15.data @@ -0,0 +1,10 @@ +# RUN: yaml-bench -canonical %s + +> + Sammy Sosa completed another + fine season with great stats. + + 63 Home Runs + 0.288 Batting Average + + What a year! diff --git a/test/YAMLParser/spec-02-16.data b/test/YAMLParser/spec-02-16.data new file mode 100644 index 00000000000..3a5792c7632 --- /dev/null +++ b/test/YAMLParser/spec-02-16.data @@ -0,0 +1,9 @@ +# RUN: yaml-bench -canonical %s + +name: Mark McGwire +accomplishment: > + Mark set a major league + home run record in 1998. +stats: | + 65 Home Runs + 0.278 Batting Average diff --git a/test/YAMLParser/spec-02-17.data b/test/YAMLParser/spec-02-17.data new file mode 100644 index 00000000000..2bcb60c8d93 --- /dev/null +++ b/test/YAMLParser/spec-02-17.data @@ -0,0 +1,16 @@ +# RUN: yaml-bench -canonical %s + +unicode: "Sosa did fine.\u263A" +control: "\b1998\t1999\t2000\n" +hexesc: "\x13\x10 is \r\n" + +single: '"Howdy!" he cried.' +quoted: ' # not a ''comment''.' +tie-fighter: '|\-*-/|' + +# CHECK: !!str "Sosa did fine.\u263A" +# CHECK: !!str "\b1998\t1999\t2000\n" +# CHECK: !!str "\x13\x10 is \r\n" +# CHECK: !!str "\"Howdy!\" he cried." +# CHECK: !!str " # not a 'comment'." +# CHECK: !!str "|\\-*-/|" diff --git a/test/YAMLParser/spec-02-18.data b/test/YAMLParser/spec-02-18.data new file mode 100644 index 00000000000..625a4962e99 --- /dev/null +++ b/test/YAMLParser/spec-02-18.data @@ -0,0 +1,8 @@ +# RUN: yaml-bench -canonical %s + +plain: + This unquoted scalar + spans many lines. + +quoted: "So does this + quoted scalar.\n" diff --git a/test/YAMLParser/spec-02-19.data b/test/YAMLParser/spec-02-19.data new file mode 100644 index 00000000000..cb9df6dd1f4 --- /dev/null +++ b/test/YAMLParser/spec-02-19.data @@ -0,0 +1,7 @@ +# RUN: yaml-bench -canonical %s + +canonical: 12345 +decimal: +12,345 +sexagesimal: 3:25:45 +octal: 014 +hexadecimal: 0xC diff --git a/test/YAMLParser/spec-02-20.data b/test/YAMLParser/spec-02-20.data new file mode 100644 index 00000000000..ed147986119 --- /dev/null +++ b/test/YAMLParser/spec-02-20.data @@ -0,0 +1,8 @@ +# RUN: yaml-bench -canonical %s + +canonical: 1.23015e+3 +exponential: 12.3015e+02 +sexagesimal: 20:30.15 +fixed: 1,230.15 +negative infinity: -.inf +not a number: .NaN diff --git a/test/YAMLParser/spec-02-21.data b/test/YAMLParser/spec-02-21.data new file mode 100644 index 00000000000..ea979db065b --- /dev/null +++ b/test/YAMLParser/spec-02-21.data @@ -0,0 +1,6 @@ +# RUN: yaml-bench -canonical %s + +null: ~ +true: y +false: n +string: '12345' diff --git a/test/YAMLParser/spec-02-22.data b/test/YAMLParser/spec-02-22.data new file mode 100644 index 00000000000..77724f71066 --- /dev/null +++ b/test/YAMLParser/spec-02-22.data @@ -0,0 +1,6 @@ +# RUN: yaml-bench -canonical %s + +canonical: 2001-12-15T02:59:43.1Z +iso8601: 2001-12-14t21:59:43.10-05:00 +spaced: 2001-12-14 21:59:43.10 -5 +date: 2002-12-14 diff --git a/test/YAMLParser/spec-02-23.data b/test/YAMLParser/spec-02-23.data new file mode 100644 index 00000000000..d08dfa755c6 --- /dev/null +++ b/test/YAMLParser/spec-02-23.data @@ -0,0 +1,15 @@ +# RUN: yaml-bench -canonical %s + +--- +not-date: !!str 2002-04-28 + +picture: !!binary | + R0lGODlhDAAMAIQAAP//9/X + 17unp5WZmZgAAAOfn515eXv + Pz7Y6OjuDg4J+fn5OTk6enp + 56enmleECcgggoBADs= + +application specific tag: !something | + The semantics of the tag + above may be different for + different documents. diff --git a/test/YAMLParser/spec-02-24.data b/test/YAMLParser/spec-02-24.data new file mode 100644 index 00000000000..01ca7f5d122 --- /dev/null +++ b/test/YAMLParser/spec-02-24.data @@ -0,0 +1,16 @@ +# RUN: yaml-bench -canonical %s + +%TAG ! tag:clarkevans.com,2002: +--- !shape + # Use the ! handle for presenting + # tag:clarkevans.com,2002:circle +- !circle + center: &ORIGIN {x: 73, y: 129} + radius: 7 +- !line + start: *ORIGIN + finish: { x: 89, y: 102 } +- !label + start: *ORIGIN + color: 0xFFEEBB + text: Pretty vector drawing. diff --git a/test/YAMLParser/spec-02-25.data b/test/YAMLParser/spec-02-25.data new file mode 100644 index 00000000000..fbadfda97e3 --- /dev/null +++ b/test/YAMLParser/spec-02-25.data @@ -0,0 +1,9 @@ +# RUN: yaml-bench -canonical %s + +# sets are represented as a +# mapping where each key is +# associated with the empty string +--- !!set +? Mark McGwire +? Sammy Sosa +? Ken Griff diff --git a/test/YAMLParser/spec-02-26.data b/test/YAMLParser/spec-02-26.data new file mode 100644 index 00000000000..257108e7e04 --- /dev/null +++ b/test/YAMLParser/spec-02-26.data @@ -0,0 +1,9 @@ +# RUN: yaml-bench -canonical %s + +# ordered maps are represented as +# a sequence of mappings, with +# each mapping having one key +--- !!omap +- Mark McGwire: 65 +- Sammy Sosa: 63 +- Ken Griffy: 58 diff --git a/test/YAMLParser/spec-02-27.data b/test/YAMLParser/spec-02-27.data new file mode 100644 index 00000000000..a190ff19db0 --- /dev/null +++ b/test/YAMLParser/spec-02-27.data @@ -0,0 +1,31 @@ +# RUN: yaml-bench -canonical %s + +--- ! +invoice: 34843 +date : 2001-01-23 +bill-to: &id001 + given : Chris + family : Dumars + address: + lines: | + 458 Walkman Dr. + Suite #292 + city : Royal Oak + state : MI + postal : 48046 +ship-to: *id001 +product: + - sku : BL394D + quantity : 4 + description : Basketball + price : 450.00 + - sku : BL4438H + quantity : 1 + description : Super Hoop + price : 2392.00 +tax : 251.42 +total: 4443.52 +comments: + Late afternoon is best. + Backup contact is Nancy + Billsmer @ 338-4338. diff --git a/test/YAMLParser/spec-02-28.data b/test/YAMLParser/spec-02-28.data new file mode 100644 index 00000000000..695c27f5d55 --- /dev/null +++ b/test/YAMLParser/spec-02-28.data @@ -0,0 +1,28 @@ +# RUN: yaml-bench -canonical %s + +--- +Time: 2001-11-23 15:01:42 -5 +User: ed +Warning: + This is an error message + for the log file +--- +Time: 2001-11-23 15:02:31 -5 +User: ed +Warning: + A slightly different error + message. +--- +Date: 2001-11-23 15:03:17 -5 +User: ed +Fatal: + Unknown variable "bar" +Stack: + - file: TopClass.py + line: 23 + code: | + x = MoreObject("345\n") + - file: MoreClass.py + line: 58 + code: |- + foo = bar diff --git a/test/YAMLParser/spec-05-01-utf8.data b/test/YAMLParser/spec-05-01-utf8.data new file mode 100644 index 00000000000..349da06fab2 --- /dev/null +++ b/test/YAMLParser/spec-05-01-utf8.data @@ -0,0 +1,3 @@ +# RUN: yaml-bench -canonical %s + +# Comment only. diff --git a/test/YAMLParser/spec-05-02-utf8.data b/test/YAMLParser/spec-05-02-utf8.data new file mode 100644 index 00000000000..b306bdb719e --- /dev/null +++ b/test/YAMLParser/spec-05-02-utf8.data @@ -0,0 +1,7 @@ +# RUN: yaml-bench -canonical %s |& FileCheck %s + +# Invalid use of BOM +# inside a +# document. + +# CHECK: error diff --git a/test/YAMLParser/spec-05-03.data b/test/YAMLParser/spec-05-03.data new file mode 100644 index 00000000000..461e98d2c2b --- /dev/null +++ b/test/YAMLParser/spec-05-03.data @@ -0,0 +1,9 @@ +# RUN: yaml-bench -canonical %s + +sequence: +- one +- two +mapping: + ? sky + : blue + ? sea : green diff --git a/test/YAMLParser/spec-05-04.data b/test/YAMLParser/spec-05-04.data new file mode 100644 index 00000000000..52850f435bf --- /dev/null +++ b/test/YAMLParser/spec-05-04.data @@ -0,0 +1,4 @@ +# RUN: yaml-bench -canonical %s + +sequence: [ one, two, ] +mapping: { sky: blue, sea: green } diff --git a/test/YAMLParser/spec-05-05.data b/test/YAMLParser/spec-05-05.data new file mode 100644 index 00000000000..499ee8ffb8f --- /dev/null +++ b/test/YAMLParser/spec-05-05.data @@ -0,0 +1,3 @@ +# RUN: yaml-bench -canonical %s + +# Comment only. diff --git a/test/YAMLParser/spec-05-06.data b/test/YAMLParser/spec-05-06.data new file mode 100644 index 00000000000..729141acf4b --- /dev/null +++ b/test/YAMLParser/spec-05-06.data @@ -0,0 +1,4 @@ +# RUN: yaml-bench -canonical %s + +anchored: !local &anchor value +alias: *anchor diff --git a/test/YAMLParser/spec-05-07.data b/test/YAMLParser/spec-05-07.data new file mode 100644 index 00000000000..fc80a0d4152 --- /dev/null +++ b/test/YAMLParser/spec-05-07.data @@ -0,0 +1,6 @@ +# RUN: yaml-bench -canonical %s + +literal: | + text +folded: > + text diff --git a/test/YAMLParser/spec-05-08.data b/test/YAMLParser/spec-05-08.data new file mode 100644 index 00000000000..9f2b7ece53f --- /dev/null +++ b/test/YAMLParser/spec-05-08.data @@ -0,0 +1,4 @@ +# RUN: yaml-bench -canonical %s + +single: 'text' +double: "text" diff --git a/test/YAMLParser/spec-05-09.data b/test/YAMLParser/spec-05-09.data new file mode 100644 index 00000000000..fc061fb2982 --- /dev/null +++ b/test/YAMLParser/spec-05-09.data @@ -0,0 +1,4 @@ +# RUN: yaml-bench -canonical %s + +%YAML 1.1 +--- text diff --git a/test/YAMLParser/spec-05-10.data b/test/YAMLParser/spec-05-10.data new file mode 100644 index 00000000000..6788f0bfc31 --- /dev/null +++ b/test/YAMLParser/spec-05-10.data @@ -0,0 +1,6 @@ +# RUN: yaml-bench -canonical %s |& FileCheck %s + +commercial-at: @text +grave-accent: `text + +# CHECK: error diff --git a/test/YAMLParser/spec-05-11.data b/test/YAMLParser/spec-05-11.data new file mode 100644 index 00000000000..7cba5562d5f --- /dev/null +++ b/test/YAMLParser/spec-05-11.data @@ -0,0 +1,5 @@ +# RUN: yaml-bench -canonical %s + +| + Generic line break (no glyph) + Generic line break (glyphed)… Line separator
 Paragraph separator
 diff --git a/test/YAMLParser/spec-05-12.data b/test/YAMLParser/spec-05-12.data new file mode 100644 index 00000000000..7dadff76f8d --- /dev/null +++ b/test/YAMLParser/spec-05-12.data @@ -0,0 +1,16 @@ +# RUN: yaml-bench -canonical %s |& FileCheck %s +# +# We don't currently reject tabs as indentation. +# XFAIL: * + +# Tabs do's and don'ts: +# comment: +quoted: "Quoted " +block: | + void main() { + printf("Hello, world!\n"); + } +elsewhere: # separation + indentation, in plain scalar + +# CHECK: error diff --git a/test/YAMLParser/spec-05-13.data b/test/YAMLParser/spec-05-13.data new file mode 100644 index 00000000000..db62e866a75 --- /dev/null +++ b/test/YAMLParser/spec-05-13.data @@ -0,0 +1,5 @@ +# RUN: yaml-bench -canonical %s + + "Text containing + both space and + tab characters" diff --git a/test/YAMLParser/spec-05-14.data b/test/YAMLParser/spec-05-14.data new file mode 100644 index 00000000000..65451651b69 --- /dev/null +++ b/test/YAMLParser/spec-05-14.data @@ -0,0 +1,9 @@ +# RUN: yaml-bench -canonical %s + +"Fun with \\ +\" \a \b \e \f \ +\n \r \t \v \0 \ +\ \_ \N \L \P \ +\x41 \u0041 \U00000041" + +# CHECK: !!str "Fun with \\\n\" \a \b \e \f \n \r \t \v \0 \_ \N \L \P A A A" diff --git a/test/YAMLParser/spec-05-15.data b/test/YAMLParser/spec-05-15.data new file mode 100644 index 00000000000..cd8421ad279 --- /dev/null +++ b/test/YAMLParser/spec-05-15.data @@ -0,0 +1,7 @@ +# RUN: yaml-bench -canonical %s |& FileCheck %s + +Bad escapes: + "\c + \xq-" + +# CHECK: error diff --git a/test/YAMLParser/spec-06-01.data b/test/YAMLParser/spec-06-01.data new file mode 100644 index 00000000000..95b26bdb385 --- /dev/null +++ b/test/YAMLParser/spec-06-01.data @@ -0,0 +1,16 @@ +# RUN: yaml-bench -canonical %s + + # Leading comment line spaces are + # neither content nor indentation. + +Not indented: + By one space: | + By four + spaces + Flow style: [ # Leading spaces + By two, # in flow style + Also by two, # are neither +# Tabs are not allowed: +# Still by two # content nor + Still by two # content nor + ] # indentation. diff --git a/test/YAMLParser/spec-06-02.data b/test/YAMLParser/spec-06-02.data new file mode 100644 index 00000000000..40a15c9f3ea --- /dev/null +++ b/test/YAMLParser/spec-06-02.data @@ -0,0 +1,5 @@ +# RUN: yaml-bench -canonical %s + + # Comment + + diff --git a/test/YAMLParser/spec-06-03.data b/test/YAMLParser/spec-06-03.data new file mode 100644 index 00000000000..c1893ef0832 --- /dev/null +++ b/test/YAMLParser/spec-06-03.data @@ -0,0 +1,4 @@ +# RUN: yaml-bench -canonical %s + +key: # Comment + value diff --git a/test/YAMLParser/spec-06-04.data b/test/YAMLParser/spec-06-04.data new file mode 100644 index 00000000000..b61bcc6b955 --- /dev/null +++ b/test/YAMLParser/spec-06-04.data @@ -0,0 +1,6 @@ +# RUN: yaml-bench -canonical %s + +key: # Comment + # lines + value + diff --git a/test/YAMLParser/spec-06-05.data b/test/YAMLParser/spec-06-05.data new file mode 100644 index 00000000000..4bcaa5a8189 --- /dev/null +++ b/test/YAMLParser/spec-06-05.data @@ -0,0 +1,8 @@ +# RUN: yaml-bench -canonical %s + +{ first: Sammy, last: Sosa }: +# Statistics: + hr: # Home runs + 65 + avg: # Average + 0.278 diff --git a/test/YAMLParser/spec-06-06.data b/test/YAMLParser/spec-06-06.data new file mode 100644 index 00000000000..67e39ddf899 --- /dev/null +++ b/test/YAMLParser/spec-06-06.data @@ -0,0 +1,9 @@ +# RUN: yaml-bench -canonical %s + +plain: text + lines +quoted: "text + lines" +block: | + text + lines diff --git a/test/YAMLParser/spec-06-07.data b/test/YAMLParser/spec-06-07.data new file mode 100644 index 00000000000..451bd349e3e --- /dev/null +++ b/test/YAMLParser/spec-06-07.data @@ -0,0 +1,10 @@ +# RUN: yaml-bench -canonical %s + +- foo + + bar +- |- + foo + + bar + diff --git a/test/YAMLParser/spec-06-08.data b/test/YAMLParser/spec-06-08.data new file mode 100644 index 00000000000..aa06f847ea3 --- /dev/null +++ b/test/YAMLParser/spec-06-08.data @@ -0,0 +1,4 @@ +# RUN: yaml-bench -canonical %s + +>- + specific
 trimmed… … …… as… space diff --git a/test/YAMLParser/spec-07-01.data b/test/YAMLParser/spec-07-01.data new file mode 100644 index 00000000000..21bc5e59d59 --- /dev/null +++ b/test/YAMLParser/spec-07-01.data @@ -0,0 +1,5 @@ +# RUN: yaml-bench -canonical %s + +%FOO bar baz # Should be ignored + # with a warning. +--- "foo" diff --git a/test/YAMLParser/spec-07-02.data b/test/YAMLParser/spec-07-02.data new file mode 100644 index 00000000000..bf0e758c880 --- /dev/null +++ b/test/YAMLParser/spec-07-02.data @@ -0,0 +1,6 @@ +# RUN: yaml-bench -canonical %s + +%YAML 1.2 # Attempt parsing + # with a warning +--- +"foo" diff --git a/test/YAMLParser/spec-07-03.data b/test/YAMLParser/spec-07-03.data new file mode 100644 index 00000000000..7ca94830169 --- /dev/null +++ b/test/YAMLParser/spec-07-03.data @@ -0,0 +1,7 @@ +# RUN: yaml-bench -canonical %s |& FileCheck %s + +%YAML 1.1 +%YAML 1.1 +foo + +# CHECK: error diff --git a/test/YAMLParser/spec-07-04.data b/test/YAMLParser/spec-07-04.data new file mode 100644 index 00000000000..beba7d06ecf --- /dev/null +++ b/test/YAMLParser/spec-07-04.data @@ -0,0 +1,5 @@ +# RUN: yaml-bench -canonical %s + +%TAG !yaml! tag:yaml.org,2002: +--- +!yaml!str "foo" diff --git a/test/YAMLParser/spec-07-05.data b/test/YAMLParser/spec-07-05.data new file mode 100644 index 00000000000..279b54afa14 --- /dev/null +++ b/test/YAMLParser/spec-07-05.data @@ -0,0 +1,10 @@ +# RUN: yaml-bench -canonical %s |& FileCheck %s +# +# We don't currently parse TAG directives. +# XFAIL: * + +%TAG ! !foo +%TAG ! !foo +bar + +# CHECK: error diff --git a/test/YAMLParser/spec-07-06.data b/test/YAMLParser/spec-07-06.data new file mode 100644 index 00000000000..9f27f91f311 --- /dev/null +++ b/test/YAMLParser/spec-07-06.data @@ -0,0 +1,7 @@ +# RUN: yaml-bench -canonical %s + +%TAG ! !foo +%TAG !yaml! tag:yaml.org,2002: +--- +- !bar "baz" +- !yaml!str "string" diff --git a/test/YAMLParser/spec-07-07a.data b/test/YAMLParser/spec-07-07a.data new file mode 100644 index 00000000000..e51f8f7d694 --- /dev/null +++ b/test/YAMLParser/spec-07-07a.data @@ -0,0 +1,4 @@ +# RUN: yaml-bench -canonical %s + +# Private application: +!foo "bar" diff --git a/test/YAMLParser/spec-07-07b.data b/test/YAMLParser/spec-07-07b.data new file mode 100644 index 00000000000..003d5755726 --- /dev/null +++ b/test/YAMLParser/spec-07-07b.data @@ -0,0 +1,6 @@ +# RUN: yaml-bench -canonical %s + +# Migrated to global: +%TAG ! tag:ben-kiki.org,2000:app/ +--- +!foo "bar" diff --git a/test/YAMLParser/spec-07-08.data b/test/YAMLParser/spec-07-08.data new file mode 100644 index 00000000000..7197404b384 --- /dev/null +++ b/test/YAMLParser/spec-07-08.data @@ -0,0 +1,11 @@ +# RUN: yaml-bench -canonical %s + +# Explicitly specify default settings: +%TAG ! ! +%TAG !! tag:yaml.org,2002: +# Named handles have no default: +%TAG !o! tag:ben-kiki.org,2000: +--- +- !foo "bar" +- !!str "string" +- !o!type "baz" diff --git a/test/YAMLParser/spec-07-09.data b/test/YAMLParser/spec-07-09.data new file mode 100644 index 00000000000..1f98ba04146 --- /dev/null +++ b/test/YAMLParser/spec-07-09.data @@ -0,0 +1,13 @@ +# RUN: yaml-bench -canonical %s + +--- +foo +... +# Repeated end marker. +... +--- +bar +# No end marker. +--- +baz +... diff --git a/test/YAMLParser/spec-07-10.data b/test/YAMLParser/spec-07-10.data new file mode 100644 index 00000000000..a1766834781 --- /dev/null +++ b/test/YAMLParser/spec-07-10.data @@ -0,0 +1,13 @@ +# RUN: yaml-bench -canonical %s + +"Root flow + scalar" +--- !!str > + Root block + scalar +--- +# Root collection: +foo : bar +... # Is optional. +--- +# Explicit document may be empty. diff --git a/test/YAMLParser/spec-07-11.data b/test/YAMLParser/spec-07-11.data new file mode 100644 index 00000000000..ce14b7ebe4d --- /dev/null +++ b/test/YAMLParser/spec-07-11.data @@ -0,0 +1,4 @@ +# RUN: yaml-bench -canonical %s + +# A stream may contain +# no documents. diff --git a/test/YAMLParser/spec-07-12a.data b/test/YAMLParser/spec-07-12a.data new file mode 100644 index 00000000000..7327f8188e0 --- /dev/null +++ b/test/YAMLParser/spec-07-12a.data @@ -0,0 +1,5 @@ +# RUN: yaml-bench -canonical %s + +# Implicit document. Root +# collection (mapping) node. +foo : bar diff --git a/test/YAMLParser/spec-07-12b.data b/test/YAMLParser/spec-07-12b.data new file mode 100644 index 00000000000..d759abea7d4 --- /dev/null +++ b/test/YAMLParser/spec-07-12b.data @@ -0,0 +1,6 @@ +# RUN: yaml-bench -canonical %s + +# Explicit document. Root +# scalar (literal) node. +--- | + Text content diff --git a/test/YAMLParser/spec-07-13.data b/test/YAMLParser/spec-07-13.data new file mode 100644 index 00000000000..ab74df10187 --- /dev/null +++ b/test/YAMLParser/spec-07-13.data @@ -0,0 +1,11 @@ +# RUN: yaml-bench -canonical %s + +! "First document" +--- +!foo "No directives" +%TAG ! !foo +--- +!bar "With directives" +%YAML 1.1 +--- +!baz "Reset settings" diff --git a/test/YAMLParser/spec-08-01.data b/test/YAMLParser/spec-08-01.data new file mode 100644 index 00000000000..5abbfa80949 --- /dev/null +++ b/test/YAMLParser/spec-08-01.data @@ -0,0 +1,4 @@ +# RUN: yaml-bench -canonical %s + +!!str &a1 "foo" : !!str bar +&a2 baz : *a1 diff --git a/test/YAMLParser/spec-08-02.data b/test/YAMLParser/spec-08-02.data new file mode 100644 index 00000000000..8a75783a709 --- /dev/null +++ b/test/YAMLParser/spec-08-02.data @@ -0,0 +1,4 @@ +# RUN: yaml-bench -canonical %s + +First occurrence: &anchor Value +Second occurrence: *anchor diff --git a/test/YAMLParser/spec-08-03.data b/test/YAMLParser/spec-08-03.data new file mode 100644 index 00000000000..8c715305a81 --- /dev/null +++ b/test/YAMLParser/spec-08-03.data @@ -0,0 +1,4 @@ +# RUN: yaml-bench -canonical %s + +! foo : + ! baz diff --git a/test/YAMLParser/spec-08-04.data b/test/YAMLParser/spec-08-04.data new file mode 100644 index 00000000000..f13538bc87e --- /dev/null +++ b/test/YAMLParser/spec-08-04.data @@ -0,0 +1,9 @@ +# RUN: yaml-bench -canonical %s |& FileCheck %s +# +# We don't currently look at the content of literal tags. +# XFAIL: * + +- ! foo +- !<$:?> bar + +# CHECK: error diff --git a/test/YAMLParser/spec-08-05.data b/test/YAMLParser/spec-08-05.data new file mode 100644 index 00000000000..0613446c897 --- /dev/null +++ b/test/YAMLParser/spec-08-05.data @@ -0,0 +1,7 @@ +# RUN: yaml-bench -canonical %s + +%TAG !o! tag:ben-kiki.org,2000: +--- +- !local foo +- !!str bar +- !o!type baz diff --git a/test/YAMLParser/spec-08-06.data b/test/YAMLParser/spec-08-06.data new file mode 100644 index 00000000000..a811bfdefe3 --- /dev/null +++ b/test/YAMLParser/spec-08-06.data @@ -0,0 +1,12 @@ +# RUN: yaml-bench -canonical %s |& FileCheck %s +# +# We don't currently validate tags. +# XFAIL: * + +%TAG !o! tag:ben-kiki.org,2000: +--- +- !$a!b foo +- !o! bar +- !h!type baz + +# CHECK: error diff --git a/test/YAMLParser/spec-08-07.data b/test/YAMLParser/spec-08-07.data new file mode 100644 index 00000000000..fc3f2df7f05 --- /dev/null +++ b/test/YAMLParser/spec-08-07.data @@ -0,0 +1,6 @@ +# RUN: yaml-bench -canonical %s + +# Assuming conventional resolution: +- "12" +- 12 +- ! 12 diff --git a/test/YAMLParser/spec-08-08.data b/test/YAMLParser/spec-08-08.data new file mode 100644 index 00000000000..460029f6ace --- /dev/null +++ b/test/YAMLParser/spec-08-08.data @@ -0,0 +1,15 @@ +# RUN: yaml-bench -canonical %s + +--- +foo: + "bar + baz" +--- +"foo + bar" +--- +foo + bar +--- | + foo +... diff --git a/test/YAMLParser/spec-08-09.data b/test/YAMLParser/spec-08-09.data new file mode 100644 index 00000000000..1c825859431 --- /dev/null +++ b/test/YAMLParser/spec-08-09.data @@ -0,0 +1,13 @@ +# RUN: yaml-bench -canonical %s + +--- +scalars: + plain: !!str some text + quoted: + single: 'some text' + double: "some text" +collections: + sequence: !!seq [ !!str entry, + # Mapping entry: + key: value ] + mapping: { key: value } diff --git a/test/YAMLParser/spec-08-10.data b/test/YAMLParser/spec-08-10.data new file mode 100644 index 00000000000..74054eb0883 --- /dev/null +++ b/test/YAMLParser/spec-08-10.data @@ -0,0 +1,17 @@ +# RUN: yaml-bench -canonical %s + +block styles: + scalars: + literal: !!str | + #!/usr/bin/perl + print "Hello, world!\n"; + folded: > + This sentence + is false. + collections: !!map + sequence: !!seq # Entry: + - entry # Plain + # Mapping entry: + - key: value + mapping: + key: value diff --git a/test/YAMLParser/spec-08-11.data b/test/YAMLParser/spec-08-11.data new file mode 100644 index 00000000000..8a75783a709 --- /dev/null +++ b/test/YAMLParser/spec-08-11.data @@ -0,0 +1,4 @@ +# RUN: yaml-bench -canonical %s + +First occurrence: &anchor Value +Second occurrence: *anchor diff --git a/test/YAMLParser/spec-08-12.data b/test/YAMLParser/spec-08-12.data new file mode 100644 index 00000000000..69e78b42d27 --- /dev/null +++ b/test/YAMLParser/spec-08-12.data @@ -0,0 +1,10 @@ +# RUN: yaml-bench -canonical %s + +[ + Without properties, + &anchor "Anchored", + !!str 'Tagged', + *anchor, # Alias node + !!str , # Empty plain scalar + '', # Empty plain scalar +] diff --git a/test/YAMLParser/spec-08-13.data b/test/YAMLParser/spec-08-13.data new file mode 100644 index 00000000000..931d56a0cfe --- /dev/null +++ b/test/YAMLParser/spec-08-13.data @@ -0,0 +1,6 @@ +# RUN: yaml-bench -canonical %s + +{ + ? foo :, + ? : bar, +} diff --git a/test/YAMLParser/spec-08-14.data b/test/YAMLParser/spec-08-14.data new file mode 100644 index 00000000000..61c448351ae --- /dev/null +++ b/test/YAMLParser/spec-08-14.data @@ -0,0 +1,7 @@ +# RUN: yaml-bench -canonical %s + +- "flow in block" +- > + Block scalar +- !!map # Block collection + foo : bar diff --git a/test/YAMLParser/spec-08-15.data b/test/YAMLParser/spec-08-15.data new file mode 100644 index 00000000000..f21e84a4314 --- /dev/null +++ b/test/YAMLParser/spec-08-15.data @@ -0,0 +1,7 @@ +# RUN: yaml-bench -canonical %s + +- # Empty plain scalar +- ? foo + : + ? + : bar diff --git a/test/YAMLParser/spec-09-01.data b/test/YAMLParser/spec-09-01.data new file mode 100644 index 00000000000..8999b496162 --- /dev/null +++ b/test/YAMLParser/spec-09-01.data @@ -0,0 +1,8 @@ +# RUN: yaml-bench -canonical %s + +"simple key" : { + "also simple" : value, + ? "not a + simple key" : "any + value" +} diff --git a/test/YAMLParser/spec-09-02.data b/test/YAMLParser/spec-09-02.data new file mode 100644 index 00000000000..f69037820eb --- /dev/null +++ b/test/YAMLParser/spec-09-02.data @@ -0,0 +1,14 @@ +# RUN: yaml-bench -canonical %s |& FileCheck %s +# +# Indent trimming is not yet implemented. +# XFAIL: * + + "as space + trimmed + + specific + + escaped \ + none" + +# CHECK: !!str "as space trimmed\nspecific\nescaped\tnone" diff --git a/test/YAMLParser/spec-09-03.data b/test/YAMLParser/spec-09-03.data new file mode 100644 index 00000000000..3fb0d8b184a --- /dev/null +++ b/test/YAMLParser/spec-09-03.data @@ -0,0 +1,8 @@ +# RUN: yaml-bench -canonical %s + +- " + last" +- " + last" +- " first + last" diff --git a/test/YAMLParser/spec-09-04.data b/test/YAMLParser/spec-09-04.data new file mode 100644 index 00000000000..4178ec6befb --- /dev/null +++ b/test/YAMLParser/spec-09-04.data @@ -0,0 +1,6 @@ +# RUN: yaml-bench -canonical %s + + "first + inner 1 + \ inner 2 \ + last" diff --git a/test/YAMLParser/spec-09-05.data b/test/YAMLParser/spec-09-05.data new file mode 100644 index 00000000000..e482d536623 --- /dev/null +++ b/test/YAMLParser/spec-09-05.data @@ -0,0 +1,10 @@ +# RUN: yaml-bench -canonical %s + +- "first + " +- "first + + last" +- "first + inner + \ last" diff --git a/test/YAMLParser/spec-09-06.data b/test/YAMLParser/spec-09-06.data new file mode 100644 index 00000000000..edc0cbba900 --- /dev/null +++ b/test/YAMLParser/spec-09-06.data @@ -0,0 +1,3 @@ +# RUN: yaml-bench -canonical %s + + 'here''s to "quotes"' diff --git a/test/YAMLParser/spec-09-07.data b/test/YAMLParser/spec-09-07.data new file mode 100644 index 00000000000..3c010ca5b93 --- /dev/null +++ b/test/YAMLParser/spec-09-07.data @@ -0,0 +1,8 @@ +# RUN: yaml-bench -canonical %s + +'simple key' : { + 'also simple' : value, + ? 'not a + simple key' : 'any + value' +} diff --git a/test/YAMLParser/spec-09-08.data b/test/YAMLParser/spec-09-08.data new file mode 100644 index 00000000000..d114e58fcac --- /dev/null +++ b/test/YAMLParser/spec-09-08.data @@ -0,0 +1,3 @@ +# RUN: yaml-bench -canonical %s + + 'as space … trimmed …… specific
… none' diff --git a/test/YAMLParser/spec-09-09.data b/test/YAMLParser/spec-09-09.data new file mode 100644 index 00000000000..2fec1b536ef --- /dev/null +++ b/test/YAMLParser/spec-09-09.data @@ -0,0 +1,8 @@ +# RUN: yaml-bench -canonical %s + +- ' + last' +- ' + last' +- ' first + last' diff --git a/test/YAMLParser/spec-09-10.data b/test/YAMLParser/spec-09-10.data new file mode 100644 index 00000000000..faabfb06b5e --- /dev/null +++ b/test/YAMLParser/spec-09-10.data @@ -0,0 +1,5 @@ +# RUN: yaml-bench -canonical %s + + 'first + inner + last' diff --git a/test/YAMLParser/spec-09-11.data b/test/YAMLParser/spec-09-11.data new file mode 100644 index 00000000000..3f487ad6b04 --- /dev/null +++ b/test/YAMLParser/spec-09-11.data @@ -0,0 +1,7 @@ +# RUN: yaml-bench -canonical %s + +- 'first + ' +- 'first + + last' diff --git a/test/YAMLParser/spec-09-12.data b/test/YAMLParser/spec-09-12.data new file mode 100644 index 00000000000..d992c589cd6 --- /dev/null +++ b/test/YAMLParser/spec-09-12.data @@ -0,0 +1,10 @@ +# RUN: yaml-bench -canonical %s + +# Outside flow collection: +- ::std::vector +- Up, up, and away! +- -123 +# Inside flow collection: +- [ '::std::vector', + "Up, up, and away!", + -123 ] diff --git a/test/YAMLParser/spec-09-13.data b/test/YAMLParser/spec-09-13.data new file mode 100644 index 00000000000..d48f2d2c47e --- /dev/null +++ b/test/YAMLParser/spec-09-13.data @@ -0,0 +1,8 @@ +# RUN: yaml-bench -canonical %s + +simple key : { + also simple : value, + ? not a + simple key : any + value +} diff --git a/test/YAMLParser/spec-09-14.data b/test/YAMLParser/spec-09-14.data new file mode 100644 index 00000000000..890f6bf2e71 --- /dev/null +++ b/test/YAMLParser/spec-09-14.data @@ -0,0 +1,21 @@ +# RUN: yaml-bench -canonical %s |& FileCheck %s +# +# Not quite sure why this doesn't fail. +# XFAIL: * + +--- +--- ||| : foo +... >>>: bar +--- +[ +--- +, +... , +{ +--- : +... # Nested +} +] +... + +# CHECK: error diff --git a/test/YAMLParser/spec-09-15.data b/test/YAMLParser/spec-09-15.data new file mode 100644 index 00000000000..4111d1ba2cb --- /dev/null +++ b/test/YAMLParser/spec-09-15.data @@ -0,0 +1,15 @@ +# RUN: yaml-bench -canonical %s + +--- +"---" : foo +...: bar +--- +[ +---, +..., +{ +? --- +: ... +} +] +... diff --git a/test/YAMLParser/spec-09-16.data b/test/YAMLParser/spec-09-16.data new file mode 100644 index 00000000000..e595f47bece --- /dev/null +++ b/test/YAMLParser/spec-09-16.data @@ -0,0 +1,5 @@ +# RUN: yaml-bench -canonical %s + +# Tabs are confusing: +# as space/trimmed/specific/none + as space … trimmed …… specific
… none diff --git a/test/YAMLParser/spec-09-17.data b/test/YAMLParser/spec-09-17.data new file mode 100644 index 00000000000..1bacf4d68b1 --- /dev/null +++ b/test/YAMLParser/spec-09-17.data @@ -0,0 +1,5 @@ +# RUN: yaml-bench -canonical %s + + first line + + more line diff --git a/test/YAMLParser/spec-09-18.data b/test/YAMLParser/spec-09-18.data new file mode 100644 index 00000000000..ac623f9973f --- /dev/null +++ b/test/YAMLParser/spec-09-18.data @@ -0,0 +1,11 @@ +# RUN: yaml-bench -canonical %s + +- | # Just the style + literal +- >1 # Indentation indicator + folded +- |+ # Chomping indicator + keep + +- >-1 # Both indicators + strip diff --git a/test/YAMLParser/spec-09-19.data b/test/YAMLParser/spec-09-19.data new file mode 100644 index 00000000000..52aa157137b --- /dev/null +++ b/test/YAMLParser/spec-09-19.data @@ -0,0 +1,6 @@ +# RUN: yaml-bench -canonical %s + +- | + literal +- > + folded diff --git a/test/YAMLParser/spec-09-20.data b/test/YAMLParser/spec-09-20.data new file mode 100644 index 00000000000..86fc7ab9a2e --- /dev/null +++ b/test/YAMLParser/spec-09-20.data @@ -0,0 +1,13 @@ +# RUN: yaml-bench -canonical %s + +- | + detected +- > + + + # detected +- |1 + explicit +- > + + detected diff --git a/test/YAMLParser/spec-09-21.data b/test/YAMLParser/spec-09-21.data new file mode 100644 index 00000000000..2bcc28337f9 --- /dev/null +++ b/test/YAMLParser/spec-09-21.data @@ -0,0 +1,12 @@ +# RUN: yaml-bench -canonical %s |& FileCheck %s + +- | + + text +- > + text + text +- |1 + text + +# CHECK: error diff --git a/test/YAMLParser/spec-09-22.data b/test/YAMLParser/spec-09-22.data new file mode 100644 index 00000000000..b95faa50b5d --- /dev/null +++ b/test/YAMLParser/spec-09-22.data @@ -0,0 +1,6 @@ +# RUN: yaml-bench -canonical %s + +strip: |- + text
clip: | + text…keep: |+ + text
 diff --git a/test/YAMLParser/spec-09-23.data b/test/YAMLParser/spec-09-23.data new file mode 100644 index 00000000000..94f839818b6 --- /dev/null +++ b/test/YAMLParser/spec-09-23.data @@ -0,0 +1,13 @@ +# RUN: yaml-bench -canonical %s + + # Strip + # Comments: +strip: |- + # text
 
 # Clip + # comments: +…clip: | + # text… 
 # Keep + # comments: +…keep: |+ + # text
… # Trail + # comments. diff --git a/test/YAMLParser/spec-09-24.data b/test/YAMLParser/spec-09-24.data new file mode 100644 index 00000000000..f08eae6a80e --- /dev/null +++ b/test/YAMLParser/spec-09-24.data @@ -0,0 +1,8 @@ +# RUN: yaml-bench -canonical %s + +strip: >- + +clip: > + +keep: |+ + diff --git a/test/YAMLParser/spec-09-25.data b/test/YAMLParser/spec-09-25.data new file mode 100644 index 00000000000..b15edb523d2 --- /dev/null +++ b/test/YAMLParser/spec-09-25.data @@ -0,0 +1,5 @@ +# RUN: yaml-bench -canonical %s + +| # Simple block scalar + literal + text diff --git a/test/YAMLParser/spec-09-26.data b/test/YAMLParser/spec-09-26.data new file mode 100644 index 00000000000..286740ed39c --- /dev/null +++ b/test/YAMLParser/spec-09-26.data @@ -0,0 +1,10 @@ +# RUN: yaml-bench -canonical %s + +| + + + literal + + text + + # Comment diff --git a/test/YAMLParser/spec-09-27.data b/test/YAMLParser/spec-09-27.data new file mode 100644 index 00000000000..286740ed39c --- /dev/null +++ b/test/YAMLParser/spec-09-27.data @@ -0,0 +1,10 @@ +# RUN: yaml-bench -canonical %s + +| + + + literal + + text + + # Comment diff --git a/test/YAMLParser/spec-09-28.data b/test/YAMLParser/spec-09-28.data new file mode 100644 index 00000000000..286740ed39c --- /dev/null +++ b/test/YAMLParser/spec-09-28.data @@ -0,0 +1,10 @@ +# RUN: yaml-bench -canonical %s + +| + + + literal + + text + + # Comment diff --git a/test/YAMLParser/spec-09-29.data b/test/YAMLParser/spec-09-29.data new file mode 100644 index 00000000000..e8906ff64a1 --- /dev/null +++ b/test/YAMLParser/spec-09-29.data @@ -0,0 +1,6 @@ +# RUN: yaml-bench -canonical %s + +> # Simple folded scalar + folded + text + lines diff --git a/test/YAMLParser/spec-09-30.data b/test/YAMLParser/spec-09-30.data new file mode 100644 index 00000000000..a2d8bf49504 --- /dev/null +++ b/test/YAMLParser/spec-09-30.data @@ -0,0 +1,16 @@ +# RUN: yaml-bench -canonical %s + +> + folded + line + + next + line + + * bullet + * list + + last + line + +# Comment diff --git a/test/YAMLParser/spec-09-31.data b/test/YAMLParser/spec-09-31.data new file mode 100644 index 00000000000..a2d8bf49504 --- /dev/null +++ b/test/YAMLParser/spec-09-31.data @@ -0,0 +1,16 @@ +# RUN: yaml-bench -canonical %s + +> + folded + line + + next + line + + * bullet + * list + + last + line + +# Comment diff --git a/test/YAMLParser/spec-09-32.data b/test/YAMLParser/spec-09-32.data new file mode 100644 index 00000000000..a2d8bf49504 --- /dev/null +++ b/test/YAMLParser/spec-09-32.data @@ -0,0 +1,16 @@ +# RUN: yaml-bench -canonical %s + +> + folded + line + + next + line + + * bullet + * list + + last + line + +# Comment diff --git a/test/YAMLParser/spec-09-33.data b/test/YAMLParser/spec-09-33.data new file mode 100644 index 00000000000..a2d8bf49504 --- /dev/null +++ b/test/YAMLParser/spec-09-33.data @@ -0,0 +1,16 @@ +# RUN: yaml-bench -canonical %s + +> + folded + line + + next + line + + * bullet + * list + + last + line + +# Comment diff --git a/test/YAMLParser/spec-10-01.data b/test/YAMLParser/spec-10-01.data new file mode 100644 index 00000000000..549a54db42f --- /dev/null +++ b/test/YAMLParser/spec-10-01.data @@ -0,0 +1,4 @@ +# RUN: yaml-bench -canonical %s + +- [ inner, inner, ] +- [inner,last] diff --git a/test/YAMLParser/spec-10-02.data b/test/YAMLParser/spec-10-02.data new file mode 100644 index 00000000000..662427a0c06 --- /dev/null +++ b/test/YAMLParser/spec-10-02.data @@ -0,0 +1,10 @@ +# RUN: yaml-bench -canonical %s + +[ +"double + quoted", 'single + quoted', +plain + text, [ nested ], +single: pair , +] diff --git a/test/YAMLParser/spec-10-03.data b/test/YAMLParser/spec-10-03.data new file mode 100644 index 00000000000..43f300e40c3 --- /dev/null +++ b/test/YAMLParser/spec-10-03.data @@ -0,0 +1,6 @@ +# RUN: yaml-bench -canonical %s + +block: # Block + # sequence +- one +- two : three diff --git a/test/YAMLParser/spec-10-04.data b/test/YAMLParser/spec-10-04.data new file mode 100644 index 00000000000..733a570efe8 --- /dev/null +++ b/test/YAMLParser/spec-10-04.data @@ -0,0 +1,6 @@ +# RUN: yaml-bench -canonical %s + +block: +- one +- + - two diff --git a/test/YAMLParser/spec-10-05.data b/test/YAMLParser/spec-10-05.data new file mode 100644 index 00000000000..3848b2a2006 --- /dev/null +++ b/test/YAMLParser/spec-10-05.data @@ -0,0 +1,9 @@ +# RUN: yaml-bench -canonical %s + +- # Empty +- | + block node +- - one # in-line + - two # sequence +- one: two # in-line + # mapping diff --git a/test/YAMLParser/spec-10-06.data b/test/YAMLParser/spec-10-06.data new file mode 100644 index 00000000000..40efb2b916c --- /dev/null +++ b/test/YAMLParser/spec-10-06.data @@ -0,0 +1,4 @@ +# RUN: yaml-bench -canonical %s + +- { inner : entry , also: inner , } +- {inner: entry,last : entry} diff --git a/test/YAMLParser/spec-10-07.data b/test/YAMLParser/spec-10-07.data new file mode 100644 index 00000000000..7aa350e40bb --- /dev/null +++ b/test/YAMLParser/spec-10-07.data @@ -0,0 +1,9 @@ +# RUN: yaml-bench -canonical %s + +{ +? : value, # Empty key +? explicit + key: value, +simple key : value, +[ collection, simple, key ]: value +} diff --git a/test/YAMLParser/spec-10-08.data b/test/YAMLParser/spec-10-08.data new file mode 100644 index 00000000000..5b981e98339 --- /dev/null +++ b/test/YAMLParser/spec-10-08.data @@ -0,0 +1,13 @@ +# RUN: yaml-bench -canonical %s |& FileCheck %s +# +# This fails because even without a key token, some contexts (in this case flow +# maps) allow implicit null keys, which mix with this in weird ways. +# XFAIL: * + +{ +multi-line + simple key : value, +very long ...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................(>1KB)................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... key: value +} + +# CHECK: error diff --git a/test/YAMLParser/spec-10-09.data b/test/YAMLParser/spec-10-09.data new file mode 100644 index 00000000000..a6b1fd00dde --- /dev/null +++ b/test/YAMLParser/spec-10-09.data @@ -0,0 +1,6 @@ +# RUN: yaml-bench -canonical %s + +{ +key : value, +empty: # empty value↓ +} diff --git a/test/YAMLParser/spec-10-10.data b/test/YAMLParser/spec-10-10.data new file mode 100644 index 00000000000..c97901ddfbe --- /dev/null +++ b/test/YAMLParser/spec-10-10.data @@ -0,0 +1,10 @@ +# RUN: yaml-bench -canonical %s + +{ +? explicit key1 : explicit value, +? explicit key2 : , # Explicit empty +? explicit key3, # Empty value +simple key1 : explicit value, +simple key2 : , # Explicit empty +simple key3, # Empty value +} diff --git a/test/YAMLParser/spec-10-11.data b/test/YAMLParser/spec-10-11.data new file mode 100644 index 00000000000..51bd06f0202 --- /dev/null +++ b/test/YAMLParser/spec-10-11.data @@ -0,0 +1,9 @@ +# RUN: yaml-bench -canonical %s + +[ +? explicit key1 : explicit value, +? explicit key2 : , # Explicit empty +? explicit key3, # Implicit empty +simple key1 : explicit value, +simple key2 : , # Explicit empty +] diff --git a/test/YAMLParser/spec-10-12.data b/test/YAMLParser/spec-10-12.data new file mode 100644 index 00000000000..65a90b3f2c5 --- /dev/null +++ b/test/YAMLParser/spec-10-12.data @@ -0,0 +1,5 @@ +# RUN: yaml-bench -canonical %s + +block: # Block + # mapping + key: value diff --git a/test/YAMLParser/spec-10-13.data b/test/YAMLParser/spec-10-13.data new file mode 100644 index 00000000000..ccadeb1e7d5 --- /dev/null +++ b/test/YAMLParser/spec-10-13.data @@ -0,0 +1,7 @@ +# RUN: yaml-bench -canonical %s + +? explicit key # implicit value +? | + block key +: - one # explicit in-line + - two # block value diff --git a/test/YAMLParser/spec-10-14.data b/test/YAMLParser/spec-10-14.data new file mode 100644 index 00000000000..866ec1f7b2c --- /dev/null +++ b/test/YAMLParser/spec-10-14.data @@ -0,0 +1,6 @@ +# RUN: yaml-bench -canonical %s + +plain key: # empty value +"quoted key": +- one # explicit next-line +- two # block value diff --git a/test/YAMLParser/spec-10-15.data b/test/YAMLParser/spec-10-15.data new file mode 100644 index 00000000000..7d061bddd19 --- /dev/null +++ b/test/YAMLParser/spec-10-15.data @@ -0,0 +1,5 @@ +# RUN: yaml-bench -canonical %s + +- sun: yellow +- ? earth: blue + : moon: white diff --git a/test/YAMLParser/str.data b/test/YAMLParser/str.data new file mode 100644 index 00000000000..bf013b6f52c --- /dev/null +++ b/test/YAMLParser/str.data @@ -0,0 +1,3 @@ +# RUN: yaml-bench -canonical %s + +- abcd diff --git a/test/YAMLParser/timestamp-bugs.data b/test/YAMLParser/timestamp-bugs.data new file mode 100644 index 00000000000..bf41a21b22d --- /dev/null +++ b/test/YAMLParser/timestamp-bugs.data @@ -0,0 +1,8 @@ +# RUN: yaml-bench -canonical %s + +- 2001-12-14 21:59:43.10 -5:30 +- 2001-12-14 21:59:43.10 +5:30 +- 2001-12-14 21:59:43.00101 +- 2001-12-14 21:59:43+1 +- 2001-12-14 21:59:43-1:30 +- 2005-07-08 17:35:04.517600 diff --git a/test/YAMLParser/timestamp.data b/test/YAMLParser/timestamp.data new file mode 100644 index 00000000000..79945451b54 --- /dev/null +++ b/test/YAMLParser/timestamp.data @@ -0,0 +1,7 @@ +# RUN: yaml-bench -canonical %s + +- 2001-12-15T02:59:43.1Z +- 2001-12-14t21:59:43.10-05:00 +- 2001-12-14 21:59:43.10 -5 +- 2001-12-15 2:59:43.10 +- 2002-12-14 diff --git a/test/YAMLParser/utf8-implicit.data b/test/YAMLParser/utf8-implicit.data new file mode 100644 index 00000000000..ee2791fb062 --- /dev/null +++ b/test/YAMLParser/utf8-implicit.data @@ -0,0 +1,3 @@ +# RUN: yaml-bench -canonical %s + +--- implicit UTF-8 diff --git a/test/YAMLParser/utf8.data b/test/YAMLParser/utf8.data new file mode 100644 index 00000000000..3935e9d1217 --- /dev/null +++ b/test/YAMLParser/utf8.data @@ -0,0 +1,3 @@ +# RUN: yaml-bench -canonical %s + +--- UTF-8 diff --git a/test/YAMLParser/value.data b/test/YAMLParser/value.data new file mode 100644 index 00000000000..311ccd4f22e --- /dev/null +++ b/test/YAMLParser/value.data @@ -0,0 +1,3 @@ +# RUN: yaml-bench -canonical %s + +- = diff --git a/test/YAMLParser/yaml.data b/test/YAMLParser/yaml.data new file mode 100644 index 00000000000..3ce5e4b73e2 --- /dev/null +++ b/test/YAMLParser/yaml.data @@ -0,0 +1,5 @@ +# RUN: yaml-bench -canonical %s + +- !!yaml '!' +- !!yaml '&' +- !!yaml '*' diff --git a/unittests/CMakeLists.txt b/unittests/CMakeLists.txt index ce0f5cd8226..5d691728d80 100644 --- a/unittests/CMakeLists.txt +++ b/unittests/CMakeLists.txt @@ -175,4 +175,5 @@ add_llvm_unittest(Support Support/TimeValue.cpp Support/TypeBuilderTest.cpp Support/ValueHandleTest.cpp + Support/YAMLParserTest.cpp ) diff --git a/unittests/Support/YAMLParserTest.cpp b/unittests/Support/YAMLParserTest.cpp new file mode 100644 index 00000000000..e88427ac09d --- /dev/null +++ b/unittests/Support/YAMLParserTest.cpp @@ -0,0 +1,179 @@ +//===- unittest/Support/YAMLParserTest ------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Support/YAMLParser.h" +#include "gtest/gtest.h" + +namespace llvm { + +// Checks that the given input gives a parse error. Makes sure that an error +// text is available and the parse fails. +static void ExpectParseError(StringRef Message, StringRef Input) { + SourceMgr SM; + yaml::Stream Stream(Input, SM); + EXPECT_FALSE(Stream.validate()) << Message << ": " << Input; + EXPECT_TRUE(Stream.failed()) << Message << ": " << Input; +} + +// Checks that the given input can be parsed without error. +static void ExpectParseSuccess(StringRef Message, StringRef Input) { + SourceMgr SM; + yaml::Stream Stream(Input, SM); + EXPECT_TRUE(Stream.validate()) << Message << ": " << Input; +} + +TEST(YAMLParser, ParsesEmptyArray) { + ExpectParseSuccess("Empty array", "[]"); +} + +TEST(YAMLParser, FailsIfNotClosingArray) { + ExpectParseError("Not closing array", "["); + ExpectParseError("Not closing array", " [ "); + ExpectParseError("Not closing array", " [x"); +} + +TEST(YAMLParser, ParsesEmptyArrayWithWhitespace) { + ExpectParseSuccess("Array with spaces", " [ ] "); + ExpectParseSuccess("All whitespaces", "\t\r\n[\t\n \t\r ]\t\r \n\n"); +} + +TEST(YAMLParser, ParsesEmptyObject) { + ExpectParseSuccess("Empty object", "[{}]"); +} + +TEST(YAMLParser, ParsesObject) { + ExpectParseSuccess("Object with an entry", "[{\"a\":\"/b\"}]"); +} + +TEST(YAMLParser, ParsesMultipleKeyValuePairsInObject) { + ExpectParseSuccess("Multiple key, value pairs", + "[{\"a\":\"/b\",\"c\":\"d\",\"e\":\"f\"}]"); +} + +TEST(YAMLParser, FailsIfNotClosingObject) { + ExpectParseError("Missing close on empty", "[{]"); + ExpectParseError("Missing close after pair", "[{\"a\":\"b\"]"); +} + +TEST(YAMLParser, FailsIfMissingColon) { + ExpectParseError("Missing colon between key and value", "[{\"a\"\"/b\"}]"); + ExpectParseError("Missing colon between key and value", "[{\"a\" \"b\"}]"); +} + +TEST(YAMLParser, FailsOnMissingQuote) { + ExpectParseError("Missing open quote", "[{a\":\"b\"}]"); + ExpectParseError("Missing closing quote", "[{\"a\":\"b}]"); +} + +TEST(YAMLParser, ParsesEscapedQuotes) { + ExpectParseSuccess("Parses escaped string in key and value", + "[{\"a\":\"\\\"b\\\" \\\" \\\"\"}]"); +} + +TEST(YAMLParser, ParsesEmptyString) { + ExpectParseSuccess("Parses empty string in value", "[{\"a\":\"\"}]"); +} + +TEST(YAMLParser, ParsesMultipleObjects) { + ExpectParseSuccess( + "Multiple objects in array", + "[" + " { \"a\" : \"b\" }," + " { \"a\" : \"b\" }," + " { \"a\" : \"b\" }" + "]"); +} + +TEST(YAMLParser, FailsOnMissingComma) { + ExpectParseError( + "Missing comma", + "[" + " { \"a\" : \"b\" }" + " { \"a\" : \"b\" }" + "]"); +} + +TEST(YAMLParser, ParsesSpacesInBetweenTokens) { + ExpectParseSuccess( + "Various whitespace between tokens", + " \t \n\n \r [ \t \n\n \r" + " \t \n\n \r { \t \n\n \r\"a\"\t \n\n \r :" + " \t \n\n \r \"b\"\t \n\n \r } \t \n\n \r,\t \n\n \r" + " \t \n\n \r { \t \n\n \r\"a\"\t \n\n \r :" + " \t \n\n \r \"b\"\t \n\n \r } \t \n\n \r]\t \n\n \r"); +} + +TEST(YAMLParser, ParsesArrayOfArrays) { + ExpectParseSuccess("Array of arrays", "[[]]"); +} + +TEST(YAMLParser, HandlesEndOfFileGracefully) { + ExpectParseError("In string starting with EOF", "[\""); + ExpectParseError("In string hitting EOF", "[\" "); + ExpectParseError("In string escaping EOF", "[\" \\"); + ExpectParseError("In array starting with EOF", "["); + ExpectParseError("In array element starting with EOF", "[[], "); + ExpectParseError("In array hitting EOF", "[[] "); + ExpectParseError("In array hitting EOF", "[[]"); + ExpectParseError("In object hitting EOF", "{\"\""); +} + +// Checks that the given string can be parsed into an identical string inside +// of an array. +static void ExpectCanParseString(StringRef String) { + std::string StringInArray = (llvm::Twine("[\"") + String + "\"]").str(); + SourceMgr SM; + yaml::Stream Stream(StringInArray, SM); + yaml::SequenceNode *ParsedSequence + = dyn_cast(Stream.begin()->getRoot()); + StringRef ParsedString + = dyn_cast( + static_cast(ParsedSequence->begin()))->getRawValue(); + ParsedString = ParsedString.substr(1, ParsedString.size() - 2); + EXPECT_EQ(String, ParsedString.str()); +} + +// Checks that parsing the given string inside an array fails. +static void ExpectCannotParseString(StringRef String) { + std::string StringInArray = (llvm::Twine("[\"") + String + "\"]").str(); + ExpectParseError((Twine("When parsing string \"") + String + "\"").str(), + StringInArray); +} + +TEST(YAMLParser, ParsesStrings) { + ExpectCanParseString(""); + ExpectCannotParseString("\\"); + ExpectCannotParseString("\""); + ExpectCanParseString(" "); + ExpectCanParseString("\\ "); + ExpectCanParseString("\\\""); + ExpectCannotParseString("\"\\"); + ExpectCannotParseString(" \\"); + ExpectCanParseString("\\\\"); + ExpectCannotParseString("\\\\\\"); + ExpectCanParseString("\\\\\\\\"); + ExpectCanParseString("\\\" "); + ExpectCannotParseString("\\\\\" "); + ExpectCanParseString("\\\\\\\" "); + ExpectCanParseString(" \\\\ \\\" \\\\\\\" "); +} + +TEST(YAMLParser, WorksWithIteratorAlgorithms) { + SourceMgr SM; + yaml::Stream Stream("[\"1\", \"2\", \"3\", \"4\", \"5\", \"6\"]", SM); + yaml::SequenceNode *Array + = dyn_cast(Stream.begin()->getRoot()); + EXPECT_EQ(6, std::distance(Array->begin(), Array->end())); +} + +} // end namespace llvm diff --git a/utils/yaml-bench/CMakeLists.txt b/utils/yaml-bench/CMakeLists.txt new file mode 100644 index 00000000000..403182ceee2 --- /dev/null +++ b/utils/yaml-bench/CMakeLists.txt @@ -0,0 +1,5 @@ +add_llvm_utility(yaml-bench + YAMLBench.cpp + ) + +target_link_libraries(yaml-bench LLVMSupport) diff --git a/utils/yaml-bench/Makefile b/utils/yaml-bench/Makefile new file mode 100644 index 00000000000..07e91226c7a --- /dev/null +++ b/utils/yaml-bench/Makefile @@ -0,0 +1,20 @@ +##===- utils/yaml-bench/Makefile ---------------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../.. +TOOLNAME = yaml-bench +USEDLIBS = LLVMSupport.a + +# This tool has no plugins, optimize startup time. +TOOL_NO_EXPORTS = 1 + +# Don't install this utility +NO_INSTALL = 1 + +include $(LEVEL)/Makefile.common diff --git a/utils/yaml-bench/YAMLBench.cpp b/utils/yaml-bench/YAMLBench.cpp new file mode 100644 index 00000000000..e5ee52a16d9 --- /dev/null +++ b/utils/yaml-bench/YAMLBench.cpp @@ -0,0 +1,203 @@ +//===- YAMLBench - Benchmark the YAMLParser implementation ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This program executes the YAMLParser on differntly sized YAML texts and +// outputs the run time. +// +//===----------------------------------------------------------------------===// + + +#include "llvm/ADT/SmallString.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Support/system_error.h" +#include "llvm/Support/Timer.h" +#include "llvm/Support/YAMLParser.h" + +using namespace llvm; + +static cl::opt + DumpTokens( "tokens" + , cl::desc("Print the tokenization of the file.") + , cl::init(false) + ); + +static cl::opt + DumpCanonical( "canonical" + , cl::desc("Print the canonical YAML for this file.") + , cl::init(false) + ); + +static cl::opt + Input(cl::Positional, cl::desc("")); + +static cl::opt + Verify( "verify" + , cl::desc( + "Run a quick verification useful for regression testing") + , cl::init(false) + ); + +static cl::opt + MemoryLimitMB("memory-limit", cl::desc( + "Do not use more megabytes of memory"), + cl::init(1000)); + +struct indent { + unsigned distance; + indent(unsigned d) : distance(d) {} +}; + +static raw_ostream &operator <<(raw_ostream &os, const indent &in) { + for (unsigned i = 0; i < in.distance; ++i) + os << " "; + return os; +} + +static void dumpNode( yaml::Node *n + , unsigned Indent = 0 + , bool SuppressFirstIndent = false) { + if (!n) + return; + if (!SuppressFirstIndent) + outs() << indent(Indent); + StringRef Anchor = n->getAnchor(); + if (!Anchor.empty()) + outs() << "&" << Anchor << " "; + if (yaml::ScalarNode *sn = dyn_cast(n)) { + SmallString<32> Storage; + StringRef Val = sn->getValue(Storage); + outs() << "!!str \"" << yaml::escape(Val) << "\""; + } else if (yaml::SequenceNode *sn = dyn_cast(n)) { + outs() << "!!seq [\n"; + ++Indent; + for (yaml::SequenceNode::iterator i = sn->begin(), e = sn->end(); + i != e; ++i) { + dumpNode(i, Indent); + outs() << ",\n"; + } + --Indent; + outs() << indent(Indent) << "]"; + } else if (yaml::MappingNode *mn = dyn_cast(n)) { + outs() << "!!map {\n"; + ++Indent; + for (yaml::MappingNode::iterator i = mn->begin(), e = mn->end(); + i != e; ++i) { + outs() << indent(Indent) << "? "; + dumpNode(i->getKey(), Indent, true); + outs() << "\n"; + outs() << indent(Indent) << ": "; + dumpNode(i->getValue(), Indent, true); + outs() << ",\n"; + } + --Indent; + outs() << indent(Indent) << "}"; + } else if (yaml::AliasNode *an = dyn_cast(n)){ + outs() << "*" << an->getName(); + } else if (dyn_cast(n)) { + outs() << "!!null null"; + } +} + +static void dumpStream(yaml::Stream &stream) { + for (yaml::document_iterator di = stream.begin(), de = stream.end(); di != de; + ++di) { + outs() << "%YAML 1.2\n" + << "---\n"; + yaml::Node *n = di->getRoot(); + if (n) + dumpNode(n); + else + break; + outs() << "\n...\n"; + } +} + +static void benchmark( llvm::TimerGroup &Group + , llvm::StringRef Name + , llvm::StringRef JSONText) { + llvm::Timer BaseLine((Name + ": Loop").str(), Group); + BaseLine.startTimer(); + char C = 0; + for (llvm::StringRef::iterator I = JSONText.begin(), + E = JSONText.end(); + I != E; ++I) { C += *I; } + BaseLine.stopTimer(); + volatile char DontOptimizeOut = C; (void)DontOptimizeOut; + + llvm::Timer Tokenizing((Name + ": Tokenizing").str(), Group); + Tokenizing.startTimer(); + { + yaml::scanTokens(JSONText); + } + Tokenizing.stopTimer(); + + llvm::Timer Parsing((Name + ": Parsing").str(), Group); + Parsing.startTimer(); + { + llvm::SourceMgr SM; + llvm::yaml::Stream stream(JSONText, SM); + stream.skip(); + } + Parsing.stopTimer(); +} + +static std::string createJSONText(size_t MemoryMB, unsigned ValueSize) { + std::string JSONText; + llvm::raw_string_ostream Stream(JSONText); + Stream << "[\n"; + size_t MemoryBytes = MemoryMB * 1024 * 1024; + while (JSONText.size() < MemoryBytes) { + Stream << " {\n" + << " \"key1\": \"" << std::string(ValueSize, '*') << "\",\n" + << " \"key2\": \"" << std::string(ValueSize, '*') << "\",\n" + << " \"key3\": \"" << std::string(ValueSize, '*') << "\"\n" + << " }"; + Stream.flush(); + if (JSONText.size() < MemoryBytes) Stream << ","; + Stream << "\n"; + } + Stream << "]\n"; + Stream.flush(); + return JSONText; +} + +int main(int argc, char **argv) { + llvm::cl::ParseCommandLineOptions(argc, argv); + if (Input.getNumOccurrences()) { + OwningPtr Buf; + if (MemoryBuffer::getFileOrSTDIN(Input, Buf)) + return 1; + + llvm::SourceMgr sm; + if (DumpTokens) { + yaml::dumpTokens(Buf->getBuffer(), outs()); + } + + if (DumpCanonical) { + yaml::Stream stream(Buf->getBuffer(), sm); + dumpStream(stream); + } + } + + if (Verify) { + llvm::TimerGroup Group("YAML parser benchmark"); + benchmark(Group, "Fast", createJSONText(10, 500)); + } else if (!DumpCanonical && !DumpTokens) { + llvm::TimerGroup Group("YAML parser benchmark"); + benchmark(Group, "Small Values", createJSONText(MemoryLimitMB, 5)); + benchmark(Group, "Medium Values", createJSONText(MemoryLimitMB, 500)); + benchmark(Group, "Large Values", createJSONText(MemoryLimitMB, 50000)); + } + + return 0; +}