1 //===--- JSONParser.h - Simple JSON parser ----------------------*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements a JSON parser.
12 // See http://www.json.org/ for an overview.
13 // See http://www.ietf.org/rfc/rfc4627.txt for the full standard.
15 // FIXME: Currently this supports a subset of JSON. Specifically, support
16 // for numbers, booleans and null for values is missing.
18 //===----------------------------------------------------------------------===//
20 #ifndef LLVM_SUPPORT_JSON_PARSER_H
21 #define LLVM_SUPPORT_JSON_PARSER_H
23 #include "llvm/ADT/StringRef.h"
24 #include "llvm/Support/Allocator.h"
25 #include "llvm/Support/Casting.h"
26 #include "llvm/Support/ErrorHandling.h"
27 #include "llvm/Support/SourceMgr.h"
34 class JSONKeyValuePair;
36 /// \brief Base class for a parsable JSON atom.
38 /// This class has no semantics other than being a unit of JSON data which can
39 /// be parsed out of a JSON document.
42 /// \brief Possible types of JSON objects.
43 enum Kind { JK_KeyValuePair, JK_Array, JK_Object, JK_String };
45 /// \brief Returns the type of this value.
46 Kind getKind() const { return MyKind; }
48 static bool classof(const JSONAtom *Atom) { return true; }
51 JSONAtom(Kind MyKind) : MyKind(MyKind) {}
57 /// \brief A parser for JSON text.
59 /// Use an object of JSONParser to iterate over the values of a JSON text.
60 /// All objects are parsed during the iteration, so you can only iterate once
61 /// over the JSON text, but the cost of partial iteration is minimized.
62 /// Create a new JSONParser if you want to iterate multiple times.
65 /// \brief Create a JSONParser for the given input.
67 /// Parsing is started via parseRoot(). Access to the object returned from
68 /// parseRoot() will parse the input lazily.
69 JSONParser(StringRef Input, SourceMgr *SM);
71 /// \brief Returns the outermost JSON value (either an array or an object).
73 /// Can return NULL if the input does not start with an array or an object.
74 /// The object is not parsed yet - the caller must iterate over the
75 /// returned object to trigger parsing.
77 /// A JSONValue can be either a JSONString, JSONObject or JSONArray.
78 JSONValue *parseRoot();
80 /// \brief Parses the JSON text and returns whether it is valid JSON.
82 /// In case validate() return false, failed() will return true and
83 /// getErrorMessage() will return the parsing error.
86 /// \brief Returns true if an error occurs during parsing.
88 /// If there was an error while parsing an object that was created by
89 /// iterating over the result of 'parseRoot', 'failed' will return true.
93 /// \brief These methods manage the implementation details of parsing new JSON
96 JSONString *parseString();
97 JSONValue *parseValue();
98 JSONKeyValuePair *parseKeyValuePair();
101 /// \brief Helpers to parse the elements out of both forms of containers.
103 const JSONAtom *parseElement(JSONAtom::Kind ContainerKind);
104 StringRef::iterator parseFirstElement(JSONAtom::Kind ContainerKind,
105 char StartChar, char EndChar,
106 const JSONAtom *&Element);
107 StringRef::iterator parseNextElement(JSONAtom::Kind ContainerKind,
109 const JSONAtom *&Element);
112 /// \brief Whitespace parsing.
114 void nextNonWhitespace();
118 /// \brief These methods are used for error handling.
120 void setExpectedError(StringRef Expected, StringRef Found);
121 void setExpectedError(StringRef Expected, char Found);
122 bool errorIfAtEndOfFile(StringRef Message);
123 bool errorIfNotAt(char C, StringRef Message);
126 /// \brief Skips all elements in the given container.
127 bool skipContainer(const JSONContainer &Container);
129 /// \brief Skips to the next position behind the given JSON atom.
130 bool skip(const JSONAtom &Atom);
132 /// All nodes are allocated by the parser and will be deallocated when the
133 /// parser is destroyed.
134 BumpPtrAllocator ValueAllocator;
136 /// \brief The original input to the parser.
137 MemoryBuffer *InputBuffer;
139 /// \brief The source manager used for diagnostics and buffer management.
142 /// \brief The current position in the parse stream.
143 StringRef::iterator Position;
145 /// \brief The end position for fast EOF checks without introducing
146 /// unnecessary dereferences.
147 StringRef::iterator End;
149 /// \brief If true, an error has occurred.
152 friend class JSONContainer;
156 /// \brief Base class for JSON value objects.
158 /// This object represents an abstract JSON value. It is the root node behind
159 /// the group of JSON entities that can represent top-level values in a JSON
160 /// document. It has no API, and is just a placeholder in the type hierarchy of
162 class JSONValue : public JSONAtom {
164 JSONValue(Kind MyKind) : JSONAtom(MyKind) {}
167 /// \brief dyn_cast helpers
169 static bool classof(const JSONAtom *Atom) {
170 switch (Atom->getKind()) {
175 case JK_KeyValuePair:
178 llvm_unreachable("Invalid JSONAtom kind");
180 static bool classof(const JSONValue *Value) { return true; }
184 /// \brief Gives access to the text of a JSON string.
186 /// FIXME: Implement a method to return the unescaped text.
187 class JSONString : public JSONValue {
189 /// \brief Returns the underlying parsed text of the string.
191 /// This is the unescaped content of the JSON text.
192 /// See http://www.ietf.org/rfc/rfc4627.txt for details.
193 StringRef getRawText() const { return RawText; };
196 JSONString(StringRef RawText) : JSONValue(JK_String), RawText(RawText) {}
200 friend class JSONParser;
203 /// \brief dyn_cast helpers
205 static bool classof(const JSONAtom *Atom) {
206 return Atom->getKind() == JK_String;
208 static bool classof(const JSONString *String) { return true; }
212 /// \brief A (key, value) tuple of type (JSONString *, JSONValue *).
214 /// Note that JSONKeyValuePair is not a JSONValue, it is a bare JSONAtom.
215 /// JSONKeyValuePairs can be elements of a JSONObject, but not of a JSONArray.
216 /// They are not viable as top-level values either.
217 class JSONKeyValuePair : public JSONAtom {
219 const JSONString * const Key;
220 const JSONValue * const Value;
223 JSONKeyValuePair(const JSONString *Key, const JSONValue *Value)
224 : JSONAtom(JK_KeyValuePair), Key(Key), Value(Value) {}
226 friend class JSONParser;
229 /// \brief dyn_cast helpers
231 static bool classof(const JSONAtom *Atom) {
232 return Atom->getKind() == JK_KeyValuePair;
234 static bool classof(const JSONKeyValuePair *KeyValuePair) { return true; }
238 /// \brief Implementation of JSON containers (arrays and objects).
240 /// JSONContainers drive the lazy parsing of JSON arrays and objects via
241 /// forward iterators.
242 class JSONContainer : public JSONValue {
244 /// \brief An iterator that parses the underlying container during iteration.
246 /// Iterators on the same collection use shared state, so when multiple copies
247 /// of an iterator exist, only one is allowed to be used for iteration;
248 /// iterating multiple copies of an iterator of the same collection will lead
249 /// to undefined behavior.
252 AtomIterator(const AtomIterator &I) : Container(I.Container) {}
254 /// \brief Iterator interface.
256 bool operator==(const AtomIterator &I) const {
257 if (isEnd() || I.isEnd())
258 return isEnd() == I.isEnd();
259 return Container->Position == I.Container->Position;
261 bool operator!=(const AtomIterator &I) const {
262 return !(*this == I);
264 AtomIterator &operator++() {
265 Container->parseNextElement();
268 const JSONAtom *operator*() {
269 return Container->Current;
274 /// \brief Create an iterator for which 'isEnd' returns true.
275 AtomIterator() : Container(0) {}
277 /// \brief Create an iterator for the given container.
278 AtomIterator(const JSONContainer *Container) : Container(Container) {}
281 return Container == 0 || Container->Position == StringRef::iterator();
284 const JSONContainer * const Container;
286 friend class JSONContainer;
290 /// \brief An iterator for the specified AtomT.
292 /// Used for the implementation of iterators for JSONArray and JSONObject.
293 template <typename AtomT>
294 class IteratorTemplate : public std::iterator<std::forward_iterator_tag,
297 explicit IteratorTemplate(const AtomIterator& AtomI)
300 bool operator==(const IteratorTemplate &I) const {
301 return AtomI == I.AtomI;
303 bool operator!=(const IteratorTemplate &I) const { return !(*this == I); }
305 IteratorTemplate &operator++() {
310 const AtomT *operator*() { return dyn_cast<AtomT>(*AtomI); }
316 JSONContainer(JSONParser *Parser, char StartChar, char EndChar,
317 JSONAtom::Kind ContainerKind)
318 : JSONValue(ContainerKind), Parser(Parser),
319 Position(), Current(0), Started(false),
320 StartChar(StartChar), EndChar(EndChar) {}
322 /// \brief Returns a lazy parsing iterator over the container.
324 /// As the iterator drives the parse stream, begin() must only be called
325 /// once per container.
326 AtomIterator atom_begin() const {
328 report_fatal_error("Cannot parse container twice.");
330 // Set up the position and current element when we begin iterating over the
332 Position = Parser->parseFirstElement(getKind(), StartChar, EndChar, Current);
333 return AtomIterator(this);
335 AtomIterator atom_end() const {
336 return AtomIterator();
340 AtomIterator atom_current() const {
344 return AtomIterator(this);
347 /// \brief Parse the next element in the container into the Current element.
349 /// This routine is called as an iterator into this container walks through
350 /// its elements. It mutates the container's internal current node to point to
351 /// the next atom of the container.
352 void parseNextElement() const {
353 Parser->skip(*Current);
354 Position = Parser->parseNextElement(getKind(), EndChar, Current);
357 // For parsing, JSONContainers call back into the JSONParser.
358 JSONParser * const Parser;
360 // 'Position', 'Current' and 'Started' store the state of the parse stream
361 // for iterators on the container, they don't change the container's elements
362 // and are thus marked as mutable.
363 mutable StringRef::iterator Position;
364 mutable const JSONAtom *Current;
365 mutable bool Started;
367 const char StartChar;
370 friend class JSONParser;
373 /// \brief dyn_cast helpers
375 static bool classof(const JSONAtom *Atom) {
376 switch (Atom->getKind()) {
380 case JK_KeyValuePair:
384 llvm_unreachable("Invalid JSONAtom kind");
386 static bool classof(const JSONContainer *Container) { return true; }
390 /// \brief A simple JSON array.
391 class JSONArray : public JSONContainer {
393 typedef IteratorTemplate<JSONValue> const_iterator;
395 /// \brief Returns a lazy parsing iterator over the container.
397 /// As the iterator drives the parse stream, begin() must only be called
398 /// once per container.
399 const_iterator begin() const { return const_iterator(atom_begin()); }
400 const_iterator end() const { return const_iterator(atom_end()); }
403 JSONArray(JSONParser *Parser)
404 : JSONContainer(Parser, '[', ']', JSONAtom::JK_Array) {}
407 /// \brief dyn_cast helpers
409 static bool classof(const JSONAtom *Atom) {
410 return Atom->getKind() == JSONAtom::JK_Array;
412 static bool classof(const JSONArray *Array) { return true; }
415 friend class JSONParser;
418 /// \brief A JSON object: an iterable list of JSON key-value pairs.
419 class JSONObject : public JSONContainer {
421 typedef IteratorTemplate<JSONKeyValuePair> const_iterator;
423 /// \brief Returns a lazy parsing iterator over the container.
425 /// As the iterator drives the parse stream, begin() must only be called
426 /// once per container.
427 const_iterator begin() const { return const_iterator(atom_begin()); }
428 const_iterator end() const { return const_iterator(atom_end()); }
431 JSONObject(JSONParser *Parser)
432 : JSONContainer(Parser, '{', '}', JSONAtom::JK_Object) {}
435 /// \brief dyn_cast helpers
437 static bool classof(const JSONAtom *Atom) {
438 return Atom->getKind() == JSONAtom::JK_Object;
440 static bool classof(const JSONObject *Object) { return true; }
443 friend class JSONParser;
446 } // end namespace llvm
448 #endif // LLVM_SUPPORT_JSON_PARSER_H