1 //===--- JsonParser.h - Simple JSON parser ----------------------*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements a JSON parser.
12 // See http://www.json.org/ for an overview.
13 // See http://www.ietf.org/rfc/rfc4627.txt for the full standard.
15 // FIXME: Currently this supports a subset of JSON. Specifically, support
16 // for numbers, booleans and null for values is missing.
18 //===----------------------------------------------------------------------===//
20 #ifndef LLVM_CLANG_TOOLING_JSON_PARSER_H
21 #define LLVM_CLANG_TOOLING_JSON_PARSER_H
23 #include "llvm/ADT/StringRef.h"
24 #include "llvm/Support/Allocator.h"
25 #include "llvm/Support/ErrorHandling.h"
33 class JSONKeyValuePair;
35 /// \brief Base class for a parsable JSON atom.
37 /// This class has no semantics other than being a unit of JSON data which can
38 /// be parsed out of a JSON document.
41 /// \brief Possible types of JSON objects.
42 enum Kind { JK_KeyValuePair, JK_Array, JK_Object, JK_String };
44 /// \brief Returns the type of this value.
45 Kind getKind() const { return MyKind; }
47 static bool classof(const JSONAtom *Atom) { return true; }
50 JSONAtom(Kind MyKind) : MyKind(MyKind) {}
53 /// \brief Parses to the end of the object and returns whether parsing
59 friend class JSONParser;
60 friend class JSONKeyValuePair;
61 template <typename, char, char, JSONAtom::Kind> friend class JSONContainer;
64 /// \brief A parser for JSON text.
66 /// Use an object of JSONParser to iterate over the values of a JSON text.
67 /// All objects are parsed during the iteration, so you can only iterate once
68 /// over the JSON text, but the cost of partial iteration is minimized.
69 /// Create a new JSONParser if you want to iterate multiple times.
72 /// \brief Create a JSONParser for the given input.
74 /// Parsing is started via parseRoot(). Access to the object returned from
75 /// parseRoot() will parse the input lazily.
76 JSONParser(StringRef Input);
78 /// \brief Returns the outermost JSON value (either an array or an object).
80 /// Can return NULL if the input does not start with an array or an object.
81 /// The object is not parsed yet - the caller must either iterate over the
82 /// returned object or call 'skip' to trigger parsing.
84 /// A JSONValue can be either a JSONString, JSONObject or JSONArray.
85 JSONValue *parseRoot();
87 /// \brief Parses the JSON text and returns whether it is valid JSON.
89 /// In case validate() return false, failed() will return true and
90 /// getErrorMessage() will return the parsing error.
93 /// \brief Returns true if an error occurs during parsing.
95 /// If there was an error while parsing an object that was created by
96 /// iterating over the result of 'parseRoot', 'failed' will return true.
99 /// \brief Returns an error message when 'failed' returns true.
100 std::string getErrorMessage() const;
103 /// \brief These methods manage the implementation details of parsing new JSON
106 JSONString *parseString();
107 JSONValue *parseValue();
108 JSONKeyValuePair *parseKeyValuePair();
111 /// \brief Templated helpers to parse the elements out of both forms of JSON
114 template <typename AtomT> AtomT *parseElement();
115 template <typename AtomT, char StartChar, char EndChar>
116 StringRef::iterator parseFirstElement(const AtomT *&Element);
117 template <typename AtomT, char EndChar>
118 StringRef::iterator parseNextElement(const AtomT *&Element);
121 /// \brief Whitespace parsing.
123 void nextNonWhitespace();
127 /// \brief These methods are used for error handling.
129 void setExpectedError(StringRef Expected, StringRef Found);
130 void setExpectedError(StringRef Expected, char Found);
131 bool errorIfAtEndOfFile(StringRef Message);
132 bool errorIfNotAt(char C, StringRef Message);
135 /// All nodes are allocated by the parser and will be deallocated when the
136 /// parser is destroyed.
137 BumpPtrAllocator ValueAllocator;
139 /// \brief The original input to the parser.
140 const StringRef Input;
142 /// \brief The current position in the parse stream.
143 StringRef::iterator Position;
145 /// \brief If non-empty, an error has occurred.
146 std::string ErrorMessage;
148 template <typename AtomT, char StartChar, char EndChar,
149 JSONAtom::Kind ContainerKind>
150 friend class JSONContainer;
154 /// \brief Base class for JSON value objects.
156 /// This object represents an abstract JSON value. It is the root node behind
157 /// the group of JSON entities that can represent top-level values in a JSON
158 /// document. It has no API, and is just a placeholder in the type hierarchy of
160 class JSONValue : public JSONAtom {
162 JSONValue(Kind MyKind) : JSONAtom(MyKind) {}
165 /// \brief dyn_cast helpers
167 static bool classof(const JSONAtom *Atom) {
168 switch (Atom->getKind()) {
173 case JK_KeyValuePair:
176 llvm_unreachable("Invalid JSONAtom kind");
178 static bool classof(const JSONValue *Value) { return true; }
182 /// \brief Gives access to the text of a JSON string.
184 /// FIXME: Implement a method to return the unescaped text.
185 class JSONString : public JSONValue {
187 /// \brief Returns the underlying parsed text of the string.
189 /// This is the unescaped content of the JSON text.
190 /// See http://www.ietf.org/rfc/rfc4627.txt for details.
191 StringRef getRawText() const { return RawText; };
194 JSONString(StringRef RawText) : JSONValue(JK_String), RawText(RawText) {}
196 /// \brief Skips to the next position in the parse stream.
197 bool skip() const { return true; };
201 friend class JSONAtom;
202 friend class JSONParser;
205 /// \brief dyn_cast helpers
207 static bool classof(const JSONAtom *Atom) {
208 return Atom->getKind() == JK_String;
210 static bool classof(const JSONString *String) { return true; }
214 /// \brief A (key, value) tuple of type (JSONString *, JSONValue *).
216 /// Note that JSONKeyValuePair is not a JSONValue, it is a bare JSONAtom.
217 /// JSONKeyValuePairs can be elements of a JSONObject, but not of a JSONArray.
218 /// They are not viable as top-level values either.
219 class JSONKeyValuePair : public JSONAtom {
221 const JSONString * const Key;
222 const JSONValue * const Value;
225 JSONKeyValuePair(const JSONString *Key, const JSONValue *Value)
226 : JSONAtom(JK_KeyValuePair), Key(Key), Value(Value) {}
228 /// \brief Skips to the next position in the parse stream.
229 bool skip() const { return Value->skip(); };
231 friend class JSONAtom;
232 friend class JSONParser;
233 template <typename, char, char, JSONAtom::Kind> friend class JSONContainer;
236 /// \brief dyn_cast helpers
238 static bool classof(const JSONAtom *Atom) {
239 return Atom->getKind() == JK_KeyValuePair;
241 static bool classof(const JSONKeyValuePair *KeyValuePair) { return true; }
245 /// \brief Implementation of JSON containers (arrays and objects).
247 /// JSONContainers drive the lazy parsing of JSON arrays and objects via
248 /// forward iterators. Call 'skip' to validate parsing of all elements of the
249 /// container and to position the parse stream behind the container.
250 template <typename AtomT, char StartChar, char EndChar,
251 JSONAtom::Kind ContainerKind>
252 class JSONContainer : public JSONValue {
254 /// \brief An iterator that parses the underlying container during iteration.
256 /// Iterators on the same collection use shared state, so when multiple copies
257 /// of an iterator exist, only one is allowed to be used for iteration;
258 /// iterating multiple copies of an iterator of the same collection will lead
259 /// to undefined behavior.
260 class const_iterator : public std::iterator<std::forward_iterator_tag,
263 const_iterator(const const_iterator &I) : Container(I.Container) {}
265 bool operator==(const const_iterator &I) const {
266 if (isEnd() || I.isEnd())
267 return isEnd() == I.isEnd();
268 return Container->Position == I.Container->Position;
270 bool operator!=(const const_iterator &I) const { return !(*this == I); }
272 const_iterator &operator++() {
273 Container->parseNextElement();
277 const AtomT *operator*() { return Container->Current; }
280 /// \brief Create an iterator for which 'isEnd' returns true.
281 const_iterator() : Container(0) {}
283 /// \brief Create an iterator for the given container.
284 const_iterator(const JSONContainer *Container) : Container(Container) {}
287 return Container == 0 || Container->Position == StringRef::iterator();
290 const JSONContainer * const Container;
292 friend class JSONContainer;
295 /// \brief Returns a lazy parsing iterator over the container.
297 /// As the iterator drives the parse stream, begin() must only be called
298 /// once per container.
299 const_iterator begin() const {
301 report_fatal_error("Cannot parse container twice.");
303 // Set up the position and current element when we begin iterating over the
305 Position = Parser->parseFirstElement<AtomT, StartChar, EndChar>(Current);
306 return const_iterator(this);
309 const_iterator end() const {
310 return const_iterator();
314 JSONContainer(JSONParser *Parser)
315 : JSONValue(ContainerKind), Parser(Parser),
316 Position(), Current(0), Started(false) {}
318 const_iterator current() const {
322 return const_iterator(this);
325 /// \brief Skips to the next position in the parse stream.
327 for (const_iterator I = current(), E = end(); I != E; ++I) {
332 return !Parser->failed();
335 /// \brief Parse the next element in the container into the Current element.
337 /// This routine is called as an iterator into this container walks through
338 /// its elements. It mutates the container's internal current node to point to
339 /// the next atom of the container.
340 void parseNextElement() const {
342 Position = Parser->parseNextElement<AtomT, EndChar>(Current);
345 // For parsing, JSONContainers call back into the JSONParser.
346 JSONParser * const Parser;
348 // 'Position', 'Current' and 'Started' store the state of the parse stream
349 // for iterators on the container, they don't change the container's elements
350 // and are thus marked as mutable.
351 mutable StringRef::iterator Position;
352 mutable const AtomT *Current;
353 mutable bool Started;
355 friend class JSONAtom;
356 friend class JSONParser;
357 friend class const_iterator;
360 /// \brief dyn_cast helpers
362 static bool classof(const JSONAtom *Atom) {
363 return Atom->getKind() == ContainerKind;
365 static bool classof(const JSONContainer *Container) { return true; }
369 /// \brief A simple JSON array.
370 typedef JSONContainer<JSONValue, '[', ']', JSONAtom::JK_Array> JSONArray;
372 /// \brief A JSON object: an iterable list of JSON key-value pairs.
373 typedef JSONContainer<JSONKeyValuePair, '{', '}', JSONAtom::JK_Object>
376 /// \brief Template adaptor to dispatch element parsing for values.
377 template <> JSONValue *JSONParser::parseElement();
379 /// \brief Template adaptor to dispatch element parsing for key value pairs.
380 template <> JSONKeyValuePair *JSONParser::parseElement();
382 /// \brief Parses the first element of a JSON array or object, or closes the
385 /// The method assumes that the current position is before the first character
386 /// of the element, with possible white space in between. When successful, it
387 /// returns the new position after parsing the element. Otherwise, if there is
388 /// no next value, it returns a default constructed StringRef::iterator.
389 template <typename AtomT, char StartChar, char EndChar>
390 StringRef::iterator JSONParser::parseFirstElement(const AtomT *&Element) {
391 assert(*Position == StartChar);
394 if (errorIfAtEndOfFile("value or end of container at start of container"))
395 return StringRef::iterator();
397 if (*Position == EndChar)
398 return StringRef::iterator();
400 Element = parseElement<AtomT>();
402 return StringRef::iterator();
407 /// \brief Parses the next element of a JSON array or object, or closes the
410 /// The method assumes that the current position is before the ',' which
411 /// separates the next element from the current element. When successful, it
412 /// returns the new position after parsing the element. Otherwise, if there is
413 /// no next value, it returns a default constructed StringRef::iterator.
414 template <typename AtomT, char EndChar>
415 StringRef::iterator JSONParser::parseNextElement(const AtomT *&Element) {
418 if (errorIfAtEndOfFile("',' or end of container for next element"))
424 if (errorIfAtEndOfFile("element in container"))
425 return StringRef::iterator();
427 Element = parseElement<AtomT>();
429 return StringRef::iterator();
434 return StringRef::iterator();
437 setExpectedError("',' or end of container for next element", *Position);
438 return StringRef::iterator();
442 } // end namespace llvm
444 #endif // LLVM_CLANG_TOOLING_JSON_PARSER_H