1 //===- TGPreprocessor.cpp - Preprocessor for TableGen ---------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // Implement the Preprocessor for TableGen.
12 //===----------------------------------------------------------------------===//
14 #include "TGPreprocessor.h"
15 #include "llvm/ADT/Twine.h"
16 #include "llvm/Support/MemoryBuffer.h"
17 #include "llvm/Support/SourceMgr.h"
18 #include "llvm/Support/ToolOutputFile.h"
19 #include "llvm/TableGen/Error.h"
28 #define snprintf _snprintf
32 typedef std::map<std::string, std::string> TGPPEnvironment;
53 bool MatchSymbol(TGPPTokenKind Kind,
54 const char *BeginOfToken, const char *EndOfToken,
57 bool MatchSymbol(TGPPTokenKind Kind,
58 const char *BeginOfToken, const char *EndOfToken,
61 bool MatchIdNum(TGPPTokenKind Kind,
62 const char *BeginOfToken, const char *EndOfToken);
64 bool MatchIdentifier(TGPPTokenKind Kind,
65 const char *BeginOfToken, const char *EndOfToken);
67 bool MatchNumber(TGPPTokenKind Kind,
68 const char *BeginOfToken, const char *EndOfToken,
72 const MemoryBuffer *CurBuf;
74 bool IsInsideMacroStatement, WasEndOfLine;
76 bool IsEndOfBuffer(const char *Ptr) const {
77 return (!*Ptr && Ptr == CurBuf->getBufferEnd());
81 if (*CurPtr == '\r' || *CurPtr == '\n') {
82 if ((CurPtr[1] == '\r' || CurPtr[1] == '\n') && CurPtr[0] != CurPtr[1])
89 bool MatchPrefix(const char *Prefix, const char *Ptr) const {
90 while (*Ptr == ' ' || *Ptr == '\t')
92 return !strncmp(Prefix, Ptr, strlen(Prefix));
95 TGPPLexer(const SourceMgr &SM)
96 : CurBuf(SM.getMemoryBuffer(0)),
97 CurPtr(CurBuf->getBufferStart()),
98 IsInsideMacroStatement(false),
102 TGPPTokenKind NextToken(const char **BeginOfToken, const char **EndOfToken);
105 // preprocessor records
110 std::vector<std::string> IndexVars;
111 std::vector<TGPPRange> IndexRanges;
112 TGPPRecords LoopBody;
114 // tgpprecord_variable, tgpprecord_literal
117 bool EvaluateFor(const TGPPEnvironment &Env, raw_fd_ostream &OS) const;
119 bool EvaluateVariable(const TGPPEnvironment &Env, raw_fd_ostream &OS) const {
120 TGPPEnvironment::const_iterator it_val = Env.find(Str);
121 if (it_val == Env.end()) {
122 PrintError("Var is not bound to any value: " + Str);
125 OS << it_val->second;
129 bool EvaluateLiteral(const TGPPEnvironment &Env, raw_fd_ostream &OS) const {
135 TGPPRecord(TGPPRecordKind K) : Kind(K) {}
136 TGPPRecord(TGPPRecordKind K, const std::string &S) : Kind(K), Str(S) {}
138 TGPPRecords *GetLoopBody() { return &LoopBody; }
140 void AppendIndex(const std::string &V, const TGPPRange &R) {
141 IndexVars.push_back(V);
142 IndexRanges.push_back(R);
145 bool Evaluate(const TGPPEnvironment &Env, raw_fd_ostream &OS) const;
152 std::vector<std::string> Vals;
154 // tgpprange_sequence
158 TGPPRange() : Kind(tgpprange_list) {}
159 TGPPRange(long int F, long int T)
160 : Kind(tgpprange_sequence), From(F), To(T) {}
162 size_t size() const {
163 if (Kind == tgpprange_list)
166 return To - From + 1;
169 std::string at(size_t i) const {
170 if (Kind == tgpprange_list)
174 snprintf(buf, sizeof(buf), "%ld", From + (long int)i);
175 return std::string(buf);
179 void push_back(const std::string &S) {
180 if (Kind == tgpprange_list)
186 using namespace llvm;
188 bool llvm::MatchSymbol(TGPPTokenKind Kind,
189 const char *BeginOfToken, const char *EndOfToken,
191 return Kind == tgpptoken_symbol &&
192 BeginOfToken + 1 == EndOfToken &&
193 Symbol == *BeginOfToken;
196 bool llvm::MatchSymbol(TGPPTokenKind Kind,
197 const char *BeginOfToken, const char *EndOfToken,
198 const char *Symbol) {
199 return Kind == tgpptoken_symbol &&
200 BeginOfToken + strlen(Symbol) == EndOfToken &&
201 !strncmp(Symbol, BeginOfToken, EndOfToken - BeginOfToken);
204 bool llvm::MatchIdNum(TGPPTokenKind Kind,
205 const char *BeginOfToken, const char *EndOfToken) {
206 if (Kind != tgpptoken_symbol)
208 for (const char *i = BeginOfToken; i != EndOfToken; ++i)
209 if (*i != '_' && !isalnum(*i))
214 bool llvm::MatchIdentifier(TGPPTokenKind Kind,
215 const char *BeginOfToken, const char *EndOfToken) {
216 if (Kind != tgpptoken_symbol)
219 const char *i = BeginOfToken;
220 if (*i != '_' && !isalpha(*i))
222 for (++i; i != EndOfToken; ++i)
223 if (*i != '_' && !isalnum(*i))
229 bool llvm::MatchNumber(TGPPTokenKind Kind,
230 const char *BeginOfToken, const char *EndOfToken,
232 if (Kind != tgpptoken_symbol)
235 *Val = strtol(BeginOfToken, &e, 10);
236 return e == EndOfToken;
239 TGPPTokenKind TGPPLexer::
240 NextToken(const char **BeginOfToken, const char **EndOfToken) {
241 bool IsBeginOfLine = WasEndOfLine;
242 WasEndOfLine = false;
244 if (IsEndOfBuffer(CurPtr))
245 return tgpptoken_end;
247 else if (IsInsideMacroStatement) {
248 while (*CurPtr == ' ' || *CurPtr == '\t') // trim space, if any
251 const char *BeginOfSymbol = CurPtr;
255 IsInsideMacroStatement = false;
257 return tgpptoken_newline;
260 else if (*CurPtr == '[' || *CurPtr == ']' ||
261 *CurPtr == '(' || *CurPtr == ')' ||
262 *CurPtr == ',' || *CurPtr == '=') {
263 *BeginOfToken = BeginOfSymbol;
264 *EndOfToken = ++CurPtr;
265 return tgpptoken_symbol;
268 else if (*CurPtr == '_' || isalpha(*CurPtr)) {
270 while (*CurPtr == '_' || isalnum(*CurPtr))
272 *BeginOfToken = BeginOfSymbol;
273 *EndOfToken = CurPtr;
274 return tgpptoken_symbol;
277 else if (*CurPtr == '+' || *CurPtr == '-' || isdigit(*CurPtr)) {
279 while (isdigit(*CurPtr))
281 *BeginOfToken = BeginOfSymbol;
282 *EndOfToken = CurPtr;
283 return tgpptoken_symbol;
287 PrintError(BeginOfSymbol, "Unrecognizable token");
288 return tgpptoken_error;
292 else if (*CurPtr == '#') {
294 (MatchPrefix("for", CurPtr + 1) ||
295 MatchPrefix("end", CurPtr + 1))) {
297 IsInsideMacroStatement = true;
298 return NextToken(BeginOfToken, EndOfToken);
301 // special token #"# is translate to literal "
302 else if (CurPtr[1] == '"' && CurPtr[2] == '#') {
303 *BeginOfToken = ++CurPtr;
304 *EndOfToken = ++CurPtr;
306 return tgpptoken_literal;
310 const char *BeginOfVar = ++CurPtr; // trim '#'
311 if (*CurPtr != '_' && !isalpha(*CurPtr)) {
312 PrintError(BeginOfVar, "Variable must start with [_A-Za-z]: ");
313 return tgpptoken_error;
315 while (*CurPtr == '_' || isalnum(*CurPtr))
317 if (*CurPtr != '#') {
318 PrintError(BeginOfVar, "Variable must end with #");
319 return tgpptoken_error;
321 *BeginOfToken = BeginOfVar;
322 *EndOfToken = CurPtr++; // trim '#'
323 return tgpptoken_symbol;
327 const char *BeginOfLiteral = CurPtr;
328 int CCommentLevel = 0;
329 bool BCPLComment = false;
330 bool StringLiteral = false;
331 for (; !IsEndOfBuffer(CurPtr); ++CurPtr) {
332 if (CCommentLevel > 0) {
333 if (CurPtr[0] == '/' && CurPtr[1] == '*') {
336 } else if (CurPtr[0] == '*' && CurPtr[1] == '/') {
339 } else if (IsNewLine())
343 else if (BCPLComment) {
350 else if (StringLiteral) {
351 // no string escape sequence in TableGen?
353 StringLiteral = false;
356 else if (CurPtr[0] == '/' && CurPtr[1] == '*') {
361 else if (CurPtr[0] == '/' && CurPtr[1] == '/') {
366 else if (*CurPtr == '"')
367 StringLiteral = true;
369 else if (IsNewLine()) {
375 else if (*CurPtr == '#')
379 *BeginOfToken = BeginOfLiteral;
380 *EndOfToken = CurPtr;
381 return tgpptoken_literal;
385 EvaluateFor(const TGPPEnvironment &Env, raw_fd_ostream &OS) const {
386 std::vector<TGPPRange>::const_iterator ri, re;
388 // calculate the min size
389 ri = IndexRanges.begin();
390 re = IndexRanges.begin();
391 size_t n = ri->size();
392 for (; ri != re; ++ri) {
393 size_t m = ri->size();
398 for (size_t which_val = 0; which_val < n; ++which_val) {
399 // construct nested environment
400 TGPPEnvironment NestedEnv(Env);
401 std::vector<std::string>::const_iterator vi = IndexVars.begin();
402 for (ri = IndexRanges.begin(), re = IndexRanges.end();
403 ri != re; ++vi, ++ri) {
404 NestedEnv.insert(std::make_pair(*vi, ri->at(which_val)));
407 for (TGPPRecords::const_iterator i = LoopBody.begin(), e = LoopBody.end();
409 if (i->Evaluate(NestedEnv, OS))
417 Evaluate(const TGPPEnvironment &Env, raw_fd_ostream &OS) const {
420 return EvaluateFor(Env, OS);
421 case tgpprecord_variable:
422 return EvaluateVariable(Env, OS);
423 case tgpprecord_literal:
424 return EvaluateLiteral(Env, OS);
426 PrintError("Unknown kind of record: " + Kind);
432 bool TGPreprocessor::ParseBlock(bool TopLevel) {
434 const char *BeginOfToken, *EndOfToken;
435 while ((Kind = Lexer->NextToken(&BeginOfToken, &EndOfToken)) !=
437 std::string Symbol(BeginOfToken, EndOfToken);
439 case tgpptoken_symbol:
440 if (Symbol == "for") {
443 } else if (Symbol == "end") {
445 PrintError(BeginOfToken, "No block to end here");
448 if ((Kind = Lexer->NextToken(&BeginOfToken, &EndOfToken)) !=
450 PrintError(BeginOfToken, "Tokens after #end");
454 } else if (Symbol == "NAME") {
455 // treat '#NAME#' as a literal
456 CurRecords->push_back(
457 TGPPRecord(tgpprecord_literal,
458 std::string("#NAME#")));
460 CurRecords->push_back(
461 TGPPRecord(tgpprecord_variable,
462 std::string(BeginOfToken, EndOfToken)));
465 case tgpptoken_literal:
466 CurRecords->push_back(
467 TGPPRecord(tgpprecord_literal,
468 std::string(BeginOfToken, EndOfToken)));
477 bool TGPreprocessor::ParseForLoop() {
478 TGPPRecord ForLoopRecord(tgpprecord_for);
482 const char *BeginOfToken, *EndOfToken;
484 Kind = Lexer->NextToken(&BeginOfToken, &EndOfToken);
485 if (!MatchIdentifier(Kind, BeginOfToken, EndOfToken)) {
486 PrintError(BeginOfToken, "Not an identifier");
489 std::string IndexVar(BeginOfToken, EndOfToken);
491 Kind = Lexer->NextToken(&BeginOfToken, &EndOfToken);
492 if (!MatchSymbol(Kind, BeginOfToken, EndOfToken, '=')) {
493 PrintError(BeginOfToken, "Need a '=' here");
498 if (ParseRange(&Range))
500 ForLoopRecord.AppendIndex(IndexVar, Range);
502 Kind = Lexer->NextToken(&BeginOfToken, &EndOfToken);
503 if (Kind == tgpptoken_newline)
505 if (!MatchSymbol(Kind, BeginOfToken, EndOfToken, ',')) {
506 PrintError(BeginOfToken, "Need a ',' here");
512 TGPPRecords *LastCurRecords = CurRecords;
513 CurRecords = ForLoopRecord.GetLoopBody();
515 if (ParseBlock(false))
518 CurRecords = LastCurRecords;
519 CurRecords->push_back(ForLoopRecord);
523 bool TGPreprocessor::ParseRange(TGPPRange *Range) {
525 const char *BeginOfToken, *EndOfToken;
527 Kind = Lexer->NextToken(&BeginOfToken, &EndOfToken);
529 if (MatchSymbol(Kind, BeginOfToken, EndOfToken, '[')) {
531 Kind = Lexer->NextToken(&BeginOfToken, &EndOfToken);
532 if (!MatchIdNum(Kind, BeginOfToken, EndOfToken)) {
533 PrintError(BeginOfToken, "Need a identifier or a number here");
536 Range->push_back(std::string(BeginOfToken, EndOfToken));
538 Kind = Lexer->NextToken(&BeginOfToken, &EndOfToken);
539 if (MatchSymbol(Kind, BeginOfToken, EndOfToken, ']'))
541 if (!MatchSymbol(Kind, BeginOfToken, EndOfToken, ',')) {
542 PrintError(BeginOfToken, "Need a comma here");
549 else if (MatchSymbol(Kind, BeginOfToken, EndOfToken, "sequence")) {
552 Kind = Lexer->NextToken(&BeginOfToken, &EndOfToken);
553 if (!MatchSymbol(Kind, BeginOfToken, EndOfToken, '(')) {
554 PrintError(BeginOfToken, "Need a left parentheses here");
558 Kind = Lexer->NextToken(&BeginOfToken, &EndOfToken);
559 if (!MatchNumber(Kind, BeginOfToken, EndOfToken, &from)) {
560 PrintError(BeginOfToken, "Not a number");
564 Kind = Lexer->NextToken(&BeginOfToken, &EndOfToken);
565 if (!MatchSymbol(Kind, BeginOfToken, EndOfToken, ',')) {
566 PrintError(BeginOfToken, "Need a comma here");
570 Kind = Lexer->NextToken(&BeginOfToken, &EndOfToken);
571 if (!MatchNumber(Kind, BeginOfToken, EndOfToken, &to)) {
572 PrintError(BeginOfToken, "Not a number");
576 Kind = Lexer->NextToken(&BeginOfToken, &EndOfToken);
577 if (!MatchSymbol(Kind, BeginOfToken, EndOfToken, ')')) {
578 PrintError(BeginOfToken, "Need a right parentheses here");
582 *Range = TGPPRange(from, to);
586 PrintError(BeginOfToken, "illegal range of loop index");
590 bool TGPreprocessor::PreprocessFile() {
591 TGPPLexer TheLexer(SrcMgr);
592 TGPPRecords TopLevelRecords;
595 CurRecords = &TopLevelRecords;
596 if (ParseBlock(true))
600 for (TGPPRecords::const_iterator i = TopLevelRecords.begin(),
601 e = TopLevelRecords.end();
603 if (i->Evaluate(Env, Out.os()))