1 //===- TGPreprocessor.cpp - Preprocessor for TableGen ---------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // Implement the Preprocessor for TableGen.
12 //===----------------------------------------------------------------------===//
14 #include "TGPreprocessor.h"
15 #include "llvm/ADT/Twine.h"
16 #include "llvm/Support/MemoryBuffer.h"
17 #include "llvm/Support/SourceMgr.h"
18 #include "llvm/Support/ToolOutputFile.h"
19 #include "llvm/TableGen/Error.h"
28 typedef std::map<std::string, std::string> TGPPEnvironment;
49 bool MatchSymbol(TGPPTokenKind Kind,
50 const char *BeginOfToken, const char *EndOfToken,
53 bool MatchSymbol(TGPPTokenKind Kind,
54 const char *BeginOfToken, const char *EndOfToken,
57 bool MatchIdNum(TGPPTokenKind Kind,
58 const char *BeginOfToken, const char *EndOfToken);
60 bool MatchIdentifier(TGPPTokenKind Kind,
61 const char *BeginOfToken, const char *EndOfToken);
63 bool MatchNumber(TGPPTokenKind Kind,
64 const char *BeginOfToken, const char *EndOfToken,
68 const MemoryBuffer *CurBuf;
70 bool IsInsideMacroStatement, WasEndOfLine;
72 bool IsEndOfBuffer(const char *Ptr) const {
73 return (!*Ptr && Ptr == CurBuf->getBufferEnd());
77 if (*CurPtr == '\r' || *CurPtr == '\n') {
78 if ((CurPtr[1] == '\r' || CurPtr[1] == '\n') && CurPtr[0] != CurPtr[1])
85 bool MatchPrefix(const char *Prefix, const char *Ptr) const {
86 while (*Ptr == ' ' || *Ptr == '\t')
88 return !strncmp(Prefix, Ptr, strlen(Prefix));
91 TGPPLexer(const SourceMgr &SM)
92 : CurBuf(SM.getMemoryBuffer(0)),
93 CurPtr(CurBuf->getBufferStart()),
94 IsInsideMacroStatement(false),
98 TGPPTokenKind NextToken(const char **BeginOfToken, const char **EndOfToken);
101 // preprocessor records
106 std::vector<std::string> IndexVars;
107 std::vector<TGPPRange> IndexRanges;
108 TGPPRecords LoopBody;
110 // tgpprecord_variable, tgpprecord_literal
113 bool EvaluateFor(const TGPPEnvironment &Env, raw_fd_ostream &OS) const;
115 bool EvaluateVariable(const TGPPEnvironment &Env, raw_fd_ostream &OS) const {
116 TGPPEnvironment::const_iterator it_val = Env.find(Str);
117 if (it_val == Env.end()) {
118 PrintError("Var is not bound to any value: " + Str);
121 OS << it_val->second;
125 bool EvaluateLiteral(const TGPPEnvironment &Env, raw_fd_ostream &OS) const {
131 TGPPRecord(TGPPRecordKind K) : Kind(K) {}
132 TGPPRecord(TGPPRecordKind K, const std::string &S) : Kind(K), Str(S) {}
134 TGPPRecords *GetLoopBody() { return &LoopBody; }
136 void AppendIndex(const std::string &V, const TGPPRange &R) {
137 IndexVars.push_back(V);
138 IndexRanges.push_back(R);
141 bool Evaluate(const TGPPEnvironment &Env, raw_fd_ostream &OS) const;
148 std::vector<std::string> Vals;
150 // tgpprange_sequence
154 TGPPRange() : Kind(tgpprange_list) {}
155 TGPPRange(long int F, long int T)
156 : Kind(tgpprange_sequence), From(F), To(T) {}
158 size_t size() const {
159 if (Kind == tgpprange_list)
162 return To - From + 1;
165 std::string at(size_t i) const {
166 if (Kind == tgpprange_list)
170 snprintf(buf, sizeof(buf), "%ld", From + (long int)i);
171 return std::string(buf);
175 void push_back(const std::string &S) {
176 if (Kind == tgpprange_list)
182 using namespace llvm;
184 bool llvm::MatchSymbol(TGPPTokenKind Kind,
185 const char *BeginOfToken, const char *EndOfToken,
187 return Kind == tgpptoken_symbol &&
188 BeginOfToken + 1 == EndOfToken &&
189 Symbol == *BeginOfToken;
192 bool llvm::MatchSymbol(TGPPTokenKind Kind,
193 const char *BeginOfToken, const char *EndOfToken,
194 const char *Symbol) {
195 return Kind == tgpptoken_symbol &&
196 BeginOfToken + strlen(Symbol) == EndOfToken &&
197 !strncmp(Symbol, BeginOfToken, EndOfToken - BeginOfToken);
200 bool llvm::MatchIdNum(TGPPTokenKind Kind,
201 const char *BeginOfToken, const char *EndOfToken) {
202 if (Kind != tgpptoken_symbol)
204 for (const char *i = BeginOfToken; i != EndOfToken; ++i)
205 if (*i != '_' && !isalnum(*i))
210 bool llvm::MatchIdentifier(TGPPTokenKind Kind,
211 const char *BeginOfToken, const char *EndOfToken) {
212 if (Kind != tgpptoken_symbol)
215 const char *i = BeginOfToken;
216 if (*i != '_' && !isalpha(*i))
218 for (++i; i != EndOfToken; ++i)
219 if (*i != '_' && !isalnum(*i))
225 bool llvm::MatchNumber(TGPPTokenKind Kind,
226 const char *BeginOfToken, const char *EndOfToken,
228 if (Kind != tgpptoken_symbol)
231 *Val = strtol(BeginOfToken, &e, 10);
232 return e == EndOfToken;
235 TGPPTokenKind TGPPLexer::
236 NextToken(const char **BeginOfToken, const char **EndOfToken) {
237 bool IsBeginOfLine = WasEndOfLine;
238 WasEndOfLine = false;
240 if (IsEndOfBuffer(CurPtr))
241 return tgpptoken_end;
243 else if (IsInsideMacroStatement) {
244 while (*CurPtr == ' ' || *CurPtr == '\t') // trim space, if any
247 const char *BeginOfSymbol = CurPtr;
251 IsInsideMacroStatement = false;
253 return tgpptoken_newline;
256 else if (*CurPtr == '[' || *CurPtr == ']' ||
257 *CurPtr == '(' || *CurPtr == ')' ||
258 *CurPtr == ',' || *CurPtr == '=') {
259 *BeginOfToken = BeginOfSymbol;
260 *EndOfToken = ++CurPtr;
261 return tgpptoken_symbol;
264 else if (*CurPtr == '_' || isalpha(*CurPtr)) {
266 while (*CurPtr == '_' || isalnum(*CurPtr))
268 *BeginOfToken = BeginOfSymbol;
269 *EndOfToken = CurPtr;
270 return tgpptoken_symbol;
273 else if (*CurPtr == '+' || *CurPtr == '-' || isdigit(*CurPtr)) {
275 while (isdigit(*CurPtr))
277 *BeginOfToken = BeginOfSymbol;
278 *EndOfToken = CurPtr;
279 return tgpptoken_symbol;
283 PrintError(BeginOfSymbol, "Unrecognizable token");
284 return tgpptoken_error;
288 else if (*CurPtr == '#') {
290 (MatchPrefix("for", CurPtr + 1) ||
291 MatchPrefix("end", CurPtr + 1))) {
293 IsInsideMacroStatement = true;
294 return NextToken(BeginOfToken, EndOfToken);
297 // special token #"# is translate to literal "
298 else if (CurPtr[1] == '"' && CurPtr[2] == '#') {
299 *BeginOfToken = ++CurPtr;
300 *EndOfToken = ++CurPtr;
302 return tgpptoken_literal;
306 const char *BeginOfVar = ++CurPtr; // trim '#'
307 if (*CurPtr != '_' && !isalpha(*CurPtr)) {
308 PrintError(BeginOfVar, "Variable must start with [_A-Za-z]: ");
309 return tgpptoken_error;
311 while (*CurPtr == '_' || isalnum(*CurPtr))
313 if (*CurPtr != '#') {
314 PrintError(BeginOfVar, "Variable must end with #");
315 return tgpptoken_error;
317 *BeginOfToken = BeginOfVar;
318 *EndOfToken = CurPtr++; // trim '#'
319 return tgpptoken_symbol;
323 const char *BeginOfLiteral = CurPtr;
324 int CCommentLevel = 0;
325 bool BCPLComment = false;
326 bool StringLiteral = false;
327 for (; !IsEndOfBuffer(CurPtr); ++CurPtr) {
328 if (CCommentLevel > 0) {
329 if (CurPtr[0] == '/' && CurPtr[1] == '*') {
332 } else if (CurPtr[0] == '*' && CurPtr[1] == '/') {
335 } else if (IsNewLine())
339 else if (BCPLComment) {
346 else if (StringLiteral) {
347 // no string escape sequence in TableGen?
349 StringLiteral = false;
352 else if (CurPtr[0] == '/' && CurPtr[1] == '*') {
357 else if (CurPtr[0] == '/' && CurPtr[1] == '/') {
362 else if (*CurPtr == '"')
363 StringLiteral = true;
365 else if (IsNewLine()) {
371 else if (*CurPtr == '#')
375 *BeginOfToken = BeginOfLiteral;
376 *EndOfToken = CurPtr;
377 return tgpptoken_literal;
381 EvaluateFor(const TGPPEnvironment &Env, raw_fd_ostream &OS) const {
382 std::vector<TGPPRange>::const_iterator ri, re;
384 // calculate the min size
385 ri = IndexRanges.begin();
386 re = IndexRanges.begin();
387 size_t n = ri->size();
388 for (; ri != re; ++ri) {
389 size_t m = ri->size();
394 for (size_t which_val = 0; which_val < n; ++which_val) {
395 // construct nested environment
396 TGPPEnvironment NestedEnv(Env);
397 std::vector<std::string>::const_iterator vi = IndexVars.begin();
398 for (ri = IndexRanges.begin(), re = IndexRanges.end();
399 ri != re; ++vi, ++ri) {
400 NestedEnv.insert(std::make_pair(*vi, ri->at(which_val)));
403 for (TGPPRecords::const_iterator i = LoopBody.begin(), e = LoopBody.end();
405 if (i->Evaluate(NestedEnv, OS))
413 Evaluate(const TGPPEnvironment &Env, raw_fd_ostream &OS) const {
416 return EvaluateFor(Env, OS);
417 case tgpprecord_variable:
418 return EvaluateVariable(Env, OS);
419 case tgpprecord_literal:
420 return EvaluateLiteral(Env, OS);
422 PrintError("Unknown kind of record: " + Kind);
428 bool TGPreprocessor::ParseBlock(bool TopLevel) {
430 const char *BeginOfToken, *EndOfToken;
431 while ((Kind = Lexer->NextToken(&BeginOfToken, &EndOfToken)) !=
433 std::string Symbol(BeginOfToken, EndOfToken);
435 case tgpptoken_symbol:
436 if (Symbol == "for") {
439 } else if (Symbol == "end") {
441 PrintError(BeginOfToken, "No block to end here");
444 if ((Kind = Lexer->NextToken(&BeginOfToken, &EndOfToken)) !=
446 PrintError(BeginOfToken, "Tokens after #end");
450 } else if (Symbol == "NAME") {
451 // treat '#NAME#' as a literal
452 CurRecords->push_back(
453 TGPPRecord(tgpprecord_literal,
454 std::string("#NAME#")));
456 CurRecords->push_back(
457 TGPPRecord(tgpprecord_variable,
458 std::string(BeginOfToken, EndOfToken)));
461 case tgpptoken_literal:
462 CurRecords->push_back(
463 TGPPRecord(tgpprecord_literal,
464 std::string(BeginOfToken, EndOfToken)));
473 bool TGPreprocessor::ParseForLoop() {
474 TGPPRecord ForLoopRecord(tgpprecord_for);
478 const char *BeginOfToken, *EndOfToken;
480 Kind = Lexer->NextToken(&BeginOfToken, &EndOfToken);
481 if (!MatchIdentifier(Kind, BeginOfToken, EndOfToken)) {
482 PrintError(BeginOfToken, "Not an identifier");
485 std::string IndexVar(BeginOfToken, EndOfToken);
487 Kind = Lexer->NextToken(&BeginOfToken, &EndOfToken);
488 if (!MatchSymbol(Kind, BeginOfToken, EndOfToken, '=')) {
489 PrintError(BeginOfToken, "Need a '=' here");
494 if (ParseRange(&Range))
496 ForLoopRecord.AppendIndex(IndexVar, Range);
498 Kind = Lexer->NextToken(&BeginOfToken, &EndOfToken);
499 if (Kind == tgpptoken_newline)
501 if (!MatchSymbol(Kind, BeginOfToken, EndOfToken, ',')) {
502 PrintError(BeginOfToken, "Need a ',' here");
508 TGPPRecords *LastCurRecords = CurRecords;
509 CurRecords = ForLoopRecord.GetLoopBody();
511 if (ParseBlock(false))
514 CurRecords = LastCurRecords;
515 CurRecords->push_back(ForLoopRecord);
519 bool TGPreprocessor::ParseRange(TGPPRange *Range) {
521 const char *BeginOfToken, *EndOfToken;
523 Kind = Lexer->NextToken(&BeginOfToken, &EndOfToken);
525 if (MatchSymbol(Kind, BeginOfToken, EndOfToken, '[')) {
527 Kind = Lexer->NextToken(&BeginOfToken, &EndOfToken);
528 if (!MatchIdNum(Kind, BeginOfToken, EndOfToken)) {
529 PrintError(BeginOfToken, "Need a identifier or a number here");
532 Range->push_back(std::string(BeginOfToken, EndOfToken));
534 Kind = Lexer->NextToken(&BeginOfToken, &EndOfToken);
535 if (MatchSymbol(Kind, BeginOfToken, EndOfToken, ']'))
537 if (!MatchSymbol(Kind, BeginOfToken, EndOfToken, ',')) {
538 PrintError(BeginOfToken, "Need a comma here");
545 else if (MatchSymbol(Kind, BeginOfToken, EndOfToken, "sequence")) {
548 Kind = Lexer->NextToken(&BeginOfToken, &EndOfToken);
549 if (!MatchSymbol(Kind, BeginOfToken, EndOfToken, '(')) {
550 PrintError(BeginOfToken, "Need a left parentheses here");
554 Kind = Lexer->NextToken(&BeginOfToken, &EndOfToken);
555 if (!MatchNumber(Kind, BeginOfToken, EndOfToken, &from)) {
556 PrintError(BeginOfToken, "Not a number");
560 Kind = Lexer->NextToken(&BeginOfToken, &EndOfToken);
561 if (!MatchSymbol(Kind, BeginOfToken, EndOfToken, ',')) {
562 PrintError(BeginOfToken, "Need a comma here");
566 Kind = Lexer->NextToken(&BeginOfToken, &EndOfToken);
567 if (!MatchNumber(Kind, BeginOfToken, EndOfToken, &to)) {
568 PrintError(BeginOfToken, "Not a number");
572 Kind = Lexer->NextToken(&BeginOfToken, &EndOfToken);
573 if (!MatchSymbol(Kind, BeginOfToken, EndOfToken, ')')) {
574 PrintError(BeginOfToken, "Need a right parentheses here");
578 *Range = TGPPRange(from, to);
582 PrintError(BeginOfToken, "illegal range of loop index");
586 bool TGPreprocessor::PreprocessFile() {
587 TGPPLexer TheLexer(SrcMgr);
588 TGPPRecords TopLevelRecords;
591 CurRecords = &TopLevelRecords;
592 if (ParseBlock(true))
596 for (TGPPRecords::const_iterator i = TopLevelRecords.begin(),
597 e = TopLevelRecords.end();
599 if (i->Evaluate(Env, Out.os()))