Repair/RepairInterpreter/token.cc

   1 #include <string.h>
   2 #include <stdlib.h>
   3 #include <stdio.h>
   4 #include "common.h"
   5 #include "token.h"
   6
   7
   8 // class Token
   9
  10 Token::Token(char *s, int tt) {
  11   str=s;
  12   token_type=tt;
  13 }
  14
  15 Token::Token(const Token & t) {
  16   token_type=t.token_type;
  17   str=copystr(t.str);
  18 }
  19
  20 Token::Token() {
  21   token_type=-1;
  22   str=NULL;
  23 }
  24
  25
  26 Token& Token::operator=(const Token &right) {
  27   if (&right != this) {
  28     token_type=right.token_type;
  29     if (str!=NULL)
  30     delete[](str);
  31     str=copystr(right.str);
  32   }
  33   return *this;
  34 }
  35
  36
  37 Token::~Token() {
  38   if (str!=NULL)
  39     delete[](str);
  40 }
  41
  42
  43
  44
  45 // class Reader
  46
  47 Reader::Reader(istream * is) {
  48   readerin=is;
  49   pos=0;
  50 }
  51
  52
  53 Token Reader::peakahead() {
  54   Token t=checktoken();
  55   if (t.token_type!=-1) {
  56     return t;
  57   }
  58   while(true) {
  59     int nextchar=readerin->get();
  60     switch(nextchar) {
  61     case ' ':
  62       break;
  63     case '\t':
  64       break;
  65     case '/':
  66       if (readerin->peek()=='*') {
  67         /*have comment region */
  68         readerin->get();
  69         int state=0;
  70         for(int ch=readerin->get();ch!=EOF&&state!=2;ch=readerin->get()) {
  71           switch(ch) {
  72           case '*':
  73             state=1;
  74             break;
  75           case '/':
  76             if (state==1)
  77               state=2;
  78             break;
  79           default:
  80             state=0;
  81           }
  82         }
  83         if (state!=2) error();
  84         break;
  85       }
  86     default:
  87       buf[pos++]=nextchar;
  88       Token t=checktoken();
  89       if (t.token_type!=-1)
  90         return t;
  91     }
  92   }
  93 }
  94
  95
  96 Token Reader::readnext() {
  97   Token t=peakahead();
  98   pos=0;
  99   return t;
 100 }
 101
 102 Token Reader::checktoken() {
 103   buf[pos]=0;
 104   if (pos==0) return Token();
 105   switch(buf[0]) {
 106   case '[':
 107     return Token(copystr(buf),TOKEN_OPENBRACK);
 108   case ']':
 109     return Token(copystr(buf),TOKEN_CLOSEBRACK);
 110   case '{':
 111     return Token(copystr(buf),TOKEN_OPENBRACE);
 112   case '}':
 113     return Token(copystr(buf),TOKEN_CLOSEBRACE);
 114   case '(':
 115     return Token(copystr(buf),TOKEN_OPENPAREN);
 116   case ')':
 117     return Token(copystr(buf),TOKEN_CLOSEPAREN);
 118   case ',':
 119     return Token(copystr(buf),TOKEN_COMMA);
 120   case ';':
 121     return Token(copystr(buf),TOKEN_SEMI);
 122   case ':':
 123     return Token(copystr(buf),TOKEN_COLON);
 124   case '=':
 125     if (pos==1) {
 126       if (readerin->peek()!='>')
 127         return Token(copystr(buf),TOKEN_EQUALS);
 128     } else
 129       return Token(copystr(buf),TOKEN_IMPLIES);
 130     break;
 131   case '<':
 132     if (pos==1) {
 133       if (readerin->peek()!='=')
 134         return Token(copystr(buf),TOKEN_LT);
 135     } else
 136       return Token(copystr(buf),TOKEN_LTE);
 137     break;
 138   case '>':
 139     if (pos==1) {
 140       if (readerin->peek()!='=')
 141         return Token(copystr(buf),TOKEN_GT);
 142     } else
 143       return Token(copystr(buf),TOKEN_GTE);
 144     break;
 145   case '.':
 146     if (pos==1) {
 147       if (readerin->peek()!='~')
 148         return Token(copystr(buf),TOKEN_DOT);
 149     } else
 150       return Token(copystr(buf),TOKEN_DOTINV);
 151     break;
 152   case '|':
 153     return Token(copystr(buf),TOKEN_BAR);
 154   case '!':
 155     return Token(copystr(buf),TOKEN_NOT);
 156   case '-':
 157     if (pos==1) {
 158       if (readerin->peek()!='>')
 159         return Token(copystr(buf),TOKEN_SUB);
 160     } else
 161       return Token(copystr(buf),TOKEN_ARROW);
 162     break;
 163   case '+':
 164     return Token(copystr(buf),TOKEN_ADD);
 165   case '*':
 166     return Token(copystr(buf),TOKEN_MULT);
 167   case '/':
 168     return Token(copystr(buf),TOKEN_DIV);
 169   case '\n':
 170     return Token(copystr(buf),TOKEN_EOL);
 171   case EOF:
 172     return Token(copystr(buf),TOKEN_EOF);
 173   default:
 174     if(breakchar(readerin->peek())) {
 175       /*we've got token*/
 176       if (strcmp(buf,"in")==0)
 177         return Token(copystr(buf),TOKEN_IN);
 178       if (strcmp(buf,"isvalid")==0)
 179         return Token(copystr(buf),TOKEN_ISVALID);
 180       if (strcmp(buf,"and")==0)
 181         return Token(copystr(buf),TOKEN_AND);
 182       if (strcmp(buf,"or")==0)
 183         return Token(copystr(buf),TOKEN_OR);
 184       if (strcmp(buf,"crash")==0)
 185         return Token(copystr(buf),TOKEN_CRASH);
 186       if (strcmp(buf,"cast")==0)
 187         return Token(copystr(buf),TOKEN_CAST);
 188       if (strcmp(buf,"NULL")==0)
 189         return Token(copystr(buf),TOKEN_NULL);
 190       if (strcmp(buf,"partition")==0)
 191         return Token(copystr(buf),TOKEN_PARTITION);
 192       if (strcmp(buf,"many")==0)
 193         return Token(copystr(buf),TOKEN_MANY);
 194       if (strcmp(buf,"set")==0)
 195         return Token(copystr(buf),TOKEN_SET);
 196       if (strcmp(buf,"structure")==0)
 197         return Token(copystr(buf),TOKEN_STRUCTURE);
 198       if (strcmp(buf,"reserved")==0)
 199         return Token(copystr(buf),TOKEN_RESERVED);
 200       if (strcmp(buf,"label")==0)
 201         return Token(copystr(buf),TOKEN_LABEL);
 202       if (strcmp(buf,"int")==0)
 203         return Token(copystr(buf),TOKEN_INT);
 204       if (strcmp(buf,"short")==0)
 205         return Token(copystr(buf),TOKEN_SHORT);
 206       if (strcmp(buf,"bit")==0)
 207         return Token(copystr(buf),TOKEN_BIT);
 208       if (strcmp(buf,"byte")==0)
 209         return Token(copystr(buf),TOKEN_BYTE);
 210       if (strcmp(buf,"subtype")==0)
 211         return Token(copystr(buf),TOKEN_SUBTYPE);
 212       if (strcmp(buf,"of")==0)
 213         return Token(copystr(buf),TOKEN_OF);
 214       if (strcmp(buf,"element")==0)
 215         return Token(copystr(buf),TOKEN_ELEMENT);
 216       if (strcmp(buf,"forall")==0)
 217         return Token(copystr(buf),TOKEN_FORALL);
 218       if (strcmp(buf,"for")==0)
 219         return Token(copystr(buf),TOKEN_FOR);
 220       if (strcmp(buf,"sizeof")==0)
 221         return Token(copystr(buf),TOKEN_SIZEOF);
 222       if (strcmp(buf,"literal")==0)
 223         return Token(copystr(buf),TOKEN_LITERAL);
 224       if (strcmp(buf,"param")==0)
 225         return Token(copystr(buf),TOKEN_PARAM);
 226       if (strcmp(buf,"1")==0)
 227         return Token(copystr(buf),TOKEN_ONE);
 228       if (strcmp(buf,"true")==0)
 229         return Token(copystr(buf),TOKEN_TRUE);
 230       if (strcmp(buf,"to")==0)
 231         return Token(copystr(buf),TOKEN_TO);
 232       if (strcmp(buf,"delay")==0)
 233         return Token(copystr(buf),TOKEN_DELAY);
 234       if (strcmp(buf,"static")==0)
 235         return Token(copystr(buf),TOKEN_STATIC);
 236       return Token(copystr(buf),0);
 237     }
 238   }
 239   return Token();
 240 }
 241
 242
 243 // return true if the given char is a separator
 244 bool Reader::breakchar(int chr) {
 245   switch(chr) {
 246   case ' ':
 247     return true;
 248   case '|':
 249     return true;
 250   case '-':
 251     return true;
 252   case '+':
 253     return true;
 254   case '*':
 255     return true;
 256   case '/':
 257     return true;
 258   case ']':
 259     return true;
 260   case ')':
 261     return true;
 262   case ';':
 263     return true;
 264   case ':':
 265     return true;
 266   case '}':
 267     return true;
 268   case '[':
 269     return true;
 270   case '(':
 271     return true;
 272   case '{':
 273     return true;
 274   case '<':
 275     return true;
 276   case '=':
 277     return true;
 278   case '\n':
 279     return true;
 280   case '>':
 281     return true;
 282   case '.':
 283     return true;
 284   case ',':
 285       return true;
 286   default:
 287     return false;
 288   }
 289 }
 290
 291
 292
 293 void Reader::error() {
 294   printf("%s\n",buf);
 295 }
 296
 297
 298
 299 void tokenname(int t) {
 300   switch(t) {
 301   case TOKEN_OPENBRACK:
 302     printf("[");
 303     break;
 304   case TOKEN_CLOSEBRACK:
 305     printf("]");
 306     break;
 307   case TOKEN_FORALL:
 308     printf("forall");
 309     break;
 310   case TOKEN_IN:
 311     printf("in");
 312     break;
 313   case TOKEN_OPENBRACE:
 314     printf("{");
 315     break;
 316   case TOKEN_CLOSEBRACE:
 317     printf("}");
 318     break;
 319   case TOKEN_COMMA:
 320     printf(",");
 321     break;
 322   case TOKEN_SIZEOF:
 323     printf("sizeof");
 324     break;
 325   case TOKEN_OPENPAREN:
 326     printf("(");
 327     break;
 328   case TOKEN_CLOSEPAREN:
 329     printf(")");
 330     break;
 331   case TOKEN_LT:
 332     printf("<");
 333     break;
 334   case TOKEN_LTE:
 335     printf("<=");
 336     break;
 337   case TOKEN_EQUALS:
 338     printf("=");
 339     break;
 340   case TOKEN_GTE:
 341     printf(">=");
 342     break;
 343   case TOKEN_GT:
 344     printf(">");
 345     break;
 346   case TOKEN_ONE:
 347     printf("1");
 348     break;
 349   case TOKEN_DOT:
 350     printf(".");
 351     break;
 352   case TOKEN_DOTINV:
 353     printf(".~");
 354     break;
 355   case TOKEN_NOT:
 356     printf("!");
 357     break;
 358   case TOKEN_LITERAL:
 359     printf("literal");
 360     break;
 361   case TOKEN_PARAM:
 362     printf("param");
 363     break;
 364   case TOKEN_SUB:
 365     printf("-");
 366     break;
 367   case TOKEN_ADD:
 368     printf("+");
 369     break;
 370   case TOKEN_MULT:
 371     printf("*");
 372     break;
 373   case TOKEN_AND:
 374     printf("and");
 375     break;
 376   case TOKEN_OR:
 377     printf("or");
 378     break;
 379   case TOKEN_EOL:
 380     printf("EOL");
 381     break;
 382   case TOKEN_EOF:
 383     printf("EOF");
 384     break;
 385   case TOKEN_IMPLIES:
 386     printf("=>");
 387     break;
 388   case TOKEN_TRUE:
 389     printf("true");
 390     break;
 391   case TOKEN_FOR:
 392     printf("for");
 393     break;
 394   case TOKEN_TO:
 395     printf("to");
 396     break;
 397   case TOKEN_STRUCTURE:
 398     printf("structure");
 399     break;
 400   case TOKEN_RESERVED:
 401     printf("reserved");
 402     break;
 403   case TOKEN_LABEL:
 404     printf("label");
 405     break;
 406   case TOKEN_INT:
 407     printf("int");
 408     break;
 409   case TOKEN_BIT:
 410     printf("bit");
 411     break;
 412   case TOKEN_BYTE:
 413     printf("byte");
 414     break;
 415   case TOKEN_SUBTYPE:
 416     printf("subtype");
 417     break;
 418   case TOKEN_OF:
 419     printf("of");
 420     break;
 421   case TOKEN_SEMI:
 422     printf(";");
 423     break;
 424   case TOKEN_COLON:
 425     printf(":");
 426     break;
 427   case TOKEN_SET:
 428     printf("set");
 429     break;
 430   case TOKEN_ARROW:
 431     printf("->");
 432     break;
 433   case TOKEN_MANY:
 434     printf("many");
 435     break;
 436   case TOKEN_BAR:
 437     printf("|");
 438     break;
 439   case TOKEN_PARTITION:
 440     printf("partition");
 441     break;
 442   case TOKEN_ELEMENT:
 443     printf("element");
 444     break;
 445   default:
 446     printf("undefined token");
 447   }
 448 }