00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016 #include "basix/mmx_syntax.hpp"
00017 #include "basix/parse_tools.hpp"
00018 namespace mmx {
00019
00020 #define s (obj->lex_string)
00021 #define n (obj->lex_length)
00022
00023 #define start (obj->lex_start)
00024 #define prev (obj->lex_prev)
00025 #define pos (obj->lex_pos)
00026
00027 #define start_line (obj->lex_start_line)
00028 #define prev_line (obj->lex_prev_line)
00029 #define line (obj->lex_line)
00030
00031 #define start_begin_line (obj->lex_start_begin_line)
00032 #define prev_begin_line (obj->lex_prev_begin_line)
00033 #define begin_line (obj->lex_begin_line)
00034
00035 #define file (obj->lex_file_name)
00036 #define input (obj->lex_input_number)
00037
00038 #define INC_POS { pos++; }
00039 #define ADD_POS(z){ pos += z; }
00040 #define INC_LINE { pos++; line++; begin_line = pos; }
00041 #define SET_PREV { prev = pos; prev_line = line; prev_begin_line = begin_line; }
00042 #define SAVE_START { start = pos; start_line = line; start_begin_line = begin_line; }
00043 #define RESTORE_START { pos = start; line = start_line; begin_line = start_begin_line; }
00044
00045 #define produce(code) { \
00046 *lval = lit(string (s + start, pos - start)); \
00047 source_insert (*lval, source_location (*lval, file, input, \
00048 source_position(start, start_line, start - start_begin_line),\
00049 source_position(pos, line, pos - begin_line))); \
00050 return code; }
00051
00052 #define test(c,code) \
00053 if ((pos<n) && (s[pos]==c)) { INC_POS; produce (code); }
00054
00055 #define keyword(c,k,code) \
00056 if ((s[pos]==c) && test_keyword(k,obj)) produce (code);
00057
00058 #define is_alpha(c) \
00059 ((((c)>='0') && ((c)<='9')) || \
00060 (((c)>='A') && ((c)<='Z')) || \
00061 (((c)>='a') && ((c)<='z')) || \
00062 (((c)=='_') || ((c)=='?') || ((c)=='$')))
00063
00064 static bool
00065 test_keyword (const char* k, parse_instance* obj) {
00066 RESTORE_START;
00067 while ((*k)!='\0') {
00068 if (pos>=n) return false;
00069 if (s[pos]!=(*k)) return false;
00070 INC_POS;
00071 k++;
00072 }
00073 if (pos>=n) return true;
00074 if (is_alpha (s[pos])) return false;
00075 return true;
00076 }
00077
00078 int
00079 mmx_lex (generic *lval, parse_instance* obj) {
00080 if (pos == n) {
00081 s= NULL;
00082 *lval= generic ();
00083 return 0;
00084 }
00085
00086 SET_PREV;
00087 char c= s[pos];
00088 while ((c<=' ') || (c>'~')) {
00089 if ((c == '\n') || (c == '\r'))
00090 INC_LINE
00091 else
00092 INC_POS;
00093 if (pos == n) {
00094 s= NULL;
00095 *lval= generic ();
00096 return 0;
00097 }
00098 c= s[pos];
00099 }
00100
00101 SAVE_START;
00102 INC_POS;
00103 switch (c) {
00104 case '!':
00105 test ('=', NOT_EQUAL);
00106 if ((pos<n) && (s[pos]=='<')) {
00107 INC_POS;
00108 test ('=', NOT_LEQ);
00109 produce (NOT_LESS);
00110 }
00111 if ((pos<n) && (s[pos]=='>')) {
00112 INC_POS;
00113 test ('=', NOT_GEQ);
00114 produce (NOT_GREATER);
00115 }
00116 produce (NOT);
00117 case '\042':
00118 while (pos<n) {
00119 if ((s[pos]=='\\') && ((pos+2)<n)) { ADD_POS(2); continue; }
00120 if (s[pos]=='\n' || (s[pos]=='\r')) { INC_LINE; continue; }
00121 if (s[pos]=='\042') { INC_POS; break; }
00122 INC_POS;
00123 }
00124 produce (STRING);
00125 case '#':
00126 produce (SIZE);
00127 case '$':
00128 goto identifier;
00129 case '%':
00130 produce (PERCENT);
00131 case '&':
00132 produce (AMPERSAND);
00133 case '\047':
00134 produce (QUOTE);
00135 case '(':
00136 case ')':
00137 produce (c);
00138 case '*':
00139 test ('=', TIMES_ASSIGN);
00140 produce (TIMES);
00141 case '+':
00142 test ('=', PLUS_ASSIGN);
00143 test ('+', INC);
00144 produce (PLUS);
00145 case ',':
00146 produce (c);
00147 case '-':
00148 test ('=', MINUS_ASSIGN);
00149 test ('-', DEC);
00150 test ('>', INTO);
00151 produce (MINUS);
00152 case '.':
00153 test ('.', RANGE);
00154 produce (ACCESS);
00155 case '/':
00156 test ('\\', AND);
00157 if (s[pos]=='/') {
00158 INC_POS;
00159 while ((pos<n) && (s[pos]!='\n')) {
00160 INC_POS;
00161 }
00162 if (pos<n) { INC_LINE; }
00163 return mmx_lex (lval, obj);
00164 }
00165 if (s[pos]=='{') {
00166 nat level= 1;
00167 INC_POS;
00168 while ((pos+1<n) && (level>0)) {
00169 if (s[pos]=='\n' || (s[pos]=='\r')) {
00170 INC_LINE; continue; }
00171 if ((s[pos]=='/') && (s[pos+1]=='{')) {
00172 level++; ADD_POS(2); continue; }
00173 if ((s[pos]=='}') && (s[pos+1]=='/')) {
00174 level--; ADD_POS(2); continue; }
00175 INC_POS;
00176 }
00177 if (level>0) pos=n;
00178 return mmx_lex (lval, obj);
00179 }
00180 if (s[pos] == '\"') {
00181 INC_POS;
00182 while ((pos+1<n) && ((s[pos]!='\"') || (s[pos+1]!='/'))) {
00183 if (s[pos]=='\n' || (s[pos]=='\r')) {
00184 INC_LINE; }
00185 else {
00186 INC_POS; }
00187 }
00188 if (pos+1 < n) {
00189 ADD_POS(2);
00190 }
00191 produce (STRING);
00192 }
00193 test ('=', OVER_ASSIGN);
00194 produce (OVER);
00195 case '0':
00196 case '1':
00197 case '2':
00198 case '3':
00199 case '4':
00200 case '5':
00201 case '6':
00202 case '7':
00203 case '8':
00204 case '9':
00205 goto identifier;
00206 case ':':
00207 if ((pos<n) && (s[pos]=='=')) {
00208 if ((pos+1<n) && (s[pos+1]=='>')) {
00209 ADD_POS(2); produce (ASSIGN_MACRO); }
00210 INC_POS; produce (ASSIGN);
00211 }
00212 test ('>', TRANSTYPE);
00213 if ((pos<n) && (s[pos]==':')) {
00214 if ((pos+1<n) && (s[pos+1]=='>')) {
00215 ADD_POS(2); produce (VARTRANSTYPE); }
00216 INC_POS; produce (VARTYPE);
00217 }
00218 if ((pos+1<n) && (s[pos]=='-') && (s[pos+1]=='>')) {
00219 ADD_POS(2); produce (MAPSTO);
00220 }
00221 produce (TYPE);
00222 case ';':
00223 produce (c);
00224 case '<':
00225 if ((pos<n) && (s[pos]=='<')) {
00226 if ((pos+1<n) && (s[pos+1]=='<')) {
00227 ADD_POS(2); produce (LEFT_FLUX_BIN); }
00228 if ((pos+1<n) && (s[pos+1]=='*')) {
00229 ADD_POS(2); produce (LEFT_FLUX_VAR); }
00230 if ((pos+1<n) && (s[pos+1]=='%')) {
00231 ADD_POS(2); produce (LEFT_FLUX_STR); }
00232 if ((pos+1<n) && (s[pos+1]=='=')) {
00233 ADD_POS(2); produce (LL_ASSIGN); }
00234 INC_POS; produce (LEFT_FLUX);
00235 }
00236 if ((pos+1<n) && (s[pos]=='=') && (s[pos+1]=='>')) {
00237 ADD_POS(2); produce (EQUIVALENT);
00238 }
00239 test ('=', LEQ);
00240 produce (LESS);
00241 case '=':
00242 if ((pos<n) && (s[pos]=='=')) {
00243 if ((pos+1<n) && (s[pos+1]=='>')) {
00244 ADD_POS(2); produce (DEFINE_MACRO); }
00245 INC_POS; produce (DEFINE);
00246 }
00247 test ('>', IMPLIES);
00248 produce (EQUAL);
00249 case '>':
00250 test ('<', APPEND);
00251 if ((pos<n) && (s[pos]=='>')) {
00252 if ((pos+1<n) && (s[pos+1]=='>')) {
00253 ADD_POS(2); produce (RIGHT_FLUX_BIN); }
00254 if ((pos+1<n) && (s[pos+1]=='=')) {
00255 ADD_POS(2); produce (GG_ASSIGN); }
00256 INC_POS; produce (RIGHT_FLUX);
00257 }
00258 test ('=', GEQ);
00259 produce (GREATER);
00260 case '?':
00261 goto identifier;
00262 case '@':
00263 test ('+', OPLUS);
00264 test ('-', OMINUS);
00265 test ('*', OTIMES);
00266 test ('/', OOVER);
00267 produce (COMPOSE);
00268 case 'A':
00269 case 'B':
00270 case 'C':
00271 case 'D':
00272 case 'E':
00273 case 'F':
00274 case 'G':
00275 case 'H':
00276 case 'I':
00277 case 'J':
00278 case 'K':
00279 case 'L':
00280 case 'M':
00281 case 'N':
00282 case 'O':
00283 case 'P':
00284 case 'Q':
00285 case 'R':
00286 case 'S':
00287 case 'T':
00288 case 'U':
00289 case 'V':
00290 case 'W':
00291 case 'X':
00292 case 'Y':
00293 case 'Z':
00294 goto identifier;
00295 case '[':
00296 produce (c);
00297 case '\\':
00298 test ('/', OR);
00299 produce (c);
00300 case ']':
00301 produce (c);
00302 case '^':
00303 test ('^', FILL);
00304 produce (POWER);
00305 case '_':
00306 goto identifier;
00307 case '`':
00308 produce (BACKQUOTE);
00309 case 'a':
00310 if (pos<n) {
00311 keyword ('b', "abstract", ABSTRACT);
00312 keyword ('n', "and", SEQAND);
00313 keyword ('s', "assume", ASSUME);
00314 keyword ('u', "autofold", AUTOFOLD);
00315 }
00316 goto identifier;
00317 case 'b':
00318 if (pos<n) keyword ('r', "break", BREAK);
00319 goto identifier;
00320 case 'c':
00321 if ((pos+3)<n) {
00322 if (s[pos]=='a') {
00323 INC_POS;
00324 keyword ('s', "case", CASE);
00325 if (s[pos] == 't') {
00326 INC_POS;
00327 keyword ('c', "catch", CATCH);
00328 keyword ('e', "category", CATEGORY);
00329 }
00330 goto identifier;
00331 }
00332 keyword ('l', "class", CLASS);
00333 if ((s[pos]=='o') && (s[pos+1]=='n')) {
00334 ADD_POS(2);
00335 keyword ('c', "concrete", CONCRETE);
00336 if (((pos+1)<n) && s[pos]=='s' && s[pos+1]=='t') {
00337 ADD_POS(2);
00338 keyword ('a', "constant", CONSTANT);
00339 keyword ('r', "constructor", CONSTRUCTOR);
00340 goto identifier;
00341 }
00342 keyword ('t', "continue", CONTINUE);
00343 }
00344 }
00345 goto identifier;
00346 case 'd':
00347 if (pos<n) {
00348 keyword ('e', "destructor", DESTRUCTOR);
00349 if (s[pos]=='i') {
00350 INC_POS;
00351 keyword ('r', "direct", DIRECT);
00352 if (((pos+1)<n) && s[pos]=='s') {
00353 INC_POS;
00354 keyword ('j', "disjunction", DISJUNCTION);
00355 keyword ('p', "dispatch", DISPATCH);
00356 goto identifier;
00357 }
00358 keyword ('v', "div", DIV);
00359 goto identifier;
00360 }
00361 if (s[pos]=='o') {
00362 INC_POS;
00363 if ((pos >= n) || (!is_alpha (s[pos]))) produce (DO);
00364 keyword ('w', "downto", DOWNTO);
00365 }
00366 }
00367 goto identifier;
00368 case 'e':
00369 if ((pos+1)<n) {
00370 keyword ('l', "else", ELSE);
00371 keyword ('v', "evolutive", EVOLUTIVE);
00372 if (s[pos]=='x') {
00373 INC_POS;
00374 keyword ('i', "exists", EXISTS);
00375 if (s[pos]=='p' && pos+1<n) {
00376 INC_POS;
00377 keyword ('l', "explicit", EXPLICIT);
00378 keyword ('o', "export", EXPORT);
00379 }
00380 else if (((pos+2)<n) && (s[pos]=='t') && (s[pos+1]=='e')) {
00381 ADD_POS(2);
00382 keyword ('n', "extend", EXTEND);
00383 keyword ('r', "extern", EXTERN);
00384 }
00385 }
00386 }
00387 goto identifier;
00388 case 'f':
00389 if (pos<n) {
00390 if ((pos+1<n) && (s[pos]=='o') && (s[pos+1]=='r')) {
00391 if ((pos+2<n) && (s[pos+2]=='a')) {
00392 keyword ('o', "forall", FORALL); }
00393 else if ((pos+2<n) && (s[pos+2]=='e')) {
00394 keyword ('o', "foreign", FOREIGN); }
00395 else {
00396 keyword ('o', "for", FOR); }
00397 }
00398 keyword ('r', "from", FROM);
00399 }
00400 goto identifier;
00401 case 'g':
00402 if (pos<n) keyword ('e', "generate", GENERATE);
00403 goto identifier;
00404 case 'h':
00405 if (pos<n) {
00406 keyword ('a', "has", HAS);
00407 keyword ('i', "hidden", HIDDEN);
00408 keyword ('o', "holds", HOLDS);
00409 }
00410 goto identifier;
00411 case 'i':
00412 if (pos<n) {
00413 keyword ('f', "if", IF);
00414 if (s[pos]=='m' && pos+2<n && s[pos+1]=='p') {
00415 ADD_POS(2);
00416 keyword ('o', "import", IMPORT);
00417 keyword ('l', "implicit", IMPLICIT);
00418 }
00419 else if (s[pos]=='n') {
00420 INC_POS;
00421 if ((pos >= n) || (!is_alpha (s[pos]))) produce (IN);
00422 keyword ('d', "indirect", INDIRECT);
00423 keyword ('f', "infix", INFIX);
00424 keyword ('h', "inherit", INHERIT);
00425 keyword ('l', "inline", INLINE);
00426 keyword ('p', "inplace", INPLACE);
00427 if ((pos+3<n) && (s[pos]=='t') && (s[pos+1]=='e') && (s[pos+2]=='r')) {
00428 ADD_POS(3);
00429 keyword ('a', "interactive", INTERACTIVE);
00430 keyword ('n', "intern", INTERN);
00431 }
00432 }
00433 }
00434 goto identifier;
00435 case 'j':
00436 if (pos<n) keyword ('o', "join", JOIN);
00437 goto identifier;
00438 case 'k':
00439 if (pos<n) keyword ('e', "keyword", KEYWORD);
00440 goto identifier;
00441 case 'l':
00442 if (pos<n) {
00443 keyword ('a', "lambda", LAMBDA);
00444 keyword ('i', "literal", LITERAL);
00445 if (((pos+1)<n) && (s[pos]=='o')) {
00446 INC_POS;
00447 keyword ('c', "locked", LOCKED);
00448 keyword ('o', "loop", LOOP);
00449 }
00450 }
00451 goto identifier;
00452 case 'm':
00453 if (pos<n) {
00454 if (((pos+1)<n) && (s[pos]=='a')) {
00455 INC_POS;
00456 keyword ('c', "macro", MACRO);
00457 keyword ('t', "match", MATCH);
00458 goto identifier;
00459 }
00460 keyword ('e', "method", METHOD);
00461 if (((pos+1)<n) && (s[pos]=='o') && (s[pos+1]=='d')) {
00462 if ((pos+2<n) && (s[pos+2]=='u')) {
00463 keyword ('o', "module", MODULE); }
00464 else {
00465 keyword ('o', "mod", MOD); }
00466 }
00467 keyword ('u', "mutable", MUTABLE);
00468 }
00469 goto identifier;
00470 case 'n':
00471 goto identifier;
00472 case 'o':
00473 if (pos<n) {
00474 keyword ('p', "operator", OPERATOR);
00475 keyword ('r', "or", SEQOR);
00476 keyword ('u', "outline", OUTLINE);
00477 }
00478 goto identifier;
00479 case 'p':
00480 if ((pos+1)<n) {
00481 if (s[pos]=='a') {
00482 INC_POS;
00483 keyword ('c', "packed", PACKED);
00484 keyword ('t', "pattern", PATTERN);
00485 goto identifier;
00486 }
00487 keyword ('e', "penalty", PENALTY);
00488 keyword ('o', "postfix", POSTFIX);
00489 if (s[pos]=='r') {
00490 INC_POS;
00491 if ((pos+3<n) && s[pos] == 'e' && s[pos+1] == 'f') {
00492 ADD_POS(2);
00493 keyword ('e', "prefer", PREFER);
00494 keyword ('i', "prefix", PREFIX);
00495 goto identifier;
00496 }
00497 keyword ('i', "private", PRIVATE);
00498 keyword ('o', "protected", PROTECTED);
00499 goto identifier;
00500 }
00501 keyword ('u', "public", PUBLIC);
00502 }
00503 goto identifier;
00504 case 'q':
00505 if (pos<n) keyword ('u', "quo", QUO);
00506 goto identifier;
00507 case 'r':
00508 if ((pos+1)<n) {
00509 keyword ('a', "raise", RAISE);
00510 if (s[pos]=='e') {
00511 INC_POS;
00512 keyword ('m', "rem", REM);
00513 keyword ('t', "return", RETURN);
00514 }
00515 }
00516 goto identifier;
00517 case 's':
00518 if (pos<n) {
00519 keyword ('e', "sequel", SEQUEL);
00520 keyword ('p', "split", SPLIT);
00521 if ((pos+1<n) && (s[pos]=='t')) {
00522 INC_POS;
00523 keyword ('e', "step", STEP);
00524 keyword ('r', "structure", STRUCTURE);
00525 goto identifier;
00526 }
00527 }
00528 goto identifier;
00529 case 't':
00530 if ((pos+1)<n) {
00531 if (s[pos]=='h') {
00532 INC_POS;
00533 keyword ('e', "then", THEN);
00534 keyword ('i', "this", THIS);
00535 goto identifier;
00536 }
00537 keyword ('r', "try", TRY);
00538 keyword ('o', "to", TO);
00539 }
00540 goto identifier;
00541 case 'u':
00542 if (((pos+1)<n) && (s[pos]=='n')) {
00543 INC_POS;
00544 keyword ('p', "unpacked", UNPACKED);
00545 keyword ('t', "until", UNTIL);
00546 }
00547 goto identifier;
00548 case 'v':
00549 if (pos<n) {
00550 keyword ('a', "value", VALUE);
00551 keyword ('i', "virtual", VIRTUAL);
00552 }
00553 goto identifier;
00554 case 'w':
00555 if (pos<n) {
00556 keyword ('h', "while", WHILE);
00557 keyword ('i', "with", WITH);
00558 }
00559 goto identifier;
00560 case 'x':
00561 if (pos<n) keyword ('o', "xor", XOR);
00562 goto identifier;
00563 case 'y':
00564 if (pos<n) keyword ('i', "yield", YIELD);
00565 goto identifier;
00566 case 'z':
00567 goto identifier;
00568 case '{':
00569 produce (c);
00570 case '|':
00571 test ('|', VWHERE);
00572 produce (WHERE);
00573 case '}':
00574 produce (c);
00575 case '~':
00576 test ('>', CONVERTS);
00577 produce (TILDA);
00578 default:
00579 return mmx_lex (lval, obj);
00580
00581 identifier:
00582 while ((pos<n) &&
00583 (is_alpha (s[pos]) ||
00584 (s[pos] == '.' && pos>0 && (pos+1)<n &&
00585 s[pos-1] >= '0' && s[pos-1] <= '9' &&
00586 s[pos+1] >= '0' && s[pos+1] <= '9')))
00587 INC_POS;
00588 produce (IDENTIFIER);
00589 }
00590 }
00591
00592 }