mudgangster

Tiny, scriptable MUD client
Log | Files | Refs | README

llex.cc (16101B)


      1 /*
      2 ** $Id: llex.c,v 2.96.1.1 2017/04/19 17:20:42 roberto Exp $
      3 ** Lexical Analyzer
      4 ** See Copyright Notice in lua.h
      5 */
      6 
      7 #define llex_c
      8 #define LUA_CORE
      9 
     10 #include "lprefix.h"
     11 
     12 
     13 #include <locale.h>
     14 #include <string.h>
     15 
     16 #include "lua.h"
     17 
     18 #include "lctype.h"
     19 #include "ldebug.h"
     20 #include "ldo.h"
     21 #include "lgc.h"
     22 #include "llex.h"
     23 #include "lobject.h"
     24 #include "lparser.h"
     25 #include "lstate.h"
     26 #include "lstring.h"
     27 #include "ltable.h"
     28 #include "lzio.h"
     29 
     30 
     31 
     32 #define next(ls) (ls->current = zgetc(ls->z))
     33 
     34 
     35 
     36 #define currIsNewline(ls)	(ls->current == '\n' || ls->current == '\r')
     37 
     38 
     39 /* ORDER RESERVED */
     40 static const char *const luaX_tokens [] = {
     41     "and", "break", "do", "else", "elseif",
     42     "end", "false", "for", "function", "goto", "if",
     43     "in", "local", "nil", "not", "or", "repeat",
     44     "return", "then", "true", "until", "while",
     45     "//", "..", "...", "==", ">=", "<=", "~=",
     46     "<<", ">>", "::", "<eof>",
     47     "<number>", "<integer>", "<name>", "<string>"
     48 };
     49 
     50 
     51 #define save_and_next(ls) (save(ls, ls->current), next(ls))
     52 
     53 
     54 static l_noret lexerror (LexState *ls, const char *msg, int token);
     55 
     56 
     57 static void save (LexState *ls, int c) {
     58   Mbuffer *b = ls->buff;
     59   if (luaZ_bufflen(b) + 1 > luaZ_sizebuffer(b)) {
     60     size_t newsize;
     61     if (luaZ_sizebuffer(b) >= MAX_SIZE/2)
     62       lexerror(ls, "lexical element too long", 0);
     63     newsize = luaZ_sizebuffer(b) * 2;
     64     luaZ_resizebuffer(ls->L, b, newsize);
     65   }
     66   b->buffer[luaZ_bufflen(b)++] = cast(char, c);
     67 }
     68 
     69 
     70 void luaX_init (lua_State *L) {
     71   int i;
     72   TString *e = luaS_newliteral(L, LUA_ENV);  /* create env name */
     73   luaC_fix(L, obj2gco(e));  /* never collect this name */
     74   for (i=0; i<NUM_RESERVED; i++) {
     75     TString *ts = luaS_new(L, luaX_tokens[i]);
     76     luaC_fix(L, obj2gco(ts));  /* reserved words are never collected */
     77     ts->extra = cast_byte(i+1);  /* reserved word */
     78   }
     79 }
     80 
     81 
     82 const char *luaX_token2str (LexState *ls, int token) {
     83   if (token < FIRST_RESERVED) {  /* single-byte symbols? */
     84     lua_assert(token == cast_uchar(token));
     85     return luaO_pushfstring(ls->L, "'%c'", token);
     86   }
     87   else {
     88     const char *s = luaX_tokens[token - FIRST_RESERVED];
     89     if (token < TK_EOS)  /* fixed format (symbols and reserved words)? */
     90       return luaO_pushfstring(ls->L, "'%s'", s);
     91     else  /* names, strings, and numerals */
     92       return s;
     93   }
     94 }
     95 
     96 
     97 static const char *txtToken (LexState *ls, int token) {
     98   switch (token) {
     99     case TK_NAME: case TK_STRING:
    100     case TK_FLT: case TK_INT:
    101       save(ls, '\0');
    102       return luaO_pushfstring(ls->L, "'%s'", luaZ_buffer(ls->buff));
    103     default:
    104       return luaX_token2str(ls, token);
    105   }
    106 }
    107 
    108 
    109 static l_noret lexerror (LexState *ls, const char *msg, int token) {
    110   msg = luaG_addinfo(ls->L, msg, ls->source, ls->linenumber);
    111   if (token)
    112     luaO_pushfstring(ls->L, "%s near %s", msg, txtToken(ls, token));
    113   luaD_throw(ls->L, LUA_ERRSYNTAX);
    114 }
    115 
    116 
    117 l_noret luaX_syntaxerror (LexState *ls, const char *msg) {
    118   lexerror(ls, msg, ls->t.token);
    119 }
    120 
    121 
    122 /*
    123 ** creates a new string and anchors it in scanner's table so that
    124 ** it will not be collected until the end of the compilation
    125 ** (by that time it should be anchored somewhere)
    126 */
    127 TString *luaX_newstring (LexState *ls, const char *str, size_t l) {
    128   lua_State *L = ls->L;
    129   TValue *o;  /* entry for 'str' */
    130   TString *ts = luaS_newlstr(L, str, l);  /* create new string */
    131   setsvalue2s(L, L->top++, ts);  /* temporarily anchor it in stack */
    132   o = luaH_set(L, ls->h, L->top - 1);
    133   if (ttisnil(o)) {  /* not in use yet? */
    134     /* boolean value does not need GC barrier;
    135        table has no metatable, so it does not need to invalidate cache */
    136     setbvalue(o, 1);  /* t[string] = true */
    137     luaC_checkGC(L);
    138   }
    139   else {  /* string already present */
    140     ts = tsvalue(keyfromval(o));  /* re-use value previously stored */
    141   }
    142   L->top--;  /* remove string from stack */
    143   return ts;
    144 }
    145 
    146 
    147 /*
    148 ** increment line number and skips newline sequence (any of
    149 ** \n, \r, \n\r, or \r\n)
    150 */
    151 static void inclinenumber (LexState *ls) {
    152   int old = ls->current;
    153   lua_assert(currIsNewline(ls));
    154   next(ls);  /* skip '\n' or '\r' */
    155   if (currIsNewline(ls) && ls->current != old)
    156     next(ls);  /* skip '\n\r' or '\r\n' */
    157   if (++ls->linenumber >= MAX_INT)
    158     lexerror(ls, "chunk has too many lines", 0);
    159 }
    160 
    161 
    162 void luaX_setinput (lua_State *L, LexState *ls, ZIO *z, TString *source,
    163                     int firstchar) {
    164   ls->t.token = 0;
    165   ls->L = L;
    166   ls->current = firstchar;
    167   ls->lookahead.token = TK_EOS;  /* no look-ahead token */
    168   ls->z = z;
    169   ls->fs = NULL;
    170   ls->linenumber = 1;
    171   ls->lastline = 1;
    172   ls->source = source;
    173   ls->envn = luaS_newliteral(L, LUA_ENV);  /* get env name */
    174   luaZ_resizebuffer(ls->L, ls->buff, LUA_MINBUFFER);  /* initialize buffer */
    175 }
    176 
    177 
    178 
    179 /*
    180 ** =======================================================
    181 ** LEXICAL ANALYZER
    182 ** =======================================================
    183 */
    184 
    185 
    186 static int check_next1 (LexState *ls, int c) {
    187   if (ls->current == c) {
    188     next(ls);
    189     return 1;
    190   }
    191   else return 0;
    192 }
    193 
    194 
    195 /*
    196 ** Check whether current char is in set 'set' (with two chars) and
    197 ** saves it
    198 */
    199 static int check_next2 (LexState *ls, const char *set) {
    200   lua_assert(set[2] == '\0');
    201   if (ls->current == set[0] || ls->current == set[1]) {
    202     save_and_next(ls);
    203     return 1;
    204   }
    205   else return 0;
    206 }
    207 
    208 
    209 /* LUA_NUMBER */
    210 /*
    211 ** this function is quite liberal in what it accepts, as 'luaO_str2num'
    212 ** will reject ill-formed numerals.
    213 */
    214 static int read_numeral (LexState *ls, SemInfo *seminfo) {
    215   TValue obj;
    216   const char *expo = "Ee";
    217   int first = ls->current;
    218   lua_assert(lisdigit(ls->current));
    219   save_and_next(ls);
    220   if (first == '0' && check_next2(ls, "xX"))  /* hexadecimal? */
    221     expo = "Pp";
    222   for (;;) {
    223     if (check_next2(ls, expo))  /* exponent part? */
    224       check_next2(ls, "-+");  /* optional exponent sign */
    225     if (lisxdigit(ls->current))
    226       save_and_next(ls);
    227     else if (ls->current == '.')
    228       save_and_next(ls);
    229     else break;
    230   }
    231   save(ls, '\0');
    232   if (luaO_str2num(luaZ_buffer(ls->buff), &obj) == 0)  /* format error? */
    233     lexerror(ls, "malformed number", TK_FLT);
    234   if (ttisinteger(&obj)) {
    235     seminfo->i = ivalue(&obj);
    236     return TK_INT;
    237   }
    238   else {
    239     lua_assert(ttisfloat(&obj));
    240     seminfo->r = fltvalue(&obj);
    241     return TK_FLT;
    242   }
    243 }
    244 
    245 
    246 /*
    247 ** skip a sequence '[=*[' or ']=*]'; if sequence is well formed, return
    248 ** its number of '='s; otherwise, return a negative number (-1 iff there
    249 ** are no '='s after initial bracket)
    250 */
    251 static int skip_sep (LexState *ls) {
    252   int count = 0;
    253   int s = ls->current;
    254   lua_assert(s == '[' || s == ']');
    255   save_and_next(ls);
    256   while (ls->current == '=') {
    257     save_and_next(ls);
    258     count++;
    259   }
    260   return (ls->current == s) ? count : (-count) - 1;
    261 }
    262 
    263 
    264 static void read_long_string (LexState *ls, SemInfo *seminfo, int sep) {
    265   int line = ls->linenumber;  /* initial line (for error message) */
    266   save_and_next(ls);  /* skip 2nd '[' */
    267   if (currIsNewline(ls))  /* string starts with a newline? */
    268     inclinenumber(ls);  /* skip it */
    269   for (;;) {
    270     switch (ls->current) {
    271       case EOZ: {  /* error */
    272         const char *what = (seminfo ? "string" : "comment");
    273         const char *msg = luaO_pushfstring(ls->L,
    274                      "unfinished long %s (starting at line %d)", what, line);
    275         lexerror(ls, msg, TK_EOS);
    276         break;  /* to avoid warnings */
    277       }
    278       case ']': {
    279         if (skip_sep(ls) == sep) {
    280           save_and_next(ls);  /* skip 2nd ']' */
    281           goto endloop;
    282         }
    283         break;
    284       }
    285       case '\n': case '\r': {
    286         save(ls, '\n');
    287         inclinenumber(ls);
    288         if (!seminfo) luaZ_resetbuffer(ls->buff);  /* avoid wasting space */
    289         break;
    290       }
    291       default: {
    292         if (seminfo) save_and_next(ls);
    293         else next(ls);
    294       }
    295     }
    296   } endloop:
    297   if (seminfo)
    298     seminfo->ts = luaX_newstring(ls, luaZ_buffer(ls->buff) + (2 + sep),
    299                                      luaZ_bufflen(ls->buff) - 2*(2 + sep));
    300 }
    301 
    302 
    303 static void esccheck (LexState *ls, int c, const char *msg) {
    304   if (!c) {
    305     if (ls->current != EOZ)
    306       save_and_next(ls);  /* add current to buffer for error message */
    307     lexerror(ls, msg, TK_STRING);
    308   }
    309 }
    310 
    311 
    312 static int gethexa (LexState *ls) {
    313   save_and_next(ls);
    314   esccheck (ls, lisxdigit(ls->current), "hexadecimal digit expected");
    315   return luaO_hexavalue(ls->current);
    316 }
    317 
    318 
    319 static int readhexaesc (LexState *ls) {
    320   int r = gethexa(ls);
    321   r = (r << 4) + gethexa(ls);
    322   luaZ_buffremove(ls->buff, 2);  /* remove saved chars from buffer */
    323   return r;
    324 }
    325 
    326 
    327 static unsigned long readutf8esc (LexState *ls) {
    328   unsigned long r;
    329   int i = 4;  /* chars to be removed: '\', 'u', '{', and first digit */
    330   save_and_next(ls);  /* skip 'u' */
    331   esccheck(ls, ls->current == '{', "missing '{'");
    332   r = gethexa(ls);  /* must have at least one digit */
    333   while ((save_and_next(ls), lisxdigit(ls->current))) {
    334     i++;
    335     r = (r << 4) + luaO_hexavalue(ls->current);
    336     esccheck(ls, r <= 0x10FFFF, "UTF-8 value too large");
    337   }
    338   esccheck(ls, ls->current == '}', "missing '}'");
    339   next(ls);  /* skip '}' */
    340   luaZ_buffremove(ls->buff, i);  /* remove saved chars from buffer */
    341   return r;
    342 }
    343 
    344 
    345 static void utf8esc (LexState *ls) {
    346   char buff[UTF8BUFFSZ];
    347   int n = luaO_utf8esc(buff, readutf8esc(ls));
    348   for (; n > 0; n--)  /* add 'buff' to string */
    349     save(ls, buff[UTF8BUFFSZ - n]);
    350 }
    351 
    352 
    353 static int readdecesc (LexState *ls) {
    354   int i;
    355   int r = 0;  /* result accumulator */
    356   for (i = 0; i < 3 && lisdigit(ls->current); i++) {  /* read up to 3 digits */
    357     r = 10*r + ls->current - '0';
    358     save_and_next(ls);
    359   }
    360   esccheck(ls, r <= UCHAR_MAX, "decimal escape too large");
    361   luaZ_buffremove(ls->buff, i);  /* remove read digits from buffer */
    362   return r;
    363 }
    364 
    365 
    366 static void read_string (LexState *ls, int del, SemInfo *seminfo) {
    367   save_and_next(ls);  /* keep delimiter (for error messages) */
    368   while (ls->current != del) {
    369     switch (ls->current) {
    370       case EOZ:
    371         lexerror(ls, "unfinished string", TK_EOS);
    372         break;  /* to avoid warnings */
    373       case '\n':
    374       case '\r':
    375         lexerror(ls, "unfinished string", TK_STRING);
    376         break;  /* to avoid warnings */
    377       case '\\': {  /* escape sequences */
    378         int c;  /* final character to be saved */
    379         save_and_next(ls);  /* keep '\\' for error messages */
    380         switch (ls->current) {
    381           case 'a': c = '\a'; goto read_save;
    382           case 'b': c = '\b'; goto read_save;
    383           case 'f': c = '\f'; goto read_save;
    384           case 'n': c = '\n'; goto read_save;
    385           case 'r': c = '\r'; goto read_save;
    386           case 't': c = '\t'; goto read_save;
    387           case 'v': c = '\v'; goto read_save;
    388           case 'x': c = readhexaesc(ls); goto read_save;
    389           case 'u': utf8esc(ls);  goto no_save;
    390           case '\n': case '\r':
    391             inclinenumber(ls); c = '\n'; goto only_save;
    392           case '\\': case '\"': case '\'':
    393             c = ls->current; goto read_save;
    394           case EOZ: goto no_save;  /* will raise an error next loop */
    395           case 'z': {  /* zap following span of spaces */
    396             luaZ_buffremove(ls->buff, 1);  /* remove '\\' */
    397             next(ls);  /* skip the 'z' */
    398             while (lisspace(ls->current)) {
    399               if (currIsNewline(ls)) inclinenumber(ls);
    400               else next(ls);
    401             }
    402             goto no_save;
    403           }
    404           default: {
    405             esccheck(ls, lisdigit(ls->current), "invalid escape sequence");
    406             c = readdecesc(ls);  /* digital escape '\ddd' */
    407             goto only_save;
    408           }
    409         }
    410        read_save:
    411          next(ls);
    412          /* go through */
    413        only_save:
    414          luaZ_buffremove(ls->buff, 1);  /* remove '\\' */
    415          save(ls, c);
    416          /* go through */
    417        no_save: break;
    418       }
    419       default:
    420         save_and_next(ls);
    421     }
    422   }
    423   save_and_next(ls);  /* skip delimiter */
    424   seminfo->ts = luaX_newstring(ls, luaZ_buffer(ls->buff) + 1,
    425                                    luaZ_bufflen(ls->buff) - 2);
    426 }
    427 
    428 
    429 static int llex (LexState *ls, SemInfo *seminfo) {
    430   luaZ_resetbuffer(ls->buff);
    431   for (;;) {
    432     switch (ls->current) {
    433       case '\n': case '\r': {  /* line breaks */
    434         inclinenumber(ls);
    435         break;
    436       }
    437       case ' ': case '\f': case '\t': case '\v': {  /* spaces */
    438         next(ls);
    439         break;
    440       }
    441       case '-': {  /* '-' or '--' (comment) */
    442         next(ls);
    443         if (ls->current != '-') return '-';
    444         /* else is a comment */
    445         next(ls);
    446         if (ls->current == '[') {  /* long comment? */
    447           int sep = skip_sep(ls);
    448           luaZ_resetbuffer(ls->buff);  /* 'skip_sep' may dirty the buffer */
    449           if (sep >= 0) {
    450             read_long_string(ls, NULL, sep);  /* skip long comment */
    451             luaZ_resetbuffer(ls->buff);  /* previous call may dirty the buff. */
    452             break;
    453           }
    454         }
    455         /* else short comment */
    456         while (!currIsNewline(ls) && ls->current != EOZ)
    457           next(ls);  /* skip until end of line (or end of file) */
    458         break;
    459       }
    460       case '[': {  /* long string or simply '[' */
    461         int sep = skip_sep(ls);
    462         if (sep >= 0) {
    463           read_long_string(ls, seminfo, sep);
    464           return TK_STRING;
    465         }
    466         else if (sep != -1)  /* '[=...' missing second bracket */
    467           lexerror(ls, "invalid long string delimiter", TK_STRING);
    468         return '[';
    469       }
    470       case '=': {
    471         next(ls);
    472         if (check_next1(ls, '=')) return TK_EQ;
    473         else return '=';
    474       }
    475       case '<': {
    476         next(ls);
    477         if (check_next1(ls, '=')) return TK_LE;
    478         else if (check_next1(ls, '<')) return TK_SHL;
    479         else return '<';
    480       }
    481       case '>': {
    482         next(ls);
    483         if (check_next1(ls, '=')) return TK_GE;
    484         else if (check_next1(ls, '>')) return TK_SHR;
    485         else return '>';
    486       }
    487       case '/': {
    488         next(ls);
    489         if (check_next1(ls, '/')) return TK_IDIV;
    490         else return '/';
    491       }
    492       case '~': {
    493         next(ls);
    494         if (check_next1(ls, '=')) return TK_NE;
    495         else return '~';
    496       }
    497       case ':': {
    498         next(ls);
    499         if (check_next1(ls, ':')) return TK_DBCOLON;
    500         else return ':';
    501       }
    502       case '"': case '\'': {  /* short literal strings */
    503         read_string(ls, ls->current, seminfo);
    504         return TK_STRING;
    505       }
    506       case '.': {  /* '.', '..', '...', or number */
    507         save_and_next(ls);
    508         if (check_next1(ls, '.')) {
    509           if (check_next1(ls, '.'))
    510             return TK_DOTS;   /* '...' */
    511           else return TK_CONCAT;   /* '..' */
    512         }
    513         else if (!lisdigit(ls->current)) return '.';
    514         else return read_numeral(ls, seminfo);
    515       }
    516       case '0': case '1': case '2': case '3': case '4':
    517       case '5': case '6': case '7': case '8': case '9': {
    518         return read_numeral(ls, seminfo);
    519       }
    520       case EOZ: {
    521         return TK_EOS;
    522       }
    523       default: {
    524         if (lislalpha(ls->current)) {  /* identifier or reserved word? */
    525           TString *ts;
    526           do {
    527             save_and_next(ls);
    528           } while (lislalnum(ls->current));
    529           ts = luaX_newstring(ls, luaZ_buffer(ls->buff),
    530                                   luaZ_bufflen(ls->buff));
    531           seminfo->ts = ts;
    532           if (isreserved(ts))  /* reserved word? */
    533             return ts->extra - 1 + FIRST_RESERVED;
    534           else {
    535             return TK_NAME;
    536           }
    537         }
    538         else {  /* single-char tokens (+ - / ...) */
    539           int c = ls->current;
    540           next(ls);
    541           return c;
    542         }
    543       }
    544     }
    545   }
    546 }
    547 
    548 
    549 void luaX_next (LexState *ls) {
    550   ls->lastline = ls->linenumber;
    551   if (ls->lookahead.token != TK_EOS) {  /* is there a look-ahead token? */
    552     ls->t = ls->lookahead;  /* use this one */
    553     ls->lookahead.token = TK_EOS;  /* and discharge it */
    554   }
    555   else
    556     ls->t.token = llex(ls, &ls->t.seminfo);  /* read next token */
    557 }
    558 
    559 
    560 int luaX_lookahead (LexState *ls) {
    561   lua_assert(ls->lookahead.token == TK_EOS);
    562   ls->lookahead.token = llex(ls, &ls->lookahead.seminfo);
    563   return ls->lookahead.token;
    564 }
    565