diff options
author | Alex Auvolat <alex@adnab.me> | 2016-07-15 22:39:04 +0200 |
---|---|---|
committer | Alex Auvolat <alex@adnab.me> | 2016-07-15 22:39:04 +0200 |
commit | 7a1ea510a9fc43ccbc257601b149a90920332e13 (patch) | |
tree | 75ac252bb8ce029c62d6834e4ca927f59eaf5769 /src/lib/lua/llex.c | |
parent | d415aca695956c79110c88fa58c12bf55c0e2163 (diff) | |
download | kogata-7a1ea510a9fc43ccbc257601b149a90920332e13.tar.gz kogata-7a1ea510a9fc43ccbc257601b149a90920332e13.zip |
Add Lua source, not compiled yet as libc/libm functions remain unimplemented
Diffstat (limited to 'src/lib/lua/llex.c')
-rw-r--r-- | src/lib/lua/llex.c | 565 |
1 files changed, 565 insertions, 0 deletions
diff --git a/src/lib/lua/llex.c b/src/lib/lua/llex.c new file mode 100644 index 0000000..7032827 --- /dev/null +++ b/src/lib/lua/llex.c @@ -0,0 +1,565 @@ +/* +** $Id: llex.c,v 2.96 2016/05/02 14:02:12 roberto Exp $ +** Lexical Analyzer +** See Copyright Notice in lua.h +*/ + +#define llex_c +#define LUA_CORE + +#include "lprefix.h" + + +#include <locale.h> +#include <string.h> + +#include "lua.h" + +#include "lctype.h" +#include "ldebug.h" +#include "ldo.h" +#include "lgc.h" +#include "llex.h" +#include "lobject.h" +#include "lparser.h" +#include "lstate.h" +#include "lstring.h" +#include "ltable.h" +#include "lzio.h" + + + +#define next(ls) (ls->current = zgetc(ls->z)) + + + +#define currIsNewline(ls) (ls->current == '\n' || ls->current == '\r') + + +/* ORDER RESERVED */ +static const char *const luaX_tokens [] = { + "and", "break", "do", "else", "elseif", + "end", "false", "for", "function", "goto", "if", + "in", "local", "nil", "not", "or", "repeat", + "return", "then", "true", "until", "while", + "//", "..", "...", "==", ">=", "<=", "~=", + "<<", ">>", "::", "<eof>", + "<number>", "<integer>", "<name>", "<string>" +}; + + +#define save_and_next(ls) (save(ls, ls->current), next(ls)) + + +static l_noret lexerror (LexState *ls, const char *msg, int token); + + +static void save (LexState *ls, int c) { + Mbuffer *b = ls->buff; + if (luaZ_bufflen(b) + 1 > luaZ_sizebuffer(b)) { + size_t newsize; + if (luaZ_sizebuffer(b) >= MAX_SIZE/2) + lexerror(ls, "lexical element too long", 0); + newsize = luaZ_sizebuffer(b) * 2; + luaZ_resizebuffer(ls->L, b, newsize); + } + b->buffer[luaZ_bufflen(b)++] = cast(char, c); +} + + +void luaX_init (lua_State *L) { + int i; + TString *e = luaS_newliteral(L, LUA_ENV); /* create env name */ + luaC_fix(L, obj2gco(e)); /* never collect this name */ + for (i=0; i<NUM_RESERVED; i++) { + TString *ts = luaS_new(L, luaX_tokens[i]); + luaC_fix(L, obj2gco(ts)); /* reserved words are never collected */ + ts->extra = cast_byte(i+1); /* reserved word */ + } +} + + +const char *luaX_token2str (LexState *ls, int token) { + if (token < FIRST_RESERVED) { /* single-byte symbols? */ + lua_assert(token == cast_uchar(token)); + return luaO_pushfstring(ls->L, "'%c'", token); + } + else { + const char *s = luaX_tokens[token - FIRST_RESERVED]; + if (token < TK_EOS) /* fixed format (symbols and reserved words)? */ + return luaO_pushfstring(ls->L, "'%s'", s); + else /* names, strings, and numerals */ + return s; + } +} + + +static const char *txtToken (LexState *ls, int token) { + switch (token) { + case TK_NAME: case TK_STRING: + case TK_FLT: case TK_INT: + save(ls, '\0'); + return luaO_pushfstring(ls->L, "'%s'", luaZ_buffer(ls->buff)); + default: + return luaX_token2str(ls, token); + } +} + + +static l_noret lexerror (LexState *ls, const char *msg, int token) { + msg = luaG_addinfo(ls->L, msg, ls->source, ls->linenumber); + if (token) + luaO_pushfstring(ls->L, "%s near %s", msg, txtToken(ls, token)); + luaD_throw(ls->L, LUA_ERRSYNTAX); +} + + +l_noret luaX_syntaxerror (LexState *ls, const char *msg) { + lexerror(ls, msg, ls->t.token); +} + + +/* +** creates a new string and anchors it in scanner's table so that +** it will not be collected until the end of the compilation +** (by that time it should be anchored somewhere) +*/ +TString *luaX_newstring (LexState *ls, const char *str, size_t l) { + lua_State *L = ls->L; + TValue *o; /* entry for 'str' */ + TString *ts = luaS_newlstr(L, str, l); /* create new string */ + setsvalue2s(L, L->top++, ts); /* temporarily anchor it in stack */ + o = luaH_set(L, ls->h, L->top - 1); + if (ttisnil(o)) { /* not in use yet? */ + /* boolean value does not need GC barrier; + table has no metatable, so it does not need to invalidate cache */ + setbvalue(o, 1); /* t[string] = true */ + luaC_checkGC(L); + } + else { /* string already present */ + ts = tsvalue(keyfromval(o)); /* re-use value previously stored */ + } + L->top--; /* remove string from stack */ + return ts; +} + + +/* +** increment line number and skips newline sequence (any of +** \n, \r, \n\r, or \r\n) +*/ +static void inclinenumber (LexState *ls) { + int old = ls->current; + lua_assert(currIsNewline(ls)); + next(ls); /* skip '\n' or '\r' */ + if (currIsNewline(ls) && ls->current != old) + next(ls); /* skip '\n\r' or '\r\n' */ + if (++ls->linenumber >= MAX_INT) + lexerror(ls, "chunk has too many lines", 0); +} + + +void luaX_setinput (lua_State *L, LexState *ls, ZIO *z, TString *source, + int firstchar) { + ls->t.token = 0; + ls->L = L; + ls->current = firstchar; + ls->lookahead.token = TK_EOS; /* no look-ahead token */ + ls->z = z; + ls->fs = NULL; + ls->linenumber = 1; + ls->lastline = 1; + ls->source = source; + ls->envn = luaS_newliteral(L, LUA_ENV); /* get env name */ + luaZ_resizebuffer(ls->L, ls->buff, LUA_MINBUFFER); /* initialize buffer */ +} + + + +/* +** ======================================================= +** LEXICAL ANALYZER +** ======================================================= +*/ + + +static int check_next1 (LexState *ls, int c) { + if (ls->current == c) { + next(ls); + return 1; + } + else return 0; +} + + +/* +** Check whether current char is in set 'set' (with two chars) and +** saves it +*/ +static int check_next2 (LexState *ls, const char *set) { + lua_assert(set[2] == '\0'); + if (ls->current == set[0] || ls->current == set[1]) { + save_and_next(ls); + return 1; + } + else return 0; +} + + +/* LUA_NUMBER */ +/* +** this function is quite liberal in what it accepts, as 'luaO_str2num' +** will reject ill-formed numerals. +*/ +static int read_numeral (LexState *ls, SemInfo *seminfo) { + TValue obj; + const char *expo = "Ee"; + int first = ls->current; + lua_assert(lisdigit(ls->current)); + save_and_next(ls); + if (first == '0' && check_next2(ls, "xX")) /* hexadecimal? */ + expo = "Pp"; + for (;;) { + if (check_next2(ls, expo)) /* exponent part? */ + check_next2(ls, "-+"); /* optional exponent sign */ + if (lisxdigit(ls->current)) + save_and_next(ls); + else if (ls->current == '.') + save_and_next(ls); + else break; + } + save(ls, '\0'); + if (luaO_str2num(luaZ_buffer(ls->buff), &obj) == 0) /* format error? */ + lexerror(ls, "malformed number", TK_FLT); + if (ttisinteger(&obj)) { + seminfo->i = ivalue(&obj); + return TK_INT; + } + else { + lua_assert(ttisfloat(&obj)); + seminfo->r = fltvalue(&obj); + return TK_FLT; + } +} + + +/* +** skip a sequence '[=*[' or ']=*]'; if sequence is well formed, return +** its number of '='s; otherwise, return a negative number (-1 iff there +** are no '='s after initial bracket) +*/ +static int skip_sep (LexState *ls) { + int count = 0; + int s = ls->current; + lua_assert(s == '[' || s == ']'); + save_and_next(ls); + while (ls->current == '=') { + save_and_next(ls); + count++; + } + return (ls->current == s) ? count : (-count) - 1; +} + + +static void read_long_string (LexState *ls, SemInfo *seminfo, int sep) { + int line = ls->linenumber; /* initial line (for error message) */ + save_and_next(ls); /* skip 2nd '[' */ + if (currIsNewline(ls)) /* string starts with a newline? */ + inclinenumber(ls); /* skip it */ + for (;;) { + switch (ls->current) { + case EOZ: { /* error */ + const char *what = (seminfo ? "string" : "comment"); + const char *msg = luaO_pushfstring(ls->L, + "unfinished long %s (starting at line %d)", what, line); + lexerror(ls, msg, TK_EOS); + break; /* to avoid warnings */ + } + case ']': { + if (skip_sep(ls) == sep) { + save_and_next(ls); /* skip 2nd ']' */ + goto endloop; + } + break; + } + case '\n': case '\r': { + save(ls, '\n'); + inclinenumber(ls); + if (!seminfo) luaZ_resetbuffer(ls->buff); /* avoid wasting space */ + break; + } + default: { + if (seminfo) save_and_next(ls); + else next(ls); + } + } + } endloop: + if (seminfo) + seminfo->ts = luaX_newstring(ls, luaZ_buffer(ls->buff) + (2 + sep), + luaZ_bufflen(ls->buff) - 2*(2 + sep)); +} + + +static void esccheck (LexState *ls, int c, const char *msg) { + if (!c) { + if (ls->current != EOZ) + save_and_next(ls); /* add current to buffer for error message */ + lexerror(ls, msg, TK_STRING); + } +} + + +static int gethexa (LexState *ls) { + save_and_next(ls); + esccheck (ls, lisxdigit(ls->current), "hexadecimal digit expected"); + return luaO_hexavalue(ls->current); +} + + +static int readhexaesc (LexState *ls) { + int r = gethexa(ls); + r = (r << 4) + gethexa(ls); + luaZ_buffremove(ls->buff, 2); /* remove saved chars from buffer */ + return r; +} + + +static unsigned long readutf8esc (LexState *ls) { + unsigned long r; + int i = 4; /* chars to be removed: '\', 'u', '{', and first digit */ + save_and_next(ls); /* skip 'u' */ + esccheck(ls, ls->current == '{', "missing '{'"); + r = gethexa(ls); /* must have at least one digit */ + while ((save_and_next(ls), lisxdigit(ls->current))) { + i++; + r = (r << 4) + luaO_hexavalue(ls->current); + esccheck(ls, r <= 0x10FFFF, "UTF-8 value too large"); + } + esccheck(ls, ls->current == '}', "missing '}'"); + next(ls); /* skip '}' */ + luaZ_buffremove(ls->buff, i); /* remove saved chars from buffer */ + return r; +} + + +static void utf8esc (LexState *ls) { + char buff[UTF8BUFFSZ]; + int n = luaO_utf8esc(buff, readutf8esc(ls)); + for (; n > 0; n--) /* add 'buff' to string */ + save(ls, buff[UTF8BUFFSZ - n]); +} + + +static int readdecesc (LexState *ls) { + int i; + int r = 0; /* result accumulator */ + for (i = 0; i < 3 && lisdigit(ls->current); i++) { /* read up to 3 digits */ + r = 10*r + ls->current - '0'; + save_and_next(ls); + } + esccheck(ls, r <= UCHAR_MAX, "decimal escape too large"); + luaZ_buffremove(ls->buff, i); /* remove read digits from buffer */ + return r; +} + + +static void read_string (LexState *ls, int del, SemInfo *seminfo) { + save_and_next(ls); /* keep delimiter (for error messages) */ + while (ls->current != del) { + switch (ls->current) { + case EOZ: + lexerror(ls, "unfinished string", TK_EOS); + break; /* to avoid warnings */ + case '\n': + case '\r': + lexerror(ls, "unfinished string", TK_STRING); + break; /* to avoid warnings */ + case '\\': { /* escape sequences */ + int c; /* final character to be saved */ + save_and_next(ls); /* keep '\\' for error messages */ + switch (ls->current) { + case 'a': c = '\a'; goto read_save; + case 'b': c = '\b'; goto read_save; + case 'f': c = '\f'; goto read_save; + case 'n': c = '\n'; goto read_save; + case 'r': c = '\r'; goto read_save; + case 't': c = '\t'; goto read_save; + case 'v': c = '\v'; goto read_save; + case 'x': c = readhexaesc(ls); goto read_save; + case 'u': utf8esc(ls); goto no_save; + case '\n': case '\r': + inclinenumber(ls); c = '\n'; goto only_save; + case '\\': case '\"': case '\'': + c = ls->current; goto read_save; + case EOZ: goto no_save; /* will raise an error next loop */ + case 'z': { /* zap following span of spaces */ + luaZ_buffremove(ls->buff, 1); /* remove '\\' */ + next(ls); /* skip the 'z' */ + while (lisspace(ls->current)) { + if (currIsNewline(ls)) inclinenumber(ls); + else next(ls); + } + goto no_save; + } + default: { + esccheck(ls, lisdigit(ls->current), "invalid escape sequence"); + c = readdecesc(ls); /* digital escape '\ddd' */ + goto only_save; + } + } + read_save: + next(ls); + /* go through */ + only_save: + luaZ_buffremove(ls->buff, 1); /* remove '\\' */ + save(ls, c); + /* go through */ + no_save: break; + } + default: + save_and_next(ls); + } + } + save_and_next(ls); /* skip delimiter */ + seminfo->ts = luaX_newstring(ls, luaZ_buffer(ls->buff) + 1, + luaZ_bufflen(ls->buff) - 2); +} + + +static int llex (LexState *ls, SemInfo *seminfo) { + luaZ_resetbuffer(ls->buff); + for (;;) { + switch (ls->current) { + case '\n': case '\r': { /* line breaks */ + inclinenumber(ls); + break; + } + case ' ': case '\f': case '\t': case '\v': { /* spaces */ + next(ls); + break; + } + case '-': { /* '-' or '--' (comment) */ + next(ls); + if (ls->current != '-') return '-'; + /* else is a comment */ + next(ls); + if (ls->current == '[') { /* long comment? */ + int sep = skip_sep(ls); + luaZ_resetbuffer(ls->buff); /* 'skip_sep' may dirty the buffer */ + if (sep >= 0) { + read_long_string(ls, NULL, sep); /* skip long comment */ + luaZ_resetbuffer(ls->buff); /* previous call may dirty the buff. */ + break; + } + } + /* else short comment */ + while (!currIsNewline(ls) && ls->current != EOZ) + next(ls); /* skip until end of line (or end of file) */ + break; + } + case '[': { /* long string or simply '[' */ + int sep = skip_sep(ls); + if (sep >= 0) { + read_long_string(ls, seminfo, sep); + return TK_STRING; + } + else if (sep != -1) /* '[=...' missing second bracket */ + lexerror(ls, "invalid long string delimiter", TK_STRING); + return '['; + } + case '=': { + next(ls); + if (check_next1(ls, '=')) return TK_EQ; + else return '='; + } + case '<': { + next(ls); + if (check_next1(ls, '=')) return TK_LE; + else if (check_next1(ls, '<')) return TK_SHL; + else return '<'; + } + case '>': { + next(ls); + if (check_next1(ls, '=')) return TK_GE; + else if (check_next1(ls, '>')) return TK_SHR; + else return '>'; + } + case '/': { + next(ls); + if (check_next1(ls, '/')) return TK_IDIV; + else return '/'; + } + case '~': { + next(ls); + if (check_next1(ls, '=')) return TK_NE; + else return '~'; + } + case ':': { + next(ls); + if (check_next1(ls, ':')) return TK_DBCOLON; + else return ':'; + } + case '"': case '\'': { /* short literal strings */ + read_string(ls, ls->current, seminfo); + return TK_STRING; + } + case '.': { /* '.', '..', '...', or number */ + save_and_next(ls); + if (check_next1(ls, '.')) { + if (check_next1(ls, '.')) + return TK_DOTS; /* '...' */ + else return TK_CONCAT; /* '..' */ + } + else if (!lisdigit(ls->current)) return '.'; + else return read_numeral(ls, seminfo); + } + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': { + return read_numeral(ls, seminfo); + } + case EOZ: { + return TK_EOS; + } + default: { + if (lislalpha(ls->current)) { /* identifier or reserved word? */ + TString *ts; + do { + save_and_next(ls); + } while (lislalnum(ls->current)); + ts = luaX_newstring(ls, luaZ_buffer(ls->buff), + luaZ_bufflen(ls->buff)); + seminfo->ts = ts; + if (isreserved(ts)) /* reserved word? */ + return ts->extra - 1 + FIRST_RESERVED; + else { + return TK_NAME; + } + } + else { /* single-char tokens (+ - / ...) */ + int c = ls->current; + next(ls); + return c; + } + } + } + } +} + + +void luaX_next (LexState *ls) { + ls->lastline = ls->linenumber; + if (ls->lookahead.token != TK_EOS) { /* is there a look-ahead token? */ + ls->t = ls->lookahead; /* use this one */ + ls->lookahead.token = TK_EOS; /* and discharge it */ + } + else + ls->t.token = llex(ls, &ls->t.seminfo); /* read next token */ +} + + +int luaX_lookahead (LexState *ls) { + lua_assert(ls->lookahead.token == TK_EOS); + ls->lookahead.token = llex(ls, &ls->lookahead.seminfo); + return ls->lookahead.token; +} + |