2812
+ − 1
/*
+ − 2
** $Id: llex.c,v 2.20.1.1 2007/12/27 13:02:25 roberto Exp $
+ − 3
** Lexical Analyzer
+ − 4
** See Copyright Notice in lua.h
+ − 5
*/
+ − 6
+ − 7
+ − 8
#include <ctype.h>
+ − 9
#include <locale.h>
+ − 10
#include <string.h>
+ − 11
+ − 12
#define llex_c
+ − 13
#define LUA_CORE
+ − 14
+ − 15
#include "lua.h"
+ − 16
+ − 17
#include "ldo.h"
+ − 18
#include "llex.h"
+ − 19
#include "lobject.h"
+ − 20
#include "lparser.h"
+ − 21
#include "lstate.h"
+ − 22
#include "lstring.h"
+ − 23
#include "ltable.h"
+ − 24
#include "lzio.h"
+ − 25
+ − 26
+ − 27
+ − 28
#define next(ls) (ls->current = zgetc(ls->z))
+ − 29
+ − 30
+ − 31
+ − 32
+ − 33
#define currIsNewline(ls) (ls->current == '\n' || ls->current == '\r')
+ − 34
+ − 35
+ − 36
/* ORDER RESERVED */
+ − 37
const char *const luaX_tokens [] = {
+ − 38
"and", "break", "do", "else", "elseif",
+ − 39
"end", "false", "for", "function", "if",
+ − 40
"in", "local", "nil", "not", "or", "repeat",
+ − 41
"return", "then", "true", "until", "while",
+ − 42
"..", "...", "==", ">=", "<=", "~=",
+ − 43
"<number>", "<name>", "<string>", "<eof>",
+ − 44
NULL
+ − 45
};
+ − 46
+ − 47
+ − 48
#define save_and_next(ls) (save(ls, ls->current), next(ls))
+ − 49
+ − 50
+ − 51
static void save (LexState *ls, int c) {
+ − 52
Mbuffer *b = ls->buff;
+ − 53
if (b->n + 1 > b->buffsize) {
+ − 54
size_t newsize;
+ − 55
if (b->buffsize >= MAX_SIZET/2)
+ − 56
luaX_lexerror(ls, "lexical element too long", 0);
+ − 57
newsize = b->buffsize * 2;
+ − 58
luaZ_resizebuffer(ls->L, b, newsize);
+ − 59
}
+ − 60
b->buffer[b->n++] = cast(char, c);
+ − 61
}
+ − 62
+ − 63
+ − 64
void luaX_init (lua_State *L) {
+ − 65
int i;
+ − 66
for (i=0; i<NUM_RESERVED; i++) {
+ − 67
TString *ts = luaS_new(L, luaX_tokens[i]);
+ − 68
luaS_fix(ts); /* reserved words are never collected */
+ − 69
lua_assert(strlen(luaX_tokens[i])+1 <= TOKEN_LEN);
+ − 70
ts->tsv.reserved = cast_byte(i+1); /* reserved word */
+ − 71
}
+ − 72
}
+ − 73
+ − 74
+ − 75
#define MAXSRC 80
+ − 76
+ − 77
+ − 78
const char *luaX_token2str (LexState *ls, int token) {
+ − 79
if (token < FIRST_RESERVED) {
+ − 80
lua_assert(token == cast(unsigned char, token));
+ − 81
return (iscntrl(token)) ? luaO_pushfstring(ls->L, "char(%d)", token) :
+ − 82
luaO_pushfstring(ls->L, "%c", token);
+ − 83
}
+ − 84
else
+ − 85
return luaX_tokens[token-FIRST_RESERVED];
+ − 86
}
+ − 87
+ − 88
+ − 89
static const char *txtToken (LexState *ls, int token) {
+ − 90
switch (token) {
+ − 91
case TK_NAME:
+ − 92
case TK_STRING:
+ − 93
case TK_NUMBER:
+ − 94
save(ls, '\0');
+ − 95
return luaZ_buffer(ls->buff);
+ − 96
default:
+ − 97
return luaX_token2str(ls, token);
+ − 98
}
+ − 99
}
+ − 100
+ − 101
+ − 102
void luaX_lexerror (LexState *ls, const char *msg, int token) {
+ − 103
char buff[MAXSRC];
+ − 104
luaO_chunkid(buff, getstr(ls->source), MAXSRC);
+ − 105
msg = luaO_pushfstring(ls->L, "%s:%d: %s", buff, ls->linenumber, msg);
+ − 106
if (token)
+ − 107
luaO_pushfstring(ls->L, "%s near " LUA_QS, msg, txtToken(ls, token));
+ − 108
luaD_throw(ls->L, LUA_ERRSYNTAX);
+ − 109
}
+ − 110
+ − 111
+ − 112
void luaX_syntaxerror (LexState *ls, const char *msg) {
+ − 113
luaX_lexerror(ls, msg, ls->t.token);
+ − 114
}
+ − 115
+ − 116
+ − 117
TString *luaX_newstring (LexState *ls, const char *str, size_t l) {
+ − 118
lua_State *L = ls->L;
+ − 119
TString *ts = luaS_newlstr(L, str, l);
+ − 120
TValue *o = luaH_setstr(L, ls->fs->h, ts); /* entry for `str' */
+ − 121
if (ttisnil(o))
+ − 122
setbvalue(o, 1); /* make sure `str' will not be collected */
+ − 123
return ts;
+ − 124
}
+ − 125
+ − 126
+ − 127
static void inclinenumber (LexState *ls) {
+ − 128
int old = ls->current;
+ − 129
lua_assert(currIsNewline(ls));
+ − 130
next(ls); /* skip `\n' or `\r' */
+ − 131
if (currIsNewline(ls) && ls->current != old)
+ − 132
next(ls); /* skip `\n\r' or `\r\n' */
+ − 133
if (++ls->linenumber >= MAX_INT)
+ − 134
luaX_syntaxerror(ls, "chunk has too many lines");
+ − 135
}
+ − 136
+ − 137
+ − 138
void luaX_setinput (lua_State *L, LexState *ls, ZIO *z, TString *source) {
+ − 139
ls->decpoint = '.';
+ − 140
ls->L = L;
+ − 141
ls->lookahead.token = TK_EOS; /* no look-ahead token */
+ − 142
ls->z = z;
+ − 143
ls->fs = NULL;
+ − 144
ls->linenumber = 1;
+ − 145
ls->lastline = 1;
+ − 146
ls->source = source;
+ − 147
luaZ_resizebuffer(ls->L, ls->buff, LUA_MINBUFFER); /* initialize buffer */
+ − 148
next(ls); /* read first char */
+ − 149
}
+ − 150
+ − 151
+ − 152
+ − 153
/*
+ − 154
** =======================================================
+ − 155
** LEXICAL ANALYZER
+ − 156
** =======================================================
+ − 157
*/
+ − 158
+ − 159
+ − 160
+ − 161
static int check_next (LexState *ls, const char *set) {
+ − 162
if (!strchr(set, ls->current))
+ − 163
return 0;
+ − 164
save_and_next(ls);
+ − 165
return 1;
+ − 166
}
+ − 167
+ − 168
+ − 169
static void buffreplace (LexState *ls, char from, char to) {
+ − 170
size_t n = luaZ_bufflen(ls->buff);
+ − 171
char *p = luaZ_buffer(ls->buff);
+ − 172
while (n--)
+ − 173
if (p[n] == from) p[n] = to;
+ − 174
}
+ − 175
+ − 176
+ − 177
static void trydecpoint (LexState *ls, SemInfo *seminfo) {
+ − 178
/* format error: try to update decimal point separator */
+ − 179
struct lconv *cv = localeconv();
+ − 180
char old = ls->decpoint;
+ − 181
ls->decpoint = (cv ? cv->decimal_point[0] : '.');
+ − 182
buffreplace(ls, old, ls->decpoint); /* try updated decimal separator */
+ − 183
if (!luaO_str2d(luaZ_buffer(ls->buff), &seminfo->r)) {
+ − 184
/* format error with correct decimal point: no more options */
+ − 185
buffreplace(ls, ls->decpoint, '.'); /* undo change (for error message) */
+ − 186
luaX_lexerror(ls, "malformed number", TK_NUMBER);
+ − 187
}
+ − 188
}
+ − 189
+ − 190
+ − 191
/* LUA_NUMBER */
+ − 192
static void read_numeral (LexState *ls, SemInfo *seminfo) {
+ − 193
lua_assert(isdigit(ls->current));
+ − 194
do {
+ − 195
save_and_next(ls);
+ − 196
} while (isdigit(ls->current) || ls->current == '.');
+ − 197
if (check_next(ls, "Ee")) /* `E'? */
+ − 198
check_next(ls, "+-"); /* optional exponent sign */
+ − 199
while (isalnum(ls->current) || ls->current == '_')
+ − 200
save_and_next(ls);
+ − 201
save(ls, '\0');
+ − 202
buffreplace(ls, '.', ls->decpoint); /* follow locale for decimal point */
+ − 203
if (!luaO_str2d(luaZ_buffer(ls->buff), &seminfo->r)) /* format error? */
+ − 204
trydecpoint(ls, seminfo); /* try to update decimal point separator */
+ − 205
}
+ − 206
+ − 207
+ − 208
static int skip_sep (LexState *ls) {
+ − 209
int count = 0;
+ − 210
int s = ls->current;
+ − 211
lua_assert(s == '[' || s == ']');
+ − 212
save_and_next(ls);
+ − 213
while (ls->current == '=') {
+ − 214
save_and_next(ls);
+ − 215
count++;
+ − 216
}
+ − 217
return (ls->current == s) ? count : (-count) - 1;
+ − 218
}
+ − 219
+ − 220
+ − 221
static void read_long_string (LexState *ls, SemInfo *seminfo, int sep) {
+ − 222
int cont = 0;
+ − 223
(void)(cont); /* avoid warnings when `cont' is not used */
+ − 224
save_and_next(ls); /* skip 2nd `[' */
+ − 225
if (currIsNewline(ls)) /* string starts with a newline? */
+ − 226
inclinenumber(ls); /* skip it */
+ − 227
for (;;) {
+ − 228
switch (ls->current) {
+ − 229
case EOZ:
+ − 230
luaX_lexerror(ls, (seminfo) ? "unfinished long string" :
+ − 231
"unfinished long comment", TK_EOS);
+ − 232
break; /* to avoid warnings */
+ − 233
#if defined(LUA_COMPAT_LSTR)
+ − 234
case '[': {
+ − 235
if (skip_sep(ls) == sep) {
+ − 236
save_and_next(ls); /* skip 2nd `[' */
+ − 237
cont++;
+ − 238
#if LUA_COMPAT_LSTR == 1
+ − 239
if (sep == 0)
+ − 240
luaX_lexerror(ls, "nesting of [[...]] is deprecated", '[');
+ − 241
#endif
+ − 242
}
+ − 243
break;
+ − 244
}
+ − 245
#endif
+ − 246
case ']': {
+ − 247
if (skip_sep(ls) == sep) {
+ − 248
save_and_next(ls); /* skip 2nd `]' */
+ − 249
#if defined(LUA_COMPAT_LSTR) && LUA_COMPAT_LSTR == 2
+ − 250
cont--;
+ − 251
if (sep == 0 && cont >= 0) break;
+ − 252
#endif
+ − 253
goto endloop;
+ − 254
}
+ − 255
break;
+ − 256
}
+ − 257
case '\n':
+ − 258
case '\r': {
+ − 259
save(ls, '\n');
+ − 260
inclinenumber(ls);
+ − 261
if (!seminfo) luaZ_resetbuffer(ls->buff); /* avoid wasting space */
+ − 262
break;
+ − 263
}
+ − 264
default: {
+ − 265
if (seminfo) save_and_next(ls);
+ − 266
else next(ls);
+ − 267
}
+ − 268
}
+ − 269
} endloop:
+ − 270
if (seminfo)
+ − 271
seminfo->ts = luaX_newstring(ls, luaZ_buffer(ls->buff) + (2 + sep),
+ − 272
luaZ_bufflen(ls->buff) - 2*(2 + sep));
+ − 273
}
+ − 274
+ − 275
+ − 276
static void read_string (LexState *ls, int del, SemInfo *seminfo) {
+ − 277
save_and_next(ls);
+ − 278
while (ls->current != del) {
+ − 279
switch (ls->current) {
+ − 280
case EOZ:
+ − 281
luaX_lexerror(ls, "unfinished string", TK_EOS);
+ − 282
continue; /* to avoid warnings */
+ − 283
case '\n':
+ − 284
case '\r':
+ − 285
luaX_lexerror(ls, "unfinished string", TK_STRING);
+ − 286
continue; /* to avoid warnings */
+ − 287
case '\\': {
+ − 288
int c;
+ − 289
next(ls); /* do not save the `\' */
+ − 290
switch (ls->current) {
+ − 291
case 'a': c = '\a'; break;
+ − 292
case 'b': c = '\b'; break;
+ − 293
case 'f': c = '\f'; break;
+ − 294
case 'n': c = '\n'; break;
+ − 295
case 'r': c = '\r'; break;
+ − 296
case 't': c = '\t'; break;
+ − 297
case 'v': c = '\v'; break;
+ − 298
case '\n': /* go through */
+ − 299
case '\r': save(ls, '\n'); inclinenumber(ls); continue;
+ − 300
case EOZ: continue; /* will raise an error next loop */
+ − 301
default: {
+ − 302
if (!isdigit(ls->current))
+ − 303
save_and_next(ls); /* handles \\, \", \', and \? */
+ − 304
else { /* \xxx */
+ − 305
int i = 0;
+ − 306
c = 0;
+ − 307
do {
+ − 308
c = 10*c + (ls->current-'0');
+ − 309
next(ls);
+ − 310
} while (++i<3 && isdigit(ls->current));
+ − 311
if (c > UCHAR_MAX)
+ − 312
luaX_lexerror(ls, "escape sequence too large", TK_STRING);
+ − 313
save(ls, c);
+ − 314
}
+ − 315
continue;
+ − 316
}
+ − 317
}
+ − 318
save(ls, c);
+ − 319
next(ls);
+ − 320
continue;
+ − 321
}
+ − 322
default:
+ − 323
save_and_next(ls);
+ − 324
}
+ − 325
}
+ − 326
save_and_next(ls); /* skip delimiter */
+ − 327
seminfo->ts = luaX_newstring(ls, luaZ_buffer(ls->buff) + 1,
+ − 328
luaZ_bufflen(ls->buff) - 2);
+ − 329
}
+ − 330
+ − 331
+ − 332
static int llex (LexState *ls, SemInfo *seminfo) {
+ − 333
luaZ_resetbuffer(ls->buff);
+ − 334
for (;;) {
+ − 335
switch (ls->current) {
+ − 336
case '\n':
+ − 337
case '\r': {
+ − 338
inclinenumber(ls);
+ − 339
continue;
+ − 340
}
+ − 341
case '-': {
+ − 342
next(ls);
+ − 343
if (ls->current != '-') return '-';
+ − 344
/* else is a comment */
+ − 345
next(ls);
+ − 346
if (ls->current == '[') {
+ − 347
int sep = skip_sep(ls);
+ − 348
luaZ_resetbuffer(ls->buff); /* `skip_sep' may dirty the buffer */
+ − 349
if (sep >= 0) {
+ − 350
read_long_string(ls, NULL, sep); /* long comment */
+ − 351
luaZ_resetbuffer(ls->buff);
+ − 352
continue;
+ − 353
}
+ − 354
}
+ − 355
/* else short comment */
+ − 356
while (!currIsNewline(ls) && ls->current != EOZ)
+ − 357
next(ls);
+ − 358
continue;
+ − 359
}
+ − 360
case '[': {
+ − 361
int sep = skip_sep(ls);
+ − 362
if (sep >= 0) {
+ − 363
read_long_string(ls, seminfo, sep);
+ − 364
return TK_STRING;
+ − 365
}
+ − 366
else if (sep == -1) return '[';
+ − 367
else luaX_lexerror(ls, "invalid long string delimiter", TK_STRING);
+ − 368
}
+ − 369
case '=': {
+ − 370
next(ls);
+ − 371
if (ls->current != '=') return '=';
+ − 372
else { next(ls); return TK_EQ; }
+ − 373
}
+ − 374
case '<': {
+ − 375
next(ls);
+ − 376
if (ls->current != '=') return '<';
+ − 377
else { next(ls); return TK_LE; }
+ − 378
}
+ − 379
case '>': {
+ − 380
next(ls);
+ − 381
if (ls->current != '=') return '>';
+ − 382
else { next(ls); return TK_GE; }
+ − 383
}
+ − 384
case '~': {
+ − 385
next(ls);
+ − 386
if (ls->current != '=') return '~';
+ − 387
else { next(ls); return TK_NE; }
+ − 388
}
+ − 389
case '"':
+ − 390
case '\'': {
+ − 391
read_string(ls, ls->current, seminfo);
+ − 392
return TK_STRING;
+ − 393
}
+ − 394
case '.': {
+ − 395
save_and_next(ls);
+ − 396
if (check_next(ls, ".")) {
+ − 397
if (check_next(ls, "."))
+ − 398
return TK_DOTS; /* ... */
+ − 399
else return TK_CONCAT; /* .. */
+ − 400
}
+ − 401
else if (!isdigit(ls->current)) return '.';
+ − 402
else {
+ − 403
read_numeral(ls, seminfo);
+ − 404
return TK_NUMBER;
+ − 405
}
+ − 406
}
+ − 407
case EOZ: {
+ − 408
return TK_EOS;
+ − 409
}
+ − 410
default: {
+ − 411
if (isspace(ls->current)) {
+ − 412
lua_assert(!currIsNewline(ls));
+ − 413
next(ls);
+ − 414
continue;
+ − 415
}
+ − 416
else if (isdigit(ls->current)) {
+ − 417
read_numeral(ls, seminfo);
+ − 418
return TK_NUMBER;
+ − 419
}
+ − 420
else if (isalpha(ls->current) || ls->current == '_') {
+ − 421
/* identifier or reserved word */
+ − 422
TString *ts;
+ − 423
do {
+ − 424
save_and_next(ls);
+ − 425
} while (isalnum(ls->current) || ls->current == '_');
+ − 426
ts = luaX_newstring(ls, luaZ_buffer(ls->buff),
+ − 427
luaZ_bufflen(ls->buff));
+ − 428
if (ts->tsv.reserved > 0) /* reserved word? */
+ − 429
return ts->tsv.reserved - 1 + FIRST_RESERVED;
+ − 430
else {
+ − 431
seminfo->ts = ts;
+ − 432
return TK_NAME;
+ − 433
}
+ − 434
}
+ − 435
else {
+ − 436
int c = ls->current;
+ − 437
next(ls);
+ − 438
return c; /* single-char tokens (+ - / ...) */
+ − 439
}
+ − 440
}
+ − 441
}
+ − 442
}
+ − 443
}
+ − 444
+ − 445
+ − 446
void luaX_next (LexState *ls) {
+ − 447
ls->lastline = ls->linenumber;
+ − 448
if (ls->lookahead.token != TK_EOS) { /* is there a look-ahead token? */
+ − 449
ls->t = ls->lookahead; /* use this one */
+ − 450
ls->lookahead.token = TK_EOS; /* and discharge it */
+ − 451
}
+ − 452
else
+ − 453
ls->t.token = llex(ls, &ls->t.seminfo); /* read next token */
+ − 454
}
+ − 455
+ − 456
+ − 457
void luaX_lookahead (LexState *ls) {
+ − 458
lua_assert(ls->lookahead.token == TK_EOS);
+ − 459
ls->lookahead.token = llex(ls, &ls->lookahead.seminfo);
+ − 460
}
+ − 461