diff options
| author | Uneven Prankster <unevenprankster@protonmail.com> | 2023-07-12 13:22:29 -0300 |
|---|---|---|
| committer | Uneven Prankster <unevenprankster@protonmail.com> | 2023-07-12 13:22:29 -0300 |
| commit | fa2bdd711212ba6b7a94a20971e8bfa281e73296 (patch) | |
| tree | 6713b3c0379507d49558287b71dd360ce188a2f0 /tinycc/tccpp.c | |
lol
Diffstat (limited to 'tinycc/tccpp.c')
| -rw-r--r-- | tinycc/tccpp.c | 4007 |
1 files changed, 4007 insertions, 0 deletions
diff --git a/tinycc/tccpp.c b/tinycc/tccpp.c new file mode 100644 index 0000000..b95c63d --- /dev/null +++ b/tinycc/tccpp.c @@ -0,0 +1,4007 @@ +/* + * TCC - Tiny C Compiler + * + * Copyright (c) 2001-2004 Fabrice Bellard + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#define USING_GLOBALS +#include "tcc.h" + +/* #define to 1 to enable (see parse_pp_string()) */ +#define ACCEPT_LF_IN_STRINGS 0 + +/********************************************************/ +/* global variables */ + +ST_DATA int tok_flags; +ST_DATA int parse_flags; + +ST_DATA struct BufferedFile *file; +ST_DATA int tok; +ST_DATA CValue tokc; +ST_DATA const int *macro_ptr; +ST_DATA CString tokcstr; /* current parsed string, if any */ + +/* display benchmark infos */ +ST_DATA int tok_ident; +ST_DATA TokenSym **table_ident; + +/* ------------------------------------------------------------------------- */ + +static TokenSym *hash_ident[TOK_HASH_SIZE]; +static char token_buf[STRING_MAX_SIZE + 1]; +static CString cstr_buf; +static TokenString tokstr_buf; +static unsigned char isidnum_table[256 - CH_EOF]; +static int pp_debug_tok, pp_debug_symv; +static int pp_once; +static int pp_expr; +static int pp_counter; +static void tok_print(const char *msg, const int *str); + +static struct TinyAlloc *toksym_alloc; +static struct TinyAlloc *tokstr_alloc; + +static TokenString *macro_stack; + +static const char tcc_keywords[] = +#define DEF(id, str) str "\0" +#include "tcctok.h" +#undef DEF +; + +/* WARNING: the content of this string encodes token numbers */ +static const unsigned char tok_two_chars[] = +/* outdated -- gr + "<=\236>=\235!=\225&&\240||\241++\244--\242==\224<<\1>>\2+=\253" + "-=\255*=\252/=\257%=\245&=\246^=\336|=\374->\313..\250##\266"; +*/{ + '<','=', TOK_LE, + '>','=', TOK_GE, + '!','=', TOK_NE, + '&','&', TOK_LAND, + '|','|', TOK_LOR, + '+','+', TOK_INC, + '-','-', TOK_DEC, + '=','=', TOK_EQ, + '<','<', TOK_SHL, + '>','>', TOK_SAR, + '+','=', TOK_A_ADD, + '-','=', TOK_A_SUB, + '*','=', TOK_A_MUL, + '/','=', TOK_A_DIV, + '%','=', TOK_A_MOD, + '&','=', TOK_A_AND, + '^','=', TOK_A_XOR, + '|','=', TOK_A_OR, + '-','>', TOK_ARROW, + '.','.', TOK_TWODOTS, + '#','#', TOK_TWOSHARPS, + '#','#', TOK_PPJOIN, + 0 +}; + +static void next_nomacro(void); + +ST_FUNC void skip(int c) +{ + if (tok != c) + tcc_error("'%c' expected (got \"%s\")", c, get_tok_str(tok, &tokc)); + next(); +} + +ST_FUNC void expect(const char *msg) +{ + tcc_error("%s expected", msg); +} + +/* ------------------------------------------------------------------------- */ +/* Custom allocator for tiny objects */ + +#define USE_TAL + +#ifndef USE_TAL +#define tal_free(al, p) tcc_free(p) +#define tal_realloc(al, p, size) tcc_realloc(p, size) +#define tal_new(a,b,c) +#define tal_delete(a) +#else +#if !defined(MEM_DEBUG) +#define tal_free(al, p) tal_free_impl(al, p) +#define tal_realloc(al, p, size) tal_realloc_impl(&al, p, size) +#define TAL_DEBUG_PARAMS +#else +#define TAL_DEBUG 1 +//#define TAL_INFO 1 /* collect and dump allocators stats */ +#define tal_free(al, p) tal_free_impl(al, p, __FILE__, __LINE__) +#define tal_realloc(al, p, size) tal_realloc_impl(&al, p, size, __FILE__, __LINE__) +#define TAL_DEBUG_PARAMS , const char *file, int line +#define TAL_DEBUG_FILE_LEN 40 +#endif + +#define TOKSYM_TAL_SIZE (768 * 1024) /* allocator for tiny TokenSym in table_ident */ +#define TOKSTR_TAL_SIZE (768 * 1024) /* allocator for tiny TokenString instances */ +#define CSTR_TAL_SIZE (256 * 1024) /* allocator for tiny CString instances */ +#define TOKSYM_TAL_LIMIT 256 /* prefer unique limits to distinguish allocators debug msgs */ +#define TOKSTR_TAL_LIMIT 128 /* 32 * sizeof(int) */ +#define CSTR_TAL_LIMIT 1024 + +typedef struct TinyAlloc { + unsigned limit; + unsigned size; + uint8_t *buffer; + uint8_t *p; + unsigned nb_allocs; + struct TinyAlloc *next, *top; +#ifdef TAL_INFO + unsigned nb_peak; + unsigned nb_total; + unsigned nb_missed; + uint8_t *peak_p; +#endif +} TinyAlloc; + +typedef struct tal_header_t { + unsigned size; +#ifdef TAL_DEBUG + int line_num; /* negative line_num used for double free check */ + char file_name[TAL_DEBUG_FILE_LEN + 1]; +#endif +} tal_header_t; + +/* ------------------------------------------------------------------------- */ + +static TinyAlloc *tal_new(TinyAlloc **pal, unsigned limit, unsigned size) +{ + TinyAlloc *al = tcc_mallocz(sizeof(TinyAlloc)); + al->p = al->buffer = tcc_malloc(size); + al->limit = limit; + al->size = size; + if (pal) *pal = al; + return al; +} + +static void tal_delete(TinyAlloc *al) +{ + TinyAlloc *next; + +tail_call: + if (!al) + return; +#ifdef TAL_INFO + fprintf(stderr, "limit=%5d, size=%5g MB, nb_peak=%6d, nb_total=%8d, nb_missed=%6d, usage=%5.1f%%\n", + al->limit, al->size / 1024.0 / 1024.0, al->nb_peak, al->nb_total, al->nb_missed, + (al->peak_p - al->buffer) * 100.0 / al->size); +#endif +#ifdef TAL_DEBUG + if (al->nb_allocs > 0) { + uint8_t *p; + fprintf(stderr, "TAL_DEBUG: memory leak %d chunk(s) (limit= %d)\n", + al->nb_allocs, al->limit); + p = al->buffer; + while (p < al->p) { + tal_header_t *header = (tal_header_t *)p; + if (header->line_num > 0) { + fprintf(stderr, "%s:%d: chunk of %d bytes leaked\n", + header->file_name, header->line_num, header->size); + } + p += header->size + sizeof(tal_header_t); + } +#if MEM_DEBUG-0 == 2 + exit(2); +#endif + } +#endif + next = al->next; + tcc_free(al->buffer); + tcc_free(al); + al = next; + goto tail_call; +} + +static void tal_free_impl(TinyAlloc *al, void *p TAL_DEBUG_PARAMS) +{ + if (!p) + return; +tail_call: + if (al->buffer <= (uint8_t *)p && (uint8_t *)p < al->buffer + al->size) { +#ifdef TAL_DEBUG + tal_header_t *header = (((tal_header_t *)p) - 1); + if (header->line_num < 0) { + fprintf(stderr, "%s:%d: TAL_DEBUG: double frees chunk from\n", + file, line); + fprintf(stderr, "%s:%d: %d bytes\n", + header->file_name, (int)-header->line_num, (int)header->size); + } else + header->line_num = -header->line_num; +#endif + al->nb_allocs--; + if (!al->nb_allocs) + al->p = al->buffer; + } else if (al->next) { + al = al->next; + goto tail_call; + } + else + tcc_free(p); +} + +static void *tal_realloc_impl(TinyAlloc **pal, void *p, unsigned size TAL_DEBUG_PARAMS) +{ + tal_header_t *header; + void *ret; + int is_own; + unsigned adj_size = (size + 3) & -4; + TinyAlloc *al = *pal; + +tail_call: + is_own = (al->buffer <= (uint8_t *)p && (uint8_t *)p < al->buffer + al->size); + if ((!p || is_own) && size <= al->limit) { + if (al->p - al->buffer + adj_size + sizeof(tal_header_t) < al->size) { + header = (tal_header_t *)al->p; + header->size = adj_size; +#ifdef TAL_DEBUG + { int ofs = strlen(file) - TAL_DEBUG_FILE_LEN; + strncpy(header->file_name, file + (ofs > 0 ? ofs : 0), TAL_DEBUG_FILE_LEN); + header->file_name[TAL_DEBUG_FILE_LEN] = 0; + header->line_num = line; } +#endif + ret = al->p + sizeof(tal_header_t); + al->p += adj_size + sizeof(tal_header_t); + if (is_own) { + header = (((tal_header_t *)p) - 1); + if (p) memcpy(ret, p, header->size); +#ifdef TAL_DEBUG + header->line_num = -header->line_num; +#endif + } else { + al->nb_allocs++; + } +#ifdef TAL_INFO + if (al->nb_peak < al->nb_allocs) + al->nb_peak = al->nb_allocs; + if (al->peak_p < al->p) + al->peak_p = al->p; + al->nb_total++; +#endif + return ret; + } else if (is_own) { + al->nb_allocs--; + ret = tal_realloc(*pal, 0, size); + header = (((tal_header_t *)p) - 1); + if (p) memcpy(ret, p, header->size); +#ifdef TAL_DEBUG + header->line_num = -header->line_num; +#endif + return ret; + } + if (al->next) { + al = al->next; + } else { + TinyAlloc *bottom = al, *next = al->top ? al->top : al; + + al = tal_new(pal, next->limit, next->size * 2); + al->next = next; + bottom->top = al; + } + goto tail_call; + } + if (is_own) { + al->nb_allocs--; + ret = tcc_malloc(size); + header = (((tal_header_t *)p) - 1); + if (p) memcpy(ret, p, header->size); +#ifdef TAL_DEBUG + header->line_num = -header->line_num; +#endif + } else if (al->next) { + al = al->next; + goto tail_call; + } else + ret = tcc_realloc(p, size); +#ifdef TAL_INFO + al->nb_missed++; +#endif + return ret; +} + +#endif /* USE_TAL */ + +/* ------------------------------------------------------------------------- */ +/* CString handling */ +static void cstr_realloc(CString *cstr, int new_size) +{ + int size; + + size = cstr->size_allocated; + if (size < 8) + size = 8; /* no need to allocate a too small first string */ + while (size < new_size) + size = size * 2; + cstr->data = tcc_realloc(cstr->data, size); + cstr->size_allocated = size; +} + +/* add a byte */ +ST_INLN void cstr_ccat(CString *cstr, int ch) +{ + int size; + size = cstr->size + 1; + if (size > cstr->size_allocated) + cstr_realloc(cstr, size); + ((unsigned char *)cstr->data)[size - 1] = ch; + cstr->size = size; +} + +ST_INLN char *unicode_to_utf8 (char *b, uint32_t Uc) +{ + if (Uc<0x80) *b++=Uc; + else if (Uc<0x800) *b++=192+Uc/64, *b++=128+Uc%64; + else if (Uc-0xd800u<0x800) goto error; + else if (Uc<0x10000) *b++=224+Uc/4096, *b++=128+Uc/64%64, *b++=128+Uc%64; + else if (Uc<0x110000) *b++=240+Uc/262144, *b++=128+Uc/4096%64, *b++=128+Uc/64%64, *b++=128+Uc%64; + else error: tcc_error("0x%x is not a valid universal character", Uc); + return b; +} + +/* add a unicode character expanded into utf8 */ +ST_INLN void cstr_u8cat(CString *cstr, int ch) +{ + char buf[4], *e; + e = unicode_to_utf8(buf, (uint32_t)ch); + cstr_cat(cstr, buf, e - buf); +} + +ST_FUNC void cstr_cat(CString *cstr, const char *str, int len) +{ + int size; + if (len <= 0) + len = strlen(str) + 1 + len; + size = cstr->size + len; + if (size > cstr->size_allocated) + cstr_realloc(cstr, size); + memmove(((unsigned char *)cstr->data) + cstr->size, str, len); + cstr->size = size; +} + +/* add a wide char */ +ST_FUNC void cstr_wccat(CString *cstr, int ch) +{ + int size; + size = cstr->size + sizeof(nwchar_t); + if (size > cstr->size_allocated) + cstr_realloc(cstr, size); + *(nwchar_t *)(((unsigned char *)cstr->data) + size - sizeof(nwchar_t)) = ch; + cstr->size = size; +} + +ST_FUNC void cstr_new(CString *cstr) +{ + memset(cstr, 0, sizeof(CString)); +} + +/* free string and reset it to NULL */ +ST_FUNC void cstr_free(CString *cstr) +{ + tcc_free(cstr->data); +} + +/* reset string to empty */ +ST_FUNC void cstr_reset(CString *cstr) +{ + cstr->size = 0; +} + +ST_FUNC int cstr_vprintf(CString *cstr, const char *fmt, va_list ap) +{ + va_list v; + int len, size = 80; + for (;;) { + size += cstr->size; + if (size > cstr->size_allocated) + cstr_realloc(cstr, size); + size = cstr->size_allocated - cstr->size; + va_copy(v, ap); + len = vsnprintf((char*)cstr->data + cstr->size, size, fmt, v); + va_end(v); + if (len >= 0 && len < size) + break; + size *= 2; + } + cstr->size += len; + return len; +} + +ST_FUNC int cstr_printf(CString *cstr, const char *fmt, ...) +{ + va_list ap; int len; + va_start(ap, fmt); + len = cstr_vprintf(cstr, fmt, ap); + va_end(ap); + return len; +} + +/* XXX: unicode ? */ +static void add_char(CString *cstr, int c) +{ + if (c == '\'' || c == '\"' || c == '\\') { + /* XXX: could be more precise if char or string */ + cstr_ccat(cstr, '\\'); + } + if (c >= 32 && c <= 126) { + cstr_ccat(cstr, c); + } else { + cstr_ccat(cstr, '\\'); + if (c == '\n') { + cstr_ccat(cstr, 'n'); + } else { + cstr_ccat(cstr, '0' + ((c >> 6) & 7)); + cstr_ccat(cstr, '0' + ((c >> 3) & 7)); + cstr_ccat(cstr, '0' + (c & 7)); + } + } +} + +/* ------------------------------------------------------------------------- */ +/* allocate a new token */ +static TokenSym *tok_alloc_new(TokenSym **pts, const char *str, int len) +{ + TokenSym *ts, **ptable; + int i; + + if (tok_ident >= SYM_FIRST_ANOM) + tcc_error("memory full (symbols)"); + + /* expand token table if needed */ + i = tok_ident - TOK_IDENT; + if ((i % TOK_ALLOC_INCR) == 0) { + ptable = tcc_realloc(table_ident, (i + TOK_ALLOC_INCR) * sizeof(TokenSym *)); + table_ident = ptable; + } + + ts = tal_realloc(toksym_alloc, 0, sizeof(TokenSym) + len); + table_ident[i] = ts; + ts->tok = tok_ident++; + ts->sym_define = NULL; + ts->sym_label = NULL; + ts->sym_struct = NULL; + ts->sym_identifier = NULL; + ts->len = len; + ts->hash_next = NULL; + memcpy(ts->str, str, len); + ts->str[len] = '\0'; + *pts = ts; + return ts; +} + +#define TOK_HASH_INIT 1 +#define TOK_HASH_FUNC(h, c) ((h) + ((h) << 5) + ((h) >> 27) + (c)) + + +/* find a token and add it if not found */ +ST_FUNC TokenSym *tok_alloc(const char *str, int len) +{ + TokenSym *ts, **pts; + int i; + unsigned int h; + + h = TOK_HASH_INIT; + for(i=0;i<len;i++) + h = TOK_HASH_FUNC(h, ((unsigned char *)str)[i]); + h &= (TOK_HASH_SIZE - 1); + + pts = &hash_ident[h]; + for(;;) { + ts = *pts; + if (!ts) + break; + if (ts->len == len && !memcmp(ts->str, str, len)) + return ts; + pts = &(ts->hash_next); + } + return tok_alloc_new(pts, str, len); +} + +ST_FUNC int tok_alloc_const(const char *str) +{ + return tok_alloc(str, strlen(str))->tok; +} + + +/* XXX: buffer overflow */ +/* XXX: float tokens */ +ST_FUNC const char *get_tok_str(int v, CValue *cv) +{ + char *p; + int i, len; + + cstr_reset(&cstr_buf); + p = cstr_buf.data; + + switch(v) { + case TOK_CINT: + case TOK_CUINT: + case TOK_CLONG: + case TOK_CULONG: + case TOK_CLLONG: + case TOK_CULLONG: + /* XXX: not quite exact, but only useful for testing */ +#ifdef _WIN32 + sprintf(p, "%u", (unsigned)cv->i); +#else + sprintf(p, "%llu", (unsigned long long)cv->i); +#endif + break; + case TOK_LCHAR: + cstr_ccat(&cstr_buf, 'L'); + case TOK_CCHAR: + cstr_ccat(&cstr_buf, '\''); + add_char(&cstr_buf, cv->i); + cstr_ccat(&cstr_buf, '\''); + cstr_ccat(&cstr_buf, '\0'); + break; + case TOK_PPNUM: + case TOK_PPSTR: + return (char*)cv->str.data; + case TOK_LSTR: + cstr_ccat(&cstr_buf, 'L'); + case TOK_STR: + cstr_ccat(&cstr_buf, '\"'); + if (v == TOK_STR) { + len = cv->str.size - 1; + for(i=0;i<len;i++) + add_char(&cstr_buf, ((unsigned char *)cv->str.data)[i]); + } else { + len = (cv->str.size / sizeof(nwchar_t)) - 1; + for(i=0;i<len;i++) + add_char(&cstr_buf, ((nwchar_t *)cv->str.data)[i]); + } + cstr_ccat(&cstr_buf, '\"'); + cstr_ccat(&cstr_buf, '\0'); + break; + + case TOK_CFLOAT: + return strcpy(p, "<float>"); + case TOK_CDOUBLE: + return strcpy(p, "<double>"); + case TOK_CLDOUBLE: + return strcpy(p, "<long double>"); + case TOK_LINENUM: + return strcpy(p, "<linenumber"); + + /* above tokens have value, the ones below don't */ + case TOK_LT: + v = '<'; + goto addv; + case TOK_GT: + v = '>'; + goto addv; + case TOK_DOTS: + return strcpy(p, "..."); + case TOK_A_SHL: + return strcpy(p, "<<="); + case TOK_A_SAR: + return strcpy(p, ">>="); + case TOK_EOF: + return strcpy(p, "<eof>"); + case 0: /* anonymous nameless symbols */ + return strcpy(p, "<no name>"); + default: + if (v < TOK_IDENT) { + /* search in two bytes table */ + const unsigned char *q = tok_two_chars; + while (*q) { + if (q[2] == v) { + *p++ = q[0]; + *p++ = q[1]; + *p = '\0'; + return cstr_buf.data; + } + q += 3; + } + if (v >= 127 || (v < 32 && !is_space(v) && v != '\n')) { + sprintf(p, "<\\x%02x>", v); + break; + } + addv: + *p++ = v; + *p = '\0'; + } else if (v < tok_ident) { + return table_ident[v - TOK_IDENT]->str; + } else if (v >= SYM_FIRST_ANOM) { + /* special name for anonymous symbol */ + sprintf(p, "L.%u", v - SYM_FIRST_ANOM); + } else { + /* should never happen */ + return NULL; + } + break; + } + return cstr_buf.data; +} + +static inline int check_space(int t, int *spc) +{ + if (t < 256 && (isidnum_table[t - CH_EOF] & IS_SPC)) { + if (*spc) + return 1; + *spc = 1; + } else + *spc = 0; + return 0; +} + +/* return the current character, handling end of block if necessary + (but not stray) */ +static int handle_eob(void) +{ + BufferedFile *bf = file; + int len; + + /* only tries to read if really end of buffer */ + if (bf->buf_ptr >= bf->buf_end) { + if (bf->fd >= 0) { +#if defined(PARSE_DEBUG) + len = 1; +#else + len = IO_BUF_SIZE; +#endif + len = read(bf->fd, bf->buffer, len); + if (len < 0) + len = 0; + } else { + len = 0; + } + total_bytes += len; + bf->buf_ptr = bf->buffer; + bf->buf_end = bf->buffer + len; + *bf->buf_end = CH_EOB; + } + if (bf->buf_ptr < bf->buf_end) { + return bf->buf_ptr[0]; + } else { + bf->buf_ptr = bf->buf_end; + return CH_EOF; + } +} + +/* read next char from current input file and handle end of input buffer */ +static int next_c(void) +{ + int ch = *++file->buf_ptr; + /* end of buffer/file handling */ + if (ch == CH_EOB && file->buf_ptr >= file->buf_end) + ch = handle_eob(); + return ch; +} + +/* input with '\[\r]\n' handling. */ +static int handle_stray_noerror(int err) +{ + int ch; + while ((ch = next_c()) == '\\') { + ch = next_c(); + if (ch == '\n') { + newl: + file->line_num++; + } else { + if (ch == '\r') { + ch = next_c(); + if (ch == '\n') + goto newl; + *--file->buf_ptr = '\r'; + } + if (err) + tcc_error("stray '\\' in program"); + /* may take advantage of 'BufferedFile.unget[4}' */ + return *--file->buf_ptr = '\\'; + } + } + return ch; +} + +#define ninp() handle_stray_noerror(0) + +/* handle '\\' in strings, comments and skipped regions */ +static int handle_bs(uint8_t **p) +{ + int c; + file->buf_ptr = *p - 1; + c = ninp(); + *p = file->buf_ptr; + return c; +} + +/* skip the stray and handle the \\n case. Output an error if + incorrect char after the stray */ +static int handle_stray(uint8_t **p) +{ + int c; + file->buf_ptr = *p - 1; + c = handle_stray_noerror(!(parse_flags & PARSE_FLAG_ACCEPT_STRAYS)); + *p = file->buf_ptr; + return c; +} + +/* handle the complicated stray case */ +#define PEEKC(c, p)\ +{\ + c = *++p;\ + if (c == '\\')\ + c = handle_stray(&p); \ +} + +static int skip_spaces(void) +{ + int ch; + --file->buf_ptr; + do { + ch = ninp(); + } while (isidnum_table[ch - CH_EOF] & IS_SPC); + return ch; +} + +/* single line C++ comments */ +static uint8_t *parse_line_comment(uint8_t *p) +{ + int c; + for(;;) { + for (;;) { + c = *++p; + redo: + if (c == '\n' || c == '\\') + break; + c = *++p; + if (c == '\n' || c == '\\') + break; + } + if (c == '\n') + break; + c = handle_bs(&p); + if (c == CH_EOF) + break; + if (c != '\\') + goto redo; + } + return p; +} + +/* C comments */ +static uint8_t *parse_comment(uint8_t *p) +{ + int c; + for(;;) { + /* fast skip loop */ + for(;;) { + c = *++p; + redo: + if (c == '\n' || c == '*' || c == '\\') + break; + c = *++p; + if (c == '\n' || c == '*' || c == '\\') + break; + } + /* now we can handle all the cases */ + if (c == '\n') { + file->line_num++; + } else if (c == '*') { + do { + c = *++p; + } while (c == '*'); + if (c == '\\') + c = handle_bs(&p); + if (c == '/') + break; + goto check_eof; + } else { + c = handle_bs(&p); + check_eof: + if (c == CH_EOF) + tcc_error("unexpected end of file in comment"); + if (c != '\\') + goto redo; + } + } + return p + 1; +} + +/* parse a string without interpreting escapes */ +static uint8_t *parse_pp_string(uint8_t *p, int sep, CString *str) +{ + int c; + for(;;) { + c = *++p; + redo: + if (c == sep) { + break; + } else if (c == '\\') { + c = handle_bs(&p); + if (c == CH_EOF) { + unterminated_string: + /* XXX: indicate line number of start of string */ + tok_flags &= ~TOK_FLAG_BOL; + tcc_error("missing terminating %c character", sep); + } else if (c == '\\') { + if (str) + cstr_ccat(str, c); + c = *++p; + /* add char after '\\' unconditionally */ + if (c == '\\') { + c = handle_bs(&p); + if (c == CH_EOF) + goto unterminated_string; + } + goto add_char; + } else { + goto redo; + } + } else if (c == '\n') { + add_lf: + if (ACCEPT_LF_IN_STRINGS) { + file->line_num++; + goto add_char; + } else if (str) { /* not skipping */ + goto unterminated_string; + } else { + //tcc_warning("missing terminating %c character", sep); + return p; + } + } else if (c == '\r') { + c = *++p; + if (c == '\\') + c = handle_bs(&p); + if (c == '\n') + goto add_lf; + if (c == CH_EOF) + goto unterminated_string; + if (str) + cstr_ccat(str, '\r'); + goto redo; + } else { + add_char: + if (str) + cstr_ccat(str, c); + } + } + p++; + return p; +} + +/* skip block of text until #else, #elif or #endif. skip also pairs of + #if/#endif */ +static void preprocess_skip(void) +{ + int a, start_of_line, c, in_warn_or_error; + uint8_t *p; + + p = file->buf_ptr; + a = 0; +redo_start: + start_of_line = 1; + in_warn_or_error = 0; + for(;;) { + redo_no_start: + c = *p; + switch(c) { + case ' ': + case '\t': + case '\f': + case '\v': + case '\r': + p++; + goto redo_no_start; + case '\n': + file->line_num++; + p++; + goto redo_start; + case '\\': + c = handle_bs(&p); + if (c == CH_EOF) + expect("#endif"); + if (c == '\\') + ++p; + goto redo_no_start; + /* skip strings */ + case '\"': + case '\'': + if (in_warn_or_error) + goto _default; + tok_flags &= ~TOK_FLAG_BOL; + p = parse_pp_string(p, c, NULL); + break; + /* skip comments */ + case '/': + if (in_warn_or_error) + goto _default; + ++p; + c = handle_bs(&p); + if (c == '*') { + p = parse_comment(p); + } else if (c == '/') { + p = parse_line_comment(p); + } + break; + case '#': + p++; + if (start_of_line) { + file->buf_ptr = p; + next_nomacro(); + p = file->buf_ptr; + if (a == 0 && + (tok == TOK_ELSE || tok == TOK_ELIF || tok == TOK_ENDIF)) + goto the_end; + if (tok == TOK_IF || tok == TOK_IFDEF || tok == TOK_IFNDEF) + a++; + else if (tok == TOK_ENDIF) + a--; + else if( tok == TOK_ERROR || tok == TOK_WARNING) + in_warn_or_error = 1; + else if (tok == TOK_LINEFEED) + goto redo_start; + else if (parse_flags & PARSE_FLAG_ASM_FILE) + p = parse_line_comment(p - 1); + } +#if !defined(TCC_TARGET_ARM) + else if (parse_flags & PARSE_FLAG_ASM_FILE) + p = parse_line_comment(p - 1); +#else + /* ARM assembly uses '#' for constants */ +#endif + break; +_default: + default: + p++; + break; + } + start_of_line = 0; + } + the_end: ; + file->buf_ptr = p; +} + +#if 0 +/* return the number of additional 'ints' necessary to store the + token */ +static inline int tok_size(const int *p) +{ + switch(*p) { + /* 4 bytes */ + case TOK_CINT: + case TOK_CUINT: + case TOK_CCHAR: + case TOK_LCHAR: + case TOK_CFLOAT: + case TOK_LINENUM: + return 1 + 1; + case TOK_STR: + case TOK_LSTR: + case TOK_PPNUM: + case TOK_PPSTR: + return 1 + ((sizeof(CString) + ((CString *)(p+1))->size + 3) >> 2); + case TOK_CLONG: + case TOK_CULONG: + return 1 + LONG_SIZE / 4; + case TOK_CDOUBLE: + case TOK_CLLONG: + case TOK_CULLONG: + return 1 + 2; + case TOK_CLDOUBLE: +#ifdef TCC_USING_DOUBLE_FOR_LDOUBLE + return 1 + 8 / 4; +#else + return 1 + LDOUBLE_SIZE / 4; +#endif + default: + return 1 + 0; + } +} +#endif + +/* token string handling */ +ST_INLN void tok_str_new(TokenString *s) +{ + s->str = NULL; + s->len = s->lastlen = 0; + s->allocated_len = 0; + s->last_line_num = -1; +} + +ST_FUNC TokenString *tok_str_alloc(void) +{ + TokenString *str = tal_realloc(tokstr_alloc, 0, sizeof *str); + tok_str_new(str); + return str; +} + +ST_FUNC int *tok_str_dup(TokenString *s) +{ + int *str; + + str = tal_realloc(tokstr_alloc, 0, s->len * sizeof(int)); + memcpy(str, s->str, s->len * sizeof(int)); + return str; +} + +ST_FUNC void tok_str_free_str(int *str) +{ + tal_free(tokstr_alloc, str); +} + +ST_FUNC void tok_str_free(TokenString *str) +{ + tok_str_free_str(str->str); + tal_free(tokstr_alloc, str); +} + +ST_FUNC int *tok_str_realloc(TokenString *s, int new_size) +{ + int *str, size; + + size = s->allocated_len; + if (size < 16) + size = 16; + while (size < new_size) + size = size * 2; + if (size > s->allocated_len) { + str = tal_realloc(tokstr_alloc, s->str, size * sizeof(int)); + s->allocated_len = size; + s->str = str; + } + return s->str; +} + +ST_FUNC void tok_str_add(TokenString *s, int t) +{ + int len, *str; + + len = s->len; + str = s->str; + if (len >= s->allocated_len) + str = tok_str_realloc(s, len + 1); + str[len++] = t; + s->len = len; +} + +ST_FUNC void begin_macro(TokenString *str, int alloc) +{ + str->alloc = alloc; + str->prev = macro_stack; + str->prev_ptr = macro_ptr; + str->save_line_num = file->line_num; + macro_ptr = str->str; + macro_stack = str; +} + +ST_FUNC void end_macro(void) +{ + TokenString *str = macro_stack; + macro_stack = str->prev; + macro_ptr = str->prev_ptr; + file->line_num = str->save_line_num; + str->len = 0; /* matters if str not alloced, may be tokstr_buf */ + if (str->alloc != 0) { + if (str->alloc == 2) + str->str = NULL; /* don't free */ + tok_str_free(str); + } +} + +static void tok_str_add2(TokenString *s, int t, CValue *cv) +{ + int len, *str; + + len = s->lastlen = s->len; + str = s->str; + + /* allocate space for worst case */ + if (len + TOK_MAX_SIZE >= s->allocated_len) + str = tok_str_realloc(s, len + TOK_MAX_SIZE + 1); + str[len++] = t; + switch(t) { + case TOK_CINT: + case TOK_CUINT: + case TOK_CCHAR: + case TOK_LCHAR: + case TOK_CFLOAT: + case TOK_LINENUM: +#if LONG_SIZE == 4 + case TOK_CLONG: + case TOK_CULONG: +#endif + str[len++] = cv->tab[0]; + break; + case TOK_PPNUM: + case TOK_PPSTR: + case TOK_STR: + case TOK_LSTR: + { + /* Insert the string into the int array. */ + size_t nb_words = + 1 + (cv->str.size + sizeof(int) - 1) / sizeof(int); + if (len + nb_words >= s->allocated_len) + str = tok_str_realloc(s, len + nb_words + 1); + str[len] = cv->str.size; + memcpy(&str[len + 1], cv->str.data, cv->str.size); + len += nb_words; + } + break; + case TOK_CDOUBLE: + case TOK_CLLONG: + case TOK_CULLONG: +#if LONG_SIZE == 8 + case TOK_CLONG: + case TOK_CULONG: +#endif + str[len++] = cv->tab[0]; + str[len++] = cv->tab[1]; + break; + case TOK_CLDOUBLE: +#if LDOUBLE_SIZE == 8 || defined TCC_USING_DOUBLE_FOR_LDOUBLE + str[len++] = cv->tab[0]; + str[len++] = cv->tab[1]; +#elif LDOUBLE_SIZE == 12 + str[len++] = cv->tab[0]; + str[len++] = cv->tab[1]; + str[len++] = cv->tab[2]; +#elif LDOUBLE_SIZE == 16 + str[len++] = cv->tab[0]; + str[len++] = cv->tab[1]; + str[len++] = cv->tab[2]; + str[len++] = cv->tab[3]; +#else +#error add long double size support +#endif + break; + default: + break; + } + s->len = len; +} + +/* add the current parse token in token string 's' */ +ST_FUNC void tok_str_add_tok(TokenString *s) +{ + CValue cval; + + /* save line number info */ + if (file->line_num != s->last_line_num) { + s->last_line_num = file->line_num; + cval.i = s->last_line_num; + tok_str_add2(s, TOK_LINENUM, &cval); + } + tok_str_add2(s, tok, &tokc); +} + +/* get a token from an integer array and increment pointer. */ +static inline void tok_get(int *t, const int **pp, CValue *cv) +{ + const int *p = *pp; + int n, *tab; + + tab = cv->tab; + switch(*t = *p++) { +#if LONG_SIZE == 4 + case TOK_CLONG: +#endif + case TOK_CINT: + case TOK_CCHAR: + case TOK_LCHAR: + case TOK_LINENUM: + cv->i = *p++; + break; +#if LONG_SIZE == 4 + case TOK_CULONG: +#endif + case TOK_CUINT: + cv->i = (unsigned)*p++; + break; + case TOK_CFLOAT: + tab[0] = *p++; + break; + case TOK_STR: + case TOK_LSTR: + case TOK_PPNUM: + case TOK_PPSTR: + cv->str.size = *p++; + cv->str.data = p; + p += (cv->str.size + sizeof(int) - 1) / sizeof(int); + break; + case TOK_CDOUBLE: + case TOK_CLLONG: + case TOK_CULLONG: +#if LONG_SIZE == 8 + case TOK_CLONG: + case TOK_CULONG: +#endif + n = 2; + goto copy; + case TOK_CLDOUBLE: +#if LDOUBLE_SIZE == 8 || defined TCC_USING_DOUBLE_FOR_LDOUBLE + n = 2; +#elif LDOUBLE_SIZE == 12 + n = 3; +#elif LDOUBLE_SIZE == 16 + n = 4; +#else +# error add long double size support +#endif + copy: + do + *tab++ = *p++; + while (--n); + break; + default: + break; + } + *pp = p; +} + +#if 0 +# define TOK_GET(t,p,c) tok_get(t,p,c) +#else +# define TOK_GET(t,p,c) do { \ + int _t = **(p); \ + if (TOK_HAS_VALUE(_t)) \ + tok_get(t, p, c); \ + else \ + *(t) = _t, ++*(p); \ + } while (0) +#endif + +static int macro_is_equal(const int *a, const int *b) +{ + CValue cv; + int t; + + if (!a || !b) + return 1; + + while (*a && *b) { + cstr_reset(&tokcstr); + TOK_GET(&t, &a, &cv); + cstr_cat(&tokcstr, get_tok_str(t, &cv), 0); + TOK_GET(&t, &b, &cv); + if (strcmp(tokcstr.data, get_tok_str(t, &cv))) + return 0; + } + return !(*a || *b); +} + +/* defines handling */ +ST_INLN void define_push(int v, int macro_type, int *str, Sym *first_arg) +{ + Sym *s, *o; + + o = define_find(v); + s = sym_push2(&define_stack, v, macro_type, 0); + s->d = str; + s->next = first_arg; + table_ident[v - TOK_IDENT]->sym_define = s; + + if (o && !macro_is_equal(o->d, s->d)) + tcc_warning("%s redefined", get_tok_str(v, NULL)); +} + +/* undefined a define symbol. Its name is just set to zero */ +ST_FUNC void define_undef(Sym *s) +{ + int v = s->v; + if (v >= TOK_IDENT && v < tok_ident) + table_ident[v - TOK_IDENT]->sym_define = NULL; +} + +ST_INLN Sym *define_find(int v) +{ + v -= TOK_IDENT; + if ((unsigned)v >= (unsigned)(tok_ident - TOK_IDENT)) + return NULL; + return table_ident[v]->sym_define; +} + +/* free define stack until top reaches 'b' */ +ST_FUNC void free_defines(Sym *b) +{ + while (define_stack != b) { + Sym *top = define_stack; + define_stack = top->prev; + tok_str_free_str(top->d); + define_undef(top); + sym_free(top); + } +} + +/* fake the nth "#if defined test_..." for tcc -dt -run */ +static void maybe_run_test(TCCState *s) +{ + const char *p; + if (s->include_stack_ptr != s->include_stack) + return; + p = get_tok_str(tok, NULL); + if (0 != memcmp(p, "test_", 5)) + return; + if (0 != --s->run_test) + return; + fprintf(s->ppfp, &"\n[%s]\n"[!(s->dflag & 32)], p), fflush(s->ppfp); + define_push(tok, MACRO_OBJ, NULL, NULL); +} + +static CachedInclude * +search_cached_include(TCCState *s1, const char *filename, int add); + +static int parse_include(TCCState *s1, int do_next, int test) +{ + int c, i; + char name[1024], buf[1024], *p; + CachedInclude *e; + + c = skip_spaces(); + if (c == '<' || c == '\"') { + cstr_reset(&tokcstr); + file->buf_ptr = parse_pp_string(file->buf_ptr, c == '<' ? '>' : c, &tokcstr); + i = tokcstr.size; + pstrncpy(name, tokcstr.data, i >= sizeof name ? sizeof name - 1 : i); + next_nomacro(); + } else { + /* computed #include : concatenate tokens until result is one of + the two accepted forms. Don't convert pp-tokens to tokens here. */ + parse_flags = PARSE_FLAG_PREPROCESS + | PARSE_FLAG_LINEFEED + | (parse_flags & PARSE_FLAG_ASM_FILE); + name[0] = 0; + for (;;) { + next(); + p = name, i = strlen(p) - 1; + if (i > 0 + && ((p[0] == '"' && p[i] == '"') + || (p[0] == '<' && p[i] == '>'))) + break; + if (tok == TOK_LINEFEED) + tcc_error("'#include' expects \"FILENAME\" or <FILENAME>"); + pstrcat(name, sizeof name, get_tok_str(tok, &tokc)); + } + c = p[0]; + /* remove '<>|""' */ + memmove(p, p + 1, i - 1), p[i - 1] = 0; + } + + i = do_next ? file->include_next_index : -1; + for (;;) { + ++i; + if (i == 0) { + /* check absolute include path */ + if (!IS_ABSPATH(name)) + continue; + buf[0] = '\0'; + } else if (i == 1) { + /* search in file's dir if "header.h" */ + if (c != '\"') + continue; + p = file->true_filename; + pstrncpy(buf, p, tcc_basename(p) - p); + } else { + int j = i - 2, k = j - s1->nb_include_paths; + if (k < 0) + p = s1->include_paths[j]; + else if (k < s1->nb_sysinclude_paths) + p = s1->sysinclude_paths[k]; + else if (test) + return 0; + else + tcc_error("include file '%s' not found", name); + pstrcpy(buf, sizeof buf, p); + pstrcat(buf, sizeof buf, "/"); + } + pstrcat(buf, sizeof buf, name); + e = search_cached_include(s1, buf, 0); + if (e && (define_find(e->ifndef_macro) || e->once == pp_once)) { + /* no need to parse the include because the 'ifndef macro' + is defined (or had #pragma once) */ +#ifdef INC_DEBUG + printf("%s: skipping cached %s\n", file->filename, buf); +#endif + return 1; + } + if (tcc_open(s1, buf) >= 0) + break; + } + + if (test) { + tcc_close(); + } else { + if (s1->include_stack_ptr >= s1->include_stack + INCLUDE_STACK_SIZE) + tcc_error("#include recursion too deep"); + /* push previous file on stack */ + *s1->include_stack_ptr++ = file->prev; + file->include_next_index = i; +#ifdef INC_DEBUG + printf("%s: including %s\n", file->prev->filename, file->filename); +#endif + /* update target deps */ + if (s1->gen_deps) { + BufferedFile *bf = file; + while (i == 1 && (bf = bf->prev)) + i = bf->include_next_index; + /* skip system include files */ + if (s1->include_sys_deps || i - 2 < s1->nb_include_paths) + dynarray_add(&s1->target_deps, &s1->nb_target_deps, + tcc_strdup(buf)); + } + /* add include file debug info */ + tcc_debug_bincl(s1); + tok_flags |= TOK_FLAG_BOF | TOK_FLAG_BOL; + } + return 1; +} + +/* eval an expression for #if/#elif */ +static int expr_preprocess(TCCState *s1) +{ + int c, t; + TokenString *str; + + str = tok_str_alloc(); + pp_expr = 1; + while (tok != TOK_LINEFEED && tok != TOK_EOF) { + next(); /* do macro subst */ + redo: + if (tok == TOK_DEFINED) { + next_nomacro(); + t = tok; + if (t == '(') + next_nomacro(); + if (tok < TOK_IDENT) + expect("identifier"); + if (s1->run_test) + maybe_run_test(s1); + c = 0; + if (define_find(tok) + || tok == TOK___HAS_INCLUDE + || tok == TOK___HAS_INCLUDE_NEXT) + c = 1; + if (t == '(') { + next_nomacro(); + if (tok != ')') + expect("')'"); + } + tok = TOK_CINT; + tokc.i = c; + } else if (tok == TOK___HAS_INCLUDE || + tok == TOK___HAS_INCLUDE_NEXT) { + t = tok; + next_nomacro(); + if (tok != '(') + expect("("); + c = parse_include(s1, t - TOK___HAS_INCLUDE, 1); + if (tok != ')') + expect("')'"); + tok = TOK_CINT; + tokc.i = c; + } else if (tok >= TOK_IDENT) { + /* if undefined macro, replace with zero, check for func-like */ + t = tok; + tok = TOK_CINT; + tokc.i = 0; + tok_str_add_tok(str); + next(); + if (tok == '(') + tcc_error("function-like macro '%s' is not defined", + get_tok_str(t, NULL)); + goto redo; + } + tok_str_add_tok(str); + } + pp_expr = 0; + tok_str_add(str, -1); /* simulate end of file */ + tok_str_add(str, 0); + /* now evaluate C constant expression */ + begin_macro(str, 1); + next(); + c = expr_const(); + end_macro(); + return c != 0; +} + + +/* parse after #define */ +ST_FUNC void parse_define(void) +{ + Sym *s, *first, **ps; + int v, t, varg, is_vaargs, spc; + int saved_parse_flags = parse_flags; + + v = tok; + if (v < TOK_IDENT || v == TOK_DEFINED) + tcc_error("invalid macro name '%s'", get_tok_str(tok, &tokc)); + /* XXX: should check if same macro (ANSI) */ + first = NULL; + t = MACRO_OBJ; + /* We have to parse the whole define as if not in asm mode, in particular + no line comment with '#' must be ignored. Also for function + macros the argument list must be parsed without '.' being an ID + character. */ + parse_flags = ((parse_flags & ~PARSE_FLAG_ASM_FILE) | PARSE_FLAG_SPACES); + /* '(' must be just after macro definition for MACRO_FUNC */ + next_nomacro(); + parse_flags &= ~PARSE_FLAG_SPACES; + if (tok == '(') { + int dotid = set_idnum('.', 0); + next_nomacro(); + ps = &first; + if (tok != ')') for (;;) { + varg = tok; + next_nomacro(); + is_vaargs = 0; + if (varg == TOK_DOTS) { + varg = TOK___VA_ARGS__; + is_vaargs = 1; + } else if (tok == TOK_DOTS && gnu_ext) { + is_vaargs = 1; + next_nomacro(); + } + if (varg < TOK_IDENT) + bad_list: + tcc_error("bad macro parameter list"); + s = sym_push2(&define_stack, varg | SYM_FIELD, is_vaargs, 0); + *ps = s; + ps = &s->next; + if (tok == ')') + break; + if (tok != ',' || is_vaargs) + goto bad_list; + next_nomacro(); + } + parse_flags |= PARSE_FLAG_SPACES; + next_nomacro(); + t = MACRO_FUNC; + set_idnum('.', dotid); + } + + tokstr_buf.len = 0; + spc = 2; + parse_flags |= PARSE_FLAG_ACCEPT_STRAYS | PARSE_FLAG_SPACES | PARSE_FLAG_LINEFEED; + /* The body of a macro definition should be parsed such that identifiers + are parsed like the file mode determines (i.e. with '.' being an + ID character in asm mode). But '#' should be retained instead of + regarded as line comment leader, so still don't set ASM_FILE + in parse_flags. */ + while (tok != TOK_LINEFEED && tok != TOK_EOF) { + /* remove spaces around ## and after '#' */ + if (TOK_TWOSHARPS == tok) { + if (2 == spc) + goto bad_twosharp; + if (1 == spc) + --tokstr_buf.len; + spc = 3; + tok = TOK_PPJOIN; + } else if ('#' == tok) { + spc = 4; + } else if (check_space(tok, &spc)) { + goto skip; + } + tok_str_add2(&tokstr_buf, tok, &tokc); + skip: + next_nomacro(); + } + + parse_flags = saved_parse_flags; + if (spc == 1) + --tokstr_buf.len; /* remove trailing space */ + tok_str_add(&tokstr_buf, 0); + if (3 == spc) +bad_twosharp: + tcc_error("'##' cannot appear at either end of macro"); + define_push(v, t, tok_str_dup(&tokstr_buf), first); +} + +#ifdef _WIN32 +static unsigned long long calc_file_hash(const char *filename) +{ + unsigned long long hash = 14695981039346656037ull; // FNV_offset_basis; + int fd = open (filename, O_RDONLY | O_BINARY); + + if (fd < 0) + return 0; + for (;;) { + unsigned char temp[IO_BUF_SIZE]; + int i, n = read(fd, temp, sizeof(temp)); + + if (n <= 0) + break; + for (i = 0; i < n; i++) + hash = hash * 1099511628211ull ^ temp[i]; // FNV_prime + } + close(fd); + return hash ? hash : 1ull; +} +#endif + +static CachedInclude *search_cached_include(TCCState *s1, const char *filename, int add) +{ + unsigned int h = 0; + CachedInclude *e; + int i; + struct stat st; +#ifdef _WIN32 + unsigned long long hash = 0; +#endif + + /* This is needed for #pragmae once + * We cannot use stat on windows because st_ino is not set correctly + * so we calculate a hash of file contents. + * This also works for hard/soft links as in gcc/clang. + */ + memset (&st, 0, sizeof(st)); + if (stat (filename, &st)) + goto skip; + h = st.st_size & (CACHED_INCLUDES_HASH_SIZE - 1); +#ifdef _WIN32 + /* Only calculate file hash if file size same. */ + i = s1->cached_includes_hash[h]; + for(;;) { + if (i == 0) + break; + e = s1->cached_includes[i - 1]; + if (e->st.st_size == st.st_size) { + if (0 == PATHCMP(e->filename, filename)) { + hash = e->hash; + break; + } + if (e->hash == 0) + e->hash = calc_file_hash(e->filename); + if (hash == 0) + hash = calc_file_hash(filename); + } + i = e->hash_next; + } +#endif + + i = s1->cached_includes_hash[h]; + for(;;) { + if (i == 0) + break; + e = s1->cached_includes[i - 1]; +#ifdef _WIN32 + if (e->st.st_size == st.st_size && e->hash == hash) +#else + if (st.st_dev == e->st.st_dev && st.st_ino == e->st.st_ino) +#endif + return e; + i = e->hash_next; + } +skip: + if (!add) + return NULL; + + e = tcc_malloc(sizeof(CachedInclude) + strlen(filename)); + e->st = st; +#ifdef _WIN32 + e->hash = hash; +#endif + strcpy(e->filename, filename); + e->ifndef_macro = e->once = 0; + dynarray_add(&s1->cached_includes, &s1->nb_cached_includes, e); + /* add in hash table */ + e->hash_next = s1->cached_includes_hash[h]; + s1->cached_includes_hash[h] = s1->nb_cached_includes; +#ifdef INC_DEBUG + printf("adding cached '%s'\n", filename); +#endif + return e; +} + +static void pragma_parse(TCCState *s1) +{ + next_nomacro(); + if (tok == TOK_push_macro || tok == TOK_pop_macro) { + int t = tok, v; + Sym *s; + + if (next(), tok != '(') + goto pragma_err; + if (next(), tok != TOK_STR) + goto pragma_err; + v = tok_alloc(tokc.str.data, tokc.str.size - 1)->tok; + if (next(), tok != ')') + goto pragma_err; + if (t == TOK_push_macro) { + while (NULL == (s = define_find(v))) + define_push(v, 0, NULL, NULL); + s->type.ref = s; /* set push boundary */ + } else { + for (s = define_stack; s; s = s->prev) + if (s->v == v && s->type.ref == s) { + s->type.ref = NULL; + break; + } + } + if (s) + table_ident[v - TOK_IDENT]->sym_define = s->d ? s : NULL; + else + tcc_warning("unbalanced #pragma pop_macro"); + pp_debug_tok = t, pp_debug_symv = v; + + } else if (tok == TOK_once) { + search_cached_include(s1, file->filename, 1)->once = pp_once; + + } else if (s1->output_type == TCC_OUTPUT_PREPROCESS) { + /* tcc -E: keep pragmas below unchanged */ + unget_tok(' '); + unget_tok(TOK_PRAGMA); + unget_tok('#'); + unget_tok(TOK_LINEFEED); + + } else if (tok == TOK_pack) { + /* This may be: + #pragma pack(1) // set + #pragma pack() // reset to default + #pragma pack(push) // push current + #pragma pack(push,1) // push & set + #pragma pack(pop) // restore previous */ + next(); + skip('('); + if (tok == TOK_ASM_pop) { + next(); + if (s1->pack_stack_ptr <= s1->pack_stack) { + stk_error: + tcc_error("out of pack stack"); + } + s1->pack_stack_ptr--; + } else { + int val = 0; + if (tok != ')') { + if (tok == TOK_ASM_push) { + next(); + if (s1->pack_stack_ptr >= s1->pack_stack + PACK_STACK_SIZE - 1) + goto stk_error; + val = *s1->pack_stack_ptr++; + if (tok != ',') + goto pack_set; + next(); + } + if (tok != TOK_CINT) + goto pragma_err; + val = tokc.i; + if (val < 1 || val > 16 || (val & (val - 1)) != 0) + goto pragma_err; + next(); + } + pack_set: + *s1->pack_stack_ptr = val; + } + if (tok != ')') + goto pragma_err; + + } else if (tok == TOK_comment) { + char *p; int t; + next(); + skip('('); + t = tok; + next(); + skip(','); + if (tok != TOK_STR) + goto pragma_err; + p = tcc_strdup((char *)tokc.str.data); + next(); + if (tok != ')') + goto pragma_err; + if (t == TOK_lib) { + dynarray_add(&s1->pragma_libs, &s1->nb_pragma_libs, p); + } else { + if (t == TOK_option) + tcc_set_options(s1, p); + tcc_free(p); + } + + } else + tcc_warning_c(warn_unsupported)("#pragma %s ignored", get_tok_str(tok, &tokc)); + return; + +pragma_err: + tcc_error("malformed #pragma directive"); + return; +} + +/* is_bof is true if first non space token at beginning of file */ +ST_FUNC void preprocess(int is_bof) +{ + TCCState *s1 = tcc_state; + int c, n, saved_parse_flags; + char buf[1024], *q; + Sym *s; + + saved_parse_flags = parse_flags; + parse_flags = PARSE_FLAG_PREPROCESS + | PARSE_FLAG_TOK_NUM + | PARSE_FLAG_TOK_STR + | PARSE_FLAG_LINEFEED + | (parse_flags & PARSE_FLAG_ASM_FILE) + ; + + next_nomacro(); + redo: + switch(tok) { + case TOK_DEFINE: + pp_debug_tok = tok; + next_nomacro(); + pp_debug_symv = tok; + parse_define(); + break; + case TOK_UNDEF: + pp_debug_tok = tok; + next_nomacro(); + pp_debug_symv = tok; + s = define_find(tok); + /* undefine symbol by putting an invalid name */ + if (s) + define_undef(s); + break; + case TOK_INCLUDE: + case TOK_INCLUDE_NEXT: + parse_include(s1, tok - TOK_INCLUDE, 0); + break; + case TOK_IFNDEF: + c = 1; + goto do_ifdef; + case TOK_IF: + c = expr_preprocess(s1); + goto do_if; + case TOK_IFDEF: + c = 0; + do_ifdef: + next_nomacro(); + if (tok < TOK_IDENT) + tcc_error("invalid argument for '#if%sdef'", c ? "n" : ""); + if (is_bof) { + if (c) { +#ifdef INC_DEBUG + printf("#ifndef %s\n", get_tok_str(tok, NULL)); +#endif + file->ifndef_macro = tok; + } + } + if (define_find(tok) + || tok == TOK___HAS_INCLUDE + || tok == TOK___HAS_INCLUDE_NEXT) + c ^= 1; + do_if: + if (s1->ifdef_stack_ptr >= s1->ifdef_stack + IFDEF_STACK_SIZE) + tcc_error("memory full (ifdef)"); + *s1->ifdef_stack_ptr++ = c; + goto test_skip; + case TOK_ELSE: + if (s1->ifdef_stack_ptr == s1->ifdef_stack) + tcc_error("#else without matching #if"); + if (s1->ifdef_stack_ptr[-1] & 2) + tcc_error("#else after #else"); + c = (s1->ifdef_stack_ptr[-1] ^= 3); + goto test_else; + case TOK_ELIF: + if (s1->ifdef_stack_ptr == s1->ifdef_stack) + tcc_error("#elif without matching #if"); + c = s1->ifdef_stack_ptr[-1]; + if (c > 1) + tcc_error("#elif after #else"); + /* last #if/#elif expression was true: we skip */ + if (c == 1) { + c = 0; + } else { + c = expr_preprocess(s1); + s1->ifdef_stack_ptr[-1] = c; + } + test_else: + if (s1->ifdef_stack_ptr == file->ifdef_stack_ptr + 1) + file->ifndef_macro = 0; + test_skip: + if (!(c & 1)) { + preprocess_skip(); + is_bof = 0; + goto redo; + } + break; + case TOK_ENDIF: + if (s1->ifdef_stack_ptr <= file->ifdef_stack_ptr) + tcc_error("#endif without matching #if"); + s1->ifdef_stack_ptr--; + /* '#ifndef macro' was at the start of file. Now we check if + an '#endif' is exactly at the end of file */ + if (file->ifndef_macro && + s1->ifdef_stack_ptr == file->ifdef_stack_ptr) { + file->ifndef_macro_saved = file->ifndef_macro; + /* need to set to zero to avoid false matches if another + #ifndef at middle of file */ + file->ifndef_macro = 0; + while (tok != TOK_LINEFEED) + next_nomacro(); + tok_flags |= TOK_FLAG_ENDIF; + goto the_end; + } + break; + case TOK_PPNUM: + n = strtoul((char*)tokc.str.data, &q, 10); + goto _line_num; + case TOK_LINE: + next(); + if (tok != TOK_CINT) + _line_err: + tcc_error("wrong #line format"); + n = tokc.i; + _line_num: + next(); + if (tok != TOK_LINEFEED) { + if (tok == TOK_STR) { + if (file->true_filename == file->filename) + file->true_filename = tcc_strdup(file->filename); + q = (char *)tokc.str.data; + buf[0] = 0; + if (!IS_ABSPATH(q)) { + /* prepend directory from real file */ + pstrcpy(buf, sizeof buf, file->true_filename); + *tcc_basename(buf) = 0; + } + pstrcat(buf, sizeof buf, q); + tcc_debug_putfile(s1, buf); + } else if (parse_flags & PARSE_FLAG_ASM_FILE) + break; + else + goto _line_err; + --n; + } + if (file->fd > 0) + total_lines += file->line_num - n; + file->line_num = n; + break; + case TOK_ERROR: + case TOK_WARNING: + q = buf; + c = skip_spaces(); + while (c != '\n' && c != CH_EOF) { + if ((q - buf) < sizeof(buf) - 1) + *q++ = c; + c = ninp(); + } + *q = '\0'; + if (tok == TOK_ERROR) + tcc_error("#error %s", buf); + else + tcc_warning("#warning %s", buf); + break; + case TOK_PRAGMA: + pragma_parse(s1); + break; + case TOK_LINEFEED: + goto the_end; + default: + /* ignore gas line comment in an 'S' file. */ + if (saved_parse_flags & PARSE_FLAG_ASM_FILE) + goto ignore; + if (tok == '!' && is_bof) + /* '!' is ignored at beginning to allow C scripts. */ + goto ignore; + tcc_warning("Ignoring unknown preprocessing directive #%s", get_tok_str(tok, &tokc)); + ignore: + file->buf_ptr = parse_line_comment(file->buf_ptr - 1); + goto the_end; + } + /* ignore other preprocess commands or #! for C scripts */ + while (tok != TOK_LINEFEED) + next_nomacro(); + the_end: + parse_flags = saved_parse_flags; +} + +/* evaluate escape codes in a string. */ +static void parse_escape_string(CString *outstr, const uint8_t *buf, int is_long) +{ + int c, n, i; + const uint8_t *p; + + p = buf; + for(;;) { + c = *p; + if (c == '\0') + break; + if (c == '\\') { + p++; + /* escape */ + c = *p; + switch(c) { + case '0': case '1': case '2': case '3': + case '4': case '5': case '6': case '7': + /* at most three octal digits */ + n = c - '0'; + p++; + c = *p; + if (isoct(c)) { + n = n * 8 + c - '0'; + p++; + c = *p; + if (isoct(c)) { + n = n * 8 + c - '0'; + p++; + } + } + c = n; + goto add_char_nonext; + case 'x': i = 0; goto parse_hex_or_ucn; + case 'u': i = 4; goto parse_hex_or_ucn; + case 'U': i = 8; goto parse_hex_or_ucn; + parse_hex_or_ucn: + p++; + n = 0; + do { + c = *p; + if (c >= 'a' && c <= 'f') + c = c - 'a' + 10; + else if (c >= 'A' && c <= 'F') + c = c - 'A' + 10; + else if (isnum(c)) + c = c - '0'; + else if (i > 0) + expect("more hex digits in universal-character-name"); + else + goto add_hex_or_ucn; + n = n * 16 + c; + p++; + } while (--i); + if (is_long) { + add_hex_or_ucn: + c = n; + goto add_char_nonext; + } + cstr_u8cat(outstr, n); + continue; + case 'a': + c = '\a'; + break; + case 'b': + c = '\b'; + break; + case 'f': + c = '\f'; + break; + case 'n': + c = '\n'; + break; + case 'r': + c = '\r'; + break; + case 't': + c = '\t'; + break; + case 'v': + c = '\v'; + break; + case 'e': + if (!gnu_ext) + goto invalid_escape; + c = 27; + break; + case '\'': + case '\"': + case '\\': + case '?': + break; + default: + invalid_escape: + if (c >= '!' && c <= '~') + tcc_warning("unknown escape sequence: \'\\%c\'", c); + else + tcc_warning("unknown escape sequence: \'\\x%x\'", c); + break; + } + } else if (is_long && c >= 0x80) { + /* assume we are processing UTF-8 sequence */ + /* reference: The Unicode Standard, Version 10.0, ch3.9 */ + + int cont; /* count of continuation bytes */ + int skip; /* how many bytes should skip when error occurred */ + int i; + + /* decode leading byte */ + if (c < 0xC2) { + skip = 1; goto invalid_utf8_sequence; + } else if (c <= 0xDF) { + cont = 1; n = c & 0x1f; + } else if (c <= 0xEF) { + cont = 2; n = c & 0xf; + } else if (c <= 0xF4) { + cont = 3; n = c & 0x7; + } else { + skip = 1; goto invalid_utf8_sequence; + } + + /* decode continuation bytes */ + for (i = 1; i <= cont; i++) { + int l = 0x80, h = 0xBF; + + /* adjust limit for second byte */ + if (i == 1) { + switch (c) { + case 0xE0: l = 0xA0; break; + case 0xED: h = 0x9F; break; + case 0xF0: l = 0x90; break; + case 0xF4: h = 0x8F; break; + } + } + + if (p[i] < l || p[i] > h) { + skip = i; goto invalid_utf8_sequence; + } + + n = (n << 6) | (p[i] & 0x3f); + } + + /* advance pointer */ + p += 1 + cont; + c = n; + goto add_char_nonext; + + /* error handling */ + invalid_utf8_sequence: + tcc_warning("ill-formed UTF-8 subsequence starting with: \'\\x%x\'", c); + c = 0xFFFD; + p += skip; + goto add_char_nonext; + + } + p++; + add_char_nonext: + if (!is_long) + cstr_ccat(outstr, c); + else { +#ifdef TCC_TARGET_PE + /* store as UTF-16 */ + if (c < 0x10000) { + cstr_wccat(outstr, c); + } else { + c -= 0x10000; + cstr_wccat(outstr, (c >> 10) + 0xD800); + cstr_wccat(outstr, (c & 0x3FF) + 0xDC00); + } +#else + cstr_wccat(outstr, c); +#endif + } + } + /* add a trailing '\0' */ + if (!is_long) + cstr_ccat(outstr, '\0'); + else + cstr_wccat(outstr, '\0'); +} + +static void parse_string(const char *s, int len) +{ + uint8_t buf[1000], *p = buf; + int is_long, sep; + + if ((is_long = *s == 'L')) + ++s, --len; + sep = *s++; + len -= 2; + if (len >= sizeof buf) + p = tcc_malloc(len + 1); + memcpy(p, s, len); + p[len] = 0; + + cstr_reset(&tokcstr); + parse_escape_string(&tokcstr, p, is_long); + if (p != buf) + tcc_free(p); + + if (sep == '\'') { + int char_size, i, n, c; + /* XXX: make it portable */ + if (!is_long) + tok = TOK_CCHAR, char_size = 1; + else + tok = TOK_LCHAR, char_size = sizeof(nwchar_t); + n = tokcstr.size / char_size - 1; + if (n < 1) + tcc_error("empty character constant"); + if (n > 1) + tcc_warning_c(warn_all)("multi-character character constant"); + for (c = i = 0; i < n; ++i) { + if (is_long) + c = ((nwchar_t *)tokcstr.data)[i]; + else + c = (c << 8) | ((char *)tokcstr.data)[i]; + } + tokc.i = c; + } else { + tokc.str.size = tokcstr.size; + tokc.str.data = tokcstr.data; + if (!is_long) + tok = TOK_STR; + else + tok = TOK_LSTR; + } +} + +/* we use 64 bit numbers */ +#define BN_SIZE 2 + +/* bn = (bn << shift) | or_val */ +static void bn_lshift(unsigned int *bn, int shift, int or_val) +{ + int i; + unsigned int v; + for(i=0;i<BN_SIZE;i++) { + v = bn[i]; + bn[i] = (v << shift) | or_val; + or_val = v >> (32 - shift); + } +} + +static void bn_zero(unsigned int *bn) +{ + int i; + for(i=0;i<BN_SIZE;i++) { + bn[i] = 0; + } +} + +/* parse number in null terminated string 'p' and return it in the + current token */ +static void parse_number(const char *p) +{ + int b, t, shift, frac_bits, s, exp_val, ch; + char *q; + unsigned int bn[BN_SIZE]; + double d; + + /* number */ + q = token_buf; + ch = *p++; + t = ch; + ch = *p++; + *q++ = t; + b = 10; + if (t == '.') { + goto float_frac_parse; + } else if (t == '0') { + if (ch == 'x' || ch == 'X') { + q--; + ch = *p++; + b = 16; + } else if (tcc_state->tcc_ext && (ch == 'b' || ch == 'B')) { + q--; + ch = *p++; + b = 2; + } + } + /* parse all digits. cannot check octal numbers at this stage + because of floating point constants */ + while (1) { + if (ch >= 'a' && ch <= 'f') + t = ch - 'a' + 10; + else if (ch >= 'A' && ch <= 'F') + t = ch - 'A' + 10; + else if (isnum(ch)) + t = ch - '0'; + else + break; + if (t >= b) + break; + if (q >= token_buf + STRING_MAX_SIZE) { + num_too_long: + tcc_error("number too long"); + } + *q++ = ch; + ch = *p++; + } + if (ch == '.' || + ((ch == 'e' || ch == 'E') && b == 10) || + ((ch == 'p' || ch == 'P') && (b == 16 || b == 2))) { + if (b != 10) { + /* NOTE: strtox should support that for hexa numbers, but + non ISOC99 libcs do not support it, so we prefer to do + it by hand */ + /* hexadecimal or binary floats */ + /* XXX: handle overflows */ + *q = '\0'; + if (b == 16) + shift = 4; + else + shift = 1; + bn_zero(bn); + q = token_buf; + while (1) { + t = *q++; + if (t == '\0') { + break; + } else if (t >= 'a') { + t = t - 'a' + 10; + } else if (t >= 'A') { + t = t - 'A' + 10; + } else { + t = t - '0'; + } + bn_lshift(bn, shift, t); + } + frac_bits = 0; + if (ch == '.') { + ch = *p++; + while (1) { + t = ch; + if (t >= 'a' && t <= 'f') { + t = t - 'a' + 10; + } else if (t >= 'A' && t <= 'F') { + t = t - 'A' + 10; + } else if (t >= '0' && t <= '9') { + t = t - '0'; + } else { + break; + } + if (t >= b) + tcc_error("invalid digit"); + bn_lshift(bn, shift, t); + frac_bits += shift; + ch = *p++; + } + } + if (ch != 'p' && ch != 'P') + expect("exponent"); + ch = *p++; + s = 1; + exp_val = 0; + if (ch == '+') { + ch = *p++; + } else if (ch == '-') { + s = -1; + ch = *p++; + } + if (ch < '0' || ch > '9') + expect("exponent digits"); + while (ch >= '0' && ch <= '9') { + exp_val = exp_val * 10 + ch - '0'; + ch = *p++; + } + exp_val = exp_val * s; + + /* now we can generate the number */ + /* XXX: should patch directly float number */ + d = (double)bn[1] * 4294967296.0 + (double)bn[0]; + d = ldexp(d, exp_val - frac_bits); + t = toup(ch); + if (t == 'F') { + ch = *p++; + tok = TOK_CFLOAT; + /* float : should handle overflow */ + tokc.f = (float)d; + } else if (t == 'L') { + ch = *p++; + tok = TOK_CLDOUBLE; +#ifdef TCC_USING_DOUBLE_FOR_LDOUBLE + tokc.d = d; +#else + /* XXX: not large enough */ + tokc.ld = (long double)d; +#endif + } else { + tok = TOK_CDOUBLE; + tokc.d = d; + } + } else { + /* decimal floats */ + if (ch == '.') { + if (q >= token_buf + STRING_MAX_SIZE) + goto num_too_long; + *q++ = ch; + ch = *p++; + float_frac_parse: + while (ch >= '0' && ch <= '9') { + if (q >= token_buf + STRING_MAX_SIZE) + goto num_too_long; + *q++ = ch; + ch = *p++; + } + } + if (ch == 'e' || ch == 'E') { + if (q >= token_buf + STRING_MAX_SIZE) + goto num_too_long; + *q++ = ch; + ch = *p++; + if (ch == '-' || ch == '+') { + if (q >= token_buf + STRING_MAX_SIZE) + goto num_too_long; + *q++ = ch; + ch = *p++; + } + if (ch < '0' || ch > '9') + expect("exponent digits"); + while (ch >= '0' && ch <= '9') { + if (q >= token_buf + STRING_MAX_SIZE) + goto num_too_long; + *q++ = ch; + ch = *p++; + } + } + *q = '\0'; + t = toup(ch); + errno = 0; + if (t == 'F') { + ch = *p++; + tok = TOK_CFLOAT; + tokc.f = strtof(token_buf, NULL); + } else if (t == 'L') { + ch = *p++; + tok = TOK_CLDOUBLE; +#ifdef TCC_USING_DOUBLE_FOR_LDOUBLE + tokc.d = strtod(token_buf, NULL); +#else + tokc.ld = strtold(token_buf, NULL); +#endif + } else { + tok = TOK_CDOUBLE; + tokc.d = strtod(token_buf, NULL); + } + } + } else { + unsigned long long n, n1; + int lcount, ucount, ov = 0; + const char *p1; + + /* integer number */ + *q = '\0'; + q = token_buf; + if (b == 10 && *q == '0') { + b = 8; + q++; + } + n = 0; + while(1) { + t = *q++; + /* no need for checks except for base 10 / 8 errors */ + if (t == '\0') + break; + else if (t >= 'a') + t = t - 'a' + 10; + else if (t >= 'A') + t = t - 'A' + 10; + else + t = t - '0'; + if (t >= b) + tcc_error("invalid digit"); + n1 = n; + n = n * b + t; + /* detect overflow */ + if (n1 >= 0x1000000000000000ULL && n / b != n1) + ov = 1; + } + + /* Determine the characteristics (unsigned and/or 64bit) the type of + the constant must have according to the constant suffix(es) */ + lcount = ucount = 0; + p1 = p; + for(;;) { + t = toup(ch); + if (t == 'L') { + if (lcount >= 2) + tcc_error("three 'l's in integer constant"); + if (lcount && *(p - 1) != ch) + tcc_error("incorrect integer suffix: %s", p1); + lcount++; + ch = *p++; + } else if (t == 'U') { + if (ucount >= 1) + tcc_error("two 'u's in integer constant"); + ucount++; + ch = *p++; + } else { + break; + } + } + + /* Determine if it needs 64 bits and/or unsigned in order to fit */ + if (ucount == 0 && b == 10) { + if (lcount <= (LONG_SIZE == 4)) { + if (n >= 0x80000000U) + lcount = (LONG_SIZE == 4) + 1; + } + if (n >= 0x8000000000000000ULL) + ov = 1, ucount = 1; + } else { + if (lcount <= (LONG_SIZE == 4)) { + if (n >= 0x100000000ULL) + lcount = (LONG_SIZE == 4) + 1; + else if (n >= 0x80000000U) + ucount = 1; + } + if (n >= 0x8000000000000000ULL) + ucount = 1; + } + + if (ov) + tcc_warning("integer constant overflow"); + + tok = TOK_CINT; + if (lcount) { + tok = TOK_CLONG; + if (lcount == 2) + tok = TOK_CLLONG; + } + if (ucount) + ++tok; /* TOK_CU... */ + tokc.i = n; + } + if (ch) + tcc_error("invalid number"); +} + + +#define PARSE2(c1, tok1, c2, tok2) \ + case c1: \ + PEEKC(c, p); \ + if (c == c2) { \ + p++; \ + tok = tok2; \ + } else { \ + tok = tok1; \ + } \ + break; + +/* return next token without macro substitution */ +static inline void next_nomacro1(void) +{ + int t, c, is_long, len; + TokenSym *ts; + uint8_t *p, *p1; + unsigned int h; + + p = file->buf_ptr; + redo_no_start: + c = *p; + switch(c) { + case ' ': + case '\t': + tok = c; + p++; + maybe_space: + if (parse_flags & PARSE_FLAG_SPACES) + goto keep_tok_flags; + while (isidnum_table[*p - CH_EOF] & IS_SPC) + ++p; + goto redo_no_start; + case '\f': + case '\v': + case '\r': + p++; + goto redo_no_start; + case '\\': + /* first look if it is in fact an end of buffer */ + c = handle_stray(&p); + if (c == '\\') + goto parse_simple; + if (c == CH_EOF) { + TCCState *s1 = tcc_state; + if ((parse_flags & PARSE_FLAG_LINEFEED) + && !(tok_flags & TOK_FLAG_EOF)) { + tok_flags |= TOK_FLAG_EOF; + tok = TOK_LINEFEED; + goto keep_tok_flags; + } else if (!(parse_flags & PARSE_FLAG_PREPROCESS)) { + tok = TOK_EOF; + } else if (s1->ifdef_stack_ptr != file->ifdef_stack_ptr) { + tcc_error("missing #endif"); + } else if (s1->include_stack_ptr == s1->include_stack) { + /* no include left : end of file. */ + tok = TOK_EOF; + } else { + tok_flags &= ~TOK_FLAG_EOF; + /* pop include file */ + + /* test if previous '#endif' was after a #ifdef at + start of file */ + if (tok_flags & TOK_FLAG_ENDIF) { +#ifdef INC_DEBUG + printf("#endif %s\n", get_tok_str(file->ifndef_macro_saved, NULL)); +#endif + search_cached_include(s1, file->filename, 1) + ->ifndef_macro = file->ifndef_macro_saved; + tok_flags &= ~TOK_FLAG_ENDIF; + } + + /* add end of include file debug info */ + tcc_debug_eincl(tcc_state); + /* pop include stack */ + tcc_close(); + s1->include_stack_ptr--; + p = file->buf_ptr; + if (p == file->buffer) + tok_flags = TOK_FLAG_BOF; + tok_flags |= TOK_FLAG_BOL; + goto redo_no_start; + } + } else { + goto redo_no_start; + } + break; + + case '\n': + file->line_num++; + tok_flags |= TOK_FLAG_BOL; + p++; +maybe_newline: + if (0 == (parse_flags & PARSE_FLAG_LINEFEED)) + goto redo_no_start; + tok = TOK_LINEFEED; + goto keep_tok_flags; + + case '#': + /* XXX: simplify */ + PEEKC(c, p); + if ((tok_flags & TOK_FLAG_BOL) && + (parse_flags & PARSE_FLAG_PREPROCESS)) { + file->buf_ptr = p; + preprocess(tok_flags & TOK_FLAG_BOF); + p = file->buf_ptr; + goto maybe_newline; + } else { + if (c == '#') { + p++; + tok = TOK_TWOSHARPS; + } else { +#if !defined(TCC_TARGET_ARM) + if (parse_flags & PARSE_FLAG_ASM_FILE) { + p = parse_line_comment(p - 1); + goto redo_no_start; + } else +#endif + { + tok = '#'; + } + } + } + break; + + /* dollar is allowed to start identifiers when not parsing asm */ + case '$': + if (!(isidnum_table[c - CH_EOF] & IS_ID) + || (parse_flags & PARSE_FLAG_ASM_FILE)) + goto parse_simple; + + case 'a': case 'b': case 'c': case 'd': + case 'e': case 'f': case 'g': case 'h': + case 'i': case 'j': case 'k': case 'l': + case 'm': case 'n': case 'o': case 'p': + case 'q': case 'r': case 's': case 't': + case 'u': case 'v': case 'w': case 'x': + case 'y': case 'z': + case 'A': case 'B': case 'C': case 'D': + case 'E': case 'F': case 'G': case 'H': + case 'I': case 'J': case 'K': + case 'M': case 'N': case 'O': case 'P': + case 'Q': case 'R': case 'S': case 'T': + case 'U': case 'V': case 'W': case 'X': + case 'Y': case 'Z': + case '_': + parse_ident_fast: + p1 = p; + h = TOK_HASH_INIT; + h = TOK_HASH_FUNC(h, c); + while (c = *++p, isidnum_table[c - CH_EOF] & (IS_ID|IS_NUM)) + h = TOK_HASH_FUNC(h, c); + len = p - p1; + if (c != '\\') { + TokenSym **pts; + + /* fast case : no stray found, so we have the full token + and we have already hashed it */ + h &= (TOK_HASH_SIZE - 1); + pts = &hash_ident[h]; + for(;;) { + ts = *pts; + if (!ts) + break; + if (ts->len == len && !memcmp(ts->str, p1, len)) + goto token_found; + pts = &(ts->hash_next); + } + ts = tok_alloc_new(pts, (char *) p1, len); + token_found: ; + } else { + /* slower case */ + cstr_reset(&tokcstr); + cstr_cat(&tokcstr, (char *) p1, len); + p--; + PEEKC(c, p); + parse_ident_slow: + while (isidnum_table[c - CH_EOF] & (IS_ID|IS_NUM)) + { + cstr_ccat(&tokcstr, c); + PEEKC(c, p); + } + ts = tok_alloc(tokcstr.data, tokcstr.size); + } + tok = ts->tok; + break; + case 'L': + t = p[1]; + if (t != '\\' && t != '\'' && t != '\"') { + /* fast case */ + goto parse_ident_fast; + } else { + PEEKC(c, p); + if (c == '\'' || c == '\"') { + is_long = 1; + goto str_const; + } else { + cstr_reset(&tokcstr); + cstr_ccat(&tokcstr, 'L'); + goto parse_ident_slow; + } + } + break; + + case '0': case '1': case '2': case '3': + case '4': case '5': case '6': case '7': + case '8': case '9': + t = c; + PEEKC(c, p); + /* after the first digit, accept digits, alpha, '.' or sign if + prefixed by 'eEpP' */ + parse_num: + cstr_reset(&tokcstr); + for(;;) { + cstr_ccat(&tokcstr, t); + if (!((isidnum_table[c - CH_EOF] & (IS_ID|IS_NUM)) + || c == '.' + || ((c == '+' || c == '-') + && (((t == 'e' || t == 'E') + && !(parse_flags & PARSE_FLAG_ASM_FILE + /* 0xe+1 is 3 tokens in asm */ + && ((char*)tokcstr.data)[0] == '0' + && toup(((char*)tokcstr.data)[1]) == 'X')) + || t == 'p' || t == 'P')))) + break; + t = c; + PEEKC(c, p); + } + /* We add a trailing '\0' to ease parsing */ + cstr_ccat(&tokcstr, '\0'); + tokc.str.size = tokcstr.size; + tokc.str.data = tokcstr.data; + tok = TOK_PPNUM; + break; + + case '.': + /* special dot handling because it can also start a number */ + PEEKC(c, p); + if (isnum(c)) { + t = '.'; + goto parse_num; + } else if ((isidnum_table['.' - CH_EOF] & IS_ID) + && (isidnum_table[c - CH_EOF] & (IS_ID|IS_NUM))) { + *--p = c = '.'; + goto parse_ident_fast; + } else if (c == '.') { + PEEKC(c, p); + if (c == '.') { + p++; + tok = TOK_DOTS; + } else { + *--p = '.'; /* may underflow into file->unget[] */ + tok = '.'; + } + } else { + tok = '.'; + } + break; + case '\'': + case '\"': + is_long = 0; + str_const: + cstr_reset(&tokcstr); + if (is_long) + cstr_ccat(&tokcstr, 'L'); + cstr_ccat(&tokcstr, c); + p = parse_pp_string(p, c, &tokcstr); + cstr_ccat(&tokcstr, c); + cstr_ccat(&tokcstr, '\0'); + tokc.str.size = tokcstr.size; + tokc.str.data = tokcstr.data; + tok = TOK_PPSTR; + break; + + case '<': + PEEKC(c, p); + if (c == '=') { + p++; + tok = TOK_LE; + } else if (c == '<') { + PEEKC(c, p); + if (c == '=') { + p++; + tok = TOK_A_SHL; + } else { + tok = TOK_SHL; + } + } else { + tok = TOK_LT; + } + break; + case '>': + PEEKC(c, p); + if (c == '=') { + p++; + tok = TOK_GE; + } else if (c == '>') { + PEEKC(c, p); + if (c == '=') { + p++; + tok = TOK_A_SAR; + } else { + tok = TOK_SAR; + } + } else { + tok = TOK_GT; + } + break; + + case '&': + PEEKC(c, p); + if (c == '&') { + p++; + tok = TOK_LAND; + } else if (c == '=') { + p++; + tok = TOK_A_AND; + } else { + tok = '&'; + } + break; + + case '|': + PEEKC(c, p); + if (c == '|') { + p++; + tok = TOK_LOR; + } else if (c == '=') { + p++; + tok = TOK_A_OR; + } else { + tok = '|'; + } + break; + + case '+': + PEEKC(c, p); + if (c == '+') { + p++; + tok = TOK_INC; + } else if (c == '=') { + p++; + tok = TOK_A_ADD; + } else { + tok = '+'; + } + break; + + case '-': + PEEKC(c, p); + if (c == '-') { + p++; + tok = TOK_DEC; + } else if (c == '=') { + p++; + tok = TOK_A_SUB; + } else if (c == '>') { + p++; + tok = TOK_ARROW; + } else { + tok = '-'; + } + break; + + PARSE2('!', '!', '=', TOK_NE) + PARSE2('=', '=', '=', TOK_EQ) + PARSE2('*', '*', '=', TOK_A_MUL) + PARSE2('%', '%', '=', TOK_A_MOD) + PARSE2('^', '^', '=', TOK_A_XOR) + + /* comments or operator */ + case '/': + PEEKC(c, p); + if (c == '*') { + p = parse_comment(p); + /* comments replaced by a blank */ + tok = ' '; + goto maybe_space; + } else if (c == '/') { + p = parse_line_comment(p); + tok = ' '; + goto maybe_space; + } else if (c == '=') { + p++; + tok = TOK_A_DIV; + } else { + tok = '/'; + } + break; + + /* simple tokens */ + case '(': + case ')': + case '[': + case ']': + case '{': + case '}': + case ',': + case ';': + case ':': + case '?': + case '~': + case '@': /* only used in assembler */ + parse_simple: + tok = c; + p++; + break; + default: + if (c >= 0x80 && c <= 0xFF) /* utf8 identifiers */ + goto parse_ident_fast; + if (parse_flags & PARSE_FLAG_ASM_FILE) + goto parse_simple; + tcc_error("unrecognized character \\x%02x", c); + break; + } + tok_flags = 0; +keep_tok_flags: + file->buf_ptr = p; +#if defined(PARSE_DEBUG) + printf("token = %d %s\n", tok, get_tok_str(tok, &tokc)); +#endif +} + +static void macro_subst( + TokenString *tok_str, + Sym **nested_list, + const int *macro_str + ); + +/* substitute arguments in replacement lists in macro_str by the values in + args (field d) and return allocated string */ +static int *macro_arg_subst(Sym **nested_list, const int *macro_str, Sym *args) +{ + int t, t0, t1, spc; + const int *st; + Sym *s; + CValue cval; + TokenString str; + + tok_str_new(&str); + t0 = t1 = 0; + while(1) { + TOK_GET(&t, ¯o_str, &cval); + if (!t) + break; + if (t == '#') { + /* stringize */ + TOK_GET(&t, ¯o_str, &cval); + if (!t) + goto bad_stringy; + s = sym_find2(args, t); + if (s) { + cstr_reset(&tokcstr); + cstr_ccat(&tokcstr, '\"'); + st = s->d; + spc = 0; + while (*st >= 0) { + TOK_GET(&t, &st, &cval); + if (t != TOK_PLCHLDR + && t != TOK_NOSUBST + && 0 == check_space(t, &spc)) { + const char *s = get_tok_str(t, &cval); + while (*s) { + if (t == TOK_PPSTR && *s != '\'') + add_char(&tokcstr, *s); + else + cstr_ccat(&tokcstr, *s); + ++s; + } + } + } + tokcstr.size -= spc; + cstr_ccat(&tokcstr, '\"'); + cstr_ccat(&tokcstr, '\0'); +#ifdef PP_DEBUG + printf("\nstringize: <%s>\n", (char *)tokcstr.data); +#endif + /* add string */ + cval.str.size = tokcstr.size; + cval.str.data = tokcstr.data; + tok_str_add2(&str, TOK_PPSTR, &cval); + } else { + bad_stringy: + expect("macro parameter after '#'"); + } + } else if (t >= TOK_IDENT) { + s = sym_find2(args, t); + if (s) { + st = s->d; + /* if '##' is present before or after, no arg substitution */ + if (*macro_str == TOK_PPJOIN || t1 == TOK_PPJOIN) { + /* special case for var arg macros : ## eats the ',' + if empty VA_ARGS variable. */ + if (t1 == TOK_PPJOIN && t0 == ',' && gnu_ext && s->type.t) { + if (*st <= 0) { + /* suppress ',' '##' */ + str.len -= 2; + } else { + /* suppress '##' and add variable */ + str.len--; + goto add_var; + } + } + } else { + add_var: + if (!s->next) { + /* Expand arguments tokens and store them. In most + cases we could also re-expand each argument if + used multiple times, but not if the argument + contains the __COUNTER__ macro. */ + TokenString str2; + sym_push2(&s->next, s->v, s->type.t, 0); + tok_str_new(&str2); + macro_subst(&str2, nested_list, st); + tok_str_add(&str2, 0); + s->next->d = str2.str; + } + st = s->next->d; + } + if (*st <= 0) { + /* expanded to empty string */ + tok_str_add(&str, TOK_PLCHLDR); + } else for (;;) { + int t2; + TOK_GET(&t2, &st, &cval); + if (t2 <= 0) + break; + tok_str_add2(&str, t2, &cval); + } + } else { + tok_str_add(&str, t); + } + } else { + tok_str_add2(&str, t, &cval); + } + t0 = t1, t1 = t; + } + tok_str_add(&str, 0); + return str.str; +} + +static char const ab_month_name[12][4] = +{ + "Jan", "Feb", "Mar", "Apr", "May", "Jun", + "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" +}; + +static int paste_tokens(int t1, CValue *v1, int t2, CValue *v2) +{ + int n, ret = 1; + + cstr_reset(&tokcstr); + if (t1 != TOK_PLCHLDR) + cstr_cat(&tokcstr, get_tok_str(t1, v1), -1); + n = tokcstr.size; + if (t2 != TOK_PLCHLDR) + cstr_cat(&tokcstr, get_tok_str(t2, v2), -1); + cstr_ccat(&tokcstr, '\0'); + //printf("paste <%s>\n", (char*)tokcstr.data); + + tcc_open_bf(tcc_state, ":paste:", tokcstr.size); + memcpy(file->buffer, tokcstr.data, tokcstr.size); + tok_flags = 0; + for (;;) { + next_nomacro1(); + if (0 == *file->buf_ptr) + break; + if (is_space(tok)) + continue; + tcc_warning("pasting \"%.*s\" and \"%s\" does not give a valid" + " preprocessing token", n, file->buffer, file->buffer + n); + ret = 0; + break; + } + tcc_close(); + return ret; +} + +/* handle the '##' operator. Return NULL if no '##' seen. Otherwise + return the resulting string (which must be freed). */ +static inline int *macro_twosharps(const int *ptr0) +{ + int t; + CValue cval; + TokenString macro_str1; + int start_of_nosubsts = -1; + const int *ptr; + + /* we search the first '##' */ + for (ptr = ptr0;;) { + TOK_GET(&t, &ptr, &cval); + if (t == TOK_PPJOIN) + break; + if (t == 0) + return NULL; + } + + tok_str_new(¯o_str1); + + //tok_print(" $$$", ptr0); + for (ptr = ptr0;;) { + TOK_GET(&t, &ptr, &cval); + if (t == 0) + break; + if (t == TOK_PPJOIN) + continue; + while (*ptr == TOK_PPJOIN) { + int t1; CValue cv1; + /* given 'a##b', remove nosubsts preceding 'a' */ + if (start_of_nosubsts >= 0) + macro_str1.len = start_of_nosubsts; + /* given 'a##b', remove nosubsts preceding 'b' */ + while ((t1 = *++ptr) == TOK_NOSUBST) + ; + if (t1 && t1 != TOK_PPJOIN) { + TOK_GET(&t1, &ptr, &cv1); + if (t != TOK_PLCHLDR || t1 != TOK_PLCHLDR) { + if (paste_tokens(t, &cval, t1, &cv1)) { + t = tok, cval = tokc; + } else { + tok_str_add2(¯o_str1, t, &cval); + t = t1, cval = cv1; + } + } + } + } + if (t == TOK_NOSUBST) { + if (start_of_nosubsts < 0) + start_of_nosubsts = macro_str1.len; + } else { + start_of_nosubsts = -1; + } + tok_str_add2(¯o_str1, t, &cval); + } + tok_str_add(¯o_str1, 0); + //tok_print(" ###", macro_str1.str); + return macro_str1.str; +} + +/* peek or read [ws_str == NULL] next token from function macro call, + walking up macro levels up to the file if necessary */ +static int next_argstream(Sym **nested_list, TokenString *ws_str) +{ + int t; + const int *p; + Sym *sa; + + for (;;) { + if (macro_ptr) { + p = macro_ptr, t = *p; + if (ws_str) { + while (is_space(t) || TOK_LINEFEED == t || TOK_PLCHLDR == t) + tok_str_add(ws_str, t), t = *++p; + } + if (t == 0) { + end_macro(); + /* also, end of scope for nested defined symbol */ + sa = *nested_list; + while (sa && sa->v == 0) + sa = sa->prev; + if (sa) + sa->v = 0; + continue; + } + } else { + uint8_t *p = file->buf_ptr; + int ch = handle_bs(&p); + if (ws_str) { + while (is_space(ch) || ch == '\n' || ch == '/') { + if (ch == '/') { + int c; + PEEKC(c, p); + if (c == '*') { + p = parse_comment(p) - 1; + } else if (c == '/') { + p = parse_line_comment(p) - 1; + } else { + *--p = ch; + break; + } + ch = ' '; + } + if (ch == '\n') + file->line_num++; + if (!(ch == '\f' || ch == '\v' || ch == '\r')) + tok_str_add(ws_str, ch); + PEEKC(ch, p); + } + } + file->buf_ptr = p; + t = ch; + } + + if (ws_str) + return t; + next_nomacro(); + return tok; + } +} + +/* do macro substitution of current token with macro 's' and add + result to (tok_str,tok_len). 'nested_list' is the list of all + macros we got inside to avoid recursing. Return non zero if no + substitution needs to be done */ +static int macro_subst_tok( + TokenString *tok_str, + Sym **nested_list, + Sym *s) +{ + Sym *args, *sa, *sa1; + int parlevel, t, t1, spc; + TokenString str; + char *cstrval; + CValue cval; + char buf[32]; + + /* if symbol is a macro, prepare substitution */ + /* special macros */ + if (tok == TOK___LINE__ || tok == TOK___COUNTER__) { + t = tok == TOK___LINE__ ? file->line_num : pp_counter++; + snprintf(buf, sizeof(buf), "%d", t); + cstrval = buf; + t1 = TOK_PPNUM; + goto add_cstr1; + } else if (tok == TOK___FILE__) { + cstrval = file->filename; + goto add_cstr; + } else if (tok == TOK___DATE__ || tok == TOK___TIME__) { + time_t ti; + struct tm *tm; + + time(&ti); + tm = localtime(&ti); + if (tok == TOK___DATE__) { + snprintf(buf, sizeof(buf), "%s %2d %d", + ab_month_name[tm->tm_mon], tm->tm_mday, tm->tm_year + 1900); + } else { + snprintf(buf, sizeof(buf), "%02d:%02d:%02d", + tm->tm_hour, tm->tm_min, tm->tm_sec); + } + cstrval = buf; + add_cstr: + t1 = TOK_STR; + add_cstr1: + cstr_reset(&tokcstr); + cstr_cat(&tokcstr, cstrval, 0); + cval.str.size = tokcstr.size; + cval.str.data = tokcstr.data; + tok_str_add2(tok_str, t1, &cval); + } else if (s->d) { + int saved_parse_flags = parse_flags; + int *joined_str = NULL; + int *mstr = s->d; + + if (s->type.t == MACRO_FUNC) { + /* whitespace between macro name and argument list */ + TokenString ws_str; + tok_str_new(&ws_str); + + spc = 0; + parse_flags |= PARSE_FLAG_SPACES | PARSE_FLAG_LINEFEED + | PARSE_FLAG_ACCEPT_STRAYS; + + /* get next token from argument stream */ + t = next_argstream(nested_list, &ws_str); + if (t != '(') { + /* not a macro substitution after all, restore the + * macro token plus all whitespace we've read. + * whitespace is intentionally not merged to preserve + * newlines. */ + parse_flags = saved_parse_flags; + tok_str_add(tok_str, tok); + if (parse_flags & PARSE_FLAG_SPACES) { + int i; + for (i = 0; i < ws_str.len; i++) + tok_str_add(tok_str, ws_str.str[i]); + } + if (ws_str.len && ws_str.str[ws_str.len - 1] == '\n') + tok_flags |= TOK_FLAG_BOL; + tok_str_free_str(ws_str.str); + return 0; + } else { + tok_str_free_str(ws_str.str); + } + do { + next_nomacro(); /* eat '(' */ + } while (tok == TOK_PLCHLDR || is_space(tok)); + + /* argument macro */ + args = NULL; + sa = s->next; + /* NOTE: empty args are allowed, except if no args */ + for(;;) { + do { + next_argstream(nested_list, NULL); + } while (tok == TOK_PLCHLDR || is_space(tok) || + TOK_LINEFEED == tok); + empty_arg: + /* handle '()' case */ + if (!args && !sa && tok == ')') + break; + if (!sa) + tcc_error("macro '%s' used with too many args", + get_tok_str(s->v, 0)); + tok_str_new(&str); + parlevel = spc = 0; + /* NOTE: non zero sa->t indicates VA_ARGS */ + while ((parlevel > 0 || + (tok != ')' && + (tok != ',' || sa->type.t)))) { + if (tok == TOK_EOF || tok == 0) + break; + if (tok == '(') + parlevel++; + else if (tok == ')') + parlevel--; + if (tok == TOK_LINEFEED) + tok = ' '; + if (!check_space(tok, &spc)) + tok_str_add2(&str, tok, &tokc); + next_argstream(nested_list, NULL); + } + if (parlevel) + expect(")"); + str.len -= spc; + tok_str_add(&str, -1); + tok_str_add(&str, 0); + sa1 = sym_push2(&args, sa->v & ~SYM_FIELD, sa->type.t, 0); + sa1->d = str.str; + sa = sa->next; + if (tok == ')') { + /* special case for gcc var args: add an empty + var arg argument if it is omitted */ + if (sa && sa->type.t && gnu_ext) + goto empty_arg; + break; + } + if (tok != ',') + expect(","); + } + if (sa) { + tcc_error("macro '%s' used with too few args", + get_tok_str(s->v, 0)); + } + + /* now subst each arg */ + mstr = macro_arg_subst(nested_list, mstr, args); + /* free memory */ + sa = args; + while (sa) { + sa1 = sa->prev; + tok_str_free_str(sa->d); + if (sa->next) { + tok_str_free_str(sa->next->d); + sym_free(sa->next); + } + sym_free(sa); + sa = sa1; + } + parse_flags = saved_parse_flags; + } + + sym_push2(nested_list, s->v, 0, 0); + parse_flags = saved_parse_flags; + joined_str = macro_twosharps(mstr); + macro_subst(tok_str, nested_list, joined_str ? joined_str : mstr); + + /* pop nested defined symbol */ + sa1 = *nested_list; + *nested_list = sa1->prev; + sym_free(sa1); + if (joined_str) + tok_str_free_str(joined_str); + if (mstr != s->d) + tok_str_free_str(mstr); + } + return 0; +} + +/* do macro substitution of macro_str and add result to + (tok_str,tok_len). 'nested_list' is the list of all macros we got + inside to avoid recursing. */ +static void macro_subst( + TokenString *tok_str, + Sym **nested_list, + const int *macro_str + ) +{ + Sym *s; + int t, spc, nosubst; + CValue cval; + + spc = nosubst = 0; + + while (1) { + TOK_GET(&t, ¯o_str, &cval); + if (t <= 0) + break; + + if (t >= TOK_IDENT && 0 == nosubst) { + s = define_find(t); + if (s == NULL) + goto no_subst; + + /* if nested substitution, do nothing */ + if (sym_find2(*nested_list, t)) { + /* and mark it as TOK_NOSUBST, so it doesn't get subst'd again */ + tok_str_add2(tok_str, TOK_NOSUBST, NULL); + goto no_subst; + } + + { + TokenString *str = tok_str_alloc(); + str->str = (int*)macro_str; + begin_macro(str, 2); + + tok = t; + macro_subst_tok(tok_str, nested_list, s); + + if (macro_stack != str) { + /* already finished by reading function macro arguments */ + break; + } + + macro_str = macro_ptr; + end_macro (); + } + if (tok_str->len) + spc = is_space(t = tok_str->str[tok_str->lastlen]); + } else { +no_subst: + if (!check_space(t, &spc)) + tok_str_add2(tok_str, t, &cval); + + if (nosubst) { + if (nosubst > 1 && (spc || (++nosubst == 3 && t == '('))) + continue; + nosubst = 0; + } + if (t == TOK_NOSUBST) + nosubst = 1; + } + /* GCC supports 'defined' as result of a macro substitution */ + if (t == TOK_DEFINED && pp_expr) + nosubst = 2; + } +} + +/* return next token without macro substitution. Can read input from + macro_ptr buffer */ +static void next_nomacro(void) +{ + int t; + if (macro_ptr) { + redo: + t = *macro_ptr; + if (TOK_HAS_VALUE(t)) { + tok_get(&tok, ¯o_ptr, &tokc); + if (t == TOK_LINENUM) { + file->line_num = tokc.i; + goto redo; + } + } else { + macro_ptr++; + if (t < TOK_IDENT) { + if (!(parse_flags & PARSE_FLAG_SPACES) + && (isidnum_table[t - CH_EOF] & IS_SPC)) + goto redo; + } + tok = t; + } + } else { + next_nomacro1(); + } +} + +/* return next token with macro substitution */ +ST_FUNC void next(void) +{ + int t; + redo: + next_nomacro(); + t = tok; + if (macro_ptr) { + if (!TOK_HAS_VALUE(t)) { + if (t == TOK_NOSUBST || t == TOK_PLCHLDR) { + /* discard preprocessor markers */ + goto redo; + } else if (t == 0) { + /* end of macro or unget token string */ + end_macro(); + goto redo; + } else if (t == '\\') { + if (!(parse_flags & PARSE_FLAG_ACCEPT_STRAYS)) + tcc_error("stray '\\' in program"); + } + return; + } + } else if (t >= TOK_IDENT && (parse_flags & PARSE_FLAG_PREPROCESS)) { + /* if reading from file, try to substitute macros */ + Sym *s = define_find(t); + if (s) { + Sym *nested_list = NULL; + tokstr_buf.len = 0; + macro_subst_tok(&tokstr_buf, &nested_list, s); + tok_str_add(&tokstr_buf, 0); + begin_macro(&tokstr_buf, 0); + goto redo; + } + return; + } + /* convert preprocessor tokens into C tokens */ + if (t == TOK_PPNUM) { + if (parse_flags & PARSE_FLAG_TOK_NUM) + parse_number((char *)tokc.str.data); + } else if (t == TOK_PPSTR) { + if (parse_flags & PARSE_FLAG_TOK_STR) + parse_string((char *)tokc.str.data, tokc.str.size - 1); + } +} + +/* push back current token and set current token to 'last_tok'. Only + identifier case handled for labels. */ +ST_INLN void unget_tok(int last_tok) +{ + + TokenString *str = tok_str_alloc(); + tok_str_add2(str, tok, &tokc); + tok_str_add(str, 0); + begin_macro(str, 1); + tok = last_tok; +} + +/* ------------------------------------------------------------------------- */ +/* init preprocessor */ + +static const char * const target_os_defs = +#ifdef TCC_TARGET_PE + "_WIN32\0" +# if PTR_SIZE == 8 + "_WIN64\0" +# endif +#else +# if defined TCC_TARGET_MACHO + "__APPLE__\0" +# elif TARGETOS_FreeBSD + "__FreeBSD__ 12\0" +# elif TARGETOS_FreeBSD_kernel + "__FreeBSD_kernel__\0" +# elif TARGETOS_NetBSD + "__NetBSD__\0" +# elif TARGETOS_OpenBSD + "__OpenBSD__\0" +# else + "__linux__\0" + "__linux\0" +# if TARGETOS_ANDROID + "__ANDROID__\0" +# endif +# endif + "__unix__\0" + "__unix\0" +#endif + ; + +static void putdef(CString *cs, const char *p) +{ + cstr_printf(cs, "#define %s%s\n", p, &" 1"[!!strchr(p, ' ')*2]); +} + +static void putdefs(CString *cs, const char *p) +{ + while (*p) + putdef(cs, p), p = strchr(p, 0) + 1; +} + +static void tcc_predefs(TCCState *s1, CString *cs, int is_asm) +{ + int a, b, c; + + sscanf(TCC_VERSION, "%d.%d.%d", &a, &b, &c); + cstr_printf(cs, "#define __TINYC__ %d\n", a*10000 + b*100 + c); + + putdefs(cs, target_machine_defs); + putdefs(cs, target_os_defs); + +#ifdef TCC_TARGET_ARM + if (s1->float_abi == ARM_HARD_FLOAT) + putdef(cs, "__ARM_PCS_VFP"); +#endif + if (is_asm) + putdef(cs, "__ASSEMBLER__"); + if (s1->output_type == TCC_OUTPUT_PREPROCESS) + putdef(cs, "__TCC_PP__"); + if (s1->output_type == TCC_OUTPUT_MEMORY) + putdef(cs, "__TCC_RUN__"); +#ifdef CONFIG_TCC_BACKTRACE + if (s1->do_backtrace) + putdef(cs, "__TCC_BACKTRACE__"); +#endif +#ifdef CONFIG_TCC_BCHECK + if (s1->do_bounds_check) + putdef(cs, "__TCC_BCHECK__"); +#endif + if (s1->char_is_unsigned) + putdef(cs, "__CHAR_UNSIGNED__"); + if (s1->optimize > 0) + putdef(cs, "__OPTIMIZE__"); + if (s1->option_pthread) + putdef(cs, "_REENTRANT"); + if (s1->leading_underscore) + putdef(cs, "__leading_underscore"); + cstr_printf(cs, "#define __SIZEOF_POINTER__ %d\n", PTR_SIZE); + cstr_printf(cs, "#define __SIZEOF_LONG__ %d\n", LONG_SIZE); + if (!is_asm) { + putdef(cs, "__STDC__"); + cstr_printf(cs, "#define __STDC_VERSION__ %dL\n", s1->cversion); + cstr_cat(cs, + /* load more predefs and __builtins */ +#if CONFIG_TCC_PREDEFS + #include "tccdefs_.h" /* include as strings */ +#else + "#include <tccdefs.h>\n" /* load at runtime */ +#endif + , -1); + } + cstr_printf(cs, "#define __BASE_FILE__ \"%s\"\n", file->filename); +} + +ST_FUNC void preprocess_start(TCCState *s1, int filetype) +{ + int is_asm = !!(filetype & (AFF_TYPE_ASM|AFF_TYPE_ASMPP)); + + tccpp_new(s1); + + s1->include_stack_ptr = s1->include_stack; + s1->ifdef_stack_ptr = s1->ifdef_stack; + file->ifdef_stack_ptr = s1->ifdef_stack_ptr; + pp_expr = 0; + pp_counter = 0; + pp_debug_tok = pp_debug_symv = 0; + pp_once++; + s1->pack_stack[0] = 0; + s1->pack_stack_ptr = s1->pack_stack; + + set_idnum('$', !is_asm && s1->dollars_in_identifiers ? IS_ID : 0); + set_idnum('.', is_asm ? IS_ID : 0); + + if (!(filetype & AFF_TYPE_ASM)) { + CString cstr; + cstr_new(&cstr); + tcc_predefs(s1, &cstr, is_asm); + if (s1->cmdline_defs.size) + cstr_cat(&cstr, s1->cmdline_defs.data, s1->cmdline_defs.size); + if (s1->cmdline_incl.size) + cstr_cat(&cstr, s1->cmdline_incl.data, s1->cmdline_incl.size); + //printf("%s\n", (char*)cstr.data); + *s1->include_stack_ptr++ = file; + tcc_open_bf(s1, "<command line>", cstr.size); + memcpy(file->buffer, cstr.data, cstr.size); + cstr_free(&cstr); + } + + parse_flags = is_asm ? PARSE_FLAG_ASM_FILE : 0; + tok_flags = TOK_FLAG_BOL | TOK_FLAG_BOF; +} + +/* cleanup from error/setjmp */ +ST_FUNC void preprocess_end(TCCState *s1) +{ + while (macro_stack) + end_macro(); + macro_ptr = NULL; + while (file) + tcc_close(); + tccpp_delete(s1); +} + +ST_FUNC int set_idnum(int c, int val) +{ + int prev = isidnum_table[c - CH_EOF]; + isidnum_table[c - CH_EOF] = val; + return prev; +} + +ST_FUNC void tccpp_new(TCCState *s) +{ + int i, c; + const char *p, *r; + + /* init isid table */ + for(i = CH_EOF; i<128; i++) + set_idnum(i, + is_space(i) ? IS_SPC + : isid(i) ? IS_ID + : isnum(i) ? IS_NUM + : 0); + + for(i = 128; i<256; i++) + set_idnum(i, IS_ID); + + /* init allocators */ + tal_new(&toksym_alloc, TOKSYM_TAL_LIMIT, TOKSYM_TAL_SIZE); + tal_new(&tokstr_alloc, TOKSTR_TAL_LIMIT, TOKSTR_TAL_SIZE); + + memset(hash_ident, 0, TOK_HASH_SIZE * sizeof(TokenSym *)); + memset(s->cached_includes_hash, 0, sizeof s->cached_includes_hash); + + cstr_new(&tokcstr); + cstr_new(&cstr_buf); + cstr_realloc(&cstr_buf, STRING_MAX_SIZE); + tok_str_new(&tokstr_buf); + tok_str_realloc(&tokstr_buf, TOKSTR_MAX_SIZE); + + tok_ident = TOK_IDENT; + p = tcc_keywords; + while (*p) { + r = p; + for(;;) { + c = *r++; + if (c == '\0') + break; + } + tok_alloc(p, r - p - 1); + p = r; + } + + /* we add dummy defines for some special macros to speed up tests + and to have working defined() */ + define_push(TOK___LINE__, MACRO_OBJ, NULL, NULL); + define_push(TOK___FILE__, MACRO_OBJ, NULL, NULL); + define_push(TOK___DATE__, MACRO_OBJ, NULL, NULL); + define_push(TOK___TIME__, MACRO_OBJ, NULL, NULL); + define_push(TOK___COUNTER__, MACRO_OBJ, NULL, NULL); +} + +ST_FUNC void tccpp_delete(TCCState *s) +{ + int i, n; + + dynarray_reset(&s->cached_includes, &s->nb_cached_includes); + + /* free tokens */ + n = tok_ident - TOK_IDENT; + if (n > total_idents) + total_idents = n; + for(i = 0; i < n; i++) + tal_free(toksym_alloc, table_ident[i]); + tcc_free(table_ident); + table_ident = NULL; + + /* free static buffers */ + cstr_free(&tokcstr); + cstr_free(&cstr_buf); + tok_str_free_str(tokstr_buf.str); + + /* free allocators */ + tal_delete(toksym_alloc); + toksym_alloc = NULL; + tal_delete(tokstr_alloc); + tokstr_alloc = NULL; +} + +/* ------------------------------------------------------------------------- */ +/* tcc -E [-P[1]] [-dD} support */ + +static void tok_print(const char *msg, const int *str) +{ + FILE *fp; + int t, s = 0; + CValue cval; + + fp = tcc_state->ppfp; + fprintf(fp, "%s", msg); + while (str) { + TOK_GET(&t, &str, &cval); + if (!t) + break; + fprintf(fp, &" %s"[s], get_tok_str(t, &cval)), s = 1; + } + fprintf(fp, "\n"); +} + +static void pp_line(TCCState *s1, BufferedFile *f, int level) +{ + int d = f->line_num - f->line_ref; + + if (s1->dflag & 4) + return; + + if (s1->Pflag == LINE_MACRO_OUTPUT_FORMAT_NONE) { + ; + } else if (level == 0 && f->line_ref && d < 8) { + while (d > 0) + fputs("\n", s1->ppfp), --d; + } else if (s1->Pflag == LINE_MACRO_OUTPUT_FORMAT_STD) { + fprintf(s1->ppfp, "#line %d \"%s\"\n", f->line_num, f->filename); + } else { + fprintf(s1->ppfp, "# %d \"%s\"%s\n", f->line_num, f->filename, + level > 0 ? " 1" : level < 0 ? " 2" : ""); + } + f->line_ref = f->line_num; +} + +static void define_print(TCCState *s1, int v) +{ + FILE *fp; + Sym *s; + + s = define_find(v); + if (NULL == s || NULL == s->d) + return; + + fp = s1->ppfp; + fprintf(fp, "#define %s", get_tok_str(v, NULL)); + if (s->type.t == MACRO_FUNC) { + Sym *a = s->next; + fprintf(fp,"("); + if (a) + for (;;) { + fprintf(fp,"%s", get_tok_str(a->v & ~SYM_FIELD, NULL)); + if (!(a = a->next)) + break; + fprintf(fp,","); + } + fprintf(fp,")"); + } + tok_print("", s->d); +} + +static void pp_debug_defines(TCCState *s1) +{ + int v, t; + const char *vs; + FILE *fp; + + t = pp_debug_tok; + if (t == 0) + return; + + file->line_num--; + pp_line(s1, file, 0); + file->line_ref = ++file->line_num; + + fp = s1->ppfp; + v = pp_debug_symv; + vs = get_tok_str(v, NULL); + if (t == TOK_DEFINE) { + define_print(s1, v); + } else if (t == TOK_UNDEF) { + fprintf(fp, "#undef %s\n", vs); + } else if (t == TOK_push_macro) { + fprintf(fp, "#pragma push_macro(\"%s\")\n", vs); + } else if (t == TOK_pop_macro) { + fprintf(fp, "#pragma pop_macro(\"%s\")\n", vs); + } + pp_debug_tok = 0; +} + +static void pp_debug_builtins(TCCState *s1) +{ + int v; + for (v = TOK_IDENT; v < tok_ident; ++v) + define_print(s1, v); +} + +/* Add a space between tokens a and b to avoid unwanted textual pasting */ +static int pp_need_space(int a, int b) +{ + return 'E' == a ? '+' == b || '-' == b + : '+' == a ? TOK_INC == b || '+' == b + : '-' == a ? TOK_DEC == b || '-' == b + : a >= TOK_IDENT ? b >= TOK_IDENT + : a == TOK_PPNUM ? b >= TOK_IDENT + : 0; +} + +/* maybe hex like 0x1e */ +static int pp_check_he0xE(int t, const char *p) +{ + if (t == TOK_PPNUM && toup(strchr(p, 0)[-1]) == 'E') + return 'E'; + return t; +} + +/* Preprocess the current file */ +ST_FUNC int tcc_preprocess(TCCState *s1) +{ + BufferedFile **iptr; + int token_seen, spcs, level; + const char *p; + char white[400]; + + parse_flags = PARSE_FLAG_PREPROCESS + | (parse_flags & PARSE_FLAG_ASM_FILE) + | PARSE_FLAG_LINEFEED + | PARSE_FLAG_SPACES + | PARSE_FLAG_ACCEPT_STRAYS + ; + /* Credits to Fabrice Bellard's initial revision to demonstrate its + capability to compile and run itself, provided all numbers are + given as decimals. tcc -E -P10 will do. */ + if (s1->Pflag == LINE_MACRO_OUTPUT_FORMAT_P10) + parse_flags |= PARSE_FLAG_TOK_NUM, s1->Pflag = 1; + + if (s1->do_bench) { + /* for PP benchmarks */ + do next(); while (tok != TOK_EOF); + return 0; + } + + if (s1->dflag & 1) { + pp_debug_builtins(s1); + s1->dflag &= ~1; + } + + token_seen = TOK_LINEFEED, spcs = 0, level = 0; + if (file->prev) + pp_line(s1, file->prev, level++); + pp_line(s1, file, level); + for (;;) { + iptr = s1->include_stack_ptr; + next(); + if (tok == TOK_EOF) + break; + + level = s1->include_stack_ptr - iptr; + if (level) { + if (level > 0) + pp_line(s1, *iptr, 0); + pp_line(s1, file, level); + } + if (s1->dflag & 7) { + pp_debug_defines(s1); + if (s1->dflag & 4) + continue; + } + + if (is_space(tok)) { + if (spcs < sizeof white - 1) + white[spcs++] = tok; + continue; + } else if (tok == TOK_LINEFEED) { + spcs = 0; + if (token_seen == TOK_LINEFEED) + continue; + ++file->line_ref; + } else if (token_seen == TOK_LINEFEED) { + pp_line(s1, file, 0); + } else if (spcs == 0 && pp_need_space(token_seen, tok)) { + white[spcs++] = ' '; + } + + white[spcs] = 0, fputs(white, s1->ppfp), spcs = 0; + fputs(p = get_tok_str(tok, &tokc), s1->ppfp); + token_seen = pp_check_he0xE(tok, p); + } + return 0; +} + +/* ------------------------------------------------------------------------- */ |
