#include #include #include #include #include "common.h" #include "lexer.h" #ifdef emit_error #warn "Remember to remove the global emit_error #undef emit_error #endif /* ifdef emit_error */ #define emit_error(...) fprintf(stderr, "Error (%zd,%zd): ", line_number, 1 + column_number);\ fprintf(stderr, __VA_ARGS__) #define BUFFER_SIZE 1024 struct keyword { char *s; /* human-readable name of the token, as written by a user */ enum TOKEN_TYPE t; }; /** Static defs **************************************************************/ static struct keyword keywords[] = { {.s = "module", .t = TOK_MODULE }, {.s = "input" , .t = TOK_INPUT }, {.s = "expr" , .t = TOK_EXPR }, {.s = "or" , .t = TOK_OR }, {.s = "and" , .t = TOK_AND }, {.s = "xor" , .t = TOK_XOR }, {.s = "not" , .t = TOK_NOT }, {.s = NULL } }; static struct keyword token_descriptors[] = { {.s = "module declaration" , .t = TOK_MODULE }, {.s = "input declaration" , .t = TOK_INPUT }, {.s = "expression start" , .t = TOK_EXPR }, {.s = "colon" , .t = TOK_COLON }, {.s = "end of line" , .t = TOK_EOL }, {.s = "binary OR expression" , .t = TOK_OR }, {.s = "binary AND expression", .t = TOK_AND }, {.s = "binary XOR expression", .t = TOK_XOR }, {.s = "unary NOT expression" , .t = TOK_NOT }, {.s = "identifier" , .t = TOK_IDENT }, {.s = NULL } }; static char buf[BUFFER_SIZE]; static FILE* fd; static size_t line_number = 0; static size_t column_number = 0; static size_t leading_whitespace_len = 0; static struct token *tok_start = NULL; static struct token *tok_cursor = NULL; /** Helpers ******************************************************************/ static struct location get_current_loc(void) { struct location l; l.line = line_number; l.column = column_number + 1; l.leading_whitespace_len = leading_whitespace_len == -1 ? 0 : leading_whitespace_len; return l; } static int expect(const char c) { if (buf[column_number] != c) { emit_error("Expected '%c', got '%c'\n", c, buf[column_number]); return 1; } column_number++; return 0; } static void eat_whitespace(void) { while (column_number < BUFFER_SIZE && ( buf[column_number] == ' ' || buf[column_number] == '\t')) { column_number++; } } static void add_token(struct token t) { struct token *last = tok_cursor; if (leading_whitespace_len == -1) { leading_whitespace_len = 0; } tok_cursor = malloc(sizeof(*tok_cursor)); if (tok_cursor == NULL) { emit_error("Internal error: malloc failed:"); perror("malloc"); /* FIXME return falsey and propagate error up */ return; } *tok_cursor = t; /* tok_start is NULL on first token only */ if (tok_start == NULL) { tok_start = tok_cursor; } else { last->next = tok_cursor; } } const char * get_token_description(enum TOKEN_TYPE t) { size_t i = 0; for (i = 0; i < sizeof(token_descriptors) / sizeof(token_descriptors[0]) && token_descriptors[i].s; i++) { if (t == token_descriptors[i].t) { return token_descriptors[i].s; } } return "(internal error: unknown token)"; } /** Beans ********************************************************************/ static struct token lex_alphanum(void) { struct token t; size_t i = 0; t.loc = get_current_loc(); i = 0; while (i < MAX_IDENT_LENGTH - 1 && isalnum(buf[column_number + i])) { t.value[i] = buf[column_number + i]; i++; } t.value[i] = '\0'; column_number += i; t.span = i; if (i == 0) { emit_error("Expected alphanumeric, got '%c'\n", buf[i]); } /* default to identifier, see below for keyword */ t.type = TOK_IDENT; /* figure out if it's a keyword or not */ for (i = 0; i < sizeof(keywords) / sizeof(struct keyword) && keywords[i].s; i++) { if (strcmp(t.value, keywords[i].s) == 0) { t.type = keywords[i].t; break; } } return t; } static struct token lex_eol(void) { struct token t; t.type = TOK_EOL; t.loc = get_current_loc(); t.span = 1; expect('\n'); return t; } static struct token lex_colon(void) { struct token t; t.type = TOK_COLON; t.loc = get_current_loc(); t.span = 1; expect(':'); return t; } static int lex_line(void) { size_t length = strlen(buf); leading_whitespace_len = -1; while (column_number < length && column_number < strlen(buf)) { switch (buf[column_number]) { case ':': add_token(lex_colon()); break; case ' ': case '\t': eat_whitespace(); if (leading_whitespace_len == -1) { leading_whitespace_len = column_number; } break; case '\r': case '\n': add_token(lex_eol()); break; default: /* perform more broad checks */ if (isalnum(buf[column_number])) { add_token(lex_alphanum()); } else { /* nope, still no dice */ emit_error("Unexpected '%c'\n", buf[column_number]); return 1; } break; } } return 0; } struct token * lex_file(FILE *fd_local) { fd = fd_local; line_number = 1; tok_cursor = tok_start = NULL; while (NULL != fgets(buf, sizeof(buf), fd)) { column_number = 0; if (lex_line()) { return NULL; } line_number++; } /* Terminate linked list */ if (tok_cursor) { tok_cursor->next = NULL; } return tok_start; }