diff options
author | David Phillips <david@sighup.nz> | 2018-08-01 01:00:54 +1200 |
---|---|---|
committer | David Phillips <david@sighup.nz> | 2018-08-01 01:00:54 +1200 |
commit | 645acc193a739ab771fb46e8ec2cf6cd87909597 (patch) | |
tree | 482c5d59220af7ae2cc8f34a860174f4b1affd01 | |
parent | 96776c831e9d2fcbd69bc1e0faa16026c5ed25b0 (diff) | |
download | hence-645acc193a739ab771fb46e8ec2cf6cd87909597.tar.xz |
WIP: Implement more robust, separated lexer+parser passes
-rw-r--r-- | Makefile | 2 | ||||
-rw-r--r-- | lexer.c | 234 | ||||
-rw-r--r-- | lexer.h | 33 | ||||
-rw-r--r-- | parser.c | 320 | ||||
-rw-r--r-- | parser.h | 2 | ||||
-rw-r--r-- | simulator.c | 25 |
6 files changed, 315 insertions, 301 deletions
@@ -2,7 +2,7 @@ CFLAGS += -std=c99 -D_XOPEN_SOURCE=500 -D_POSIX_C_SOURCE=200809L -Wall -Wextra all: simulator -simulator: simulator.o gate.o logic.o parser.o +simulator: simulator.o gate.o logic.o lexer.o parser.o .PHONY: test test: all @@ -0,0 +1,234 @@ +#include <ctype.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "common.h" +#include "lexer.h" + +#ifdef emit_error +#warn "Remember to remove the global emit_error +#undef emit_error +#endif /* ifdef emit_error */ + +#define emit_error(...) fprintf(stderr, "Error (%zd,%zd): ", line_number, 1 + column_number);\ + fprintf(stderr, __VA_ARGS__) + +#define BUFFER_SIZE 1024 + +struct keyword { + char *s; /* human-readable name of the token, as written by a user */ + enum TOKEN_TYPE t; +}; + + +/** Static defs **************************************************************/ + +static struct keyword keywords[] = { + {.s = "module", .t = TOK_MODULE }, + {.s = "input" , .t = TOK_INPUT }, + {.s = "module", .t = TOK_EXPR }, + {.s = "or" , .t = TOK_OR }, + {.s = "and" , .t = TOK_AND }, + {.s = "xor" , .t = TOK_XOR }, + {.s = "not" , .t = TOK_NOT }, + {.s = NULL } +}; + +static struct keyword token_descriptors[] = { + {.s = "module declaration" , .t = TOK_MODULE }, + {.s = "input declaration" , .t = TOK_INPUT }, + {.s = "expression start" , .t = TOK_EXPR }, + {.s = "colon" , .t = TOK_COLON }, + {.s = "end of line" , .t = TOK_EOL }, + {.s = "binary OR expression" , .t = TOK_OR }, + {.s = "binary AND expression", .t = TOK_AND }, + {.s = "binary XOR expression", .t = TOK_XOR }, + {.s = "unary NOT expression" , .t = TOK_NOT }, + {.s = "identifier" , .t = TOK_IDENT }, + {.s = NULL } +}; + +static char buf[BUFFER_SIZE]; +static FILE* fd; +static size_t line_number = 0; +static size_t column_number = 0; +static struct token *tok_start = NULL; +static struct token *tok_cursor = NULL; + + +/** Helpers ******************************************************************/ + +static struct location +get_current_loc(void) { + struct location l; + l.line = line_number; + l.column = column_number + 1; + return l; +} + +static int +expect(const char c) { + if (buf[column_number] != c) { + emit_error("Expected '%c', got '%c'\n", c, buf[column_number]); + return 1; + } + column_number++; + return 0; +} + +static void +eat_whitespace(void) { + while (column_number < BUFFER_SIZE && ( + buf[column_number] == ' ' || + buf[column_number] == '\t')) { + column_number++; + } +} + +static void +add_token(struct token t) { + struct token *last = tok_cursor; + + tok_cursor = malloc(sizeof(*tok_cursor)); + if (tok_cursor == NULL) { + emit_error("Internal error: malloc failed:"); + perror("malloc"); + /* FIXME return falsey and propagate error up */ + return; + } + + *tok_cursor = t; + + /* tok_start is NULL on first token only */ + if (tok_start == NULL) { + tok_start = tok_cursor; + } else { + last->next = tok_cursor; + } +} + +const char * +get_token_description(enum TOKEN_TYPE t) { + size_t i = 0; + + for (i = 0; i < sizeof(token_descriptors) / sizeof(token_descriptors[0]) && token_descriptors[i].s; i++) { + if (t == token_descriptors[i].t) { + return token_descriptors[i].s; + } + } + + return "(internal error: unknown token)"; +} + +/** Beans ********************************************************************/ + +static struct token +lex_alphanum(void) { + struct token t; + size_t i = 0; + + t.loc = get_current_loc(); + + i = 0; + while (i < MAX_IDENT_LENGTH - 1 && isalnum(buf[column_number + i])) { + t.value[i] = buf[i]; + i++; + } + t.value[i] = '\0'; + + column_number += i; + + if (i == 0) { + emit_error("Expected alphanumeric, got '%c'\n", buf[i]); + } + + /* default to identifier, see below for keyword */ + t.type = TOK_IDENT; + + /* figure out if it's a keyword or not */ + for (i = 0; i < sizeof(keywords) / sizeof(struct keyword) && keywords[i].s; i++) { + if (strcmp(t.value, keywords[i].s) == 0) { + t.type = keywords[i].t; + break; + } + } + + return t; +} + +static struct token +lex_eol(void) { + struct token t; + + t.type = TOK_EOL; + t.loc = get_current_loc(); + + expect('\n'); + + return t; +} + +static struct token +lex_colon(void) { + struct token t; + + t.type = TOK_COLON; + t.loc = get_current_loc(); + + expect(':'); + + return t; +} + +static int +lex_line(void) { + size_t length = strlen(buf); + while (column_number < length && column_number < strlen(buf)) { + switch (buf[column_number]) { + case ':': + add_token(lex_colon()); + break; + case ' ': + case '\t': + eat_whitespace(); + break; + case '\r': + case '\n': + add_token(lex_eol()); + break; + default: + /* perform more broad checks */ + if (isalnum(buf[column_number])) { + add_token(lex_alphanum()); + } else { + /* nope, still no dice */ + emit_error("Unexpected '%c'\n", buf[column_number]); + return 1; + } + break; + } + } + return 0; +} + +struct token * +lex_file(FILE *fd_local) { + fd = fd_local; + + line_number = 1; + tok_cursor = tok_start = NULL; + + while (NULL != fgets(buf, sizeof(buf), fd)) { + column_number = 0; + if (lex_line()) { + return NULL; + } + line_number++; + } + + /* Terminate linked list */ + tok_cursor->next = NULL; + + return tok_start; +} @@ -0,0 +1,33 @@ +#define MAX_IDENT_LENGTH 128 + +enum TOKEN_TYPE { + /* Keywords */ + TOK_MODULE, + TOK_INPUT, + TOK_EXPR, + TOK_COLON, + TOK_EOL, + TOK_OR, + TOK_AND, + TOK_XOR, + TOK_NOT, + + /* Etc */ + TOK_IDENT +}; + +struct location { + size_t line; + size_t column; +}; + +struct token { + enum TOKEN_TYPE type; + struct location loc; + char value[MAX_IDENT_LENGTH]; + struct token *next; +}; + +struct token* lex_file(FILE*); +const char *get_token_description(enum TOKEN_TYPE); + @@ -3,309 +3,61 @@ #include <stdlib.h> #include <string.h> +#include "lexer.h" #include "common.h" #include "error.h" #include "logic.h" #include "gate.h" -int parse_input(char *); -int parse_module(char *); -int parse_expr(char *); +static struct token *cursor; -struct tok_lookup { - char *str; - int (*handler)(char*); -}; +#ifdef emit_error +#warning "Remember to remove the global emit_error" +#undef emit_error +#endif /* ifdef emit_error */ -struct op_lookup { - char *str; - enum BINARY (*handler)(enum BINARY, enum BINARY); -}; +#define emit_error(...) fprintf(stderr, "Error (%zd,%zd): ", cursor->loc.line, cursor->loc.column);\ + fprintf(stderr, __VA_ARGS__) -static struct op_lookup uop_handlers[] = { - {.str = "not", .handler = logic_nand}, -}; -static struct op_lookup bop_handlers[] = { - {.str = "and", .handler = logic_and}, - {.str = "or", .handler = logic_or}, - {.str = "nand", .handler = logic_nand}, - {.str = "nor", .handler = logic_nor}, - {.str = "xor", .handler = logic_xor} -}; +//static struct op_lookup uop_handlers[] = { +// {.str = "not", .handler = logic_nand}, +//}; +// +//static struct op_lookup bop_handlers[] = { +// {.str = "and", .handler = logic_and}, +// {.str = "or", .handler = logic_or}, +// {.str = "nand", .handler = logic_nand}, +// {.str = "nor", .handler = logic_nor}, +// {.str = "xor", .handler = logic_xor} +//}; +// +//static struct tok_lookup tok_handlers[] = { +// {.str = "input", .handler = parse_input}, +// {.str = "module", .handler = parse_module}, +// {.str = "expr", .handler = parse_expr} +//}; -static struct tok_lookup tok_handlers[] = { - {.str = "input", .handler = parse_input}, - {.str = "module", .handler = parse_module}, - {.str = "expr", .handler = parse_expr} -}; +static int +expect(enum TOKEN_TYPE e) { + char *expected_desc = "(internal error)"; + char *observed_desc = "(internal error)"; -int -rtrim(char *string) { - char *old_end = string + strlen(string); - char *w = NULL; - - string[strcspn(string, "\r\n")] = '\0'; - - /* find first member of right whitespace */ - for (w = old_end; w >= string; w--) { - if (strchr("\t ", *w) == NULL) { - break; - } - *w = '\0'; - } - - return old_end - w; -} - -char * -eat_whitespace(char *string) { - while (*string && isspace(*string)) { - string++; - } - return string; -} - -char * -get_token_special(char *string, char **rest, char *delims) { - size_t tok_len = 0; - char *ret = NULL; - string = eat_whitespace(string); - tok_len = strcspn(string, delims); - if (tok_len == 0) { - emit_error("unexpected end of string"); - return NULL; - } - if (NULL == (ret = strndup(string, tok_len))) { - emit_error("get_token: "); - perror("strndup"); - return NULL; - } - *rest = string + tok_len; - return ret; -} - -char * -get_token(char *string, char **rest) { - return get_token_special(string, rest, " \t"); -} - -int -expect(const char *expect, char *actual) { - int min_len = 0; - - min_len = MIN(strlen(expect), strlen(actual)); - - if (strncmp(expect, actual, min_len) != 0) { - emit_error("Expected '%s' at start of '%s'\n", expect, actual); + if (cursor->type != e) { + expected_desc = get_token_description(e); + observed_desc = get_token_description(cursor->type); + emit_error("Expected %s, got %s\n", expected_desc, observed_desc); return 1; } - return 0; -} - -int parse_uop(char *str, char *name, enum BINARY (*handler)(enum BINARY, enum BINARY)) { - char *source_name = NULL; - char *next = NULL; - - if (NULL == (source_name = get_token(str, &next))) { - return 1; - } - - /* FIXME allow input from other gates, not just inputs */ - struct gate *in = gate_get_input_by_name(source_name); - free(source_name); - return gate_add(name, handler, in, in); - - -} - -int parse_bop(char *str, char *name, enum BINARY (*handler)(enum BINARY, enum BINARY)) { - char *source_name1 = NULL; - char *source_name2 = NULL; - char *next = NULL; - struct gate *in1 = NULL; - struct gate *in2 = NULL; - - if (NULL == (source_name1 = get_token(str, &next))) { - return 1; - } - - if (NULL == (source_name2 = get_token(next, &next))) { - free(source_name1); - return 1; - } - - if (*next != '\0') { - emit_error("superfluous text \"%s\"\n", next); - free(source_name1); - free(source_name2); - return 1; - } - - /* FIXME allow input from other gates, not just inputs */ - in1 = gate_get_input_by_name(source_name1); - in2 = gate_get_input_by_name(source_name2); - - free(source_name1); - free(source_name2); - - if (in1 == NULL || in2 == NULL) { - emit_error("Undeclared identifier in source(s) for expression '%s'\n", - name); - return 1; - } - - return gate_add(name, handler, in1, in2); -} - -int -parse_op(char *str, char *name) { - char *tok = NULL; - char *next = NULL; - int match = 0; - size_t i = 0; - - if (NULL == (tok = get_token(str, &next))) { - return 1; - } - - match = 0; - for (i = 0; i < sizeof(bop_handlers)/sizeof(bop_handlers[0]); i++) { - if (strcmp(bop_handlers[i].str, tok) == 0) { - match = 1; - if (parse_bop(next, name, bop_handlers[i].handler)) { - free(tok); - return 1; - } - } - } - for (i = 0; i < sizeof(uop_handlers)/sizeof(uop_handlers[0]); i++) { - if (strcmp(uop_handlers[i].str, tok) == 0) { - match = 1; - if (parse_uop(next, name, uop_handlers[i].handler)) { - free(tok); - return 1; - } - } - } - if (match == 0) { - emit_error("Invalid operator \"%s\"\n", tok); - free(tok); - return 1; - } - free(tok); return 0; } -int -parse_expr(char *str) { - int ret = 0; - const char *expr_sep = ": "; - char *ident = NULL; - char *next = NULL; - char *op = NULL; - - if (NULL == (ident = get_token_special(str, &next, ": \t"))) { - return 1; - } - - if (expect(expr_sep, next)) { - free(ident); - return 1; - } - - op = next + strlen(expr_sep); - - ret = parse_op(op, ident); - free(ident); - return ret; -} - -int -parse_input(char *str) { - char *ident = NULL; - char *next = NULL; - int ret = 0; - - if (NULL == (ident = get_token(str, &next))) { - return 1; - } - - if (strlen(ident) == 0) { - emit_error("input label must not be empty"); - free(ident); - return 1; - } - - if (*next != '\0') { - emit_error("superfluous text after \"%s\" identifier: \"%s\"\n", - ident, next); - free(ident); - return 1; - } - - emit_info("Add input '%s'\n", ident); - ret = gate_input_add(ident); - free(ident); - return ret; -} - -int -parse_module(char *str) { - char *mod_name = NULL; - char *next = NULL; - if (NULL == (mod_name = get_token(str, &next))) { - return 1; - } - - if (*next != '\0') { - emit_error("superfluous text following module name"); - free(mod_name); - return 1; - } - - printf("FIXME module name is \"%s\" but modules are not implemented\n", mod_name); - free(mod_name); - return 0; -} int -parse_line(char *line) { - size_t i = 0; - char *next = NULL; - char *tok = NULL; - int match = 0; - - rtrim(line); - - if ( strlen(line) == 0 - || (strlen(line) == 1 && isspace(line[0]))) { - return 0; - } - - if (NULL == (tok = get_token(line, &next))) { - return 1; - } - - match = 0; - for (i = 0; i < sizeof(tok_handlers)/sizeof(tok_handlers[0]); i++) { - if (strcmp(tok_handlers[i].str, tok) == 0) { - match = 1; - if ((tok_handlers[i].handler)(next)) { - free(tok); - return 1; - } - } - } - - if (match == 0) { - emit_error("invalid token \"%s\"", tok); - free(tok); - return 1; - } - free(tok); +parse(struct token *t) { + cursor = t; + expect(TOK_MODULE); return 0; } - @@ -1 +1 @@ -int parse_line(char *line); +int parse(struct token *); diff --git a/simulator.c b/simulator.c index fb5ce8f..4f99c4f 100644 --- a/simulator.c +++ b/simulator.c @@ -1,29 +1,24 @@ #include <stdio.h> #include <string.h> -#include "gate.h" -#include "logic.h" +//#include "gate.h" +//#include "logic.h" +#include "lexer.h" #include "parser.h" int main(int argc, char **argv) { - char buf[4096]; - FILE *fd = stdin; - (void)argc; (void)argv; - gate_init(); +// gate_init(); - while (NULL != fgets(buf, sizeof(buf), fd)) { - if (parse_line(buf)) { - return 1; - } - } + struct token *tok = lex_file(stdin); + int p = parse(tok); - gate_set_input("a", LOGIC_LOW); - gate_set_input("b", LOGIC_LOW); - gate_update(); - gate_dump(); +// gate_set_input("a", LOGIC_LOW); +// gate_set_input("b", LOGIC_LOW); +// gate_update(); +// gate_dump(); return 0; } |