summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Phillips <david@sighup.nz>2018-08-01 01:00:54 +1200
committerDavid Phillips <david@sighup.nz>2018-08-01 01:00:54 +1200
commit645acc193a739ab771fb46e8ec2cf6cd87909597 (patch)
tree482c5d59220af7ae2cc8f34a860174f4b1affd01
parent96776c831e9d2fcbd69bc1e0faa16026c5ed25b0 (diff)
downloadhence-645acc193a739ab771fb46e8ec2cf6cd87909597.tar.xz
WIP: Implement more robust, separated lexer+parser passes
-rw-r--r--Makefile2
-rw-r--r--lexer.c234
-rw-r--r--lexer.h33
-rw-r--r--parser.c320
-rw-r--r--parser.h2
-rw-r--r--simulator.c25
6 files changed, 315 insertions, 301 deletions
diff --git a/Makefile b/Makefile
index 1b28030..bdfcc72 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@ CFLAGS += -std=c99 -D_XOPEN_SOURCE=500 -D_POSIX_C_SOURCE=200809L -Wall -Wextra
all: simulator
-simulator: simulator.o gate.o logic.o parser.o
+simulator: simulator.o gate.o logic.o lexer.o parser.o
.PHONY: test
test: all
diff --git a/lexer.c b/lexer.c
new file mode 100644
index 0000000..251afed
--- /dev/null
+++ b/lexer.c
@@ -0,0 +1,234 @@
+#include <ctype.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "common.h"
+#include "lexer.h"
+
+#ifdef emit_error
+#warn "Remember to remove the global emit_error
+#undef emit_error
+#endif /* ifdef emit_error */
+
+#define emit_error(...) fprintf(stderr, "Error (%zd,%zd): ", line_number, 1 + column_number);\
+ fprintf(stderr, __VA_ARGS__)
+
+#define BUFFER_SIZE 1024
+
+struct keyword {
+ char *s; /* human-readable name of the token, as written by a user */
+ enum TOKEN_TYPE t;
+};
+
+
+/** Static defs **************************************************************/
+
+static struct keyword keywords[] = {
+ {.s = "module", .t = TOK_MODULE },
+ {.s = "input" , .t = TOK_INPUT },
+ {.s = "module", .t = TOK_EXPR },
+ {.s = "or" , .t = TOK_OR },
+ {.s = "and" , .t = TOK_AND },
+ {.s = "xor" , .t = TOK_XOR },
+ {.s = "not" , .t = TOK_NOT },
+ {.s = NULL }
+};
+
+static struct keyword token_descriptors[] = {
+ {.s = "module declaration" , .t = TOK_MODULE },
+ {.s = "input declaration" , .t = TOK_INPUT },
+ {.s = "expression start" , .t = TOK_EXPR },
+ {.s = "colon" , .t = TOK_COLON },
+ {.s = "end of line" , .t = TOK_EOL },
+ {.s = "binary OR expression" , .t = TOK_OR },
+ {.s = "binary AND expression", .t = TOK_AND },
+ {.s = "binary XOR expression", .t = TOK_XOR },
+ {.s = "unary NOT expression" , .t = TOK_NOT },
+ {.s = "identifier" , .t = TOK_IDENT },
+ {.s = NULL }
+};
+
+static char buf[BUFFER_SIZE];
+static FILE* fd;
+static size_t line_number = 0;
+static size_t column_number = 0;
+static struct token *tok_start = NULL;
+static struct token *tok_cursor = NULL;
+
+
+/** Helpers ******************************************************************/
+
+static struct location
+get_current_loc(void) {
+ struct location l;
+ l.line = line_number;
+ l.column = column_number + 1;
+ return l;
+}
+
+static int
+expect(const char c) {
+ if (buf[column_number] != c) {
+ emit_error("Expected '%c', got '%c'\n", c, buf[column_number]);
+ return 1;
+ }
+ column_number++;
+ return 0;
+}
+
+static void
+eat_whitespace(void) {
+ while (column_number < BUFFER_SIZE && (
+ buf[column_number] == ' ' ||
+ buf[column_number] == '\t')) {
+ column_number++;
+ }
+}
+
+static void
+add_token(struct token t) {
+ struct token *last = tok_cursor;
+
+ tok_cursor = malloc(sizeof(*tok_cursor));
+ if (tok_cursor == NULL) {
+ emit_error("Internal error: malloc failed:");
+ perror("malloc");
+ /* FIXME return falsey and propagate error up */
+ return;
+ }
+
+ *tok_cursor = t;
+
+ /* tok_start is NULL on first token only */
+ if (tok_start == NULL) {
+ tok_start = tok_cursor;
+ } else {
+ last->next = tok_cursor;
+ }
+}
+
+const char *
+get_token_description(enum TOKEN_TYPE t) {
+ size_t i = 0;
+
+ for (i = 0; i < sizeof(token_descriptors) / sizeof(token_descriptors[0]) && token_descriptors[i].s; i++) {
+ if (t == token_descriptors[i].t) {
+ return token_descriptors[i].s;
+ }
+ }
+
+ return "(internal error: unknown token)";
+}
+
+/** Beans ********************************************************************/
+
+static struct token
+lex_alphanum(void) {
+ struct token t;
+ size_t i = 0;
+
+ t.loc = get_current_loc();
+
+ i = 0;
+ while (i < MAX_IDENT_LENGTH - 1 && isalnum(buf[column_number + i])) {
+ t.value[i] = buf[i];
+ i++;
+ }
+ t.value[i] = '\0';
+
+ column_number += i;
+
+ if (i == 0) {
+ emit_error("Expected alphanumeric, got '%c'\n", buf[i]);
+ }
+
+ /* default to identifier, see below for keyword */
+ t.type = TOK_IDENT;
+
+ /* figure out if it's a keyword or not */
+ for (i = 0; i < sizeof(keywords) / sizeof(struct keyword) && keywords[i].s; i++) {
+ if (strcmp(t.value, keywords[i].s) == 0) {
+ t.type = keywords[i].t;
+ break;
+ }
+ }
+
+ return t;
+}
+
+static struct token
+lex_eol(void) {
+ struct token t;
+
+ t.type = TOK_EOL;
+ t.loc = get_current_loc();
+
+ expect('\n');
+
+ return t;
+}
+
+static struct token
+lex_colon(void) {
+ struct token t;
+
+ t.type = TOK_COLON;
+ t.loc = get_current_loc();
+
+ expect(':');
+
+ return t;
+}
+
+static int
+lex_line(void) {
+ size_t length = strlen(buf);
+ while (column_number < length && column_number < strlen(buf)) {
+ switch (buf[column_number]) {
+ case ':':
+ add_token(lex_colon());
+ break;
+ case ' ':
+ case '\t':
+ eat_whitespace();
+ break;
+ case '\r':
+ case '\n':
+ add_token(lex_eol());
+ break;
+ default:
+ /* perform more broad checks */
+ if (isalnum(buf[column_number])) {
+ add_token(lex_alphanum());
+ } else {
+ /* nope, still no dice */
+ emit_error("Unexpected '%c'\n", buf[column_number]);
+ return 1;
+ }
+ break;
+ }
+ }
+ return 0;
+}
+
+struct token *
+lex_file(FILE *fd_local) {
+ fd = fd_local;
+
+ line_number = 1;
+ tok_cursor = tok_start = NULL;
+
+ while (NULL != fgets(buf, sizeof(buf), fd)) {
+ column_number = 0;
+ if (lex_line()) {
+ return NULL;
+ }
+ line_number++;
+ }
+
+ /* Terminate linked list */
+ tok_cursor->next = NULL;
+
+ return tok_start;
+}
diff --git a/lexer.h b/lexer.h
new file mode 100644
index 0000000..542dd2c
--- /dev/null
+++ b/lexer.h
@@ -0,0 +1,33 @@
+#define MAX_IDENT_LENGTH 128
+
+enum TOKEN_TYPE {
+ /* Keywords */
+ TOK_MODULE,
+ TOK_INPUT,
+ TOK_EXPR,
+ TOK_COLON,
+ TOK_EOL,
+ TOK_OR,
+ TOK_AND,
+ TOK_XOR,
+ TOK_NOT,
+
+ /* Etc */
+ TOK_IDENT
+};
+
+struct location {
+ size_t line;
+ size_t column;
+};
+
+struct token {
+ enum TOKEN_TYPE type;
+ struct location loc;
+ char value[MAX_IDENT_LENGTH];
+ struct token *next;
+};
+
+struct token* lex_file(FILE*);
+const char *get_token_description(enum TOKEN_TYPE);
+
diff --git a/parser.c b/parser.c
index 2c4a7d6..c577105 100644
--- a/parser.c
+++ b/parser.c
@@ -3,309 +3,61 @@
#include <stdlib.h>
#include <string.h>
+#include "lexer.h"
#include "common.h"
#include "error.h"
#include "logic.h"
#include "gate.h"
-int parse_input(char *);
-int parse_module(char *);
-int parse_expr(char *);
+static struct token *cursor;
-struct tok_lookup {
- char *str;
- int (*handler)(char*);
-};
+#ifdef emit_error
+#warning "Remember to remove the global emit_error"
+#undef emit_error
+#endif /* ifdef emit_error */
-struct op_lookup {
- char *str;
- enum BINARY (*handler)(enum BINARY, enum BINARY);
-};
+#define emit_error(...) fprintf(stderr, "Error (%zd,%zd): ", cursor->loc.line, cursor->loc.column);\
+ fprintf(stderr, __VA_ARGS__)
-static struct op_lookup uop_handlers[] = {
- {.str = "not", .handler = logic_nand},
-};
-static struct op_lookup bop_handlers[] = {
- {.str = "and", .handler = logic_and},
- {.str = "or", .handler = logic_or},
- {.str = "nand", .handler = logic_nand},
- {.str = "nor", .handler = logic_nor},
- {.str = "xor", .handler = logic_xor}
-};
+//static struct op_lookup uop_handlers[] = {
+// {.str = "not", .handler = logic_nand},
+//};
+//
+//static struct op_lookup bop_handlers[] = {
+// {.str = "and", .handler = logic_and},
+// {.str = "or", .handler = logic_or},
+// {.str = "nand", .handler = logic_nand},
+// {.str = "nor", .handler = logic_nor},
+// {.str = "xor", .handler = logic_xor}
+//};
+//
+//static struct tok_lookup tok_handlers[] = {
+// {.str = "input", .handler = parse_input},
+// {.str = "module", .handler = parse_module},
+// {.str = "expr", .handler = parse_expr}
+//};
-static struct tok_lookup tok_handlers[] = {
- {.str = "input", .handler = parse_input},
- {.str = "module", .handler = parse_module},
- {.str = "expr", .handler = parse_expr}
-};
+static int
+expect(enum TOKEN_TYPE e) {
+ char *expected_desc = "(internal error)";
+ char *observed_desc = "(internal error)";
-int
-rtrim(char *string) {
- char *old_end = string + strlen(string);
- char *w = NULL;
-
- string[strcspn(string, "\r\n")] = '\0';
-
- /* find first member of right whitespace */
- for (w = old_end; w >= string; w--) {
- if (strchr("\t ", *w) == NULL) {
- break;
- }
- *w = '\0';
- }
-
- return old_end - w;
-}
-
-char *
-eat_whitespace(char *string) {
- while (*string && isspace(*string)) {
- string++;
- }
- return string;
-}
-
-char *
-get_token_special(char *string, char **rest, char *delims) {
- size_t tok_len = 0;
- char *ret = NULL;
- string = eat_whitespace(string);
- tok_len = strcspn(string, delims);
- if (tok_len == 0) {
- emit_error("unexpected end of string");
- return NULL;
- }
- if (NULL == (ret = strndup(string, tok_len))) {
- emit_error("get_token: ");
- perror("strndup");
- return NULL;
- }
- *rest = string + tok_len;
- return ret;
-}
-
-char *
-get_token(char *string, char **rest) {
- return get_token_special(string, rest, " \t");
-}
-
-int
-expect(const char *expect, char *actual) {
- int min_len = 0;
-
- min_len = MIN(strlen(expect), strlen(actual));
-
- if (strncmp(expect, actual, min_len) != 0) {
- emit_error("Expected '%s' at start of '%s'\n", expect, actual);
+ if (cursor->type != e) {
+ expected_desc = get_token_description(e);
+ observed_desc = get_token_description(cursor->type);
+ emit_error("Expected %s, got %s\n", expected_desc, observed_desc);
return 1;
}
- return 0;
-}
-
-int parse_uop(char *str, char *name, enum BINARY (*handler)(enum BINARY, enum BINARY)) {
- char *source_name = NULL;
- char *next = NULL;
-
- if (NULL == (source_name = get_token(str, &next))) {
- return 1;
- }
-
- /* FIXME allow input from other gates, not just inputs */
- struct gate *in = gate_get_input_by_name(source_name);
- free(source_name);
- return gate_add(name, handler, in, in);
-
-
-}
-
-int parse_bop(char *str, char *name, enum BINARY (*handler)(enum BINARY, enum BINARY)) {
- char *source_name1 = NULL;
- char *source_name2 = NULL;
- char *next = NULL;
- struct gate *in1 = NULL;
- struct gate *in2 = NULL;
-
- if (NULL == (source_name1 = get_token(str, &next))) {
- return 1;
- }
-
- if (NULL == (source_name2 = get_token(next, &next))) {
- free(source_name1);
- return 1;
- }
-
- if (*next != '\0') {
- emit_error("superfluous text \"%s\"\n", next);
- free(source_name1);
- free(source_name2);
- return 1;
- }
-
- /* FIXME allow input from other gates, not just inputs */
- in1 = gate_get_input_by_name(source_name1);
- in2 = gate_get_input_by_name(source_name2);
-
- free(source_name1);
- free(source_name2);
-
- if (in1 == NULL || in2 == NULL) {
- emit_error("Undeclared identifier in source(s) for expression '%s'\n",
- name);
- return 1;
- }
-
- return gate_add(name, handler, in1, in2);
-}
-
-int
-parse_op(char *str, char *name) {
- char *tok = NULL;
- char *next = NULL;
- int match = 0;
- size_t i = 0;
-
- if (NULL == (tok = get_token(str, &next))) {
- return 1;
- }
-
- match = 0;
- for (i = 0; i < sizeof(bop_handlers)/sizeof(bop_handlers[0]); i++) {
- if (strcmp(bop_handlers[i].str, tok) == 0) {
- match = 1;
- if (parse_bop(next, name, bop_handlers[i].handler)) {
- free(tok);
- return 1;
- }
- }
- }
- for (i = 0; i < sizeof(uop_handlers)/sizeof(uop_handlers[0]); i++) {
- if (strcmp(uop_handlers[i].str, tok) == 0) {
- match = 1;
- if (parse_uop(next, name, uop_handlers[i].handler)) {
- free(tok);
- return 1;
- }
- }
- }
- if (match == 0) {
- emit_error("Invalid operator \"%s\"\n", tok);
- free(tok);
- return 1;
- }
- free(tok);
return 0;
}
-int
-parse_expr(char *str) {
- int ret = 0;
- const char *expr_sep = ": ";
- char *ident = NULL;
- char *next = NULL;
- char *op = NULL;
-
- if (NULL == (ident = get_token_special(str, &next, ": \t"))) {
- return 1;
- }
-
- if (expect(expr_sep, next)) {
- free(ident);
- return 1;
- }
-
- op = next + strlen(expr_sep);
-
- ret = parse_op(op, ident);
- free(ident);
- return ret;
-}
-
-int
-parse_input(char *str) {
- char *ident = NULL;
- char *next = NULL;
- int ret = 0;
-
- if (NULL == (ident = get_token(str, &next))) {
- return 1;
- }
-
- if (strlen(ident) == 0) {
- emit_error("input label must not be empty");
- free(ident);
- return 1;
- }
-
- if (*next != '\0') {
- emit_error("superfluous text after \"%s\" identifier: \"%s\"\n",
- ident, next);
- free(ident);
- return 1;
- }
-
- emit_info("Add input '%s'\n", ident);
- ret = gate_input_add(ident);
- free(ident);
- return ret;
-}
-
-int
-parse_module(char *str) {
- char *mod_name = NULL;
- char *next = NULL;
- if (NULL == (mod_name = get_token(str, &next))) {
- return 1;
- }
-
- if (*next != '\0') {
- emit_error("superfluous text following module name");
- free(mod_name);
- return 1;
- }
-
- printf("FIXME module name is \"%s\" but modules are not implemented\n", mod_name);
- free(mod_name);
- return 0;
-}
int
-parse_line(char *line) {
- size_t i = 0;
- char *next = NULL;
- char *tok = NULL;
- int match = 0;
-
- rtrim(line);
-
- if ( strlen(line) == 0
- || (strlen(line) == 1 && isspace(line[0]))) {
- return 0;
- }
-
- if (NULL == (tok = get_token(line, &next))) {
- return 1;
- }
-
- match = 0;
- for (i = 0; i < sizeof(tok_handlers)/sizeof(tok_handlers[0]); i++) {
- if (strcmp(tok_handlers[i].str, tok) == 0) {
- match = 1;
- if ((tok_handlers[i].handler)(next)) {
- free(tok);
- return 1;
- }
- }
- }
-
- if (match == 0) {
- emit_error("invalid token \"%s\"", tok);
- free(tok);
- return 1;
- }
- free(tok);
+parse(struct token *t) {
+ cursor = t;
+ expect(TOK_MODULE);
return 0;
}
-
diff --git a/parser.h b/parser.h
index 5c78b92..cf5c3b0 100644
--- a/parser.h
+++ b/parser.h
@@ -1 +1 @@
-int parse_line(char *line);
+int parse(struct token *);
diff --git a/simulator.c b/simulator.c
index fb5ce8f..4f99c4f 100644
--- a/simulator.c
+++ b/simulator.c
@@ -1,29 +1,24 @@
#include <stdio.h>
#include <string.h>
-#include "gate.h"
-#include "logic.h"
+//#include "gate.h"
+//#include "logic.h"
+#include "lexer.h"
#include "parser.h"
int main(int argc, char **argv) {
- char buf[4096];
- FILE *fd = stdin;
-
(void)argc;
(void)argv;
- gate_init();
+// gate_init();
- while (NULL != fgets(buf, sizeof(buf), fd)) {
- if (parse_line(buf)) {
- return 1;
- }
- }
+ struct token *tok = lex_file(stdin);
+ int p = parse(tok);
- gate_set_input("a", LOGIC_LOW);
- gate_set_input("b", LOGIC_LOW);
- gate_update();
- gate_dump();
+// gate_set_input("a", LOGIC_LOW);
+// gate_set_input("b", LOGIC_LOW);
+// gate_update();
+// gate_dump();
return 0;
}