From 0b7bf68ade7a646377acbc7eb535ba1133d18475 Mon Sep 17 00:00:00 2001 From: David Phillips Date: Sun, 14 Apr 2019 17:49:14 +1200 Subject: Factor out various lookup tables --- Makefile | 6 +- instruction.h | 42 ------------- parse.c | 174 +++++++++++++++++------------------------------------ tok_util.c | 78 ------------------------ tok_util.h | 9 --- util.c | 189 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ util.h | 9 +++ 7 files changed, 257 insertions(+), 250 deletions(-) delete mode 100644 tok_util.c delete mode 100644 tok_util.h create mode 100644 util.c create mode 100644 util.h diff --git a/Makefile b/Makefile index fa8f61b..34dfbdb 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -OBJECTS = lex.o parse.o output.o assembler.o tok_util.o +OBJECTS = lex.o parse.o output.o assembler.o util.o all: assembler @@ -6,11 +6,11 @@ assembler: $(OBJECTS) lex.o: lex.h -parse.o: lex.h parse.h instruction.h tok_util.h +parse.o: lex.h parse.h instruction.h util.h output.o: parse.h -tok_util.o: lex.h +util.o: lex.h instruction.h .PHONY: clean diff --git a/instruction.h b/instruction.h index 3ee18d9..7e1a490 100644 --- a/instruction.h +++ b/instruction.h @@ -40,17 +40,6 @@ enum OPER { #define OPER_SHAMT (11) #define MASK_OPER(x) ((x) << OPER_SHAMT) -static const char *oper_to_human[] = { - [OPER_ADD] = "add", - [OPER_SUB] = "sub", - [OPER_SHL] = "shl", - [OPER_SHR] = "shr", - [OPER_AND] = "and", - [OPER_OR ] = "or", - [OPER_XOR] = "xor", - [OPER_MUL] = "mul" -}; - /** * Masks for jump and branch conditions * J-type instructions (jump, branch) take these as follows: @@ -74,26 +63,6 @@ enum JCOND { #define MASK_JR (0x1 << 8) #define MASK_JUMP_REGISTER(x) ((x) << 5) -static const char *j_to_human[] = { - [JB_UNCOND] = "jmp", - [JB_NEVER] = "jn", - [JB_ZERO] = "jz", - [JB_NZERO] = "jnz", - [JB_CARRY] = "jc", - [JB_NCARRY] = "jnc", - [JB_CARRYZ] = "jcz", - [JB_NCARRYZ] = "jncz" -}; -static const char *b_to_human[] = { - [JB_UNCOND] = "bra", - [JB_NEVER] = "bn", - [JB_ZERO] = "bz", - [JB_NZERO] = "bnz", - [JB_CARRY] = "bc", - [JB_NCARRY] = "bnc", - [JB_CARRYZ] = "bcz", - [JB_NCARRYZ] = "bncz" -}; /** * Register numbers used in all manner of instructions in varying positions @@ -109,17 +78,6 @@ enum REG { REG_H = 7 }; -static const char *reg_to_human[] = { - [REG_0] = "$0", - [REG_1] = "$1", - [REG_2] = "$2", - [REG_3] = "$3", - [REG_4] = "$4", - [REG_5] = "$5", - [REG_6] = "$6", - [REG_H] = "$H", -}; - /** * Offset macro to turn REG_* into mask for register operands of R-type and * I-type instructions diff --git a/parse.c b/parse.c index 6be202e..6ce7f67 100644 --- a/parse.c +++ b/parse.c @@ -9,62 +9,7 @@ #include "lex.h" #include "parse.h" #include "instruction.h" -#include "tok_util.h" - -#if 0 -struct label { - char *name; - size_t byte_offset; -}; - -union immediate { - const char *label; - int16_t value; -}; - -struct r_type { - enum OPER oper; - enum REG dest; - enum REG left; - enum REG right; -}; - -struct i_type { - enum OPER oper; - enum REG dest; - enum REG left; - bool imm_is_ident; - union immediate imm; -}; - -struct jr_type { - enum JCOND cond; - enum REG reg; -}; - -struct ji_type { - enum JCOND cond; - bool imm_is_ident; - union immediate imm; -}; - -struct b_type { - enum JCOND cond; - bool imm_is_ident; - union immediate imm; -}; - -struct instruction { - enum INST_TYPE type; - union instruction_u { - struct r_type r; /* catch-all R-Type */ - struct i_type i; /* I-type on immediate literal */ - struct jr_type jr; /* jump to register */ - struct ji_type ji; /* jump to immediate */ - struct b_type b; /* branch to immediate literal */ - } inst; -}; -#endif +#include "util.h" static const char *filename; static FILE *fd; @@ -499,89 +444,82 @@ int parse_instruction(void) * parse it like normal */ enum OPER op; - for (op = 0; op < sizeof(oper_to_human)/sizeof(*oper_to_human); op++) { - if (strcmp(oper_to_human[op], cursor->s_val) == 0) { - kerchunk(); - if ( parse_reg(®) || parse_comma() - || parse_reg(®_left) || parse_comma() - || parse_reg(®_right) - || parse_eol()) - return 1; - return parse_r_type(op, reg, reg_left, reg_right); - } + if (get_oper_from_asm(cursor->s_val, &op) == 0) { + kerchunk(); + if ( parse_reg(®) || parse_comma() + || parse_reg(®_left) || parse_comma() + || parse_reg(®_right) + || parse_eol()) + return 1; + return parse_r_type(op, reg, reg_left, reg_right); } if (cursor->s_val[strlen(cursor->s_val) - 1] == 'i') { /* temporarily remove 'i' from end */ cursor->s_val[strlen(cursor->s_val) - 1] = '\0'; - for (op = 0; op < sizeof(oper_to_human)/sizeof(*oper_to_human); op++) { - if (strcmp(oper_to_human[op], cursor->s_val) == 0) { - kerchunk(); - if ( parse_reg(®) || parse_comma() - || parse_reg(®_left) || parse_comma()) - return 1; - switch (cursor->type) { - case TOKEN_NUMERIC: - if (parse_imm(&imm) || parse_eol()) - return 1; - return parse_i_type(op, reg, reg_left, imm); - case TOKEN_IDENT: - if (parse_ident(&ident) || parse_eol()) - return 1; - return parse_i_ident_type(op, reg, reg_left, ident); - default: - emit("Error: Expected numeric literal or identifier, got %s\n", - get_token_description(cursor->type)); - return 1; - } - } - } - /* fallthrough: pop it back on, we might need it */ - cursor->s_val[strlen(cursor->s_val)] = 'i'; - } - - enum JCOND cond; - for (cond = 0; cond < sizeof(j_to_human)/sizeof(*j_to_human); cond++) { - if (strcmp(j_to_human[cond], cursor->s_val) == 0) { + if ((get_oper_from_asm(cursor->s_val, &op)) == 0) { kerchunk(); + if ( parse_reg(®) || parse_comma() + || parse_reg(®_left) || parse_comma()) + return 1; + switch (cursor->type) { - case TOKEN_REGISTER: - if (parse_reg(®) || parse_eol()) - return 1; - return parse_j_reg_type(cond, reg); case TOKEN_NUMERIC: if (parse_imm(&imm) || parse_eol()) return 1; - return parse_j_imm_type(cond, imm); + return parse_i_type(op, reg, reg_left, imm); case TOKEN_IDENT: if (parse_ident(&ident) || parse_eol()) return 1; - return parse_j_ident_type(cond, ident); + return parse_i_ident_type(op, reg, reg_left, ident); default: - emit("Error: Expected register, numeric literal, or identifier, got %s\n", + emit("Error: Expected numeric literal or identifier, got %s\n", get_token_description(cursor->type)); return 1; } } + /* fallthrough: pop it back on, we might need it */ + cursor->s_val[strlen(cursor->s_val)] = 'i'; } - for (cond = 0; cond < sizeof(b_to_human)/sizeof(*b_to_human); cond++) { - if (strcmp(b_to_human[cond], cursor->s_val) == 0) { - kerchunk(); - switch (cursor->type) { - case TOKEN_NUMERIC: - if (parse_imm(&imm) || parse_eol()) - return 1; - return parse_b_imm_type(cond, imm); - case TOKEN_IDENT: - if (parse_ident(&ident) || parse_eol()) - return 1; - return parse_b_ident_type(cond, ident); - default: - emit("Error: Expected numeric literal, or identifier, got %s\n", - get_token_description(cursor->type)); + enum JCOND cond; + if (get_j_from_asm(cursor->s_val, &cond) == 0) { + kerchunk(); + switch (cursor->type) { + case TOKEN_REGISTER: + if (parse_reg(®) || parse_eol()) return 1; - } + return parse_j_reg_type(cond, reg); + case TOKEN_NUMERIC: + if (parse_imm(&imm) || parse_eol()) + return 1; + return parse_j_imm_type(cond, imm); + case TOKEN_IDENT: + if (parse_ident(&ident) || parse_eol()) + return 1; + return parse_j_ident_type(cond, ident); + default: + emit("Error: Expected register, numeric literal, or identifier, got %s\n", + get_token_description(cursor->type)); + return 1; + } + } + + if (get_b_from_asm(cursor->s_val, &cond) == 0) { + kerchunk(); + switch (cursor->type) { + case TOKEN_NUMERIC: + if (parse_imm(&imm) || parse_eol()) + return 1; + return parse_b_imm_type(cond, imm); + case TOKEN_IDENT: + if (parse_ident(&ident) || parse_eol()) + return 1; + return parse_b_ident_type(cond, ident); + default: + emit("Error: Expected numeric literal, or identifier, got %s\n", + get_token_description(cursor->type)); + return 1; } } diff --git a/tok_util.c b/tok_util.c deleted file mode 100644 index c17ca6d..0000000 --- a/tok_util.c +++ /dev/null @@ -1,78 +0,0 @@ -#include -#include - -#include "lex.h" - -const char *tok_to_desc[] = { - [TOKEN_REGISTER] = "register", - [TOKEN_NUMERIC] = "numeric literal", - [TOKEN_KEYWORD] = "keyword", - [TOKEN_STRING] = "string literal", - [TOKEN_COMMA] = "comma", - [TOKEN_LABEL] = "label", - [TOKEN_IDENT] = "identifier", - [TOKEN_DOT] = "assembler directive", - [TOKEN_EOL] = "end of line", -}; - -const char * get_token_description(enum TOKEN_TYPE t) -{ - if (t < 0 || t >= sizeof(tok_to_desc)/sizeof(*tok_to_desc)) { - return "[internal error]"; - } else { - return tok_to_desc[t]; - } -} - -void indicate_file_area(FILE* fd, size_t line, size_t column, size_t span) -{ - size_t i = 0; - const char margin[] = " "; - - char buf[1024] = { '\0' }; - char *s = buf; - char c = '\0'; - - rewind(fd); - while (line && !feof(fd) && fgets(buf, sizeof(buf), fd)) { - s = buf; - while (*s) { - if (*(s++) == '\n') { - line--; - } - } - } - - /* trim leading whitespace */ - s = buf; - while (*s == '\t' || *s == ' ') { - s++; - } - - /* filter non-printables to spaces to keep alignment correct */ - for (i = 0; i < strlen(s); i++) { - if (!isprint(s[i]) && s[i] != '\n') { - s[i] = ' '; - } - } - - fputs(margin, stderr); - fputs(s, stderr); - - /* corner case (still needed?) - buf was just return */ - if (strlen(buf) == 1 && buf[0] == '\n') { - fputc('\n', stderr); - } - - fputs(margin, stderr); - column -= (s - buf); - for (column--; column; column--) { - fputc(' ', stderr); - } - - c = span == 1 ? '^' : '"'; - for (; span; span--) { - fputc(c, stderr); - } - fputc('\n', stderr); -} diff --git a/tok_util.h b/tok_util.h deleted file mode 100644 index 21d3d30..0000000 --- a/tok_util.h +++ /dev/null @@ -1,9 +0,0 @@ -#ifndef TOK_UTIL -#define TOK_UTIL - -#include "lex.h" - -const char * get_token_description(enum TOKEN_TYPE t); -void indicate_file_area(FILE* fd, size_t line, size_t column, size_t span); - -#endif /* TOK_UTIL */ diff --git a/util.c b/util.c new file mode 100644 index 0000000..c9b0bc1 --- /dev/null +++ b/util.c @@ -0,0 +1,189 @@ +#include +#include + +#include "instruction.h" +#include "lex.h" + +/** + * Human-readable descriptions for tokens + */ +static struct { + enum TOKEN_TYPE look; + const char *str; +} token_to_desc[] = { + { .look = TOKEN_REGISTER, .str = "register" }, + { .look = TOKEN_NUMERIC , .str = "numeric literal" }, + { .look = TOKEN_KEYWORD , .str = "keyword" }, + { .look = TOKEN_STRING , .str = "string literal" }, + { .look = TOKEN_COMMA , .str = "comma" }, + { .look = TOKEN_LABEL , .str = "label" }, + { .look = TOKEN_IDENT , .str = "identifier" }, + { .look = TOKEN_DOT , .str = "assembler directive" }, + { .look = TOKEN_EOL , .str = "end of line" }, + { .str = NULL }, +}; + +/** + * ALU operation to assembly instruction + */ +static struct { + enum OPER look; + const char *str; +} oper_to_asm[] = { + { .look = OPER_ADD, .str = "add" }, + { .look = OPER_SUB, .str = "sub" }, + { .look = OPER_SHL, .str = "shl" }, + { .look = OPER_SHR, .str = "shr" }, + { .look = OPER_AND, .str = "and" }, + { .look = OPER_OR , .str = "or" }, + { .look = OPER_XOR, .str = "xor" }, + { .look = OPER_MUL, .str = "mul" }, + { .str = NULL }, +}; + +/** + * Jump condition to jump assembly instruction + */ +static struct { + enum JCOND look; + const char *str; +} j_to_asm[] = { + { .look = JB_UNCOND , .str = "jmp" }, + { .look = JB_NEVER , .str = "jn" }, + { .look = JB_ZERO , .str = "jz" }, + { .look = JB_NZERO , .str = "jnz" }, + { .look = JB_CARRY , .str = "jc" }, + { .look = JB_NCARRY , .str = "jnc" }, + { .look = JB_CARRYZ , .str = "jcz" }, + { .look = JB_NCARRYZ, .str = "jncz" }, + { .str = NULL }, +}; + +/** + * Jump condition to branch assembly instruction + */ +static struct { + enum JCOND look; + const char *str; +} b_to_asm[] = { + { .look = JB_UNCOND , .str = "bra" }, + { .look = JB_NEVER , .str = "bn" }, + { .look = JB_ZERO , .str = "bz" }, + { .look = JB_NZERO , .str = "bnz" }, + { .look = JB_CARRY , .str = "bc" }, + { .look = JB_NCARRY , .str = "bnc" }, + { .look = JB_CARRYZ , .str = "bcz" }, + { .look = JB_NCARRYZ, .str = "bncz" }, + { .str = NULL }, +}; + +/** + * Register number to assembly representation + */ +static struct { + enum REG look; + const char *str; +} reg_to_asm[] = { + { .look = REG_0, .str = "$0" }, + { .look = REG_1, .str = "$1" }, + { .look = REG_2, .str = "$2" }, + { .look = REG_3, .str = "$3" }, + { .look = REG_4, .str = "$4" }, + { .look = REG_5, .str = "$5" }, + { .look = REG_6, .str = "$6" }, + { .look = REG_H, .str = "$H" }, + { .str = NULL }, +}; + +/* Generates a function that takes an enum value from the given type and looks + * it up in the given lookup table, returning a string that matches it from + * the table, or NULL if no such string exists */ +#define GENERATE_STR_LOOKUP_FUNC(name, lookup, type) \ +const char* name(type x) { \ + size_t i = 0; \ + for (i = 0; lookup[i].str; i++) \ + if (lookup[i].look == x) \ + return lookup[i].str; \ + return NULL; \ +} + +/* Inverse of GENERATE_STR_LOOKUP_FUNC - this generates a function that takes + * a string and places in *res an enum value matching that string as entered + * in the given lookup table. + * Returns zero on match + * Returns non-zero on no match */ +#define GENERATE_NUM_LOOKUP_FUNC(name, lookup, type) \ +int name(const char *x, type *res) { \ + size_t i = 0; \ + for (i = 0; lookup[i].str; i++) \ + if (strcmp(lookup[i].str, x) == 0) { \ + *res = lookup[i].look; \ + return 0; \ + } \ + return 1; \ +} + +GENERATE_STR_LOOKUP_FUNC(get_asm_from_oper, oper_to_asm, enum OPER); +GENERATE_STR_LOOKUP_FUNC(get_asm_from_j, j_to_asm, enum JCOND); +GENERATE_STR_LOOKUP_FUNC(get_asm_from_b, b_to_asm, enum JCOND); +GENERATE_STR_LOOKUP_FUNC(get_asm_from_reg, reg_to_asm, enum REG); +GENERATE_STR_LOOKUP_FUNC(get_token_description, token_to_desc, enum TOKEN_TYPE); + +GENERATE_NUM_LOOKUP_FUNC(get_oper_from_asm, oper_to_asm, enum OPER); +GENERATE_NUM_LOOKUP_FUNC(get_j_from_asm, j_to_asm, enum JCOND); +GENERATE_NUM_LOOKUP_FUNC(get_b_from_asm, b_to_asm, enum JCOND); +GENERATE_NUM_LOOKUP_FUNC(get_reg_from_asm, reg_to_asm, enum REG); + + +void indicate_file_area(FILE* fd, size_t line, size_t column, size_t span) +{ + size_t i = 0; + const char margin[] = " "; + + char buf[1024] = { '\0' }; + char *s = buf; + char c = '\0'; + + rewind(fd); + while (line && !feof(fd) && fgets(buf, sizeof(buf), fd)) { + s = buf; + while (*s) { + if (*(s++) == '\n') { + line--; + } + } + } + + /* trim leading whitespace */ + s = buf; + while (*s == '\t' || *s == ' ') { + s++; + } + + /* filter non-printables to spaces to keep alignment correct */ + for (i = 0; i < strlen(s); i++) { + if (!isprint(s[i]) && s[i] != '\n') { + s[i] = ' '; + } + } + + fputs(margin, stderr); + fputs(s, stderr); + + /* corner case (still needed?) - buf was just return */ + if (strlen(buf) == 1 && buf[0] == '\n') { + fputc('\n', stderr); + } + + fputs(margin, stderr); + column -= (s - buf); + for (column--; column; column--) { + fputc(' ', stderr); + } + + c = span == 1 ? '^' : '"'; + for (; span; span--) { + fputc(c, stderr); + } + fputc('\n', stderr); +} diff --git a/util.h b/util.h new file mode 100644 index 0000000..21d3d30 --- /dev/null +++ b/util.h @@ -0,0 +1,9 @@ +#ifndef TOK_UTIL +#define TOK_UTIL + +#include "lex.h" + +const char * get_token_description(enum TOKEN_TYPE t); +void indicate_file_area(FILE* fd, size_t line, size_t column, size_t span); + +#endif /* TOK_UTIL */ -- cgit v1.1