summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Phillips <david@sighup.nz>2019-04-14 17:49:14 +1200
committerDavid Phillips <david@sighup.nz>2019-08-03 12:43:44 +1200
commit0b7bf68ade7a646377acbc7eb535ba1133d18475 (patch)
tree839737fbc628cae00a1fc2e56ff57fee5aa56d42
parent38e3922cf7f521d1e119cbeff8722f0d8ca4c66a (diff)
downloadtoy-cpu-assembler-0b7bf68ade7a646377acbc7eb535ba1133d18475.tar.xz
Factor out various lookup tables
-rw-r--r--Makefile6
-rw-r--r--instruction.h42
-rw-r--r--parse.c174
-rw-r--r--tok_util.c78
-rw-r--r--util.c189
-rw-r--r--util.h (renamed from tok_util.h)0
6 files changed, 248 insertions, 241 deletions
diff --git a/Makefile b/Makefile
index fa8f61b..34dfbdb 100644
--- a/Makefile
+++ b/Makefile
@@ -1,4 +1,4 @@
-OBJECTS = lex.o parse.o output.o assembler.o tok_util.o
+OBJECTS = lex.o parse.o output.o assembler.o util.o
all: assembler
@@ -6,11 +6,11 @@ assembler: $(OBJECTS)
lex.o: lex.h
-parse.o: lex.h parse.h instruction.h tok_util.h
+parse.o: lex.h parse.h instruction.h util.h
output.o: parse.h
-tok_util.o: lex.h
+util.o: lex.h instruction.h
.PHONY: clean
diff --git a/instruction.h b/instruction.h
index 3ee18d9..7e1a490 100644
--- a/instruction.h
+++ b/instruction.h
@@ -40,17 +40,6 @@ enum OPER {
#define OPER_SHAMT (11)
#define MASK_OPER(x) ((x) << OPER_SHAMT)
-static const char *oper_to_human[] = {
- [OPER_ADD] = "add",
- [OPER_SUB] = "sub",
- [OPER_SHL] = "shl",
- [OPER_SHR] = "shr",
- [OPER_AND] = "and",
- [OPER_OR ] = "or",
- [OPER_XOR] = "xor",
- [OPER_MUL] = "mul"
-};
-
/**
* Masks for jump and branch conditions
* J-type instructions (jump, branch) take these as follows:
@@ -74,26 +63,6 @@ enum JCOND {
#define MASK_JR (0x1 << 8)
#define MASK_JUMP_REGISTER(x) ((x) << 5)
-static const char *j_to_human[] = {
- [JB_UNCOND] = "jmp",
- [JB_NEVER] = "jn",
- [JB_ZERO] = "jz",
- [JB_NZERO] = "jnz",
- [JB_CARRY] = "jc",
- [JB_NCARRY] = "jnc",
- [JB_CARRYZ] = "jcz",
- [JB_NCARRYZ] = "jncz"
-};
-static const char *b_to_human[] = {
- [JB_UNCOND] = "bra",
- [JB_NEVER] = "bn",
- [JB_ZERO] = "bz",
- [JB_NZERO] = "bnz",
- [JB_CARRY] = "bc",
- [JB_NCARRY] = "bnc",
- [JB_CARRYZ] = "bcz",
- [JB_NCARRYZ] = "bncz"
-};
/**
* Register numbers used in all manner of instructions in varying positions
@@ -109,17 +78,6 @@ enum REG {
REG_H = 7
};
-static const char *reg_to_human[] = {
- [REG_0] = "$0",
- [REG_1] = "$1",
- [REG_2] = "$2",
- [REG_3] = "$3",
- [REG_4] = "$4",
- [REG_5] = "$5",
- [REG_6] = "$6",
- [REG_H] = "$H",
-};
-
/**
* Offset macro to turn REG_* into mask for register operands of R-type and
* I-type instructions
diff --git a/parse.c b/parse.c
index 6be202e..6ce7f67 100644
--- a/parse.c
+++ b/parse.c
@@ -9,62 +9,7 @@
#include "lex.h"
#include "parse.h"
#include "instruction.h"
-#include "tok_util.h"
-
-#if 0
-struct label {
- char *name;
- size_t byte_offset;
-};
-
-union immediate {
- const char *label;
- int16_t value;
-};
-
-struct r_type {
- enum OPER oper;
- enum REG dest;
- enum REG left;
- enum REG right;
-};
-
-struct i_type {
- enum OPER oper;
- enum REG dest;
- enum REG left;
- bool imm_is_ident;
- union immediate imm;
-};
-
-struct jr_type {
- enum JCOND cond;
- enum REG reg;
-};
-
-struct ji_type {
- enum JCOND cond;
- bool imm_is_ident;
- union immediate imm;
-};
-
-struct b_type {
- enum JCOND cond;
- bool imm_is_ident;
- union immediate imm;
-};
-
-struct instruction {
- enum INST_TYPE type;
- union instruction_u {
- struct r_type r; /* catch-all R-Type */
- struct i_type i; /* I-type on immediate literal */
- struct jr_type jr; /* jump to register */
- struct ji_type ji; /* jump to immediate */
- struct b_type b; /* branch to immediate literal */
- } inst;
-};
-#endif
+#include "util.h"
static const char *filename;
static FILE *fd;
@@ -499,89 +444,82 @@ int parse_instruction(void)
* parse it like normal */
enum OPER op;
- for (op = 0; op < sizeof(oper_to_human)/sizeof(*oper_to_human); op++) {
- if (strcmp(oper_to_human[op], cursor->s_val) == 0) {
- kerchunk();
- if ( parse_reg(&reg) || parse_comma()
- || parse_reg(&reg_left) || parse_comma()
- || parse_reg(&reg_right)
- || parse_eol())
- return 1;
- return parse_r_type(op, reg, reg_left, reg_right);
- }
+ if (get_oper_from_asm(cursor->s_val, &op) == 0) {
+ kerchunk();
+ if ( parse_reg(&reg) || parse_comma()
+ || parse_reg(&reg_left) || parse_comma()
+ || parse_reg(&reg_right)
+ || parse_eol())
+ return 1;
+ return parse_r_type(op, reg, reg_left, reg_right);
}
if (cursor->s_val[strlen(cursor->s_val) - 1] == 'i') {
/* temporarily remove 'i' from end */
cursor->s_val[strlen(cursor->s_val) - 1] = '\0';
- for (op = 0; op < sizeof(oper_to_human)/sizeof(*oper_to_human); op++) {
- if (strcmp(oper_to_human[op], cursor->s_val) == 0) {
- kerchunk();
- if ( parse_reg(&reg) || parse_comma()
- || parse_reg(&reg_left) || parse_comma())
- return 1;
- switch (cursor->type) {
- case TOKEN_NUMERIC:
- if (parse_imm(&imm) || parse_eol())
- return 1;
- return parse_i_type(op, reg, reg_left, imm);
- case TOKEN_IDENT:
- if (parse_ident(&ident) || parse_eol())
- return 1;
- return parse_i_ident_type(op, reg, reg_left, ident);
- default:
- emit("Error: Expected numeric literal or identifier, got %s\n",
- get_token_description(cursor->type));
- return 1;
- }
- }
- }
- /* fallthrough: pop it back on, we might need it */
- cursor->s_val[strlen(cursor->s_val)] = 'i';
- }
-
- enum JCOND cond;
- for (cond = 0; cond < sizeof(j_to_human)/sizeof(*j_to_human); cond++) {
- if (strcmp(j_to_human[cond], cursor->s_val) == 0) {
+ if ((get_oper_from_asm(cursor->s_val, &op)) == 0) {
kerchunk();
+ if ( parse_reg(&reg) || parse_comma()
+ || parse_reg(&reg_left) || parse_comma())
+ return 1;
+
switch (cursor->type) {
- case TOKEN_REGISTER:
- if (parse_reg(&reg) || parse_eol())
- return 1;
- return parse_j_reg_type(cond, reg);
case TOKEN_NUMERIC:
if (parse_imm(&imm) || parse_eol())
return 1;
- return parse_j_imm_type(cond, imm);
+ return parse_i_type(op, reg, reg_left, imm);
case TOKEN_IDENT:
if (parse_ident(&ident) || parse_eol())
return 1;
- return parse_j_ident_type(cond, ident);
+ return parse_i_ident_type(op, reg, reg_left, ident);
default:
- emit("Error: Expected register, numeric literal, or identifier, got %s\n",
+ emit("Error: Expected numeric literal or identifier, got %s\n",
get_token_description(cursor->type));
return 1;
}
}
+ /* fallthrough: pop it back on, we might need it */
+ cursor->s_val[strlen(cursor->s_val)] = 'i';
}
- for (cond = 0; cond < sizeof(b_to_human)/sizeof(*b_to_human); cond++) {
- if (strcmp(b_to_human[cond], cursor->s_val) == 0) {
- kerchunk();
- switch (cursor->type) {
- case TOKEN_NUMERIC:
- if (parse_imm(&imm) || parse_eol())
- return 1;
- return parse_b_imm_type(cond, imm);
- case TOKEN_IDENT:
- if (parse_ident(&ident) || parse_eol())
- return 1;
- return parse_b_ident_type(cond, ident);
- default:
- emit("Error: Expected numeric literal, or identifier, got %s\n",
- get_token_description(cursor->type));
+ enum JCOND cond;
+ if (get_j_from_asm(cursor->s_val, &cond) == 0) {
+ kerchunk();
+ switch (cursor->type) {
+ case TOKEN_REGISTER:
+ if (parse_reg(&reg) || parse_eol())
return 1;
- }
+ return parse_j_reg_type(cond, reg);
+ case TOKEN_NUMERIC:
+ if (parse_imm(&imm) || parse_eol())
+ return 1;
+ return parse_j_imm_type(cond, imm);
+ case TOKEN_IDENT:
+ if (parse_ident(&ident) || parse_eol())
+ return 1;
+ return parse_j_ident_type(cond, ident);
+ default:
+ emit("Error: Expected register, numeric literal, or identifier, got %s\n",
+ get_token_description(cursor->type));
+ return 1;
+ }
+ }
+
+ if (get_b_from_asm(cursor->s_val, &cond) == 0) {
+ kerchunk();
+ switch (cursor->type) {
+ case TOKEN_NUMERIC:
+ if (parse_imm(&imm) || parse_eol())
+ return 1;
+ return parse_b_imm_type(cond, imm);
+ case TOKEN_IDENT:
+ if (parse_ident(&ident) || parse_eol())
+ return 1;
+ return parse_b_ident_type(cond, ident);
+ default:
+ emit("Error: Expected numeric literal, or identifier, got %s\n",
+ get_token_description(cursor->type));
+ return 1;
}
}
diff --git a/tok_util.c b/tok_util.c
deleted file mode 100644
index c17ca6d..0000000
--- a/tok_util.c
+++ /dev/null
@@ -1,78 +0,0 @@
-#include <string.h>
-#include <ctype.h>
-
-#include "lex.h"
-
-const char *tok_to_desc[] = {
- [TOKEN_REGISTER] = "register",
- [TOKEN_NUMERIC] = "numeric literal",
- [TOKEN_KEYWORD] = "keyword",
- [TOKEN_STRING] = "string literal",
- [TOKEN_COMMA] = "comma",
- [TOKEN_LABEL] = "label",
- [TOKEN_IDENT] = "identifier",
- [TOKEN_DOT] = "assembler directive",
- [TOKEN_EOL] = "end of line",
-};
-
-const char * get_token_description(enum TOKEN_TYPE t)
-{
- if (t < 0 || t >= sizeof(tok_to_desc)/sizeof(*tok_to_desc)) {
- return "[internal error]";
- } else {
- return tok_to_desc[t];
- }
-}
-
-void indicate_file_area(FILE* fd, size_t line, size_t column, size_t span)
-{
- size_t i = 0;
- const char margin[] = " ";
-
- char buf[1024] = { '\0' };
- char *s = buf;
- char c = '\0';
-
- rewind(fd);
- while (line && !feof(fd) && fgets(buf, sizeof(buf), fd)) {
- s = buf;
- while (*s) {
- if (*(s++) == '\n') {
- line--;
- }
- }
- }
-
- /* trim leading whitespace */
- s = buf;
- while (*s == '\t' || *s == ' ') {
- s++;
- }
-
- /* filter non-printables to spaces to keep alignment correct */
- for (i = 0; i < strlen(s); i++) {
- if (!isprint(s[i]) && s[i] != '\n') {
- s[i] = ' ';
- }
- }
-
- fputs(margin, stderr);
- fputs(s, stderr);
-
- /* corner case (still needed?) - buf was just return */
- if (strlen(buf) == 1 && buf[0] == '\n') {
- fputc('\n', stderr);
- }
-
- fputs(margin, stderr);
- column -= (s - buf);
- for (column--; column; column--) {
- fputc(' ', stderr);
- }
-
- c = span == 1 ? '^' : '"';
- for (; span; span--) {
- fputc(c, stderr);
- }
- fputc('\n', stderr);
-}
diff --git a/util.c b/util.c
new file mode 100644
index 0000000..c9b0bc1
--- /dev/null
+++ b/util.c
@@ -0,0 +1,189 @@
+#include <string.h>
+#include <ctype.h>
+
+#include "instruction.h"
+#include "lex.h"
+
+/**
+ * Human-readable descriptions for tokens
+ */
+static struct {
+ enum TOKEN_TYPE look;
+ const char *str;
+} token_to_desc[] = {
+ { .look = TOKEN_REGISTER, .str = "register" },
+ { .look = TOKEN_NUMERIC , .str = "numeric literal" },
+ { .look = TOKEN_KEYWORD , .str = "keyword" },
+ { .look = TOKEN_STRING , .str = "string literal" },
+ { .look = TOKEN_COMMA , .str = "comma" },
+ { .look = TOKEN_LABEL , .str = "label" },
+ { .look = TOKEN_IDENT , .str = "identifier" },
+ { .look = TOKEN_DOT , .str = "assembler directive" },
+ { .look = TOKEN_EOL , .str = "end of line" },
+ { .str = NULL },
+};
+
+/**
+ * ALU operation to assembly instruction
+ */
+static struct {
+ enum OPER look;
+ const char *str;
+} oper_to_asm[] = {
+ { .look = OPER_ADD, .str = "add" },
+ { .look = OPER_SUB, .str = "sub" },
+ { .look = OPER_SHL, .str = "shl" },
+ { .look = OPER_SHR, .str = "shr" },
+ { .look = OPER_AND, .str = "and" },
+ { .look = OPER_OR , .str = "or" },
+ { .look = OPER_XOR, .str = "xor" },
+ { .look = OPER_MUL, .str = "mul" },
+ { .str = NULL },
+};
+
+/**
+ * Jump condition to jump assembly instruction
+ */
+static struct {
+ enum JCOND look;
+ const char *str;
+} j_to_asm[] = {
+ { .look = JB_UNCOND , .str = "jmp" },
+ { .look = JB_NEVER , .str = "jn" },
+ { .look = JB_ZERO , .str = "jz" },
+ { .look = JB_NZERO , .str = "jnz" },
+ { .look = JB_CARRY , .str = "jc" },
+ { .look = JB_NCARRY , .str = "jnc" },
+ { .look = JB_CARRYZ , .str = "jcz" },
+ { .look = JB_NCARRYZ, .str = "jncz" },
+ { .str = NULL },
+};
+
+/**
+ * Jump condition to branch assembly instruction
+ */
+static struct {
+ enum JCOND look;
+ const char *str;
+} b_to_asm[] = {
+ { .look = JB_UNCOND , .str = "bra" },
+ { .look = JB_NEVER , .str = "bn" },
+ { .look = JB_ZERO , .str = "bz" },
+ { .look = JB_NZERO , .str = "bnz" },
+ { .look = JB_CARRY , .str = "bc" },
+ { .look = JB_NCARRY , .str = "bnc" },
+ { .look = JB_CARRYZ , .str = "bcz" },
+ { .look = JB_NCARRYZ, .str = "bncz" },
+ { .str = NULL },
+};
+
+/**
+ * Register number to assembly representation
+ */
+static struct {
+ enum REG look;
+ const char *str;
+} reg_to_asm[] = {
+ { .look = REG_0, .str = "$0" },
+ { .look = REG_1, .str = "$1" },
+ { .look = REG_2, .str = "$2" },
+ { .look = REG_3, .str = "$3" },
+ { .look = REG_4, .str = "$4" },
+ { .look = REG_5, .str = "$5" },
+ { .look = REG_6, .str = "$6" },
+ { .look = REG_H, .str = "$H" },
+ { .str = NULL },
+};
+
+/* Generates a function that takes an enum value from the given type and looks
+ * it up in the given lookup table, returning a string that matches it from
+ * the table, or NULL if no such string exists */
+#define GENERATE_STR_LOOKUP_FUNC(name, lookup, type) \
+const char* name(type x) { \
+ size_t i = 0; \
+ for (i = 0; lookup[i].str; i++) \
+ if (lookup[i].look == x) \
+ return lookup[i].str; \
+ return NULL; \
+}
+
+/* Inverse of GENERATE_STR_LOOKUP_FUNC - this generates a function that takes
+ * a string and places in *res an enum value matching that string as entered
+ * in the given lookup table.
+ * Returns zero on match
+ * Returns non-zero on no match */
+#define GENERATE_NUM_LOOKUP_FUNC(name, lookup, type) \
+int name(const char *x, type *res) { \
+ size_t i = 0; \
+ for (i = 0; lookup[i].str; i++) \
+ if (strcmp(lookup[i].str, x) == 0) { \
+ *res = lookup[i].look; \
+ return 0; \
+ } \
+ return 1; \
+}
+
+GENERATE_STR_LOOKUP_FUNC(get_asm_from_oper, oper_to_asm, enum OPER);
+GENERATE_STR_LOOKUP_FUNC(get_asm_from_j, j_to_asm, enum JCOND);
+GENERATE_STR_LOOKUP_FUNC(get_asm_from_b, b_to_asm, enum JCOND);
+GENERATE_STR_LOOKUP_FUNC(get_asm_from_reg, reg_to_asm, enum REG);
+GENERATE_STR_LOOKUP_FUNC(get_token_description, token_to_desc, enum TOKEN_TYPE);
+
+GENERATE_NUM_LOOKUP_FUNC(get_oper_from_asm, oper_to_asm, enum OPER);
+GENERATE_NUM_LOOKUP_FUNC(get_j_from_asm, j_to_asm, enum JCOND);
+GENERATE_NUM_LOOKUP_FUNC(get_b_from_asm, b_to_asm, enum JCOND);
+GENERATE_NUM_LOOKUP_FUNC(get_reg_from_asm, reg_to_asm, enum REG);
+
+
+void indicate_file_area(FILE* fd, size_t line, size_t column, size_t span)
+{
+ size_t i = 0;
+ const char margin[] = " ";
+
+ char buf[1024] = { '\0' };
+ char *s = buf;
+ char c = '\0';
+
+ rewind(fd);
+ while (line && !feof(fd) && fgets(buf, sizeof(buf), fd)) {
+ s = buf;
+ while (*s) {
+ if (*(s++) == '\n') {
+ line--;
+ }
+ }
+ }
+
+ /* trim leading whitespace */
+ s = buf;
+ while (*s == '\t' || *s == ' ') {
+ s++;
+ }
+
+ /* filter non-printables to spaces to keep alignment correct */
+ for (i = 0; i < strlen(s); i++) {
+ if (!isprint(s[i]) && s[i] != '\n') {
+ s[i] = ' ';
+ }
+ }
+
+ fputs(margin, stderr);
+ fputs(s, stderr);
+
+ /* corner case (still needed?) - buf was just return */
+ if (strlen(buf) == 1 && buf[0] == '\n') {
+ fputc('\n', stderr);
+ }
+
+ fputs(margin, stderr);
+ column -= (s - buf);
+ for (column--; column; column--) {
+ fputc(' ', stderr);
+ }
+
+ c = span == 1 ? '^' : '"';
+ for (; span; span--) {
+ fputc(c, stderr);
+ }
+ fputc('\n', stderr);
+}
diff --git a/tok_util.h b/util.h
index 21d3d30..21d3d30 100644
--- a/tok_util.h
+++ b/util.h