From 0b7bf68ade7a646377acbc7eb535ba1133d18475 Mon Sep 17 00:00:00 2001
From: David Phillips <david@sighup.nz>
Date: Sun, 14 Apr 2019 17:49:14 +1200
Subject: Factor out various lookup tables

---
 Makefile      |   6 +-
 instruction.h |  42 -------------
 parse.c       | 174 +++++++++++++++++------------------------------------
 tok_util.c    |  78 ------------------------
 tok_util.h    |   9 ---
 util.c        | 189 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 util.h        |   9 +++
 7 files changed, 257 insertions(+), 250 deletions(-)
 delete mode 100644 tok_util.c
 delete mode 100644 tok_util.h
 create mode 100644 util.c
 create mode 100644 util.h

diff --git a/Makefile b/Makefile
index fa8f61b..34dfbdb 100644
--- a/Makefile
+++ b/Makefile
@@ -1,4 +1,4 @@
-OBJECTS = lex.o parse.o output.o assembler.o tok_util.o
+OBJECTS = lex.o parse.o output.o assembler.o util.o
 
 all: assembler
 
@@ -6,11 +6,11 @@ assembler: $(OBJECTS)
 
 lex.o: lex.h
 
-parse.o: lex.h parse.h instruction.h tok_util.h
+parse.o: lex.h parse.h instruction.h util.h
 
 output.o: parse.h
 
-tok_util.o: lex.h
+util.o: lex.h instruction.h
 
 
 .PHONY: clean
diff --git a/instruction.h b/instruction.h
index 3ee18d9..7e1a490 100644
--- a/instruction.h
+++ b/instruction.h
@@ -40,17 +40,6 @@ enum OPER {
 #define OPER_SHAMT (11)
 #define MASK_OPER(x) ((x) << OPER_SHAMT)
 
-static const char *oper_to_human[] = {
-	[OPER_ADD] = "add",
-	[OPER_SUB] = "sub",
-	[OPER_SHL] = "shl",
-	[OPER_SHR] = "shr",
-	[OPER_AND] = "and",
-	[OPER_OR ] = "or",
-	[OPER_XOR] = "xor",
-	[OPER_MUL] = "mul"
-};
-
 /**
  * Masks for jump and branch conditions
  * J-type instructions (jump, branch) take these as follows:
@@ -74,26 +63,6 @@ enum JCOND {
 #define MASK_JR (0x1 << 8)
 #define MASK_JUMP_REGISTER(x) ((x) << 5)
 
-static const char *j_to_human[] = {
-	[JB_UNCOND]  = "jmp",
-	[JB_NEVER]   = "jn",
-	[JB_ZERO]    = "jz",
-	[JB_NZERO]   = "jnz",
-	[JB_CARRY]   = "jc",
-	[JB_NCARRY]  = "jnc",
-	[JB_CARRYZ]  = "jcz",
-	[JB_NCARRYZ] = "jncz"
-};
-static const char *b_to_human[] = {
-	[JB_UNCOND]  = "bra",
-	[JB_NEVER]   = "bn",
-	[JB_ZERO]    = "bz",
-	[JB_NZERO]   = "bnz",
-	[JB_CARRY]   = "bc",
-	[JB_NCARRY]  = "bnc",
-	[JB_CARRYZ]  = "bcz",
-	[JB_NCARRYZ] = "bncz"
-};
 
 /**
  * Register numbers used in all manner of instructions in varying positions
@@ -109,17 +78,6 @@ enum REG {
 	REG_H = 7
 };
 
-static const char *reg_to_human[] = {
-	[REG_0] = "$0",
-	[REG_1] = "$1",
-	[REG_2] = "$2",
-	[REG_3] = "$3",
-	[REG_4] = "$4",
-	[REG_5] = "$5",
-	[REG_6] = "$6",
-	[REG_H] = "$H",
-};
-
 /**
  * Offset macro to turn REG_* into mask for register operands of R-type and
  * I-type instructions
diff --git a/parse.c b/parse.c
index 6be202e..6ce7f67 100644
--- a/parse.c
+++ b/parse.c
@@ -9,62 +9,7 @@
 #include "lex.h"
 #include "parse.h"
 #include "instruction.h"
-#include "tok_util.h"
-
-#if 0
-struct label {
-	char *name;
-	size_t byte_offset;
-};
-
-union immediate {
-	const char *label;
-	int16_t value;
-};
-
-struct r_type {
-	enum OPER oper;
-	enum REG dest;
-	enum REG left;
-	enum REG right;
-};
-
-struct i_type {
-	enum OPER oper;
-	enum REG dest;
-	enum REG left;
-	bool imm_is_ident;
-	union immediate imm;
-};
-
-struct jr_type {
-	enum JCOND cond;
-	enum REG reg;
-};
-
-struct ji_type {
-	enum JCOND cond;
-	bool imm_is_ident;
-	union immediate imm;
-};
-
-struct b_type {
-	enum JCOND cond;
-	bool imm_is_ident;
-	union immediate imm;
-};
-
-struct instruction {
-	enum INST_TYPE type;
-	union instruction_u {
-		struct r_type r;   /* catch-all R-Type */
-		struct i_type i;   /* I-type on immediate literal */
-		struct jr_type jr; /* jump to register */
-		struct ji_type ji; /* jump to immediate */
-		struct b_type b;   /* branch to immediate literal */
-	} inst;
-};
-#endif
+#include "util.h"
 
 static const char *filename;
 static FILE *fd;
@@ -499,89 +444,82 @@ int parse_instruction(void)
 	 * parse it like normal */
 
 	enum OPER op;
-	for (op = 0; op < sizeof(oper_to_human)/sizeof(*oper_to_human); op++) {
-		if (strcmp(oper_to_human[op], cursor->s_val) == 0) {
-			kerchunk();
-			if (   parse_reg(&reg) || parse_comma()
-			    || parse_reg(&reg_left) || parse_comma()
-			    || parse_reg(&reg_right)
-			    || parse_eol())
-				return 1;
-			return parse_r_type(op, reg, reg_left, reg_right);
-		}
+	if (get_oper_from_asm(cursor->s_val, &op) == 0) {
+		kerchunk();
+		if (   parse_reg(&reg) || parse_comma()
+			|| parse_reg(&reg_left) || parse_comma()
+			|| parse_reg(&reg_right)
+			|| parse_eol())
+			return 1;
+		return parse_r_type(op, reg, reg_left, reg_right);
 	}
 	if (cursor->s_val[strlen(cursor->s_val) - 1] == 'i') {
 		/* temporarily remove 'i' from end */
 		cursor->s_val[strlen(cursor->s_val) - 1] = '\0';
-		for (op = 0; op < sizeof(oper_to_human)/sizeof(*oper_to_human); op++) {
-			if (strcmp(oper_to_human[op], cursor->s_val) == 0) {
-				kerchunk();
-				if (   parse_reg(&reg) || parse_comma()
-					|| parse_reg(&reg_left) || parse_comma())
-					return 1;
 
-				switch (cursor->type) {
-					case TOKEN_NUMERIC:
-						if (parse_imm(&imm) || parse_eol())
-							return 1;
-						return parse_i_type(op, reg, reg_left, imm);
-					case TOKEN_IDENT:
-						if (parse_ident(&ident) || parse_eol())
-							return 1;
-						return parse_i_ident_type(op, reg, reg_left, ident);
-					default:
-						emit("Error: Expected numeric literal or identifier, got %s\n",
-							get_token_description(cursor->type));
-						return 1;
-				}
-			}
-		}
-		/* fallthrough: pop it back on, we might need it */
-		cursor->s_val[strlen(cursor->s_val)] = 'i';
-	}
-
-	enum JCOND cond;
-	for (cond = 0; cond < sizeof(j_to_human)/sizeof(*j_to_human); cond++) {
-		if (strcmp(j_to_human[cond], cursor->s_val) == 0) {
+		if ((get_oper_from_asm(cursor->s_val, &op)) == 0) {
 			kerchunk();
+			if (   parse_reg(&reg) || parse_comma()
+				|| parse_reg(&reg_left) || parse_comma())
+				return 1;
+
 			switch (cursor->type) {
-				case TOKEN_REGISTER:
-					if (parse_reg(&reg) || parse_eol())
-						return 1;
-					return parse_j_reg_type(cond, reg);
 				case TOKEN_NUMERIC:
 					if (parse_imm(&imm) || parse_eol())
 						return 1;
-					return parse_j_imm_type(cond, imm);
+					return parse_i_type(op, reg, reg_left, imm);
 				case TOKEN_IDENT:
 					if (parse_ident(&ident) || parse_eol())
 						return 1;
-					return parse_j_ident_type(cond, ident);
+					return parse_i_ident_type(op, reg, reg_left, ident);
 				default:
-					emit("Error: Expected register, numeric literal, or identifier, got %s\n",
+					emit("Error: Expected numeric literal or identifier, got %s\n",
 						get_token_description(cursor->type));
 					return 1;
 			}
 		}
+		/* fallthrough: pop it back on, we might need it */
+		cursor->s_val[strlen(cursor->s_val)] = 'i';
 	}
 
-	for (cond = 0; cond < sizeof(b_to_human)/sizeof(*b_to_human); cond++) {
-		if (strcmp(b_to_human[cond], cursor->s_val) == 0) {
-			kerchunk();
-			switch (cursor->type) {
-				case TOKEN_NUMERIC:
-					if (parse_imm(&imm) || parse_eol())
-						return 1;
-					return parse_b_imm_type(cond, imm);
-				case TOKEN_IDENT:
-					if (parse_ident(&ident) || parse_eol())
-						return 1;
-					return parse_b_ident_type(cond, ident);
-				default:
-					emit("Error: Expected numeric literal, or identifier, got %s\n",
-						get_token_description(cursor->type));
+	enum JCOND cond;
+	if (get_j_from_asm(cursor->s_val, &cond) == 0) {
+		kerchunk();
+		switch (cursor->type) {
+			case TOKEN_REGISTER:
+				if (parse_reg(&reg) || parse_eol())
 					return 1;
-			}
+				return parse_j_reg_type(cond, reg);
+			case TOKEN_NUMERIC:
+				if (parse_imm(&imm) || parse_eol())
+					return 1;
+				return parse_j_imm_type(cond, imm);
+			case TOKEN_IDENT:
+				if (parse_ident(&ident) || parse_eol())
+					return 1;
+				return parse_j_ident_type(cond, ident);
+			default:
+				emit("Error: Expected register, numeric literal, or identifier, got %s\n",
+					get_token_description(cursor->type));
+				return 1;
+		}
+	}
+
+	if (get_b_from_asm(cursor->s_val, &cond) == 0) {
+		kerchunk();
+		switch (cursor->type) {
+			case TOKEN_NUMERIC:
+				if (parse_imm(&imm) || parse_eol())
+					return 1;
+				return parse_b_imm_type(cond, imm);
+			case TOKEN_IDENT:
+				if (parse_ident(&ident) || parse_eol())
+					return 1;
+				return parse_b_ident_type(cond, ident);
+			default:
+				emit("Error: Expected numeric literal, or identifier, got %s\n",
+					get_token_description(cursor->type));
+				return 1;
 		}
 	}
 
diff --git a/tok_util.c b/tok_util.c
deleted file mode 100644
index c17ca6d..0000000
--- a/tok_util.c
+++ /dev/null
@@ -1,78 +0,0 @@
-#include <string.h>
-#include <ctype.h>
-
-#include "lex.h"
-
-const char *tok_to_desc[] = {
-	[TOKEN_REGISTER] = "register",
-	[TOKEN_NUMERIC] = "numeric literal",
-	[TOKEN_KEYWORD] = "keyword",
-	[TOKEN_STRING] = "string literal",
-	[TOKEN_COMMA] = "comma",
-	[TOKEN_LABEL] = "label",
-	[TOKEN_IDENT] = "identifier",
-	[TOKEN_DOT] = "assembler directive",
-	[TOKEN_EOL] = "end of line",
-};
-
-const char * get_token_description(enum TOKEN_TYPE t)
-{
-	if (t < 0 || t >= sizeof(tok_to_desc)/sizeof(*tok_to_desc)) {
-		return "[internal error]";
-	} else {
-		return tok_to_desc[t];
-	}
-}
-
-void indicate_file_area(FILE* fd, size_t line, size_t column, size_t span)
-{
-	size_t i = 0;
-	const char margin[] = "  ";
-
-	char buf[1024] = { '\0' };
-	char *s = buf;
-	char c = '\0';
-
-	rewind(fd);
-	while (line && !feof(fd) && fgets(buf, sizeof(buf), fd)) {
-		s = buf;
-		while (*s) {
-			if (*(s++) == '\n') {
-				line--;
-			}
-		}
-	}
-
-	/* trim leading whitespace */
-	s = buf;
-	while (*s == '\t' || *s == ' ') {
-		s++;
-	}
-
-	/* filter non-printables to spaces to keep alignment correct */
-	for (i = 0; i < strlen(s); i++) {
-		if (!isprint(s[i]) && s[i] != '\n') {
-			s[i] = ' ';
-		}
-	}
-
-	fputs(margin, stderr);
-	fputs(s, stderr);
-
-	/* corner case (still needed?) - buf was just return */
-	if (strlen(buf) == 1 && buf[0] == '\n') {
-		fputc('\n', stderr);
-	}
-
-	fputs(margin, stderr);
-	column -= (s - buf);
-	for (column--; column; column--) {
-		fputc(' ', stderr);
-	}
-
-	c = span == 1 ? '^' : '"';
-	for (; span; span--) {
-		fputc(c, stderr);
-	}
-	fputc('\n', stderr);
-}
diff --git a/tok_util.h b/tok_util.h
deleted file mode 100644
index 21d3d30..0000000
--- a/tok_util.h
+++ /dev/null
@@ -1,9 +0,0 @@
-#ifndef TOK_UTIL
-#define TOK_UTIL
-
-#include "lex.h"
-
-const char * get_token_description(enum TOKEN_TYPE t);
-void indicate_file_area(FILE* fd, size_t line, size_t column, size_t span);
-
-#endif /* TOK_UTIL */
diff --git a/util.c b/util.c
new file mode 100644
index 0000000..c9b0bc1
--- /dev/null
+++ b/util.c
@@ -0,0 +1,189 @@
+#include <string.h>
+#include <ctype.h>
+
+#include "instruction.h"
+#include "lex.h"
+
+/**
+  * Human-readable descriptions for tokens
+  */
+static struct {
+	enum TOKEN_TYPE look;
+	const char *str;
+} token_to_desc[] = {
+	{ .look = TOKEN_REGISTER, .str = "register"            },
+	{ .look = TOKEN_NUMERIC , .str = "numeric literal"     },
+	{ .look = TOKEN_KEYWORD , .str = "keyword"             },
+	{ .look = TOKEN_STRING  , .str = "string literal"      },
+	{ .look = TOKEN_COMMA   , .str = "comma"               },
+	{ .look = TOKEN_LABEL   , .str = "label"               },
+	{ .look = TOKEN_IDENT   , .str = "identifier"          },
+	{ .look = TOKEN_DOT     , .str = "assembler directive" },
+	{ .look = TOKEN_EOL     , .str = "end of line"         },
+	{ .str = NULL },
+};
+
+/**
+ * ALU operation to assembly instruction
+ */
+static struct {
+	enum OPER look;
+	const char *str;
+} oper_to_asm[] = {
+	{ .look = OPER_ADD, .str = "add" },
+	{ .look = OPER_SUB, .str = "sub" },
+	{ .look = OPER_SHL, .str = "shl" },
+	{ .look = OPER_SHR, .str = "shr" },
+	{ .look = OPER_AND, .str = "and" },
+	{ .look = OPER_OR , .str = "or"  },
+	{ .look = OPER_XOR, .str = "xor" },
+	{ .look = OPER_MUL, .str = "mul" },
+	{ .str = NULL },
+};
+
+/**
+ * Jump condition to jump assembly instruction
+ */
+static struct {
+	enum JCOND look;
+	const char *str;
+} j_to_asm[] = {
+	{ .look = JB_UNCOND , .str = "jmp"  },
+	{ .look = JB_NEVER  , .str = "jn"   },
+	{ .look = JB_ZERO   , .str = "jz"   },
+	{ .look = JB_NZERO  , .str = "jnz"  },
+	{ .look = JB_CARRY  , .str = "jc"   },
+	{ .look = JB_NCARRY , .str = "jnc"  },
+	{ .look = JB_CARRYZ , .str = "jcz"  },
+	{ .look = JB_NCARRYZ, .str = "jncz" },
+	{ .str = NULL },
+};
+
+/**
+ * Jump condition to branch assembly instruction
+ */
+static struct {
+	enum JCOND look;
+	const char *str;
+} b_to_asm[] = {
+	{ .look = JB_UNCOND , .str = "bra"  },
+	{ .look = JB_NEVER  , .str = "bn"   },
+	{ .look = JB_ZERO   , .str = "bz"   },
+	{ .look = JB_NZERO  , .str = "bnz"  },
+	{ .look = JB_CARRY  , .str = "bc"   },
+	{ .look = JB_NCARRY , .str = "bnc"  },
+	{ .look = JB_CARRYZ , .str = "bcz"  },
+	{ .look = JB_NCARRYZ, .str = "bncz" },
+	{ .str = NULL },
+};
+
+/**
+ * Register number to assembly representation
+ */
+static struct {
+	enum REG look;
+	const char *str;
+} reg_to_asm[] = {
+	{ .look = REG_0, .str = "$0" },
+	{ .look = REG_1, .str = "$1" },
+	{ .look = REG_2, .str = "$2" },
+	{ .look = REG_3, .str = "$3" },
+	{ .look = REG_4, .str = "$4" },
+	{ .look = REG_5, .str = "$5" },
+	{ .look = REG_6, .str = "$6" },
+	{ .look = REG_H, .str = "$H" },
+	{ .str = NULL },
+};
+
+/* Generates a function that takes an enum value from the given type and looks
+ * it up in the given lookup table, returning a string that matches it from
+ * the table, or NULL if no such string exists */
+#define GENERATE_STR_LOOKUP_FUNC(name, lookup, type) \
+const char* name(type x) { \
+	size_t i = 0; \
+	for (i = 0; lookup[i].str; i++) \
+		if (lookup[i].look == x) \
+			return lookup[i].str; \
+	return NULL; \
+}
+
+/* Inverse of GENERATE_STR_LOOKUP_FUNC - this generates a function that takes
+ * a string and places in *res an enum value matching that string as entered
+ * in the given lookup table.
+ * Returns zero on match
+ * Returns non-zero on no match */
+#define GENERATE_NUM_LOOKUP_FUNC(name, lookup, type) \
+int name(const char *x, type *res) { \
+	size_t i = 0; \
+	for (i = 0; lookup[i].str; i++) \
+		if (strcmp(lookup[i].str, x) == 0) { \
+			*res = lookup[i].look; \
+			return 0; \
+		} \
+	return 1; \
+}
+
+GENERATE_STR_LOOKUP_FUNC(get_asm_from_oper, oper_to_asm, enum OPER);
+GENERATE_STR_LOOKUP_FUNC(get_asm_from_j, j_to_asm, enum JCOND);
+GENERATE_STR_LOOKUP_FUNC(get_asm_from_b, b_to_asm, enum JCOND);
+GENERATE_STR_LOOKUP_FUNC(get_asm_from_reg, reg_to_asm, enum REG);
+GENERATE_STR_LOOKUP_FUNC(get_token_description, token_to_desc, enum TOKEN_TYPE);
+
+GENERATE_NUM_LOOKUP_FUNC(get_oper_from_asm, oper_to_asm, enum OPER);
+GENERATE_NUM_LOOKUP_FUNC(get_j_from_asm, j_to_asm, enum JCOND);
+GENERATE_NUM_LOOKUP_FUNC(get_b_from_asm, b_to_asm, enum JCOND);
+GENERATE_NUM_LOOKUP_FUNC(get_reg_from_asm, reg_to_asm, enum REG);
+
+
+void indicate_file_area(FILE* fd, size_t line, size_t column, size_t span)
+{
+	size_t i = 0;
+	const char margin[] = "  ";
+
+	char buf[1024] = { '\0' };
+	char *s = buf;
+	char c = '\0';
+
+	rewind(fd);
+	while (line && !feof(fd) && fgets(buf, sizeof(buf), fd)) {
+		s = buf;
+		while (*s) {
+			if (*(s++) == '\n') {
+				line--;
+			}
+		}
+	}
+
+	/* trim leading whitespace */
+	s = buf;
+	while (*s == '\t' || *s == ' ') {
+		s++;
+	}
+
+	/* filter non-printables to spaces to keep alignment correct */
+	for (i = 0; i < strlen(s); i++) {
+		if (!isprint(s[i]) && s[i] != '\n') {
+			s[i] = ' ';
+		}
+	}
+
+	fputs(margin, stderr);
+	fputs(s, stderr);
+
+	/* corner case (still needed?) - buf was just return */
+	if (strlen(buf) == 1 && buf[0] == '\n') {
+		fputc('\n', stderr);
+	}
+
+	fputs(margin, stderr);
+	column -= (s - buf);
+	for (column--; column; column--) {
+		fputc(' ', stderr);
+	}
+
+	c = span == 1 ? '^' : '"';
+	for (; span; span--) {
+		fputc(c, stderr);
+	}
+	fputc('\n', stderr);
+}
diff --git a/util.h b/util.h
new file mode 100644
index 0000000..21d3d30
--- /dev/null
+++ b/util.h
@@ -0,0 +1,9 @@
+#ifndef TOK_UTIL
+#define TOK_UTIL
+
+#include "lex.h"
+
+const char * get_token_description(enum TOKEN_TYPE t);
+void indicate_file_area(FILE* fd, size_t line, size_t column, size_t span);
+
+#endif /* TOK_UTIL */
-- 
cgit v1.1