Dirst dump of working prototype

author: David Phillips <david@sighup.nz> 2019-04-14 16:10:18 +1200
committer: David Phillips <david@sighup.nz> 2019-08-03 12:42:57 +1200
commit: ac8150b7601d9611818bb8b265a125a347a67004 (patch)
tree: aa1440c18551fa415af53daedde76536ac2d000d
download: toy-cpu-assembler-ac8150b7601d9611818bb8b265a125a347a67004.tar.xz
11 files changed, 1674 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..71e5da6
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,3 @@
+*.o
+*.bin
+assembler
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..fa8f61b
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,18 @@
+OBJECTS = lex.o parse.o output.o assembler.o tok_util.o
+
+all: assembler
+
+assembler: $(OBJECTS)
+
+lex.o: lex.h
+
+parse.o: lex.h parse.h instruction.h tok_util.h
+
+output.o: parse.h
+
+tok_util.o: lex.h
+
+
+.PHONY: clean
+clean:
+	- rm -f assembler $(OBJECTS)
diff --git a/assembler.c b/assembler.c
new file mode 100644
index 0000000..eaf4d38
--- /dev/null
+++ b/assembler.c
@@ -0,0 +1,97 @@
+#include <stdio.h>
+#include <stdint.h>
+
+#include "lex.h"
+#include "parse.h"
+#include "instruction.h"
+
+#if 0
+/**
+ * Types for intermediate storage of instructions
+ */
+struct r_type {
+	enum OPER operation;
+	enum REG dest;
+	enum REG left;
+	enum REG right;
+};
+
+struct i_type { /* covers WI and NI */
+	enum OPER operation;
+	enum REG dest;
+	enum REG left;
+	int16_t immediate;
+};
+
+struct jr_type {
+	enum JCOND condition;
+	enum REG reg;
+};
+
+struct ji_type {
+	enum JCOND condition;
+	uint16_t immediate;
+};
+
+struct b_type { /* FIXME merge with ji_type? */
+	enum JCOND condition;
+	uint16_t immediate; /* capped to 10 bits by IS */
+};
+
+/* Union for bringing above together */
+union instruction_union {
+	struct r_type r;
+	struct i_type i;
+	struct jr_type jr;
+	struct ji_type ji;
+	struct b_type b;
+};
+
+struct instruction {
+	enum INST_TYPE type;
+	union instruction_union i;
+};
+/**/
+#endif
+
+int main(int argc, char **argv)
+{
+	int ret = 0;
+	FILE *fin = NULL;
+	FILE *fout = NULL;
+
+	if (argc < 3) {
+		fprintf(stderr, "Syntax: %s <in.asm> <out.bin>\n", argv[0]);
+		return 1;
+	}
+
+	if ((fin = fopen(argv[1], "r")) == NULL) {
+		fprintf(stderr, "Error opening %s: ", argv[1]);
+		perror("fopen");
+		return 1;
+	}
+
+	if ((fout = fopen(argv[2], "wb")) == NULL) {
+		fprintf(stderr, "Error opening %s: ", argv[2]);
+		perror("fopen");
+		return 1;
+	}
+/****/
+	struct token *tokens = NULL;
+	size_t tok_count = 0;
+
+	if ((tokens = lex(argv[1], fin, &tok_count)) == NULL)
+		return 2;
+
+	struct instruction *insts;
+	size_t insts_count;
+	struct label *labels;
+	size_t labels_count;
+	if (ret = parse(argv[1], fin, &labels, &labels_count, tokens, tok_count, &insts, &insts_count))
+		return ret;
+
+	if (ret = output(fout, labels, labels_count, insts, insts_count))
+		return ret;
+
+	return 0;
+}
diff --git a/instruction.h b/instruction.h
new file mode 100644
index 0000000..3ee18d9
--- /dev/null
+++ b/instruction.h
@@ -0,0 +1,145 @@
+#ifndef INSTRUCTION_H
+#define INSTRUCTION_H
+
+/**
+ * Values used for software-only identification instruction types. Values not
+ * tied to machine language. Guaranteed unique.
+ */
+enum INST_TYPE {
+	INST_TYPE_R,
+	INST_TYPE_NI,
+	INST_TYPE_WI,
+	INST_TYPE_JR,
+	INST_TYPE_JI,
+	INST_TYPE_B
+};
+
+/**
+ * Masks for all four instruction types. Not guaranteed unique
+ */
+#define MASK_INST_RTYPE  (0x0000)
+#define MASK_INST_NITYPE (0x4000)
+#define MASK_INST_WITYPE (0x8000)
+#define MASK_INST_JTYPE  (0xC000)
+
+/**
+ * ALU operation types
+ * R-type and I-type take 3-bit ALU oper as bits:
+ * xx___xxx xxxxxxxx
+ */
+enum OPER {
+	OPER_ADD = 0,
+	OPER_SUB = 1,
+	OPER_SHL = 2,
+	OPER_SHR = 3,
+	OPER_AND = 4,
+	OPER_OR  = 5,
+	OPER_XOR = 6,
+	OPER_MUL = 7,
+};
+#define OPER_SHAMT (11)
+#define MASK_OPER(x) ((x) << OPER_SHAMT)
+
+static const char *oper_to_human[] = {
+	[OPER_ADD] = "add",
+	[OPER_SUB] = "sub",
+	[OPER_SHL] = "shl",
+	[OPER_SHR] = "shr",
+	[OPER_AND] = "and",
+	[OPER_OR ] = "or",
+	[OPER_XOR] = "xor",
+	[OPER_MUL] = "mul"
+};
+
+/**
+ * Masks for jump and branch conditions
+ * J-type instructions (jump, branch) take these as follows:
+ * xxx___xx xxxxxxxx
+ */
+enum JCOND {
+	JB_UNCOND  = 0x0,
+	JB_NEVER   = 0x1,
+	JB_ZERO    = 0x2,
+	JB_NZERO   = 0x3,
+	JB_CARRY   = 0x4,
+	JB_NCARRY  = 0x5,
+	JB_CARRYZ  = 0x6,
+	JB_NCARRYZ = 0x7
+};
+#define JB_SHAMT   (10)
+#define MASK_JB_COND(x) ((x) << JB_SHAMT)
+#define MASK_IS_JUMP   (0 << 13)
+#define MASK_IS_BRANCH (1 << 13)
+#define MASK_JI (0x0 << 8)
+#define MASK_JR (0x1 << 8)
+#define MASK_JUMP_REGISTER(x) ((x) << 5)
+
+static const char *j_to_human[] = {
+	[JB_UNCOND]  = "jmp",
+	[JB_NEVER]   = "jn",
+	[JB_ZERO]    = "jz",
+	[JB_NZERO]   = "jnz",
+	[JB_CARRY]   = "jc",
+	[JB_NCARRY]  = "jnc",
+	[JB_CARRYZ]  = "jcz",
+	[JB_NCARRYZ] = "jncz"
+};
+static const char *b_to_human[] = {
+	[JB_UNCOND]  = "bra",
+	[JB_NEVER]   = "bn",
+	[JB_ZERO]    = "bz",
+	[JB_NZERO]   = "bnz",
+	[JB_CARRY]   = "bc",
+	[JB_NCARRY]  = "bnc",
+	[JB_CARRYZ]  = "bcz",
+	[JB_NCARRYZ] = "bncz"
+};
+
+/**
+ * Register numbers used in all manner of instructions in varying positions
+ */
+enum REG {
+	REG_0 = 0,
+	REG_1 = 1,
+	REG_2 = 2,
+	REG_3 = 3,
+	REG_4 = 4,
+	REG_5 = 5,
+	REG_6 = 6,
+	REG_H = 7
+};
+
+static const char *reg_to_human[] = {
+	[REG_0] = "$0",
+	[REG_1] = "$1",
+	[REG_2] = "$2",
+	[REG_3] = "$3",
+	[REG_4] = "$4",
+	[REG_5] = "$5",
+	[REG_6] = "$6",
+	[REG_H] = "$H",
+};
+
+/**
+ * Offset macro to turn REG_* into mask for register operands of R-type and
+ * I-type instructions
+ */
+/* destination reg: xxxxx___ xxxxxxxx */
+#define REG_DEST_OFFSET (8)
+#define MASK_REG_DEST(x) ((x) << REG_DEST_OFFSET)
+
+/* left reg: xxxxxxxx ___xxxxx */
+#define REG_LEFT_OFFSET (5)
+#define MASK_REG_LEFT(x) ((x) << REG_LEFT_OFFSET)
+
+/* right reg (R-type only): xxxxxxxx xxx___xx */
+#define REG_RIGHT_OFFSET (2)
+#define MASK_REG_RIGHT(x) ((x) << REG_RIGHT_OFFSET)
+
+/* five LSb are narrow immediate value */
+#define MASK_NI_IMM(x) ((x) & 0x1F)
+
+/* 10 LSb is branch offset */
+#define MASK_B_OFFSET(x) ((x) & 0x3FF)
+
+#endif /* INSTRUCTION_H */
diff --git a/lex.c b/lex.c
new file mode 100644
index 0000000..6c32c97
--- /dev/null
+++ b/lex.c
@@ -0,0 +1,373 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+
+#include "lex.h"
+
+#define emit_error(...) fprintf(stderr, "%s at (%zd,%zd): ", filename, 1 + line, 1 + column);\
+                        fprintf(stderr, __VA_ARGS__)
+
+static const char *keywords[] = {
+	"declare",
+	"byte",
+	"bytes",
+	"word",
+	"words",
+	"base",
+};
+
+static const char *filename = NULL;
+static size_t line;
+static size_t column;
+static struct token* tokens;
+static size_t tokens_count;
+static char buffer[1024]; /* XXX limitation: sources must have lines < 1024 bytes */
+
+static int expect(const char c) {
+	if (buffer[column] != c) {
+		emit_error("Expected '%c', got '%c'\n", c, buffer[column]);
+		return 1;
+	}
+	column++;
+	return 0;
+}
+
+static void store_location(struct token *t) {
+	t->column = column + 1;
+	t->line = line + 1;
+}
+
+static void eat_whitespace(void) {
+	size_t len = strlen(buffer);
+	while (column < len && strchr(" \t", buffer[column])) {
+		column++;
+	}
+}
+
+static int add_token(struct token t) {
+	struct token *old_tok = tokens;
+
+	tokens_count++;
+	tokens = realloc(tokens, sizeof(struct token) * tokens_count);
+
+	if (!tokens) {
+		perror("realloc");
+		free(old_tok);
+		return 1;
+	}
+
+	tokens[tokens_count - 1] = t;
+//	printf("Adding token from (%d,%d ~%d), str %s int %d\n", t.line, t.column, t.span, t.s_val, t.i_val);
+	return 0;
+}
+
+static int lex_comma(struct token *t) {
+	if (expect(','))
+		return 1;
+
+	t->span = 1;
+	t->type = TOKEN_COMMA;
+	return 0;
+}
+
+static int lex_dot(struct token *t) {
+	if (expect('.'))
+		return 1;
+
+	t->span = 1;
+	t->type = TOKEN_DOT;
+	return 0;
+}
+
+static int lex_register(struct token *t) {
+	int i = 0;
+	if (expect('$'))
+		return 1;
+
+	for (i = column; isalnum(buffer[i]); i++) {
+		;
+	}
+
+	t->s_val = strndup(&buffer[column], i - column);
+	if (!t->s_val) {
+		perror("strndup");
+		return 1;
+	}
+
+	t->span = i - column + 1;
+	t->type = TOKEN_REGISTER;
+	column = i;
+	return 0;
+}
+
+static int lex_string(struct token *t) {
+	int i = 0;
+	if (expect('"'))
+		return 1;
+
+	for (i = column; buffer[i] != '\0' && buffer[i] != '"'; i++) {
+		;
+	}
+
+	t->s_val = strndup(&buffer[column], i - column);
+	if (!t->s_val) {
+		perror("strndup");
+		return 1;
+	}
+
+	t->span = i - column + 2; /* +2 to include "" */
+	t->type = TOKEN_STRING;
+	column = i;
+	if (expect('"'))
+		return 1;
+
+	return 0;
+}
+
+static int lex_char_escaped(struct token *t) {
+	if (expect('\\'))
+		return 1;
+
+	switch (buffer[column]) {
+		case 'a': t->i_val = '\a'; break;
+		case 'b': t->i_val = '\b'; break;
+		case 'f': t->i_val = '\f'; break;
+		case 'n': t->i_val = '\n'; break;
+		case 'r': t->i_val = '\r'; break;
+		case 't': t->i_val = '\t'; break;
+		case 'v': t->i_val = '\v'; break;
+
+		case '\\': t->i_val = '\\'; break;
+		case '\'': t->i_val = '\''; break;
+		default:
+			emit_error("Unknown escape sequence '\\%c'\n", buffer[column]);
+			break;
+	}
+	column++;
+	t->type = TOKEN_NUMERIC;
+	t->span = 4; /* len '\x' == 4 */
+	return 0;
+}
+
+static int lex_char(struct token *t) {
+	if (expect('\''))
+		return 1;
+
+	if (buffer[column] == '\\') {
+		lex_char_escaped(t);
+	} else {
+		t->type = TOKEN_NUMERIC;
+		t->span = 3; /* len 'x' == 3 */
+		t->i_val = buffer[column];
+	}
+	if (expect('\''))
+		return 1;
+
+	return 0;
+}
+
+static int lex_num(struct token *t)
+{
+	char *num_s = NULL;
+	char *end = NULL;
+	size_t span = 0;
+	size_t prefix_span = 0;
+	int value = 0;
+	int base = 0;
+	int neg = 0;
+
+	/* shave off a leading '-' now to make handling easier */
+	if (buffer[column] == '-') {
+		neg = 1;
+		if (expect('-'))
+			return 1;
+		prefix_span++;
+	}
+
+	if (!isdigit(buffer[column])) {
+		emit_error("Error: '%c' cannot start a numerical literal\n", buffer[column]);
+		return 1;
+	}
+
+	/* check if hex */
+	if (   column <= strlen(buffer) - 2
+	    && buffer[column] == '0'
+	    && buffer[column + 1] == 'x') {
+		base = 16;
+	}
+
+	span = strcspn(&buffer[column], " \n\t,");
+	if (span == 0) {
+		emit_error("Error: malformed numerical literal\n");
+		return 1;
+	}
+	num_s = strndup(&buffer[column], span);
+	if (!num_s) {
+		perror("malloc");
+		return 1;
+	}
+
+	/* if base still unknown, determine if from the last char of constant */
+	char *suffix = &num_s[span - 1];
+	if (base == 0) {
+		switch (*suffix) {
+			case 'h': base = 16; break;
+			case 'd': base = 10; break;
+			case 'o': base = 8;  break;
+			case 'b': base = 2;  break;
+			default:
+				if (!isdigit(*suffix)) {
+					emit_error("Error: '%c' is an invalid base suffix in numerical literal\n", *suffix);
+					free(num_s);
+					return 1;
+				}
+				break;
+		}
+		if (!isdigit(*suffix)) {
+			*suffix = '\0';
+		}
+	}
+
+	value = strtol(num_s, &end, base);
+	if (*end != '\0') {
+		emit_error("Error: malformed numerical literal\n", *end, base);
+		free(num_s);
+		return 1;
+	}
+	free(num_s);
+
+	column += span;
+
+	t->type = TOKEN_NUMERIC;
+	t->span = prefix_span + span;
+	t->i_val = (neg ? -value : value);
+	return 0;
+}
+
+static int lex_misc(struct token *t) {
+	int i = 0;
+	int j = 0;
+
+	if (!isalpha(buffer[column])) {
+		emit_error("Error: '%c' cannot start an identifier\n", buffer[column]);
+		return 1;
+	}
+
+	for (i = column; isalnum(buffer[i]); i++) {
+		;
+	}
+
+	if (buffer[i] == ':') {
+		t->type = TOKEN_LABEL;
+	} else {
+		t->type = TOKEN_IDENT;
+	}
+
+	t->s_val = strndup(&buffer[column], i - column);
+	if (!t->s_val)
+		return 1;
+
+	for (j = 0; j < sizeof(keywords)/sizeof(*keywords); j++)
+		if (strcmp(t->s_val, keywords[j]) == 0)
+			t->type = TOKEN_KEYWORD;
+
+	t->span = i - column;
+	column = i;
+	/* skip over colon, but don't have included it in the name */
+	if (t->type == TOKEN_LABEL) {
+		column++;
+	}
+	return 0;
+}
+
+static int lex_eol(struct token *t) {
+	column++;
+	t->type = TOKEN_EOL;
+	t->span = 1;
+	return 0;
+}
+
+int lex_line(void) {
+	int ret = 0;
+	size_t len = strlen(buffer);
+	struct token tok;
+
+	while (column < len) {
+		memset(&tok, 0, sizeof(tok));
+		store_location(&tok);
+		switch (buffer[column]) {
+			case ';':
+			case '#':
+			case '!':
+			case '\n':
+				ret = lex_eol(&tok);
+				return add_token(tok);
+			case ' ':
+			case '\t':
+				eat_whitespace();
+				continue;
+			/*
+			case '/':
+				FIXME look ahead * or /
+				eat_block_comment();
+				break;
+				*/
+			case ',':
+				ret = lex_comma(&tok);
+				break;
+			case '.':
+				ret = lex_dot(&tok);
+				break;
+			case '$':
+				ret = lex_register(&tok);
+				break;
+			case '"':
+				ret = lex_string(&tok);
+				break;
+			case '\'':
+				ret = lex_char(&tok);
+				break;
+			case '-':
+				ret = lex_num(&tok);
+				break;
+			/* FIXME add support for expressions like `addi $0, $0, (1+2*3) */
+			default:
+				if (isdigit(buffer[column])) {
+					ret = lex_num(&tok);
+				} else {
+					ret = lex_misc(&tok);
+				}
+				break;
+		}
+		if (ret)
+			return ret;
+
+		if (add_token(tok))
+			return 1;
+	}
+	return 0;
+}
+
+struct token* lex(const char *filename_local, FILE *fin, size_t *len)
+{
+	filename = filename_local;
+	line = 0;
+	tokens = NULL;
+	tokens_count = 0;
+
+	while (fgets(buffer, sizeof(buffer), fin)) {
+		column = 0;
+		if (lex_line()) {
+			return NULL;
+		}
+		line++;
+	}
+	if (!feof(fin)) {
+		perror("fgets");
+		return NULL;
+	}
+
+	*len = tokens_count;
+	return tokens;
+}
diff --git a/lex.h b/lex.h
new file mode 100644
index 0000000..a14528f
--- /dev/null
+++ b/lex.h
@@ -0,0 +1,30 @@
+#ifndef LEX_H
+#define LEX_H
+
+#include <stdio.h>
+
+enum TOKEN_TYPE {
+	TOKEN_COMMA = 1,
+	TOKEN_DOT, /* starts an assembler directive */
+	TOKEN_LABEL, /* label declaration */
+	TOKEN_IDENT, /* identifier (not label decl) or instruction */
+	TOKEN_KEYWORD, /* keyword used to tell the assembler special information */
+	TOKEN_STRING, /* string literal */
+	TOKEN_NUMERIC, /* numeric literal, incl literal chars */
+	TOKEN_REGISTER, /* $0, $H, $1 */
+	TOKEN_EOL /* end of line */
+};
+
+struct token {
+	enum TOKEN_TYPE type;
+	/* line and column of the source file this token occurs at. 1-based. */
+	size_t line;
+	size_t column;
+	size_t span;
+	char *s_val;
+	int i_val;
+};
+
+struct token* lex(const char *filename_local, FILE *fin, size_t *len);
+
+#endif /* LEX_H */
diff --git a/output.c b/output.c
new file mode 100644
index 0000000..ff22956
--- /dev/null
+++ b/output.c
@@ -0,0 +1,203 @@
+#include <stdio.h>
+#include <stdint.h>
+#include <string.h>
+
+#include "parse.h"
+
+static size_t cur_byte;
+
+int generate_single_r_type(uint32_t *dest, struct r_type inst)
+{
+	uint32_t i = 0;
+
+	i |= MASK_INST_RTYPE;
+	i |= MASK_OPER(inst.oper);
+	i |= MASK_REG_DEST(inst.dest);
+	i |= MASK_REG_LEFT(inst.left);
+	i |= MASK_REG_RIGHT(inst.right);
+
+	*dest = i;
+	return 1;
+}
+int generate_single_ni_type(uint32_t *dest, struct i_type inst)
+{
+	uint32_t i = 0;
+
+	i |= MASK_INST_NITYPE;
+	i |= MASK_OPER(inst.oper);
+	i |= MASK_REG_DEST(inst.dest);
+	i |= MASK_REG_LEFT(inst.left);
+	i |= MASK_NI_IMM(inst.imm.value);
+
+	*dest = i;
+	return 1;
+}
+
+int generate_single_wi_type(uint32_t *dest, struct i_type inst)
+{
+	uint32_t i = 0;
+
+	i |= MASK_INST_WITYPE;
+	i |= MASK_OPER(inst.oper);
+	i |= MASK_REG_DEST(inst.dest);
+	i |= MASK_REG_LEFT(inst.left);
+
+	/* two-word instruction - make room for the immediate */
+	i <<= 16;
+
+	i |= inst.imm.value;
+
+	*dest = i;
+	return 2;
+}
+
+int generate_single_ji_type(uint32_t *dest, struct ji_type inst)
+{
+	uint32_t i = 0;
+
+	i |= MASK_INST_JTYPE;
+	i |= MASK_IS_JUMP;
+	i |= MASK_JB_COND(inst.cond);
+	i |= MASK_JI;
+
+	/* two-word instruction - make room for the immediate */
+	i <<= 16;
+
+	i |= inst.imm.value;
+
+	*dest = i;
+	return 2;
+}
+
+int generate_single_jr_type(uint32_t *dest, struct jr_type inst)
+{
+	uint32_t i = 0;
+
+	i |= MASK_INST_JTYPE;
+	i |= MASK_IS_JUMP;
+	i |= MASK_JB_COND(inst.cond);
+	i |= MASK_JR;
+	i |= MASK_JUMP_REGISTER(inst.reg);
+
+	*dest = i;
+	return 1;
+}
+
+int generate_single_b_type(uint32_t *dest, struct b_type inst)
+{
+	uint32_t i = 0;
+
+	i |= MASK_INST_JTYPE;
+	i |= MASK_IS_BRANCH;
+	i |= MASK_JB_COND(inst.cond);
+	i |= MASK_B_OFFSET(inst.imm.value);
+
+	*dest = i;
+	return 1;
+}
+
+
+int look_up_label(struct label *labels, size_t labels_count, uint16_t *val, const char *label)
+{
+	size_t i = 0;
+
+	for (i = 0; i < labels_count; i++) {
+		if (strcmp(labels[i].name, label) == 0) {
+			*val = labels[i].byte_offset;
+			return 0;
+		}
+	}
+
+	/* FIXME emit */
+	fprintf(stderr, "Reference to undefined label `%s'\n", label);
+	return 1;
+}
+
+int output_single(FILE *f, struct label *labels, size_t labels_count, struct instruction inst)
+{
+	int len = 0;
+	uint32_t i = 0;
+	uint16_t imm = 0;
+
+	switch (inst.type) {
+		case INST_TYPE_R:
+			len = generate_single_r_type(&i, inst.inst.r);
+			break;
+		case INST_TYPE_NI:
+			if (   inst.inst.i.imm_is_ident
+			    && look_up_label(labels, labels_count, &inst.inst.i.imm.value, inst.inst.i.imm.label))
+				return 1;
+
+			len = generate_single_ni_type(&i, inst.inst.i);
+			break;
+		case INST_TYPE_WI:
+			if (   inst.inst.i.imm_is_ident
+			    && look_up_label(labels, labels_count, &inst.inst.i.imm.value, inst.inst.i.imm.label))
+				return 1;
+
+			len = generate_single_wi_type(&i, inst.inst.i);
+			break;
+		case INST_TYPE_JR:
+			len = generate_single_jr_type(&i, inst.inst.jr);
+			break;
+		case INST_TYPE_JI:
+		if (   inst.inst.ji.imm_is_ident
+			    && look_up_label(labels, labels_count, &inst.inst.ji.imm.value, inst.inst.ji.imm.label))
+				return 1;
+
+			len = generate_single_ji_type(&i, inst.inst.ji);
+			break;
+		case INST_TYPE_B:
+			if (   inst.inst.b.imm_is_ident
+			    && look_up_label(labels, labels_count, &inst.inst.b.imm.value, inst.inst.b.imm.label))
+				return 1;
+			inst.inst.b.imm.value -= cur_byte;
+			if (inst.inst.b.imm.value % 2 != 0) {
+				fprintf(stderr, "Internal error: branch offset %d not a multiple of 2\n", inst.inst.b.imm.value);
+			}
+			inst.inst.b.imm.value /= 2;
+
+			len = generate_single_b_type(&i, inst.inst.b);
+			break;
+		default:
+			fprintf(stderr, "Internal error: unhandled instruction type\n");
+			break;
+	}
+
+	if (len == 2) {
+//#define RAW
+#ifdef RAW
+		fputc(0xFF & (i >> 24), f);
+		fputc(0xFF & (i >> 16), f);
+#else
+		fprintf(f, "%04x ", i >> 16);
+#endif
+	}
+#ifdef RAW
+	fputc(0xFF & (i >> 8), f);
+	fputc(0xFF & (i >> 0), f);
+#else
+	fprintf(f, "%04x ", 0xFFFF & i);
+#endif
+
+	cur_byte += 2 * len;
+	return 0;
+}
+
+int output(FILE *fout, struct label *labels, size_t label_count, struct instruction *insts, size_t insts_count)
+{
+	size_t i = 0;
+	cur_byte = 0;
+
+#ifndef RAW
+	fprintf(fout, "v2.0 raw\n");
+#endif
+
+	printf("output: have %d instructions\n", insts_count);
+
+	for (i = 0; i < insts_count; i++)
+		if (output_single(fout, labels, label_count, insts[i]))
+			return 1;
+
+	return 0;
+}
diff --git a/parse.c b/parse.c
new file mode 100644
index 0000000..f5caf4d
--- /dev/null
+++ b/parse.c
@@ -0,0 +1,653 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <stdarg.h>
+#include <string.h>
+#include <stdint.h>
+#include <stdbool.h>
+
+#include "lex.h"
+#include "parse.h"
+#include "instruction.h"
+#include "tok_util.h"
+#if 0
+struct label {
+	char *name;
+	size_t byte_offset;
+};
+
+union immediate {
+	const char *label;
+	int16_t value;
+};
+
+struct r_type {
+	enum OPER oper;
+	enum REG dest;
+	enum REG left;
+	enum REG right;
+};
+
+struct i_type {
+	enum OPER oper;
+	enum REG dest;
+	enum REG left;
+	bool imm_is_ident;
+	union immediate imm;
+};
+
+struct jr_type {
+	enum JCOND cond;
+	enum REG reg;
+};
+
+struct ji_type {
+	enum JCOND cond;
+	bool imm_is_ident;
+	union immediate imm;
+};
+
+struct b_type {
+	enum JCOND cond;
+	bool imm_is_ident;
+	union immediate imm;
+};
+
+struct instruction {
+	enum INST_TYPE type;
+	union instruction_u {
+		struct r_type r;   /* catch-all R-Type */
+		struct i_type i;   /* I-type on immediate literal */
+		struct jr_type jr; /* jump to register */
+		struct ji_type ji; /* jump to immediate */
+		struct b_type b;   /* branch to immediate literal */
+	} inst;
+};
+#endif
+
+static const char *filename;
+static FILE *fd;
+static struct token *cursor;
+static struct token *tokens;
+static size_t tokens_pos;
+static size_t tokens_count;
+static struct label *labels;
+static size_t labels_count;
+static struct instruction *insts;
+static size_t insts_count;
+static size_t byte_offset;
+
+void emit(const char *fmt, ...)
+{
+	va_list args;
+	va_start(args, fmt);
+	if (cursor) {
+		fprintf(stderr, "%s at (%zd,%zd): ", filename, cursor->line, cursor->column);
+		vfprintf(stderr, fmt, args);
+		indicate_file_area(fd, cursor->line, cursor->column, cursor->span);
+	} else {
+		fprintf(stderr, "%s: ", filename);
+		vfprintf(stderr, fmt, args);
+	}
+	va_end(args);
+}
+
+#define EXPECT_AND_DISCARD_CRITICAL(type)\
+	do {                                 \
+		EXPECT_CRITICAL(type)            \
+		kerchunk();                      \
+	} while (0);
+
+#define EXPECT_CRITICAL(type)\
+	if (expect(type)) {  \
+		return 1;        \
+	}
+
+static int expect(enum TOKEN_TYPE e)
+{
+	const char *expected_desc = "(internal error)";
+	const char *observed_desc = "(internal error)";
+
+	if (!cursor || cursor->type != e) {
+		expected_desc = get_token_description(e);
+		if (cursor) {
+			observed_desc = get_token_description(cursor->type);
+		} else {
+			observed_desc = "end of file";
+		}
+		emit("Error: Expected %s, got %s\n", expected_desc, observed_desc);
+		return 1;
+	}
+
+	return 0;
+}
+
+void kerchunk()
+{
+	if (tokens_pos < tokens_count - 1) {
+		cursor = &tokens[++tokens_pos];
+	} else {
+		cursor = NULL;
+	}
+}
+
+int parse_eol(void)
+{
+	EXPECT_AND_DISCARD_CRITICAL(TOKEN_EOL);
+	return 0;
+}
+
+int parse_comma(void)
+{
+	EXPECT_AND_DISCARD_CRITICAL(TOKEN_COMMA);
+	return 0;
+}
+
+int parse_imm(uint16_t *imm)
+{
+	EXPECT_CRITICAL(TOKEN_NUMERIC);
+	/* FIXME allow identifiers? or is that job of parent */
+	*imm = cursor->i_val;
+	kerchunk();
+	return 0;
+}
+
+int parse_ident(char **ident)
+{
+	EXPECT_CRITICAL(TOKEN_IDENT);
+	*ident = cursor->s_val;
+	kerchunk();
+	return 0;
+}
+
+/**
+ * FIXME move */
+
+int add_instruction(struct instruction inst)
+{
+	struct instruction *old_insts = insts;
+	insts = realloc(insts, (insts_count + 1) * sizeof(struct instruction));
+	if (!insts) {
+		free(old_insts);
+		perror("realloc");
+		return 1;
+	}
+
+	insts[insts_count] = inst;
+
+	insts_count++;
+	return 0;
+}
+
+int new_label(struct label *dest, const char *name)
+{
+	char *name_clone = strdup(name);
+
+	if (!name_clone) {
+		perror("strdup");
+		return 1;
+	}
+
+	dest->name = name_clone;
+	dest->byte_offset = byte_offset;
+
+	return 0;
+}
+
+void destroy_label(struct label *l)
+{
+	free(l->name);
+}
+/**/
+
+int parse_label()
+{
+	size_t i = 0;
+	struct label l;
+	struct label *old_labels = labels;
+
+	EXPECT_CRITICAL(TOKEN_LABEL);
+
+	for (i = 0; i < labels_count; i++) {
+		if (strcmp(labels[i].name, cursor->s_val) == 0) {
+			emit("Error: duplicate label\n");
+			return 1;
+		}
+	}
+
+	labels = realloc(labels, (labels_count + 1) * sizeof(struct label));
+	if (!labels) {
+		perror("realloc");
+		free(old_labels);
+		return 1;
+	}
+
+	if (new_label(&l, cursor->s_val))
+		return 1;
+
+	labels[labels_count] = l;
+
+	labels_count++;
+	kerchunk();
+	return 0;
+}
+
+int parse_reg(enum REG *reg)
+{
+	EXPECT_CRITICAL(TOKEN_REGISTER);
+	/* valid registers are: $0, $1, $2, $3, $4, $5, $6, $7, $Z, $H
+	 * the latter two are aliases for $0 and $7 respectively
+	 */
+	if (strlen(cursor->s_val) != 1) {
+		emit("Error: incorrect register name length (%d)\n", strlen(cursor->s_val));
+		return 1;
+	}
+
+	switch (cursor->s_val[0])
+	{
+		case 'Z': /* fallthrough */
+		case 'z': /* fallthrough */
+		case '0': *reg = REG_0; break;
+		case '1': *reg = REG_1; break;
+		case '2': *reg = REG_2; break;
+		case '3': *reg = REG_3; break;
+		case '4': *reg = REG_4; break;
+		case '5': *reg = REG_5; break;
+		case '6': *reg = REG_6; break;
+		case 'h': /* fallthrough */
+		case 'H': /* fallthrough */
+		case '7': *reg = REG_H; break;
+		default:
+			emit("Error: unknown register '%c'\n", cursor->s_val[0]);
+			return 1;
+	}
+	kerchunk();
+	return 0;
+}
+
+int parse_i_type(enum OPER oper, enum REG dest, enum REG left, uint16_t imm)
+{
+//	fprintf(stderr, "<DEBUG>: ITYPE %s <%s> <%s> <%d>\n",
+//		oper_to_human[oper],
+//		reg_to_human[dest],
+//		reg_to_human[left],
+//		imm);
+	struct instruction i;
+	i.type = INST_TYPE_NI;
+	i.inst.i.oper = oper;
+	i.inst.i.dest = dest;
+	i.inst.i.left = left;
+	i.inst.i.imm_is_ident = false;
+	i.inst.i.imm.value = imm;
+
+	if (add_instruction(i))
+		return 1;
+
+	/* FIXME detect narrow/wide */
+	byte_offset += 2;
+	return 0;
+}
+
+int parse_i_ident_type(enum OPER oper, enum REG dest, enum REG left, char *ident)
+{
+	struct instruction i;
+	i.type = INST_TYPE_NI;
+	i.inst.i.oper = oper;
+	i.inst.i.dest = dest;
+	i.inst.i.left = left;
+	i.inst.i.imm_is_ident = true;
+	i.inst.i.imm.label = ident;
+
+	if (add_instruction(i))
+		return 1;
+
+	/* FIXME detect narrow/wide */
+	byte_offset += 2;
+	return 0;
+}
+
+int parse_r_type(enum OPER oper, enum REG dest, enum REG left, enum REG right)
+{
+//	fprintf(stderr, "<DEBUG>: RTYPE %s <%s> <%s> <%s>\n",
+//		oper_to_human[oper],
+//		reg_to_human[dest],
+//		reg_to_human[left],
+//		reg_to_human[right]);
+
+	struct instruction i;
+	i.type = INST_TYPE_R;
+	i.inst.r.oper = oper;
+	i.inst.r.dest = dest;
+	i.inst.r.left = left;
+	i.inst.r.right = right;
+
+	if (add_instruction(i))
+		return 1;
+
+	/* FIXME #define */
+	byte_offset += 2;
+	return 0;
+}
+
+int parse_j_reg_type(enum JCOND cond, enum REG reg)
+{
+//	fprintf(stderr, "<DEBUG>: JRTYPE %s <%s>\n",
+//		j_to_human[cond],
+//		reg_to_human[reg]);
+
+	struct instruction i;
+	i.type = INST_TYPE_JR;
+	i.inst.jr.cond = cond;
+	i.inst.jr.reg = reg;
+
+	if (add_instruction(i))
+		return 1;
+
+	/* FIXME #define */
+	byte_offset += 2;
+	return 0;
+}
+
+int parse_j_imm_type(enum JCOND cond, uint16_t imm)
+{
+//	fprintf(stderr, "<DEBUG>: JITYPE %s <0x%04x>\n",
+//		j_to_human[cond],
+//		imm);
+
+	struct instruction i;
+
+	i.type = INST_TYPE_JI;
+	i.inst.ji.cond = cond;
+	i.inst.ji.imm_is_ident = false;
+	i.inst.ji.imm.value = imm;
+
+	if (add_instruction(i))
+		return 1;
+
+	/* FIXME #define */
+	byte_offset += 4;
+	return 0;
+}
+
+int parse_j_ident_type(enum JCOND cond, char *ident)
+{
+//	fprintf(stderr, "<DEBUG>: JTYPE %s <%s>\n",
+//		b_to_human[cond],
+//		ident);
+	struct instruction i;
+
+	i.type = INST_TYPE_JI;
+	i.inst.ji.cond = cond;
+	i.inst.ji.imm_is_ident = true;
+	i.inst.ji.imm.label = ident;
+
+	if (add_instruction(i))
author	David Phillips <david@sighup.nz>	2019-04-14 16:10:18 +1200
committer	David Phillips <david@sighup.nz>	2019-08-03 12:42:57 +1200
commit	ac8150b7601d9611818bb8b265a125a347a67004 (patch)
tree	aa1440c18551fa415af53daedde76536ac2d000d
download	toy-cpu-assembler-ac8150b7601d9611818bb8b265a125a347a67004.tar.xz