summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Phillips <david@sighup.nz>2019-04-14 16:10:18 +1200
committerDavid Phillips <david@sighup.nz>2019-08-03 12:42:57 +1200
commitac8150b7601d9611818bb8b265a125a347a67004 (patch)
treeaa1440c18551fa415af53daedde76536ac2d000d
downloadtoy-cpu-assembler-ac8150b7601d9611818bb8b265a125a347a67004.tar.xz
Dirst dump of working prototype
-rw-r--r--.gitignore3
-rw-r--r--Makefile18
-rw-r--r--assembler.c97
-rw-r--r--instruction.h145
-rw-r--r--lex.c373
-rw-r--r--lex.h30
-rw-r--r--output.c203
-rw-r--r--parse.c653
-rw-r--r--parse.h65
-rw-r--r--tok_util.c78
-rw-r--r--tok_util.h9
11 files changed, 1674 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..71e5da6
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,3 @@
+*.o
+*.bin
+assembler
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..fa8f61b
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,18 @@
+OBJECTS = lex.o parse.o output.o assembler.o tok_util.o
+
+all: assembler
+
+assembler: $(OBJECTS)
+
+lex.o: lex.h
+
+parse.o: lex.h parse.h instruction.h tok_util.h
+
+output.o: parse.h
+
+tok_util.o: lex.h
+
+
+.PHONY: clean
+clean:
+ - rm -f assembler $(OBJECTS)
diff --git a/assembler.c b/assembler.c
new file mode 100644
index 0000000..eaf4d38
--- /dev/null
+++ b/assembler.c
@@ -0,0 +1,97 @@
+#include <stdio.h>
+#include <stdint.h>
+
+#include "lex.h"
+#include "parse.h"
+#include "instruction.h"
+
+#if 0
+/**
+ * Types for intermediate storage of instructions
+ */
+struct r_type {
+ enum OPER operation;
+ enum REG dest;
+ enum REG left;
+ enum REG right;
+};
+
+struct i_type { /* covers WI and NI */
+ enum OPER operation;
+ enum REG dest;
+ enum REG left;
+ int16_t immediate;
+};
+
+struct jr_type {
+ enum JCOND condition;
+ enum REG reg;
+};
+
+struct ji_type {
+ enum JCOND condition;
+ uint16_t immediate;
+};
+
+struct b_type { /* FIXME merge with ji_type? */
+ enum JCOND condition;
+ uint16_t immediate; /* capped to 10 bits by IS */
+};
+
+/* Union for bringing above together */
+union instruction_union {
+ struct r_type r;
+ struct i_type i;
+ struct jr_type jr;
+ struct ji_type ji;
+ struct b_type b;
+};
+
+struct instruction {
+ enum INST_TYPE type;
+ union instruction_union i;
+};
+/**/
+#endif
+
+int main(int argc, char **argv)
+{
+ int ret = 0;
+ FILE *fin = NULL;
+ FILE *fout = NULL;
+
+ if (argc < 3) {
+ fprintf(stderr, "Syntax: %s <in.asm> <out.bin>\n", argv[0]);
+ return 1;
+ }
+
+ if ((fin = fopen(argv[1], "r")) == NULL) {
+ fprintf(stderr, "Error opening %s: ", argv[1]);
+ perror("fopen");
+ return 1;
+ }
+
+ if ((fout = fopen(argv[2], "wb")) == NULL) {
+ fprintf(stderr, "Error opening %s: ", argv[2]);
+ perror("fopen");
+ return 1;
+ }
+/****/
+ struct token *tokens = NULL;
+ size_t tok_count = 0;
+
+ if ((tokens = lex(argv[1], fin, &tok_count)) == NULL)
+ return 2;
+
+ struct instruction *insts;
+ size_t insts_count;
+ struct label *labels;
+ size_t labels_count;
+ if (ret = parse(argv[1], fin, &labels, &labels_count, tokens, tok_count, &insts, &insts_count))
+ return ret;
+
+ if (ret = output(fout, labels, labels_count, insts, insts_count))
+ return ret;
+
+ return 0;
+}
diff --git a/instruction.h b/instruction.h
new file mode 100644
index 0000000..3ee18d9
--- /dev/null
+++ b/instruction.h
@@ -0,0 +1,145 @@
+#ifndef INSTRUCTION_H
+#define INSTRUCTION_H
+
+/**
+ * Values used for software-only identification instruction types. Values not
+ * tied to machine language. Guaranteed unique.
+ */
+enum INST_TYPE {
+ INST_TYPE_R,
+ INST_TYPE_NI,
+ INST_TYPE_WI,
+ INST_TYPE_JR,
+ INST_TYPE_JI,
+ INST_TYPE_B
+};
+
+/**
+ * Masks for all four instruction types. Not guaranteed unique
+ */
+#define MASK_INST_RTYPE (0x0000)
+#define MASK_INST_NITYPE (0x4000)
+#define MASK_INST_WITYPE (0x8000)
+#define MASK_INST_JTYPE (0xC000)
+
+/**
+ * ALU operation types
+ * R-type and I-type take 3-bit ALU oper as bits:
+ * xx___xxx xxxxxxxx
+ */
+enum OPER {
+ OPER_ADD = 0,
+ OPER_SUB = 1,
+ OPER_SHL = 2,
+ OPER_SHR = 3,
+ OPER_AND = 4,
+ OPER_OR = 5,
+ OPER_XOR = 6,
+ OPER_MUL = 7,
+};
+#define OPER_SHAMT (11)
+#define MASK_OPER(x) ((x) << OPER_SHAMT)
+
+static const char *oper_to_human[] = {
+ [OPER_ADD] = "add",
+ [OPER_SUB] = "sub",
+ [OPER_SHL] = "shl",
+ [OPER_SHR] = "shr",
+ [OPER_AND] = "and",
+ [OPER_OR ] = "or",
+ [OPER_XOR] = "xor",
+ [OPER_MUL] = "mul"
+};
+
+/**
+ * Masks for jump and branch conditions
+ * J-type instructions (jump, branch) take these as follows:
+ * xxx___xx xxxxxxxx
+ */
+enum JCOND {
+ JB_UNCOND = 0x0,
+ JB_NEVER = 0x1,
+ JB_ZERO = 0x2,
+ JB_NZERO = 0x3,
+ JB_CARRY = 0x4,
+ JB_NCARRY = 0x5,
+ JB_CARRYZ = 0x6,
+ JB_NCARRYZ = 0x7
+};
+#define JB_SHAMT (10)
+#define MASK_JB_COND(x) ((x) << JB_SHAMT)
+#define MASK_IS_JUMP (0 << 13)
+#define MASK_IS_BRANCH (1 << 13)
+#define MASK_JI (0x0 << 8)
+#define MASK_JR (0x1 << 8)
+#define MASK_JUMP_REGISTER(x) ((x) << 5)
+
+static const char *j_to_human[] = {
+ [JB_UNCOND] = "jmp",
+ [JB_NEVER] = "jn",
+ [JB_ZERO] = "jz",
+ [JB_NZERO] = "jnz",
+ [JB_CARRY] = "jc",
+ [JB_NCARRY] = "jnc",
+ [JB_CARRYZ] = "jcz",
+ [JB_NCARRYZ] = "jncz"
+};
+static const char *b_to_human[] = {
+ [JB_UNCOND] = "bra",
+ [JB_NEVER] = "bn",
+ [JB_ZERO] = "bz",
+ [JB_NZERO] = "bnz",
+ [JB_CARRY] = "bc",
+ [JB_NCARRY] = "bnc",
+ [JB_CARRYZ] = "bcz",
+ [JB_NCARRYZ] = "bncz"
+};
+
+/**
+ * Register numbers used in all manner of instructions in varying positions
+ */
+enum REG {
+ REG_0 = 0,
+ REG_1 = 1,
+ REG_2 = 2,
+ REG_3 = 3,
+ REG_4 = 4,
+ REG_5 = 5,
+ REG_6 = 6,
+ REG_H = 7
+};
+
+static const char *reg_to_human[] = {
+ [REG_0] = "$0",
+ [REG_1] = "$1",
+ [REG_2] = "$2",
+ [REG_3] = "$3",
+ [REG_4] = "$4",
+ [REG_5] = "$5",
+ [REG_6] = "$6",
+ [REG_H] = "$H",
+};
+
+/**
+ * Offset macro to turn REG_* into mask for register operands of R-type and
+ * I-type instructions
+ */
+/* destination reg: xxxxx___ xxxxxxxx */
+#define REG_DEST_OFFSET (8)
+#define MASK_REG_DEST(x) ((x) << REG_DEST_OFFSET)
+
+/* left reg: xxxxxxxx ___xxxxx */
+#define REG_LEFT_OFFSET (5)
+#define MASK_REG_LEFT(x) ((x) << REG_LEFT_OFFSET)
+
+/* right reg (R-type only): xxxxxxxx xxx___xx */
+#define REG_RIGHT_OFFSET (2)
+#define MASK_REG_RIGHT(x) ((x) << REG_RIGHT_OFFSET)
+
+/* five LSb are narrow immediate value */
+#define MASK_NI_IMM(x) ((x) & 0x1F)
+
+/* 10 LSb is branch offset */
+#define MASK_B_OFFSET(x) ((x) & 0x3FF)
+
+#endif /* INSTRUCTION_H */
diff --git a/lex.c b/lex.c
new file mode 100644
index 0000000..6c32c97
--- /dev/null
+++ b/lex.c
@@ -0,0 +1,373 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+
+#include "lex.h"
+
+#define emit_error(...) fprintf(stderr, "%s at (%zd,%zd): ", filename, 1 + line, 1 + column);\
+ fprintf(stderr, __VA_ARGS__)
+
+static const char *keywords[] = {
+ "declare",
+ "byte",
+ "bytes",
+ "word",
+ "words",
+ "base",
+};
+
+static const char *filename = NULL;
+static size_t line;
+static size_t column;
+static struct token* tokens;
+static size_t tokens_count;
+static char buffer[1024]; /* XXX limitation: sources must have lines < 1024 bytes */
+
+static int expect(const char c) {
+ if (buffer[column] != c) {
+ emit_error("Expected '%c', got '%c'\n", c, buffer[column]);
+ return 1;
+ }
+ column++;
+ return 0;
+}
+
+static void store_location(struct token *t) {
+ t->column = column + 1;
+ t->line = line + 1;
+}
+
+static void eat_whitespace(void) {
+ size_t len = strlen(buffer);
+ while (column < len && strchr(" \t", buffer[column])) {
+ column++;
+ }
+}
+
+static int add_token(struct token t) {
+ struct token *old_tok = tokens;
+
+ tokens_count++;
+ tokens = realloc(tokens, sizeof(struct token) * tokens_count);
+
+ if (!tokens) {
+ perror("realloc");
+ free(old_tok);
+ return 1;
+ }
+
+ tokens[tokens_count - 1] = t;
+// printf("Adding token from (%d,%d ~%d), str %s int %d\n", t.line, t.column, t.span, t.s_val, t.i_val);
+ return 0;
+}
+
+static int lex_comma(struct token *t) {
+ if (expect(','))
+ return 1;
+
+ t->span = 1;
+ t->type = TOKEN_COMMA;
+ return 0;
+}
+
+static int lex_dot(struct token *t) {
+ if (expect('.'))
+ return 1;
+
+ t->span = 1;
+ t->type = TOKEN_DOT;
+ return 0;
+}
+
+static int lex_register(struct token *t) {
+ int i = 0;
+ if (expect('$'))
+ return 1;
+
+ for (i = column; isalnum(buffer[i]); i++) {
+ ;
+ }
+
+ t->s_val = strndup(&buffer[column], i - column);
+ if (!t->s_val) {
+ perror("strndup");
+ return 1;
+ }
+
+ t->span = i - column + 1;
+ t->type = TOKEN_REGISTER;
+ column = i;
+ return 0;
+}
+
+static int lex_string(struct token *t) {
+ int i = 0;
+ if (expect('"'))
+ return 1;
+
+ for (i = column; buffer[i] != '\0' && buffer[i] != '"'; i++) {
+ ;
+ }
+
+ t->s_val = strndup(&buffer[column], i - column);
+ if (!t->s_val) {
+ perror("strndup");
+ return 1;
+ }
+
+ t->span = i - column + 2; /* +2 to include "" */
+ t->type = TOKEN_STRING;
+ column = i;
+ if (expect('"'))
+ return 1;
+
+ return 0;
+}
+
+static int lex_char_escaped(struct token *t) {
+ if (expect('\\'))
+ return 1;
+
+ switch (buffer[column]) {
+ case 'a': t->i_val = '\a'; break;
+ case 'b': t->i_val = '\b'; break;
+ case 'f': t->i_val = '\f'; break;
+ case 'n': t->i_val = '\n'; break;
+ case 'r': t->i_val = '\r'; break;
+ case 't': t->i_val = '\t'; break;
+ case 'v': t->i_val = '\v'; break;
+
+ case '\\': t->i_val = '\\'; break;
+ case '\'': t->i_val = '\''; break;
+ default:
+ emit_error("Unknown escape sequence '\\%c'\n", buffer[column]);
+ break;
+ }
+ column++;
+ t->type = TOKEN_NUMERIC;
+ t->span = 4; /* len '\x' == 4 */
+ return 0;
+}
+
+static int lex_char(struct token *t) {
+ if (expect('\''))
+ return 1;
+
+ if (buffer[column] == '\\') {
+ lex_char_escaped(t);
+ } else {
+ t->type = TOKEN_NUMERIC;
+ t->span = 3; /* len 'x' == 3 */
+ t->i_val = buffer[column];
+ }
+ if (expect('\''))
+ return 1;
+
+ return 0;
+}
+
+static int lex_num(struct token *t)
+{
+ char *num_s = NULL;
+ char *end = NULL;
+ size_t span = 0;
+ size_t prefix_span = 0;
+ int value = 0;
+ int base = 0;
+ int neg = 0;
+
+ /* shave off a leading '-' now to make handling easier */
+ if (buffer[column] == '-') {
+ neg = 1;
+ if (expect('-'))
+ return 1;
+ prefix_span++;
+ }
+
+ if (!isdigit(buffer[column])) {
+ emit_error("Error: '%c' cannot start a numerical literal\n", buffer[column]);
+ return 1;
+ }
+
+ /* check if hex */
+ if ( column <= strlen(buffer) - 2
+ && buffer[column] == '0'
+ && buffer[column + 1] == 'x') {
+ base = 16;
+ }
+
+ span = strcspn(&buffer[column], " \n\t,");
+ if (span == 0) {
+ emit_error("Error: malformed numerical literal\n");
+ return 1;
+ }
+ num_s = strndup(&buffer[column], span);
+ if (!num_s) {
+ perror("malloc");
+ return 1;
+ }
+
+ /* if base still unknown, determine if from the last char of constant */
+ char *suffix = &num_s[span - 1];
+ if (base == 0) {
+ switch (*suffix) {
+ case 'h': base = 16; break;
+ case 'd': base = 10; break;
+ case 'o': base = 8; break;
+ case 'b': base = 2; break;
+ default:
+ if (!isdigit(*suffix)) {
+ emit_error("Error: '%c' is an invalid base suffix in numerical literal\n", *suffix);
+ free(num_s);
+ return 1;
+ }
+ break;
+ }
+ if (!isdigit(*suffix)) {
+ *suffix = '\0';
+ }
+ }
+
+ value = strtol(num_s, &end, base);
+ if (*end != '\0') {
+ emit_error("Error: malformed numerical literal\n", *end, base);
+ free(num_s);
+ return 1;
+ }
+ free(num_s);
+
+ column += span;
+
+ t->type = TOKEN_NUMERIC;
+ t->span = prefix_span + span;
+ t->i_val = (neg ? -value : value);
+ return 0;
+}
+
+static int lex_misc(struct token *t) {
+ int i = 0;
+ int j = 0;
+
+ if (!isalpha(buffer[column])) {
+ emit_error("Error: '%c' cannot start an identifier\n", buffer[column]);
+ return 1;
+ }
+
+ for (i = column; isalnum(buffer[i]); i++) {
+ ;
+ }
+
+ if (buffer[i] == ':') {
+ t->type = TOKEN_LABEL;
+ } else {
+ t->type = TOKEN_IDENT;
+ }
+
+ t->s_val = strndup(&buffer[column], i - column);
+ if (!t->s_val)
+ return 1;
+
+ for (j = 0; j < sizeof(keywords)/sizeof(*keywords); j++)
+ if (strcmp(t->s_val, keywords[j]) == 0)
+ t->type = TOKEN_KEYWORD;
+
+ t->span = i - column;
+ column = i;
+ /* skip over colon, but don't have included it in the name */
+ if (t->type == TOKEN_LABEL) {
+ column++;
+ }
+ return 0;
+}
+
+static int lex_eol(struct token *t) {
+ column++;
+ t->type = TOKEN_EOL;
+ t->span = 1;
+ return 0;
+}
+
+int lex_line(void) {
+ int ret = 0;
+ size_t len = strlen(buffer);
+ struct token tok;
+
+ while (column < len) {
+ memset(&tok, 0, sizeof(tok));
+ store_location(&tok);
+ switch (buffer[column]) {
+ case ';':
+ case '#':
+ case '!':
+ case '\n':
+ ret = lex_eol(&tok);
+ return add_token(tok);
+ case ' ':
+ case '\t':
+ eat_whitespace();
+ continue;
+ /*
+ case '/':
+ FIXME look ahead * or /
+ eat_block_comment();
+ break;
+ */
+ case ',':
+ ret = lex_comma(&tok);
+ break;
+ case '.':
+ ret = lex_dot(&tok);
+ break;
+ case '$':
+ ret = lex_register(&tok);
+ break;
+ case '"':
+ ret = lex_string(&tok);
+ break;
+ case '\'':
+ ret = lex_char(&tok);
+ break;
+ case '-':
+ ret = lex_num(&tok);
+ break;
+ /* FIXME add support for expressions like `addi $0, $0, (1+2*3) */
+ default:
+ if (isdigit(buffer[column])) {
+ ret = lex_num(&tok);
+ } else {
+ ret = lex_misc(&tok);
+ }
+ break;
+ }
+ if (ret)
+ return ret;
+
+ if (add_token(tok))
+ return 1;
+ }
+ return 0;
+}
+
+struct token* lex(const char *filename_local, FILE *fin, size_t *len)
+{
+ filename = filename_local;
+ line = 0;
+ tokens = NULL;
+ tokens_count = 0;
+
+ while (fgets(buffer, sizeof(buffer), fin)) {
+ column = 0;
+ if (lex_line()) {
+ return NULL;
+ }
+ line++;
+ }
+ if (!feof(fin)) {
+ perror("fgets");
+ return NULL;
+ }
+
+ *len = tokens_count;
+ return tokens;
+}
diff --git a/lex.h b/lex.h
new file mode 100644
index 0000000..a14528f
--- /dev/null
+++ b/lex.h
@@ -0,0 +1,30 @@
+#ifndef LEX_H
+#define LEX_H
+
+#include <stdio.h>
+
+enum TOKEN_TYPE {
+ TOKEN_COMMA = 1,
+ TOKEN_DOT, /* starts an assembler directive */
+ TOKEN_LABEL, /* label declaration */
+ TOKEN_IDENT, /* identifier (not label decl) or instruction */
+ TOKEN_KEYWORD, /* keyword used to tell the assembler special information */
+ TOKEN_STRING, /* string literal */
+ TOKEN_NUMERIC, /* numeric literal, incl literal chars */
+ TOKEN_REGISTER, /* $0, $H, $1 */
+ TOKEN_EOL /* end of line */
+};
+
+struct token {
+ enum TOKEN_TYPE type;
+ /* line and column of the source file this token occurs at. 1-based. */
+ size_t line;
+ size_t column;
+ size_t span;
+ char *s_val;
+ int i_val;
+};
+
+struct token* lex(const char *filename_local, FILE *fin, size_t *len);
+
+#endif /* LEX_H */
diff --git a/output.c b/output.c
new file mode 100644
index 0000000..ff22956
--- /dev/null
+++ b/output.c
@@ -0,0 +1,203 @@
+#include <stdio.h>
+#include <stdint.h>
+#include <string.h>
+
+#include "parse.h"
+
+static size_t cur_byte;
+
+int generate_single_r_type(uint32_t *dest, struct r_type inst)
+{
+ uint32_t i = 0;
+
+ i |= MASK_INST_RTYPE;
+ i |= MASK_OPER(inst.oper);
+ i |= MASK_REG_DEST(inst.dest);
+ i |= MASK_REG_LEFT(inst.left);
+ i |= MASK_REG_RIGHT(inst.right);
+
+ *dest = i;
+ return 1;
+}
+int generate_single_ni_type(uint32_t *dest, struct i_type inst)
+{
+ uint32_t i = 0;
+
+ i |= MASK_INST_NITYPE;
+ i |= MASK_OPER(inst.oper);
+ i |= MASK_REG_DEST(inst.dest);
+ i |= MASK_REG_LEFT(inst.left);
+ i |= MASK_NI_IMM(inst.imm.value);
+
+ *dest = i;
+ return 1;
+}
+
+int generate_single_wi_type(uint32_t *dest, struct i_type inst)
+{
+ uint32_t i = 0;
+
+ i |= MASK_INST_WITYPE;
+ i |= MASK_OPER(inst.oper);
+ i |= MASK_REG_DEST(inst.dest);
+ i |= MASK_REG_LEFT(inst.left);
+
+ /* two-word instruction - make room for the immediate */
+ i <<= 16;
+
+ i |= inst.imm.value;
+
+ *dest = i;
+ return 2;
+}
+
+int generate_single_ji_type(uint32_t *dest, struct ji_type inst)
+{
+ uint32_t i = 0;
+
+ i |= MASK_INST_JTYPE;
+ i |= MASK_IS_JUMP;
+ i |= MASK_JB_COND(inst.cond);
+ i |= MASK_JI;
+
+ /* two-word instruction - make room for the immediate */
+ i <<= 16;
+
+ i |= inst.imm.value;
+
+ *dest = i;
+ return 2;
+}
+
+int generate_single_jr_type(uint32_t *dest, struct jr_type inst)
+{
+ uint32_t i = 0;
+
+ i |= MASK_INST_JTYPE;
+ i |= MASK_IS_JUMP;
+ i |= MASK_JB_COND(inst.cond);
+ i |= MASK_JR;
+ i |= MASK_JUMP_REGISTER(inst.reg);
+
+ *dest = i;
+ return 1;
+}
+
+int generate_single_b_type(uint32_t *dest, struct b_type inst)
+{
+ uint32_t i = 0;
+
+ i |= MASK_INST_JTYPE;
+ i |= MASK_IS_BRANCH;
+ i |= MASK_JB_COND(inst.cond);
+ i |= MASK_B_OFFSET(inst.imm.value);
+
+ *dest = i;
+ return 1;
+}
+
+
+int look_up_label(struct label *labels, size_t labels_count, uint16_t *val, const char *label)
+{
+ size_t i = 0;
+
+ for (i = 0; i < labels_count; i++) {
+ if (strcmp(labels[i].name, label) == 0) {
+ *val = labels[i].byte_offset;
+ return 0;
+ }
+ }
+
+ /* FIXME emit */
+ fprintf(stderr, "Reference to undefined label `%s'\n", label);
+ return 1;
+}
+
+int output_single(FILE *f, struct label *labels, size_t labels_count, struct instruction inst)
+{
+ int len = 0;
+ uint32_t i = 0;
+ uint16_t imm = 0;
+
+ switch (inst.type) {
+ case INST_TYPE_R:
+ len = generate_single_r_type(&i, inst.inst.r);
+ break;
+ case INST_TYPE_NI:
+ if ( inst.inst.i.imm_is_ident
+ && look_up_label(labels, labels_count, &inst.inst.i.imm.value, inst.inst.i.imm.label))
+ return 1;
+
+ len = generate_single_ni_type(&i, inst.inst.i);
+ break;
+ case INST_TYPE_WI:
+ if ( inst.inst.i.imm_is_ident
+ && look_up_label(labels, labels_count, &inst.inst.i.imm.value, inst.inst.i.imm.label))
+ return 1;
+
+ len = generate_single_wi_type(&i, inst.inst.i);
+ break;
+ case INST_TYPE_JR:
+ len = generate_single_jr_type(&i, inst.inst.jr);
+ break;
+ case INST_TYPE_JI:
+ if ( inst.inst.ji.imm_is_ident
+ && look_up_label(labels, labels_count, &inst.inst.ji.imm.value, inst.inst.ji.imm.label))
+ return 1;
+
+ len = generate_single_ji_type(&i, inst.inst.ji);
+ break;
+ case INST_TYPE_B:
+ if ( inst.inst.b.imm_is_ident
+ && look_up_label(labels, labels_count, &inst.inst.b.imm.value, inst.inst.b.imm.label))
+ return 1;
+ inst.inst.b.imm.value -= cur_byte;
+ if (inst.inst.b.imm.value % 2 != 0) {
+ fprintf(stderr, "Internal error: branch offset %d not a multiple of 2\n", inst.inst.b.imm.value);
+ }
+ inst.inst.b.imm.value /= 2;
+
+ len = generate_single_b_type(&i, inst.inst.b);
+ break;
+ default:
+ fprintf(stderr, "Internal error: unhandled instruction type\n");
+ break;
+ }
+
+ if (len == 2) {
+//#define RAW
+#ifdef RAW
+ fputc(0xFF & (i >> 24), f);
+ fputc(0xFF & (i >> 16), f);
+#else
+ fprintf(f, "%04x ", i >> 16);
+#endif
+ }
+#ifdef RAW
+ fputc(0xFF & (i >> 8), f);
+ fputc(0xFF & (i >> 0), f);
+#else
+ fprintf(f, "%04x ", 0xFFFF & i);
+#endif
+
+ cur_byte += 2 * len;
+ return 0;
+}
+
+int output(FILE *fout, struct label *labels, size_t label_count, struct instruction *insts, size_t insts_count)
+{
+ size_t i = 0;
+ cur_byte = 0;
+
+#ifndef RAW
+ fprintf(fout, "v2.0 raw\n");
+#endif
+
+ printf("output: have %d instructions\n", insts_count);
+
+ for (i = 0; i < insts_count; i++)
+ if (output_single(fout, labels, label_count, insts[i]))
+ return 1;
+
+ return 0;
+}
diff --git a/parse.c b/parse.c
new file mode 100644
index 0000000..f5caf4d
--- /dev/null
+++ b/parse.c
@@ -0,0 +1,653 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <stdarg.h>
+#include <string.h>
+#include <stdint.h>
+#include <stdbool.h>
+
+#include "lex.h"
+#include "parse.h"
+#include "instruction.h"
+#include "tok_util.h"
+#if 0
+struct label {
+ char *name;
+ size_t byte_offset;
+};
+
+union immediate {
+ const char *label;
+ int16_t value;
+};
+
+struct r_type {
+ enum OPER oper;
+ enum REG dest;
+ enum REG left;
+ enum REG right;
+};
+
+struct i_type {
+ enum OPER oper;
+ enum REG dest;
+ enum REG left;
+ bool imm_is_ident;
+ union immediate imm;
+};
+
+struct jr_type {
+ enum JCOND cond;
+ enum REG reg;
+};
+
+struct ji_type {
+ enum JCOND cond;
+ bool imm_is_ident;
+ union immediate imm;
+};
+
+struct b_type {
+ enum JCOND cond;
+ bool imm_is_ident;
+ union immediate imm;
+};
+
+struct instruction {
+ enum INST_TYPE type;
+ union instruction_u {
+ struct r_type r; /* catch-all R-Type */
+ struct i_type i; /* I-type on immediate literal */
+ struct jr_type jr; /* jump to register */
+ struct ji_type ji; /* jump to immediate */
+ struct b_type b; /* branch to immediate literal */
+ } inst;
+};
+#endif
+
+static const char *filename;
+static FILE *fd;
+static struct token *cursor;
+static struct token *tokens;
+static size_t tokens_pos;
+static size_t tokens_count;
+static struct label *labels;
+static size_t labels_count;
+static struct instruction *insts;
+static size_t insts_count;
+static size_t byte_offset;
+
+void emit(const char *fmt, ...)
+{
+ va_list args;
+ va_start(args, fmt);
+ if (cursor) {
+ fprintf(stderr, "%s at (%zd,%zd): ", filename, cursor->line, cursor->column);
+ vfprintf(stderr, fmt, args);
+ indicate_file_area(fd, cursor->line, cursor->column, cursor->span);
+ } else {
+ fprintf(stderr, "%s: ", filename);
+ vfprintf(stderr, fmt, args);
+ }
+ va_end(args);
+}
+
+#define EXPECT_AND_DISCARD_CRITICAL(type)\
+ do { \
+ EXPECT_CRITICAL(type) \
+ kerchunk(); \
+ } while (0);
+
+#define EXPECT_CRITICAL(type)\
+ if (expect(type)) { \
+ return 1; \
+ }
+
+static int expect(enum TOKEN_TYPE e)
+{
+ const char *expected_desc = "(internal error)";
+ const char *observed_desc = "(internal error)";
+
+ if (!cursor || cursor->type != e) {
+ expected_desc = get_token_description(e);
+ if (cursor) {
+ observed_desc = get_token_description(cursor->type);
+ } else {
+ observed_desc = "end of file";
+ }
+ emit("Error: Expected %s, got %s\n", expected_desc, observed_desc);
+ return 1;
+ }
+
+ return 0;
+}
+
+void kerchunk()
+{
+ if (tokens_pos < tokens_count - 1) {
+ cursor = &tokens[++tokens_pos];
+ } else {
+ cursor = NULL;
+ }
+}
+
+int parse_eol(void)
+{
+ EXPECT_AND_DISCARD_CRITICAL(TOKEN_EOL);
+ return 0;
+}
+
+int parse_comma(void)
+{
+ EXPECT_AND_DISCARD_CRITICAL(TOKEN_COMMA);
+ return 0;
+}
+
+int parse_imm(uint16_t *imm)
+{
+ EXPECT_CRITICAL(TOKEN_NUMERIC);
+ /* FIXME allow identifiers? or is that job of parent */
+ *imm = cursor->i_val;
+ kerchunk();
+ return 0;
+}
+
+int parse_ident(char **ident)
+{
+ EXPECT_CRITICAL(TOKEN_IDENT);
+ *ident = cursor->s_val;
+ kerchunk();
+ return 0;
+}
+
+/**
+ * FIXME move */
+
+int add_instruction(struct instruction inst)
+{
+ struct instruction *old_insts = insts;
+ insts = realloc(insts, (insts_count + 1) * sizeof(struct instruction));
+ if (!insts) {
+ free(old_insts);
+ perror("realloc");
+ return 1;
+ }
+
+ insts[insts_count] = inst;
+
+ insts_count++;
+ return 0;
+}
+
+int new_label(struct label *dest, const char *name)
+{
+ char *name_clone = strdup(name);
+
+ if (!name_clone) {
+ perror("strdup");
+ return 1;
+ }
+
+ dest->name = name_clone;
+ dest->byte_offset = byte_offset;
+
+ return 0;
+}
+
+void destroy_label(struct label *l)
+{
+ free(l->name);
+}
+/**/
+
+int parse_label()
+{
+ size_t i = 0;
+ struct label l;
+ struct label *old_labels = labels;
+
+ EXPECT_CRITICAL(TOKEN_LABEL);
+
+ for (i = 0; i < labels_count; i++) {
+ if (strcmp(labels[i].name, cursor->s_val) == 0) {
+ emit("Error: duplicate label\n");
+ return 1;
+ }
+ }
+
+ labels = realloc(labels, (labels_count + 1) * sizeof(struct label));
+ if (!labels) {
+ perror("realloc");
+ free(old_labels);
+ return 1;
+ }
+
+ if (new_label(&l, cursor->s_val))
+ return 1;
+
+ labels[labels_count] = l;
+
+ labels_count++;
+ kerchunk();
+ return 0;
+}
+
+int parse_reg(enum REG *reg)
+{
+ EXPECT_CRITICAL(TOKEN_REGISTER);
+ /* valid registers are: $0, $1, $2, $3, $4, $5, $6, $7, $Z, $H
+ * the latter two are aliases for $0 and $7 respectively
+ */
+ if (strlen(cursor->s_val) != 1) {
+ emit("Error: incorrect register name length (%d)\n", strlen(cursor->s_val));
+ return 1;
+ }
+
+ switch (cursor->s_val[0])
+ {
+ case 'Z': /* fallthrough */
+ case 'z': /* fallthrough */
+ case '0': *reg = REG_0; break;
+ case '1': *reg = REG_1; break;
+ case '2': *reg = REG_2; break;
+ case '3': *reg = REG_3; break;
+ case '4': *reg = REG_4; break;
+ case '5': *reg = REG_5; break;
+ case '6': *reg = REG_6; break;
+ case 'h': /* fallthrough */
+ case 'H': /* fallthrough */
+ case '7': *reg = REG_H; break;
+ default:
+ emit("Error: unknown register '%c'\n", cursor->s_val[0]);
+ return 1;
+ }
+ kerchunk();
+ return 0;
+}
+
+int parse_i_type(enum OPER oper, enum REG dest, enum REG left, uint16_t imm)
+{
+// fprintf(stderr, "<DEBUG>: ITYPE %s <%s> <%s> <%d>\n",
+// oper_to_human[oper],
+// reg_to_human[dest],
+// reg_to_human[left],
+// imm);
+ struct instruction i;
+ i.type = INST_TYPE_NI;
+ i.inst.i.oper = oper;
+ i.inst.i.dest = dest;
+ i.inst.i.left = left;
+ i.inst.i.imm_is_ident = false;
+ i.inst.i.imm.value = imm;
+
+ if (add_instruction(i))
+ return 1;
+
+ /* FIXME detect narrow/wide */
+ byte_offset += 2;
+ return 0;
+}
+
+int parse_i_ident_type(enum OPER oper, enum REG dest, enum REG left, char *ident)
+{
+ struct instruction i;
+ i.type = INST_TYPE_NI;
+ i.inst.i.oper = oper;
+ i.inst.i.dest = dest;
+ i.inst.i.left = left;
+ i.inst.i.imm_is_ident = true;
+ i.inst.i.imm.label = ident;
+
+ if (add_instruction(i))
+ return 1;
+
+ /* FIXME detect narrow/wide */
+ byte_offset += 2;
+ return 0;
+}
+
+int parse_r_type(enum OPER oper, enum REG dest, enum REG left, enum REG right)
+{
+// fprintf(stderr, "<DEBUG>: RTYPE %s <%s> <%s> <%s>\n",
+// oper_to_human[oper],
+// reg_to_human[dest],
+// reg_to_human[left],
+// reg_to_human[right]);
+
+ struct instruction i;
+ i.type = INST_TYPE_R;
+ i.inst.r.oper = oper;
+ i.inst.r.dest = dest;
+ i.inst.r.left = left;
+ i.inst.r.right = right;
+
+ if (add_instruction(i))
+ return 1;
+
+ /* FIXME #define */
+ byte_offset += 2;
+ return 0;
+}
+
+int parse_j_reg_type(enum JCOND cond, enum REG reg)
+{
+// fprintf(stderr, "<DEBUG>: JRTYPE %s <%s>\n",
+// j_to_human[cond],
+// reg_to_human[reg]);
+
+ struct instruction i;
+ i.type = INST_TYPE_JR;
+ i.inst.jr.cond = cond;
+ i.inst.jr.reg = reg;
+
+ if (add_instruction(i))
+ return 1;
+
+ /* FIXME #define */
+ byte_offset += 2;
+ return 0;
+}
+
+int parse_j_imm_type(enum JCOND cond, uint16_t imm)
+{
+// fprintf(stderr, "<DEBUG>: JITYPE %s <0x%04x>\n",
+// j_to_human[cond],
+// imm);
+
+ struct instruction i;
+
+ i.type = INST_TYPE_JI;
+ i.inst.ji.cond = cond;
+ i.inst.ji.imm_is_ident = false;
+ i.inst.ji.imm.value = imm;
+
+ if (add_instruction(i))
+ return 1;
+
+ /* FIXME #define */
+ byte_offset += 4;
+ return 0;
+}
+
+int parse_j_ident_type(enum JCOND cond, char *ident)
+{
+// fprintf(stderr, "<DEBUG>: JTYPE %s <%s>\n",
+// b_to_human[cond],
+// ident);
+ struct instruction i;
+
+ i.type = INST_TYPE_JI;
+ i.inst.ji.cond = cond;
+ i.inst.ji.imm_is_ident = true;
+ i.inst.ji.imm.label = ident;
+
+ if (add_instruction(i))