diff options
author | David Phillips <david@sighup.nz> | 2019-04-14 16:10:18 +1200 |
---|---|---|
committer | David Phillips <david@sighup.nz> | 2019-08-03 12:42:57 +1200 |
commit | ac8150b7601d9611818bb8b265a125a347a67004 (patch) | |
tree | aa1440c18551fa415af53daedde76536ac2d000d | |
download | toy-cpu-assembler-ac8150b7601d9611818bb8b265a125a347a67004.tar.xz |
Dirst dump of working prototype
-rw-r--r-- | .gitignore | 3 | ||||
-rw-r--r-- | Makefile | 18 | ||||
-rw-r--r-- | assembler.c | 97 | ||||
-rw-r--r-- | instruction.h | 145 | ||||
-rw-r--r-- | lex.c | 373 | ||||
-rw-r--r-- | lex.h | 30 | ||||
-rw-r--r-- | output.c | 203 | ||||
-rw-r--r-- | parse.c | 653 | ||||
-rw-r--r-- | parse.h | 65 | ||||
-rw-r--r-- | tok_util.c | 78 | ||||
-rw-r--r-- | tok_util.h | 9 |
11 files changed, 1674 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..71e5da6 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +*.o +*.bin +assembler diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..fa8f61b --- /dev/null +++ b/Makefile @@ -0,0 +1,18 @@ +OBJECTS = lex.o parse.o output.o assembler.o tok_util.o + +all: assembler + +assembler: $(OBJECTS) + +lex.o: lex.h + +parse.o: lex.h parse.h instruction.h tok_util.h + +output.o: parse.h + +tok_util.o: lex.h + + +.PHONY: clean +clean: + - rm -f assembler $(OBJECTS) diff --git a/assembler.c b/assembler.c new file mode 100644 index 0000000..eaf4d38 --- /dev/null +++ b/assembler.c @@ -0,0 +1,97 @@ +#include <stdio.h> +#include <stdint.h> + +#include "lex.h" +#include "parse.h" +#include "instruction.h" + +#if 0 +/** + * Types for intermediate storage of instructions + */ +struct r_type { + enum OPER operation; + enum REG dest; + enum REG left; + enum REG right; +}; + +struct i_type { /* covers WI and NI */ + enum OPER operation; + enum REG dest; + enum REG left; + int16_t immediate; +}; + +struct jr_type { + enum JCOND condition; + enum REG reg; +}; + +struct ji_type { + enum JCOND condition; + uint16_t immediate; +}; + +struct b_type { /* FIXME merge with ji_type? */ + enum JCOND condition; + uint16_t immediate; /* capped to 10 bits by IS */ +}; + +/* Union for bringing above together */ +union instruction_union { + struct r_type r; + struct i_type i; + struct jr_type jr; + struct ji_type ji; + struct b_type b; +}; + +struct instruction { + enum INST_TYPE type; + union instruction_union i; +}; +/**/ +#endif + +int main(int argc, char **argv) +{ + int ret = 0; + FILE *fin = NULL; + FILE *fout = NULL; + + if (argc < 3) { + fprintf(stderr, "Syntax: %s <in.asm> <out.bin>\n", argv[0]); + return 1; + } + + if ((fin = fopen(argv[1], "r")) == NULL) { + fprintf(stderr, "Error opening %s: ", argv[1]); + perror("fopen"); + return 1; + } + + if ((fout = fopen(argv[2], "wb")) == NULL) { + fprintf(stderr, "Error opening %s: ", argv[2]); + perror("fopen"); + return 1; + } +/****/ + struct token *tokens = NULL; + size_t tok_count = 0; + + if ((tokens = lex(argv[1], fin, &tok_count)) == NULL) + return 2; + + struct instruction *insts; + size_t insts_count; + struct label *labels; + size_t labels_count; + if (ret = parse(argv[1], fin, &labels, &labels_count, tokens, tok_count, &insts, &insts_count)) + return ret; + + if (ret = output(fout, labels, labels_count, insts, insts_count)) + return ret; + + return 0; +} diff --git a/instruction.h b/instruction.h new file mode 100644 index 0000000..3ee18d9 --- /dev/null +++ b/instruction.h @@ -0,0 +1,145 @@ +#ifndef INSTRUCTION_H +#define INSTRUCTION_H + +/** + * Values used for software-only identification instruction types. Values not + * tied to machine language. Guaranteed unique. + */ +enum INST_TYPE { + INST_TYPE_R, + INST_TYPE_NI, + INST_TYPE_WI, + INST_TYPE_JR, + INST_TYPE_JI, + INST_TYPE_B +}; + +/** + * Masks for all four instruction types. Not guaranteed unique + */ +#define MASK_INST_RTYPE (0x0000) +#define MASK_INST_NITYPE (0x4000) +#define MASK_INST_WITYPE (0x8000) +#define MASK_INST_JTYPE (0xC000) + +/** + * ALU operation types + * R-type and I-type take 3-bit ALU oper as bits: + * xx___xxx xxxxxxxx + */ +enum OPER { + OPER_ADD = 0, + OPER_SUB = 1, + OPER_SHL = 2, + OPER_SHR = 3, + OPER_AND = 4, + OPER_OR = 5, + OPER_XOR = 6, + OPER_MUL = 7, +}; +#define OPER_SHAMT (11) +#define MASK_OPER(x) ((x) << OPER_SHAMT) + +static const char *oper_to_human[] = { + [OPER_ADD] = "add", + [OPER_SUB] = "sub", + [OPER_SHL] = "shl", + [OPER_SHR] = "shr", + [OPER_AND] = "and", + [OPER_OR ] = "or", + [OPER_XOR] = "xor", + [OPER_MUL] = "mul" +}; + +/** + * Masks for jump and branch conditions + * J-type instructions (jump, branch) take these as follows: + * xxx___xx xxxxxxxx + */ +enum JCOND { + JB_UNCOND = 0x0, + JB_NEVER = 0x1, + JB_ZERO = 0x2, + JB_NZERO = 0x3, + JB_CARRY = 0x4, + JB_NCARRY = 0x5, + JB_CARRYZ = 0x6, + JB_NCARRYZ = 0x7 +}; +#define JB_SHAMT (10) +#define MASK_JB_COND(x) ((x) << JB_SHAMT) +#define MASK_IS_JUMP (0 << 13) +#define MASK_IS_BRANCH (1 << 13) +#define MASK_JI (0x0 << 8) +#define MASK_JR (0x1 << 8) +#define MASK_JUMP_REGISTER(x) ((x) << 5) + +static const char *j_to_human[] = { + [JB_UNCOND] = "jmp", + [JB_NEVER] = "jn", + [JB_ZERO] = "jz", + [JB_NZERO] = "jnz", + [JB_CARRY] = "jc", + [JB_NCARRY] = "jnc", + [JB_CARRYZ] = "jcz", + [JB_NCARRYZ] = "jncz" +}; +static const char *b_to_human[] = { + [JB_UNCOND] = "bra", + [JB_NEVER] = "bn", + [JB_ZERO] = "bz", + [JB_NZERO] = "bnz", + [JB_CARRY] = "bc", + [JB_NCARRY] = "bnc", + [JB_CARRYZ] = "bcz", + [JB_NCARRYZ] = "bncz" +}; + +/** + * Register numbers used in all manner of instructions in varying positions + */ +enum REG { + REG_0 = 0, + REG_1 = 1, + REG_2 = 2, + REG_3 = 3, + REG_4 = 4, + REG_5 = 5, + REG_6 = 6, + REG_H = 7 +}; + +static const char *reg_to_human[] = { + [REG_0] = "$0", + [REG_1] = "$1", + [REG_2] = "$2", + [REG_3] = "$3", + [REG_4] = "$4", + [REG_5] = "$5", + [REG_6] = "$6", + [REG_H] = "$H", +}; + +/** + * Offset macro to turn REG_* into mask for register operands of R-type and + * I-type instructions + */ +/* destination reg: xxxxx___ xxxxxxxx */ +#define REG_DEST_OFFSET (8) +#define MASK_REG_DEST(x) ((x) << REG_DEST_OFFSET) + +/* left reg: xxxxxxxx ___xxxxx */ +#define REG_LEFT_OFFSET (5) +#define MASK_REG_LEFT(x) ((x) << REG_LEFT_OFFSET) + +/* right reg (R-type only): xxxxxxxx xxx___xx */ +#define REG_RIGHT_OFFSET (2) +#define MASK_REG_RIGHT(x) ((x) << REG_RIGHT_OFFSET) + +/* five LSb are narrow immediate value */ +#define MASK_NI_IMM(x) ((x) & 0x1F) + +/* 10 LSb is branch offset */ +#define MASK_B_OFFSET(x) ((x) & 0x3FF) + +#endif /* INSTRUCTION_H */ @@ -0,0 +1,373 @@ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <ctype.h> + +#include "lex.h" + +#define emit_error(...) fprintf(stderr, "%s at (%zd,%zd): ", filename, 1 + line, 1 + column);\ + fprintf(stderr, __VA_ARGS__) + +static const char *keywords[] = { + "declare", + "byte", + "bytes", + "word", + "words", + "base", +}; + +static const char *filename = NULL; +static size_t line; +static size_t column; +static struct token* tokens; +static size_t tokens_count; +static char buffer[1024]; /* XXX limitation: sources must have lines < 1024 bytes */ + +static int expect(const char c) { + if (buffer[column] != c) { + emit_error("Expected '%c', got '%c'\n", c, buffer[column]); + return 1; + } + column++; + return 0; +} + +static void store_location(struct token *t) { + t->column = column + 1; + t->line = line + 1; +} + +static void eat_whitespace(void) { + size_t len = strlen(buffer); + while (column < len && strchr(" \t", buffer[column])) { + column++; + } +} + +static int add_token(struct token t) { + struct token *old_tok = tokens; + + tokens_count++; + tokens = realloc(tokens, sizeof(struct token) * tokens_count); + + if (!tokens) { + perror("realloc"); + free(old_tok); + return 1; + } + + tokens[tokens_count - 1] = t; +// printf("Adding token from (%d,%d ~%d), str %s int %d\n", t.line, t.column, t.span, t.s_val, t.i_val); + return 0; +} + +static int lex_comma(struct token *t) { + if (expect(',')) + return 1; + + t->span = 1; + t->type = TOKEN_COMMA; + return 0; +} + +static int lex_dot(struct token *t) { + if (expect('.')) + return 1; + + t->span = 1; + t->type = TOKEN_DOT; + return 0; +} + +static int lex_register(struct token *t) { + int i = 0; + if (expect('$')) + return 1; + + for (i = column; isalnum(buffer[i]); i++) { + ; + } + + t->s_val = strndup(&buffer[column], i - column); + if (!t->s_val) { + perror("strndup"); + return 1; + } + + t->span = i - column + 1; + t->type = TOKEN_REGISTER; + column = i; + return 0; +} + +static int lex_string(struct token *t) { + int i = 0; + if (expect('"')) + return 1; + + for (i = column; buffer[i] != '\0' && buffer[i] != '"'; i++) { + ; + } + + t->s_val = strndup(&buffer[column], i - column); + if (!t->s_val) { + perror("strndup"); + return 1; + } + + t->span = i - column + 2; /* +2 to include "" */ + t->type = TOKEN_STRING; + column = i; + if (expect('"')) + return 1; + + return 0; +} + +static int lex_char_escaped(struct token *t) { + if (expect('\\')) + return 1; + + switch (buffer[column]) { + case 'a': t->i_val = '\a'; break; + case 'b': t->i_val = '\b'; break; + case 'f': t->i_val = '\f'; break; + case 'n': t->i_val = '\n'; break; + case 'r': t->i_val = '\r'; break; + case 't': t->i_val = '\t'; break; + case 'v': t->i_val = '\v'; break; + + case '\\': t->i_val = '\\'; break; + case '\'': t->i_val = '\''; break; + default: + emit_error("Unknown escape sequence '\\%c'\n", buffer[column]); + break; + } + column++; + t->type = TOKEN_NUMERIC; + t->span = 4; /* len '\x' == 4 */ + return 0; +} + +static int lex_char(struct token *t) { + if (expect('\'')) + return 1; + + if (buffer[column] == '\\') { + lex_char_escaped(t); + } else { + t->type = TOKEN_NUMERIC; + t->span = 3; /* len 'x' == 3 */ + t->i_val = buffer[column]; + } + if (expect('\'')) + return 1; + + return 0; +} + +static int lex_num(struct token *t) +{ + char *num_s = NULL; + char *end = NULL; + size_t span = 0; + size_t prefix_span = 0; + int value = 0; + int base = 0; + int neg = 0; + + /* shave off a leading '-' now to make handling easier */ + if (buffer[column] == '-') { + neg = 1; + if (expect('-')) + return 1; + prefix_span++; + } + + if (!isdigit(buffer[column])) { + emit_error("Error: '%c' cannot start a numerical literal\n", buffer[column]); + return 1; + } + + /* check if hex */ + if ( column <= strlen(buffer) - 2 + && buffer[column] == '0' + && buffer[column + 1] == 'x') { + base = 16; + } + + span = strcspn(&buffer[column], " \n\t,"); + if (span == 0) { + emit_error("Error: malformed numerical literal\n"); + return 1; + } + num_s = strndup(&buffer[column], span); + if (!num_s) { + perror("malloc"); + return 1; + } + + /* if base still unknown, determine if from the last char of constant */ + char *suffix = &num_s[span - 1]; + if (base == 0) { + switch (*suffix) { + case 'h': base = 16; break; + case 'd': base = 10; break; + case 'o': base = 8; break; + case 'b': base = 2; break; + default: + if (!isdigit(*suffix)) { + emit_error("Error: '%c' is an invalid base suffix in numerical literal\n", *suffix); + free(num_s); + return 1; + } + break; + } + if (!isdigit(*suffix)) { + *suffix = '\0'; + } + } + + value = strtol(num_s, &end, base); + if (*end != '\0') { + emit_error("Error: malformed numerical literal\n", *end, base); + free(num_s); + return 1; + } + free(num_s); + + column += span; + + t->type = TOKEN_NUMERIC; + t->span = prefix_span + span; + t->i_val = (neg ? -value : value); + return 0; +} + +static int lex_misc(struct token *t) { + int i = 0; + int j = 0; + + if (!isalpha(buffer[column])) { + emit_error("Error: '%c' cannot start an identifier\n", buffer[column]); + return 1; + } + + for (i = column; isalnum(buffer[i]); i++) { + ; + } + + if (buffer[i] == ':') { + t->type = TOKEN_LABEL; + } else { + t->type = TOKEN_IDENT; + } + + t->s_val = strndup(&buffer[column], i - column); + if (!t->s_val) + return 1; + + for (j = 0; j < sizeof(keywords)/sizeof(*keywords); j++) + if (strcmp(t->s_val, keywords[j]) == 0) + t->type = TOKEN_KEYWORD; + + t->span = i - column; + column = i; + /* skip over colon, but don't have included it in the name */ + if (t->type == TOKEN_LABEL) { + column++; + } + return 0; +} + +static int lex_eol(struct token *t) { + column++; + t->type = TOKEN_EOL; + t->span = 1; + return 0; +} + +int lex_line(void) { + int ret = 0; + size_t len = strlen(buffer); + struct token tok; + + while (column < len) { + memset(&tok, 0, sizeof(tok)); + store_location(&tok); + switch (buffer[column]) { + case ';': + case '#': + case '!': + case '\n': + ret = lex_eol(&tok); + return add_token(tok); + case ' ': + case '\t': + eat_whitespace(); + continue; + /* + case '/': + FIXME look ahead * or / + eat_block_comment(); + break; + */ + case ',': + ret = lex_comma(&tok); + break; + case '.': + ret = lex_dot(&tok); + break; + case '$': + ret = lex_register(&tok); + break; + case '"': + ret = lex_string(&tok); + break; + case '\'': + ret = lex_char(&tok); + break; + case '-': + ret = lex_num(&tok); + break; + /* FIXME add support for expressions like `addi $0, $0, (1+2*3) */ + default: + if (isdigit(buffer[column])) { + ret = lex_num(&tok); + } else { + ret = lex_misc(&tok); + } + break; + } + if (ret) + return ret; + + if (add_token(tok)) + return 1; + } + return 0; +} + +struct token* lex(const char *filename_local, FILE *fin, size_t *len) +{ + filename = filename_local; + line = 0; + tokens = NULL; + tokens_count = 0; + + while (fgets(buffer, sizeof(buffer), fin)) { + column = 0; + if (lex_line()) { + return NULL; + } + line++; + } + if (!feof(fin)) { + perror("fgets"); + return NULL; + } + + *len = tokens_count; + return tokens; +} @@ -0,0 +1,30 @@ +#ifndef LEX_H +#define LEX_H + +#include <stdio.h> + +enum TOKEN_TYPE { + TOKEN_COMMA = 1, + TOKEN_DOT, /* starts an assembler directive */ + TOKEN_LABEL, /* label declaration */ + TOKEN_IDENT, /* identifier (not label decl) or instruction */ + TOKEN_KEYWORD, /* keyword used to tell the assembler special information */ + TOKEN_STRING, /* string literal */ + TOKEN_NUMERIC, /* numeric literal, incl literal chars */ + TOKEN_REGISTER, /* $0, $H, $1 */ + TOKEN_EOL /* end of line */ +}; + +struct token { + enum TOKEN_TYPE type; + /* line and column of the source file this token occurs at. 1-based. */ + size_t line; + size_t column; + size_t span; + char *s_val; + int i_val; +}; + +struct token* lex(const char *filename_local, FILE *fin, size_t *len); + +#endif /* LEX_H */ diff --git a/output.c b/output.c new file mode 100644 index 0000000..ff22956 --- /dev/null +++ b/output.c @@ -0,0 +1,203 @@ +#include <stdio.h> +#include <stdint.h> +#include <string.h> + +#include "parse.h" + +static size_t cur_byte; + +int generate_single_r_type(uint32_t *dest, struct r_type inst) +{ + uint32_t i = 0; + + i |= MASK_INST_RTYPE; + i |= MASK_OPER(inst.oper); + i |= MASK_REG_DEST(inst.dest); + i |= MASK_REG_LEFT(inst.left); + i |= MASK_REG_RIGHT(inst.right); + + *dest = i; + return 1; +} +int generate_single_ni_type(uint32_t *dest, struct i_type inst) +{ + uint32_t i = 0; + + i |= MASK_INST_NITYPE; + i |= MASK_OPER(inst.oper); + i |= MASK_REG_DEST(inst.dest); + i |= MASK_REG_LEFT(inst.left); + i |= MASK_NI_IMM(inst.imm.value); + + *dest = i; + return 1; +} + +int generate_single_wi_type(uint32_t *dest, struct i_type inst) +{ + uint32_t i = 0; + + i |= MASK_INST_WITYPE; + i |= MASK_OPER(inst.oper); + i |= MASK_REG_DEST(inst.dest); + i |= MASK_REG_LEFT(inst.left); + + /* two-word instruction - make room for the immediate */ + i <<= 16; + + i |= inst.imm.value; + + *dest = i; + return 2; +} + +int generate_single_ji_type(uint32_t *dest, struct ji_type inst) +{ + uint32_t i = 0; + + i |= MASK_INST_JTYPE; + i |= MASK_IS_JUMP; + i |= MASK_JB_COND(inst.cond); + i |= MASK_JI; + + /* two-word instruction - make room for the immediate */ + i <<= 16; + + i |= inst.imm.value; + + *dest = i; + return 2; +} + +int generate_single_jr_type(uint32_t *dest, struct jr_type inst) +{ + uint32_t i = 0; + + i |= MASK_INST_JTYPE; + i |= MASK_IS_JUMP; + i |= MASK_JB_COND(inst.cond); + i |= MASK_JR; + i |= MASK_JUMP_REGISTER(inst.reg); + + *dest = i; + return 1; +} + +int generate_single_b_type(uint32_t *dest, struct b_type inst) +{ + uint32_t i = 0; + + i |= MASK_INST_JTYPE; + i |= MASK_IS_BRANCH; + i |= MASK_JB_COND(inst.cond); + i |= MASK_B_OFFSET(inst.imm.value); + + *dest = i; + return 1; +} + + +int look_up_label(struct label *labels, size_t labels_count, uint16_t *val, const char *label) +{ + size_t i = 0; + + for (i = 0; i < labels_count; i++) { + if (strcmp(labels[i].name, label) == 0) { + *val = labels[i].byte_offset; + return 0; + } + } + + /* FIXME emit */ + fprintf(stderr, "Reference to undefined label `%s'\n", label); + return 1; +} + +int output_single(FILE *f, struct label *labels, size_t labels_count, struct instruction inst) +{ + int len = 0; + uint32_t i = 0; + uint16_t imm = 0; + + switch (inst.type) { + case INST_TYPE_R: + len = generate_single_r_type(&i, inst.inst.r); + break; + case INST_TYPE_NI: + if ( inst.inst.i.imm_is_ident + && look_up_label(labels, labels_count, &inst.inst.i.imm.value, inst.inst.i.imm.label)) + return 1; + + len = generate_single_ni_type(&i, inst.inst.i); + break; + case INST_TYPE_WI: + if ( inst.inst.i.imm_is_ident + && look_up_label(labels, labels_count, &inst.inst.i.imm.value, inst.inst.i.imm.label)) + return 1; + + len = generate_single_wi_type(&i, inst.inst.i); + break; + case INST_TYPE_JR: + len = generate_single_jr_type(&i, inst.inst.jr); + break; + case INST_TYPE_JI: + if ( inst.inst.ji.imm_is_ident + && look_up_label(labels, labels_count, &inst.inst.ji.imm.value, inst.inst.ji.imm.label)) + return 1; + + len = generate_single_ji_type(&i, inst.inst.ji); + break; + case INST_TYPE_B: + if ( inst.inst.b.imm_is_ident + && look_up_label(labels, labels_count, &inst.inst.b.imm.value, inst.inst.b.imm.label)) + return 1; + inst.inst.b.imm.value -= cur_byte; + if (inst.inst.b.imm.value % 2 != 0) { + fprintf(stderr, "Internal error: branch offset %d not a multiple of 2\n", inst.inst.b.imm.value); + } + inst.inst.b.imm.value /= 2; + + len = generate_single_b_type(&i, inst.inst.b); + break; + default: + fprintf(stderr, "Internal error: unhandled instruction type\n"); + break; + } + + if (len == 2) { +//#define RAW +#ifdef RAW + fputc(0xFF & (i >> 24), f); + fputc(0xFF & (i >> 16), f); +#else + fprintf(f, "%04x ", i >> 16); +#endif + } +#ifdef RAW + fputc(0xFF & (i >> 8), f); + fputc(0xFF & (i >> 0), f); +#else + fprintf(f, "%04x ", 0xFFFF & i); +#endif + + cur_byte += 2 * len; + return 0; +} + +int output(FILE *fout, struct label *labels, size_t label_count, struct instruction *insts, size_t insts_count) +{ + size_t i = 0; + cur_byte = 0; + +#ifndef RAW + fprintf(fout, "v2.0 raw\n"); +#endif + + printf("output: have %d instructions\n", insts_count); + + for (i = 0; i < insts_count; i++) + if (output_single(fout, labels, label_count, insts[i])) + return 1; + + return 0; +} @@ -0,0 +1,653 @@ +#include <stdio.h> +#include <stdlib.h> +#include <stddef.h> +#include <stdarg.h> +#include <string.h> +#include <stdint.h> +#include <stdbool.h> + +#include "lex.h" +#include "parse.h" +#include "instruction.h" +#include "tok_util.h" +#if 0 +struct label { + char *name; + size_t byte_offset; +}; + +union immediate { + const char *label; + int16_t value; +}; + +struct r_type { + enum OPER oper; + enum REG dest; + enum REG left; + enum REG right; +}; + +struct i_type { + enum OPER oper; + enum REG dest; + enum REG left; + bool imm_is_ident; + union immediate imm; +}; + +struct jr_type { + enum JCOND cond; + enum REG reg; +}; + +struct ji_type { + enum JCOND cond; + bool imm_is_ident; + union immediate imm; +}; + +struct b_type { + enum JCOND cond; + bool imm_is_ident; + union immediate imm; +}; + +struct instruction { + enum INST_TYPE type; + union instruction_u { + struct r_type r; /* catch-all R-Type */ + struct i_type i; /* I-type on immediate literal */ + struct jr_type jr; /* jump to register */ + struct ji_type ji; /* jump to immediate */ + struct b_type b; /* branch to immediate literal */ + } inst; +}; +#endif + +static const char *filename; +static FILE *fd; +static struct token *cursor; +static struct token *tokens; +static size_t tokens_pos; +static size_t tokens_count; +static struct label *labels; +static size_t labels_count; +static struct instruction *insts; +static size_t insts_count; +static size_t byte_offset; + +void emit(const char *fmt, ...) +{ + va_list args; + va_start(args, fmt); + if (cursor) { + fprintf(stderr, "%s at (%zd,%zd): ", filename, cursor->line, cursor->column); + vfprintf(stderr, fmt, args); + indicate_file_area(fd, cursor->line, cursor->column, cursor->span); + } else { + fprintf(stderr, "%s: ", filename); + vfprintf(stderr, fmt, args); + } + va_end(args); +} + +#define EXPECT_AND_DISCARD_CRITICAL(type)\ + do { \ + EXPECT_CRITICAL(type) \ + kerchunk(); \ + } while (0); + +#define EXPECT_CRITICAL(type)\ + if (expect(type)) { \ + return 1; \ + } + +static int expect(enum TOKEN_TYPE e) +{ + const char *expected_desc = "(internal error)"; + const char *observed_desc = "(internal error)"; + + if (!cursor || cursor->type != e) { + expected_desc = get_token_description(e); + if (cursor) { + observed_desc = get_token_description(cursor->type); + } else { + observed_desc = "end of file"; + } + emit("Error: Expected %s, got %s\n", expected_desc, observed_desc); + return 1; + } + + return 0; +} + +void kerchunk() +{ + if (tokens_pos < tokens_count - 1) { + cursor = &tokens[++tokens_pos]; + } else { + cursor = NULL; + } +} + +int parse_eol(void) +{ + EXPECT_AND_DISCARD_CRITICAL(TOKEN_EOL); + return 0; +} + +int parse_comma(void) +{ + EXPECT_AND_DISCARD_CRITICAL(TOKEN_COMMA); + return 0; +} + +int parse_imm(uint16_t *imm) +{ + EXPECT_CRITICAL(TOKEN_NUMERIC); + /* FIXME allow identifiers? or is that job of parent */ + *imm = cursor->i_val; + kerchunk(); + return 0; +} + +int parse_ident(char **ident) +{ + EXPECT_CRITICAL(TOKEN_IDENT); + *ident = cursor->s_val; + kerchunk(); + return 0; +} + +/** + * FIXME move */ + +int add_instruction(struct instruction inst) +{ + struct instruction *old_insts = insts; + insts = realloc(insts, (insts_count + 1) * sizeof(struct instruction)); + if (!insts) { + free(old_insts); + perror("realloc"); + return 1; + } + + insts[insts_count] = inst; + + insts_count++; + return 0; +} + +int new_label(struct label *dest, const char *name) +{ + char *name_clone = strdup(name); + + if (!name_clone) { + perror("strdup"); + return 1; + } + + dest->name = name_clone; + dest->byte_offset = byte_offset; + + return 0; +} + +void destroy_label(struct label *l) +{ + free(l->name); +} +/**/ + +int parse_label() +{ + size_t i = 0; + struct label l; + struct label *old_labels = labels; + + EXPECT_CRITICAL(TOKEN_LABEL); + + for (i = 0; i < labels_count; i++) { + if (strcmp(labels[i].name, cursor->s_val) == 0) { + emit("Error: duplicate label\n"); + return 1; + } + } + + labels = realloc(labels, (labels_count + 1) * sizeof(struct label)); + if (!labels) { + perror("realloc"); + free(old_labels); + return 1; + } + + if (new_label(&l, cursor->s_val)) + return 1; + + labels[labels_count] = l; + + labels_count++; + kerchunk(); + return 0; +} + +int parse_reg(enum REG *reg) +{ + EXPECT_CRITICAL(TOKEN_REGISTER); + /* valid registers are: $0, $1, $2, $3, $4, $5, $6, $7, $Z, $H + * the latter two are aliases for $0 and $7 respectively + */ + if (strlen(cursor->s_val) != 1) { + emit("Error: incorrect register name length (%d)\n", strlen(cursor->s_val)); + return 1; + } + + switch (cursor->s_val[0]) + { + case 'Z': /* fallthrough */ + case 'z': /* fallthrough */ + case '0': *reg = REG_0; break; + case '1': *reg = REG_1; break; + case '2': *reg = REG_2; break; + case '3': *reg = REG_3; break; + case '4': *reg = REG_4; break; + case '5': *reg = REG_5; break; + case '6': *reg = REG_6; break; + case 'h': /* fallthrough */ + case 'H': /* fallthrough */ + case '7': *reg = REG_H; break; + default: + emit("Error: unknown register '%c'\n", cursor->s_val[0]); + return 1; + } + kerchunk(); + return 0; +} + +int parse_i_type(enum OPER oper, enum REG dest, enum REG left, uint16_t imm) +{ +// fprintf(stderr, "<DEBUG>: ITYPE %s <%s> <%s> <%d>\n", +// oper_to_human[oper], +// reg_to_human[dest], +// reg_to_human[left], +// imm); + struct instruction i; + i.type = INST_TYPE_NI; + i.inst.i.oper = oper; + i.inst.i.dest = dest; + i.inst.i.left = left; + i.inst.i.imm_is_ident = false; + i.inst.i.imm.value = imm; + + if (add_instruction(i)) + return 1; + + /* FIXME detect narrow/wide */ + byte_offset += 2; + return 0; +} + +int parse_i_ident_type(enum OPER oper, enum REG dest, enum REG left, char *ident) +{ + struct instruction i; + i.type = INST_TYPE_NI; + i.inst.i.oper = oper; + i.inst.i.dest = dest; + i.inst.i.left = left; + i.inst.i.imm_is_ident = true; + i.inst.i.imm.label = ident; + + if (add_instruction(i)) + return 1; + + /* FIXME detect narrow/wide */ + byte_offset += 2; + return 0; +} + +int parse_r_type(enum OPER oper, enum REG dest, enum REG left, enum REG right) +{ +// fprintf(stderr, "<DEBUG>: RTYPE %s <%s> <%s> <%s>\n", +// oper_to_human[oper], +// reg_to_human[dest], +// reg_to_human[left], +// reg_to_human[right]); + + struct instruction i; + i.type = INST_TYPE_R; + i.inst.r.oper = oper; + i.inst.r.dest = dest; + i.inst.r.left = left; + i.inst.r.right = right; + + if (add_instruction(i)) + return 1; + + /* FIXME #define */ + byte_offset += 2; + return 0; +} + +int parse_j_reg_type(enum JCOND cond, enum REG reg) +{ +// fprintf(stderr, "<DEBUG>: JRTYPE %s <%s>\n", +// j_to_human[cond], +// reg_to_human[reg]); + + struct instruction i; + i.type = INST_TYPE_JR; + i.inst.jr.cond = cond; + i.inst.jr.reg = reg; + + if (add_instruction(i)) + return 1; + + /* FIXME #define */ + byte_offset += 2; + return 0; +} + +int parse_j_imm_type(enum JCOND cond, uint16_t imm) +{ +// fprintf(stderr, "<DEBUG>: JITYPE %s <0x%04x>\n", +// j_to_human[cond], +// imm); + + struct instruction i; + + i.type = INST_TYPE_JI; + i.inst.ji.cond = cond; + i.inst.ji.imm_is_ident = false; + i.inst.ji.imm.value = imm; + + if (add_instruction(i)) + return 1; + + /* FIXME #define */ + byte_offset += 4; + return 0; +} + +int parse_j_ident_type(enum JCOND cond, char *ident) +{ +// fprintf(stderr, "<DEBUG>: JTYPE %s <%s>\n", +// b_to_human[cond], +// ident); + struct instruction i; + + i.type = INST_TYPE_JI; + i.inst.ji.cond = cond; + i.inst.ji.imm_is_ident = true; + i.inst.ji.imm.label = ident; + + if (add_instruction(i)) |