summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Phillips <david@sighup.nz>2019-08-01 23:12:34 +1200
committerDavid Phillips <david@sighup.nz>2019-08-03 12:44:12 +1200
commit28d6a88c02f10b75fb4c5cb46178d2ef71629494 (patch)
treeecea67672dfc7cb638e9c8bd474850094ca5064b
parent15b556038033821aa85392f4013d68999f14e231 (diff)
downloadtoy-cpu-assembler-28d6a88c02f10b75fb4c5cb46178d2ef71629494.tar.xz
Refactor disassembler to move data through instruction list
This refactors the disassembler into two stages with the list of struct instruction (currently also output by the parse stage) as an "intermediate language" between disassembly and assembler output. This should make these units, especially the "machine code => IL" section, more reusable for future soft emulation work.
-rw-r--r--Makefile4
-rw-r--r--disassembler.c202
-rw-r--r--input_bin.c173
-rw-r--r--input_bin.h6
-rw-r--r--output_asm.c134
-rw-r--r--output_asm.h6
-rwxr-xr-xtest/full-pipeline/run-full-pipeline.sh2
7 files changed, 369 insertions, 158 deletions
diff --git a/Makefile b/Makefile
index 6edd47d..843ccca 100644
--- a/Makefile
+++ b/Makefile
@@ -1,5 +1,5 @@
ASM_OBJECTS = lex.o parse.o output.o assembler.o util.o
-DISASM_OBJECTS = util.o
+DISASM_OBJECTS = disassembler.o util.o input_bin.o output_asm.o
all: assembler disassembler
@@ -19,5 +19,5 @@ util.o: lex.h instruction.h
clean:
- rm -f assembler disasm $(ASM_OBJECTS)
-test:
+test: all
make -C test test
diff --git a/disassembler.c b/disassembler.c
index ea370ba..2ceee3f 100644
--- a/disassembler.c
+++ b/disassembler.c
@@ -1,179 +1,71 @@
#include <stdio.h>
-#include <stdlib.h>
#include <stdint.h>
-#include <errno.h>
+#include <string.h>
-#include "util.h"
+#include "instruction.h"
+#include "parse.h"
+#include "input_bin.h"
+#include "output_asm.h"
void print_help(const char *argv0)
{
- fprintf(stderr, "Syntax: %s <in.bin>\0 <out.asm>\n", argv0);
+ fprintf(stderr, "Syntax: %s <in.bin> <out.asm>\n", argv0);
}
-int disasm_rtype(uint16_t i, uint16_t unused)
-{
- const char *oper = get_asm_from_oper(GET_OPER(i));
- const char *dest = get_asm_from_reg(GET_REG_DEST(i));
- const char *left = get_asm_from_reg(GET_REG_LEFT(i));
- const char *right = get_asm_from_reg(GET_REG_RIGHT(i));
-
- /* FIXME add special cases:
- * - nop
- * - neg
- * - mv
- */
- printf("%s %s, %s, %s\n", oper, dest, left, right);
-
- return 0;
-}
-
-int disasm_nitype(uint16_t i, uint16_t unused)
-{
- const char *oper = get_asm_from_oper(GET_OPER(i));
- const char *dest = get_asm_from_reg(GET_REG_DEST(i));
- const char *left = get_asm_from_reg(GET_REG_LEFT(i));
- uint16_t imm = GET_NI_IMM(i);
- /** FIXME add special cases:
- * - ldi
- */
- /* FIXME review sign around immediate value */
- printf("%si %s, %s, 0x%x\n", oper, dest, left, imm);
- return 0;
-}
-
-int disasm_witype(uint16_t i, uint16_t unused)
-{
- const char *oper = get_asm_from_oper(GET_OPER(i));
- const char *dest = get_asm_from_reg(GET_REG_DEST(i));
- const char *left = get_asm_from_reg(GET_REG_LEFT(i));
- uint16_t imm = GET_NI_IMM(i);
- /* FIXME handle wide imm value */
- printf("%si, %s, %s, 0x%x\n", "oper", dest, left, "?");
- return 0;
-}
-
-int disasm_jreg(uint16_t i, uint16_t unused)
-{
- const char *inst = get_asm_from_j(GET_JB_COND(i));
- const char *reg = get_asm_from_reg(GET_JUMP_REG(i));
- printf("%s %s\n", inst, reg);
- return 0;
-}
-
-int disasm_bimm(uint16_t i, uint16_t pc)
-{
- const char *inst = get_asm_from_b(GET_JB_COND(i));
- struct {signed int s:10;} sign;
- int offset = sign.s = GET_B_OFFSET(i);
- printf("%s 0x%x\n", inst, pc + 2 * offset);
- return 0;
-}
-
-int disasm_jimm(uint16_t i, uint16_t imm)
-{
- const char *inst = get_asm_from_j(GET_JB_COND(i));
- printf("%s 0x%x\n", inst, imm);
- return 0;
-}
-
-int disasm(FILE *f)
+int main(int argc, char **argv)
{
+ int error_ret = 1;
int ret = 0;
- size_t offs = 0;
- size_t extra_read = 0;
- uint16_t inst = 0;
- uint16_t extra_arg = 0;
- uint8_t c[2] = { 0 };
- int (*disasm_inst)(uint16_t, uint16_t);
+ const char *path_in = NULL;
+ const char *path_out = NULL;
+ FILE *fin = NULL;
+ FILE *fout = NULL;
- while (!feof(f) && fread(c, sizeof(c), 1, f) == 1) {
- extra_read = 0;
- inst = c[0] << 8 | c[1];
- switch (GET_INST_TYPE(inst)) {
- case INST_TYPE_RTYPE:
- disasm_inst = disasm_rtype;
- break;
- case INST_TYPE_NITYPE:
- disasm_inst = disasm_nitype;
- break;
- case INST_TYPE_WITYPE:
- if (fread(c, sizeof(c), 1, f) != 1) {
- ret = -errno;
- break;
- }
- extra_read = sizeof(c);
- extra_arg = c[0] << 16 | c[1];
- disasm_inst = disasm_witype;
- break;
- case INST_TYPE_JTYPE:
- /* J Type can be split into three further subtypes:
- * - branch (always immediate, 2 bytes)
- * - jump reg (2 bytes)
- * - jump immediate (4 bytes)
- */
- if (inst & MASK_IS_BRANCH) {
- disasm_inst = disasm_bimm;
- extra_arg = offs;
- } else {
- if (inst & MASK_JR) {
- disasm_inst = disasm_jreg;
- } else {
- if (fread(c, sizeof(c), 1, f) != 1) {
- ret = -errno;
- break;
- }
- extra_arg = c[0] << 16 | c[1];
- extra_read = sizeof(c);
- disasm_inst = disasm_jimm;
- }
- }
- break;
- default:
- printf("Unhandled instruction at byte %zd\n", offs);
- ret = -1;
- break;
- }
- /* Fall out of loop if picking a handler errored */
- if (ret) {
- break;
- }
- printf("%04x:\t", offs);
- ret = disasm_inst(inst, extra_arg);
- if (ret < 0) {
- fprintf(stderr, "Error handling instruction at byte %zd\n", offs);
- break;
- }
- offs += sizeof(c) + extra_read;
+ if (argc < 3) {
+ print_help(argv[0]);
+ return 1;
}
- if (!feof(f)) {
- perror("fread");
- ret = errno;
+ if (strcmp(argv[1], "-q") == 0) {
+ if (argc != 4) {
+ print_help(argv[0]);
+ return 0;
+ }
+ error_ret = 0;
+ path_in = argv[2];
+ path_out = argv[3];
} else {
- /* print out final, empty label */
- printf("%04x:\n", offs);
+ path_in = argv[1];
+ path_out = argv[2];
}
- return ret;
-}
-int main(int argc, char **argv)
-{
- int ret = 0;
- FILE *fin = NULL;
-
- if (argc < 2) {
- print_help(argv[0]);
- return 1;
+ if ((fin = fopen(path_in, "r")) == NULL) {
+ fprintf(stderr, "Error opening %s: ", path_in);
+ perror("fopen");
+ return error_ret;
}
- if (!(fin = fopen(argv[1], "rb"))) {
+ if ((fout = fopen(path_out, "w")) == NULL) {
+ fprintf(stderr, "Error opening %s: ", path_out);
perror("fopen");
- return 1;
+ return error_ret;
}
- ret = disasm(fin);
+/****/
+ /* FIXME package these things into `tok_result`, parse_result` etc */
+ struct instruction *insts;
+ size_t insts_count;
+ struct label *labels;
+ size_t labels_count;
+ labels = NULL;
+ labels_count = 0;
+
+ if (ret = disasm(fin, &insts, &insts_count))
+ return error_ret && ret;
- fclose(fin);
- return ret;
+ if (ret = output_asm(fout, labels, labels_count, insts, insts_count))
+ return error_ret && ret;
+
+ return 0;
}
diff --git a/input_bin.c b/input_bin.c
new file mode 100644
index 0000000..54a99f3
--- /dev/null
+++ b/input_bin.c
@@ -0,0 +1,173 @@
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <errno.h>
+
+#include "parse.h"
+
+static void disasm_rtype(uint16_t i, uint16_t unused, struct instruction *inst)
+{
+ inst->type = INST_TYPE_R;
+ inst->inst.r.oper = GET_OPER(i);
+ inst->inst.r.dest = GET_REG_DEST(i);
+ inst->inst.r.left = GET_REG_LEFT(i);
+ inst->inst.r.right = GET_REG_RIGHT(i);
+}
+
+static void disasm_nitype(uint16_t i, uint16_t unused, struct instruction *inst)
+{
+ inst->type = INST_TYPE_NI;
+ inst->inst.i.oper = GET_OPER(i);
+ inst->inst.i.dest = GET_REG_DEST(i);
+ inst->inst.i.left = GET_REG_LEFT(i);
+ inst->inst.i.imm.value = GET_NI_IMM(i);
+ inst->inst.i.imm_is_ident = 0;
+}
+
+static void disasm_witype(uint16_t i, uint16_t imm, struct instruction *inst)
+{
+ inst->type = INST_TYPE_WI;
+ inst->inst.i.oper = GET_OPER(i);
+ inst->inst.i.dest = GET_REG_DEST(i);
+ inst->inst.i.left = GET_REG_LEFT(i);
+ inst->inst.i.imm.value = imm;
+ inst->inst.i.imm_is_ident = 0;
+}
+
+static void disasm_jreg(uint16_t i, uint16_t unused, struct instruction *inst)
+{
+ inst->type = INST_TYPE_JR;
+ inst->inst.jr.cond = GET_JB_COND(i);
+ inst->inst.jr.reg = GET_JUMP_REG(i);
+}
+
+static void disasm_jimm(uint16_t i, uint16_t imm, struct instruction *inst)
+{
+ inst->type = INST_TYPE_JI;
+ inst->inst.ji.cond= GET_JB_COND(i);
+ inst->inst.ji.imm.value = imm;
+ inst->inst.ji.imm_is_ident = 0;
+}
+
+static void disasm_bimm(uint16_t i, uint16_t pc, struct instruction *inst)
+{
+ struct { signed int s:10; } sign;
+ int offset = sign.s = GET_B_OFFSET(i);
+ inst->type = INST_TYPE_B;
+ inst->inst.b.cond = GET_JB_COND(i);
+ inst->inst.b.imm.value = pc + 2 * offset;
+ inst->inst.b.imm_is_ident = 0;
+}
+
+
+/**
+ * FIXME move and factor out with parse.c */
+static struct instruction *insts = NULL;
+static size_t insts_count = 0;
+static int add_instruction(struct instruction inst)
+{
+ struct instruction *old_insts = insts;
+ insts = realloc(insts, (insts_count + 1) * sizeof(struct instruction));
+ if (!insts) {
+ free(old_insts);
+ perror("realloc");
+ return 1;
+ }
+
+ insts[insts_count] = inst;
+
+ insts_count++;
+ return 0;
+}
+
+
+
+/* FIXME needs whatsit arguments. f, tok, tok length */
+static int disasm_file(FILE *f)
+{
+ int ret = 0;
+ size_t offs = 0;
+ uint16_t inst = 0;
+ uint8_t c[2] = { 0 };
+ size_t extra_read = 0;
+ uint16_t extra_arg = 0;
+ struct instruction i = { 0 };
+ void (*disasm_inst)(uint16_t, uint16_t, struct instruction*);
+
+ while (!feof(f) && fread(c, sizeof(c), 1, f) == 1) {
+ extra_read = 0;
+ inst = c[0] << 8 | c[1];
+ switch (GET_INST_TYPE(inst)) {
+ case INST_TYPE_RTYPE:
+ disasm_inst = disasm_rtype;
+ break;
+ case INST_TYPE_NITYPE:
+ disasm_inst = disasm_nitype;
+ break;
+ case INST_TYPE_WITYPE:
+ if (fread(c, sizeof(c), 1, f) != 1) {
+ ret = -errno;
+ break;
+ }
+ extra_read = sizeof(c);
+ extra_arg = c[0] << 16 | c[1];
+ disasm_inst = disasm_witype;
+ break;
+ case INST_TYPE_JTYPE:
+ /* J Type can be split into three further subtypes:
+ * - branch (always immediate, 2 bytes)
+ * - jump reg (2 bytes)
+ * - jump immediate (4 bytes)
+ */
+ if (inst & MASK_IS_BRANCH) {
+ disasm_inst = disasm_bimm;
+ extra_arg = offs;
+ } else {
+ if (inst & MASK_JR) {
+ disasm_inst = disasm_jreg;
+ } else {
+ if (fread(c, sizeof(c), 1, f) != 1) {
+ ret = -errno;
+ break;
+ }
+ extra_arg = c[0] << 16 | c[1];
+ extra_read = sizeof(c);
+ disasm_inst = disasm_jimm;
+ }
+ }
+ break;
+ default:
+ printf("Unhandled instruction at byte %zd\n", offs);
+ ret = -1;
+ break;
+ }
+ /* Fall out of loop if picking a handler errored */
+ if (ret) {
+ break;
+ }
+ disasm_inst(inst, extra_arg, &i);
+ if (ret < 0) {
+ fprintf(stderr, "Error handling instruction at byte %zd\n", offs);
+ break;
+ }
+ if (add_instruction(i))
+ return 1;
+ offs += sizeof(c) + extra_read;
+ }
+ if (!feof(f)) {
+ perror("fread");
+ ret = -errno;
+ }
+ return ret;
+}
+
+int disasm(FILE *f, struct instruction **i, size_t *i_count)
+{
+ int ret = 0;
+
+ ret = disasm_file(f);
+ *i = insts;
+ *i_count = insts_count;
+
+ return ret;
+}
diff --git a/input_bin.h b/input_bin.h
new file mode 100644
index 0000000..7683835
--- /dev/null
+++ b/input_bin.h
@@ -0,0 +1,6 @@
+#ifndef INPUT_BIN_H
+#define INPUT_BIN_H
+
+int disasm(FILE *f, struct instruction **i, size_t *i_count);
+
+#endif /* INPUT_BIN_H */
diff --git a/output_asm.c b/output_asm.c
new file mode 100644
index 0000000..b1ccfc9
--- /dev/null
+++ b/output_asm.c
@@ -0,0 +1,134 @@
+#include <stdio.h>
+#include <stdint.h>
+#include <string.h>
+
+#include "parse.h"
+#include "util.h"
+
+static size_t inst_sizes[] = {
+ [INST_TYPE_R] = 2,
+ [INST_TYPE_NI] = 2,
+ [INST_TYPE_WI] = 4,
+ [INST_TYPE_JR] = 2,
+ [INST_TYPE_JI] = 4,
+ [INST_TYPE_B] = 2,
+};
+
+void emit_single_r_type(FILE *f, struct r_type inst)
+{
+ const char *oper = get_asm_from_oper(inst.oper);
+ const char *dest = get_asm_from_reg(inst.dest);
+ const char *left = get_asm_from_reg(inst.left);
+ const char *right = get_asm_from_reg(inst.right);
+
+ fprintf(f, "%s %s, %s, %s\n", oper, dest, left, right);
+}
+
+void emit_single_i_type(FILE *f, struct i_type inst)
+{
+ const char *oper = get_asm_from_oper(inst.oper);
+ const char *dest = get_asm_from_reg(inst.dest);
+ const char *left = get_asm_from_reg(inst.left);
+
+ fprintf(f, "%si %s, %s, 0x%x\n", oper, dest, left, inst.imm.value);
+}
+
+/*void emit_single_wi_type(FILE *f, struct i_type inst)
+{
+ const char *oper = get_asm_from_oper(inst.oper);
+ const char *dest = get_asm_from_oper(inst.dest);
+ const char *left = get_asm_from_oper(inst.left);
+
+ fprintf(f, "%si %s, %s, 0x%s\n", oper, dest, left, inst.imm.value);
+}*/
+
+void emit_single_ji_type(FILE *f, struct ji_type inst)
+{
+ const char *cond = get_asm_from_j(inst.cond);
+
+ fprintf(f, "%s %s\n", cond, inst.imm.value);
+}
+
+void emit_single_jr_type(FILE *f, struct jr_type inst)
+{
+ const char *cond = get_asm_from_j(inst.cond);
+ const char *reg = get_asm_from_reg(inst.reg);
+
+ fprintf(f, "%s %s\n", cond, reg);
+}
+
+void emit_single_b_type(FILE *f, struct b_type inst)
+{
+ const char *cond = get_asm_from_b(inst.cond);
+
+ fprintf(f, "%s 0x%x\n", cond, inst.imm.value);
+}
+
+
+int look_up_label(struct label *labels, size_t labels_count, uint16_t *val, const char *label)
+{
+ size_t i = 0;
+
+ for (i = 0; i < labels_count; i++) {
+ if (strcmp(labels[i].name, label) == 0) {
+ *val = labels[i].byte_offset;
+ return 0;
+ }
+ }
+
+ /* FIXME emit */
+ fprintf(stderr, "Reference to undefined label `%s'\n", label);
+ return 1;
+}
+
+int output_single(FILE *f, size_t *cur_byte, struct label *labels, size_t labels_count, struct instruction inst)
+{
+ switch (inst.type) {
+ case INST_TYPE_R:
+ emit_single_r_type(f, inst.inst.r);
+ break;
+ case INST_TYPE_NI:
+ case INST_TYPE_WI:
+ if ( inst.inst.i.imm_is_ident
+ && look_up_label(labels, labels_count, &inst.inst.i.imm.value, inst.inst.i.imm.label))
+ return 1;
+
+ emit_single_i_type(f, inst.inst.i);
+ break;
+ case INST_TYPE_JR:
+ emit_single_jr_type(f, inst.inst.jr);
+ break;
+ case INST_TYPE_JI:
+ if ( inst.inst.ji.imm_is_ident
+ && look_up_label(labels, labels_count, &inst.inst.ji.imm.value, inst.inst.ji.imm.label))
+ return 1;
+
+ emit_single_ji_type(f, inst.inst.ji);
+ break;
+ case INST_TYPE_B:
+ if ( inst.inst.b.imm_is_ident
+ && look_up_label(labels, labels_count, &inst.inst.b.imm.value, inst.inst.b.imm.label))
+ return 1;
+ emit_single_b_type(f, inst.inst.b);
+ break;
+ default:
+ fprintf(stderr, "Internal error: unhandled instruction type\n");
+ break;
+ }
+
+ *cur_byte += inst_sizes[inst.type];
+
+ return 0;
+}
+
+int output_asm(FILE *fout, struct label *labels, size_t label_count, struct instruction *insts, size_t insts_count)
+{
+ size_t i = 0;
+ size_t cur_byte = 0;
+
+ for (i = 0; i < insts_count; i++)
+ if (output_single(fout, &cur_byte, labels, label_count, insts[i]))
+ return 1;
+
+ return 0;
+}
diff --git a/output_asm.h b/output_asm.h
new file mode 100644
index 0000000..b4afd2a
--- /dev/null
+++ b/output_asm.h
@@ -0,0 +1,6 @@
+#ifndef OUTPUT_ASM_H
+#define OUTPUT_ASM_H
+
+int output_asm(FILE *fout, struct label *labels, size_t label_count, struct instruction *insts, size_t insts_count);
+
+#endif /* OUTPUT_ASM_H */
diff --git a/test/full-pipeline/run-full-pipeline.sh b/test/full-pipeline/run-full-pipeline.sh
index 8598a8d..aeb7c4f 100755
--- a/test/full-pipeline/run-full-pipeline.sh
+++ b/test/full-pipeline/run-full-pipeline.sh
@@ -45,7 +45,7 @@ for first_stage_asm in *.asm ; do
fi
# Disassemble test code and re-assemble that disassembly
- if ! "$DISASM" "$first_stage_bin" > "$second_stage_asm" ; then
+ if ! "$DISASM" "$first_stage_bin" "$second_stage_asm" ; then
fail "$first_stage_asm" "first stage disassembly failed"
continue
fi