diff options
-rw-r--r-- | Makefile | 4 | ||||
-rw-r--r-- | disassembler.c | 202 | ||||
-rw-r--r-- | input_bin.c | 173 | ||||
-rw-r--r-- | input_bin.h | 6 | ||||
-rw-r--r-- | output_asm.c | 134 | ||||
-rw-r--r-- | output_asm.h | 6 | ||||
-rwxr-xr-x | test/full-pipeline/run-full-pipeline.sh | 2 |
7 files changed, 369 insertions, 158 deletions
@@ -1,5 +1,5 @@ ASM_OBJECTS = lex.o parse.o output.o assembler.o util.o -DISASM_OBJECTS = util.o +DISASM_OBJECTS = disassembler.o util.o input_bin.o output_asm.o all: assembler disassembler @@ -19,5 +19,5 @@ util.o: lex.h instruction.h clean: - rm -f assembler disasm $(ASM_OBJECTS) -test: +test: all make -C test test diff --git a/disassembler.c b/disassembler.c index ea370ba..2ceee3f 100644 --- a/disassembler.c +++ b/disassembler.c @@ -1,179 +1,71 @@ #include <stdio.h> -#include <stdlib.h> #include <stdint.h> -#include <errno.h> +#include <string.h> -#include "util.h" +#include "instruction.h" +#include "parse.h" +#include "input_bin.h" +#include "output_asm.h" void print_help(const char *argv0) { - fprintf(stderr, "Syntax: %s <in.bin>\0 <out.asm>\n", argv0); + fprintf(stderr, "Syntax: %s <in.bin> <out.asm>\n", argv0); } -int disasm_rtype(uint16_t i, uint16_t unused) -{ - const char *oper = get_asm_from_oper(GET_OPER(i)); - const char *dest = get_asm_from_reg(GET_REG_DEST(i)); - const char *left = get_asm_from_reg(GET_REG_LEFT(i)); - const char *right = get_asm_from_reg(GET_REG_RIGHT(i)); - - /* FIXME add special cases: - * - nop - * - neg - * - mv - */ - printf("%s %s, %s, %s\n", oper, dest, left, right); - - return 0; -} - -int disasm_nitype(uint16_t i, uint16_t unused) -{ - const char *oper = get_asm_from_oper(GET_OPER(i)); - const char *dest = get_asm_from_reg(GET_REG_DEST(i)); - const char *left = get_asm_from_reg(GET_REG_LEFT(i)); - uint16_t imm = GET_NI_IMM(i); - /** FIXME add special cases: - * - ldi - */ - /* FIXME review sign around immediate value */ - printf("%si %s, %s, 0x%x\n", oper, dest, left, imm); - return 0; -} - -int disasm_witype(uint16_t i, uint16_t unused) -{ - const char *oper = get_asm_from_oper(GET_OPER(i)); - const char *dest = get_asm_from_reg(GET_REG_DEST(i)); - const char *left = get_asm_from_reg(GET_REG_LEFT(i)); - uint16_t imm = GET_NI_IMM(i); - /* FIXME handle wide imm value */ - printf("%si, %s, %s, 0x%x\n", "oper", dest, left, "?"); - return 0; -} - -int disasm_jreg(uint16_t i, uint16_t unused) -{ - const char *inst = get_asm_from_j(GET_JB_COND(i)); - const char *reg = get_asm_from_reg(GET_JUMP_REG(i)); - printf("%s %s\n", inst, reg); - return 0; -} - -int disasm_bimm(uint16_t i, uint16_t pc) -{ - const char *inst = get_asm_from_b(GET_JB_COND(i)); - struct {signed int s:10;} sign; - int offset = sign.s = GET_B_OFFSET(i); - printf("%s 0x%x\n", inst, pc + 2 * offset); - return 0; -} - -int disasm_jimm(uint16_t i, uint16_t imm) -{ - const char *inst = get_asm_from_j(GET_JB_COND(i)); - printf("%s 0x%x\n", inst, imm); - return 0; -} - -int disasm(FILE *f) +int main(int argc, char **argv) { + int error_ret = 1; int ret = 0; - size_t offs = 0; - size_t extra_read = 0; - uint16_t inst = 0; - uint16_t extra_arg = 0; - uint8_t c[2] = { 0 }; - int (*disasm_inst)(uint16_t, uint16_t); + const char *path_in = NULL; + const char *path_out = NULL; + FILE *fin = NULL; + FILE *fout = NULL; - while (!feof(f) && fread(c, sizeof(c), 1, f) == 1) { - extra_read = 0; - inst = c[0] << 8 | c[1]; - switch (GET_INST_TYPE(inst)) { - case INST_TYPE_RTYPE: - disasm_inst = disasm_rtype; - break; - case INST_TYPE_NITYPE: - disasm_inst = disasm_nitype; - break; - case INST_TYPE_WITYPE: - if (fread(c, sizeof(c), 1, f) != 1) { - ret = -errno; - break; - } - extra_read = sizeof(c); - extra_arg = c[0] << 16 | c[1]; - disasm_inst = disasm_witype; - break; - case INST_TYPE_JTYPE: - /* J Type can be split into three further subtypes: - * - branch (always immediate, 2 bytes) - * - jump reg (2 bytes) - * - jump immediate (4 bytes) - */ - if (inst & MASK_IS_BRANCH) { - disasm_inst = disasm_bimm; - extra_arg = offs; - } else { - if (inst & MASK_JR) { - disasm_inst = disasm_jreg; - } else { - if (fread(c, sizeof(c), 1, f) != 1) { - ret = -errno; - break; - } - extra_arg = c[0] << 16 | c[1]; - extra_read = sizeof(c); - disasm_inst = disasm_jimm; - } - } - break; - default: - printf("Unhandled instruction at byte %zd\n", offs); - ret = -1; - break; - } - /* Fall out of loop if picking a handler errored */ - if (ret) { - break; - } - printf("%04x:\t", offs); - ret = disasm_inst(inst, extra_arg); - if (ret < 0) { - fprintf(stderr, "Error handling instruction at byte %zd\n", offs); - break; - } - offs += sizeof(c) + extra_read; + if (argc < 3) { + print_help(argv[0]); + return 1; } - if (!feof(f)) { - perror("fread"); - ret = errno; + if (strcmp(argv[1], "-q") == 0) { + if (argc != 4) { + print_help(argv[0]); + return 0; + } + error_ret = 0; + path_in = argv[2]; + path_out = argv[3]; } else { - /* print out final, empty label */ - printf("%04x:\n", offs); + path_in = argv[1]; + path_out = argv[2]; } - return ret; -} -int main(int argc, char **argv) -{ - int ret = 0; - FILE *fin = NULL; - - if (argc < 2) { - print_help(argv[0]); - return 1; + if ((fin = fopen(path_in, "r")) == NULL) { + fprintf(stderr, "Error opening %s: ", path_in); + perror("fopen"); + return error_ret; } - if (!(fin = fopen(argv[1], "rb"))) { + if ((fout = fopen(path_out, "w")) == NULL) { + fprintf(stderr, "Error opening %s: ", path_out); perror("fopen"); - return 1; + return error_ret; } - ret = disasm(fin); +/****/ + /* FIXME package these things into `tok_result`, parse_result` etc */ + struct instruction *insts; + size_t insts_count; + struct label *labels; + size_t labels_count; + labels = NULL; + labels_count = 0; + + if (ret = disasm(fin, &insts, &insts_count)) + return error_ret && ret; - fclose(fin); - return ret; + if (ret = output_asm(fout, labels, labels_count, insts, insts_count)) + return error_ret && ret; + + return 0; } diff --git a/input_bin.c b/input_bin.c new file mode 100644 index 0000000..54a99f3 --- /dev/null +++ b/input_bin.c @@ -0,0 +1,173 @@ +#include <stdio.h> +#include <stdint.h> +#include <stdlib.h> +#include <errno.h> + +#include "parse.h" + +static void disasm_rtype(uint16_t i, uint16_t unused, struct instruction *inst) +{ + inst->type = INST_TYPE_R; + inst->inst.r.oper = GET_OPER(i); + inst->inst.r.dest = GET_REG_DEST(i); + inst->inst.r.left = GET_REG_LEFT(i); + inst->inst.r.right = GET_REG_RIGHT(i); +} + +static void disasm_nitype(uint16_t i, uint16_t unused, struct instruction *inst) +{ + inst->type = INST_TYPE_NI; + inst->inst.i.oper = GET_OPER(i); + inst->inst.i.dest = GET_REG_DEST(i); + inst->inst.i.left = GET_REG_LEFT(i); + inst->inst.i.imm.value = GET_NI_IMM(i); + inst->inst.i.imm_is_ident = 0; +} + +static void disasm_witype(uint16_t i, uint16_t imm, struct instruction *inst) +{ + inst->type = INST_TYPE_WI; + inst->inst.i.oper = GET_OPER(i); + inst->inst.i.dest = GET_REG_DEST(i); + inst->inst.i.left = GET_REG_LEFT(i); + inst->inst.i.imm.value = imm; + inst->inst.i.imm_is_ident = 0; +} + +static void disasm_jreg(uint16_t i, uint16_t unused, struct instruction *inst) +{ + inst->type = INST_TYPE_JR; + inst->inst.jr.cond = GET_JB_COND(i); + inst->inst.jr.reg = GET_JUMP_REG(i); +} + +static void disasm_jimm(uint16_t i, uint16_t imm, struct instruction *inst) +{ + inst->type = INST_TYPE_JI; + inst->inst.ji.cond= GET_JB_COND(i); + inst->inst.ji.imm.value = imm; + inst->inst.ji.imm_is_ident = 0; +} + +static void disasm_bimm(uint16_t i, uint16_t pc, struct instruction *inst) +{ + struct { signed int s:10; } sign; + int offset = sign.s = GET_B_OFFSET(i); + inst->type = INST_TYPE_B; + inst->inst.b.cond = GET_JB_COND(i); + inst->inst.b.imm.value = pc + 2 * offset; + inst->inst.b.imm_is_ident = 0; +} + + +/** + * FIXME move and factor out with parse.c */ +static struct instruction *insts = NULL; +static size_t insts_count = 0; +static int add_instruction(struct instruction inst) +{ + struct instruction *old_insts = insts; + insts = realloc(insts, (insts_count + 1) * sizeof(struct instruction)); + if (!insts) { + free(old_insts); + perror("realloc"); + return 1; + } + + insts[insts_count] = inst; + + insts_count++; + return 0; +} + + + +/* FIXME needs whatsit arguments. f, tok, tok length */ +static int disasm_file(FILE *f) +{ + int ret = 0; + size_t offs = 0; + uint16_t inst = 0; + uint8_t c[2] = { 0 }; + size_t extra_read = 0; + uint16_t extra_arg = 0; + struct instruction i = { 0 }; + void (*disasm_inst)(uint16_t, uint16_t, struct instruction*); + + while (!feof(f) && fread(c, sizeof(c), 1, f) == 1) { + extra_read = 0; + inst = c[0] << 8 | c[1]; + switch (GET_INST_TYPE(inst)) { + case INST_TYPE_RTYPE: + disasm_inst = disasm_rtype; + break; + case INST_TYPE_NITYPE: + disasm_inst = disasm_nitype; + break; + case INST_TYPE_WITYPE: + if (fread(c, sizeof(c), 1, f) != 1) { + ret = -errno; + break; + } + extra_read = sizeof(c); + extra_arg = c[0] << 16 | c[1]; + disasm_inst = disasm_witype; + break; + case INST_TYPE_JTYPE: + /* J Type can be split into three further subtypes: + * - branch (always immediate, 2 bytes) + * - jump reg (2 bytes) + * - jump immediate (4 bytes) + */ + if (inst & MASK_IS_BRANCH) { + disasm_inst = disasm_bimm; + extra_arg = offs; + } else { + if (inst & MASK_JR) { + disasm_inst = disasm_jreg; + } else { + if (fread(c, sizeof(c), 1, f) != 1) { + ret = -errno; + break; + } + extra_arg = c[0] << 16 | c[1]; + extra_read = sizeof(c); + disasm_inst = disasm_jimm; + } + } + break; + default: + printf("Unhandled instruction at byte %zd\n", offs); + ret = -1; + break; + } + /* Fall out of loop if picking a handler errored */ + if (ret) { + break; + } + disasm_inst(inst, extra_arg, &i); + if (ret < 0) { + fprintf(stderr, "Error handling instruction at byte %zd\n", offs); + break; + } + if (add_instruction(i)) + return 1; + offs += sizeof(c) + extra_read; + } + if (!feof(f)) { + perror("fread"); + ret = -errno; + } + return ret; +} + +int disasm(FILE *f, struct instruction **i, size_t *i_count) +{ + int ret = 0; + + ret = disasm_file(f); + *i = insts; + *i_count = insts_count; + + return ret; +} diff --git a/input_bin.h b/input_bin.h new file mode 100644 index 0000000..7683835 --- /dev/null +++ b/input_bin.h @@ -0,0 +1,6 @@ +#ifndef INPUT_BIN_H +#define INPUT_BIN_H + +int disasm(FILE *f, struct instruction **i, size_t *i_count); + +#endif /* INPUT_BIN_H */ diff --git a/output_asm.c b/output_asm.c new file mode 100644 index 0000000..b1ccfc9 --- /dev/null +++ b/output_asm.c @@ -0,0 +1,134 @@ +#include <stdio.h> +#include <stdint.h> +#include <string.h> + +#include "parse.h" +#include "util.h" + +static size_t inst_sizes[] = { + [INST_TYPE_R] = 2, + [INST_TYPE_NI] = 2, + [INST_TYPE_WI] = 4, + [INST_TYPE_JR] = 2, + [INST_TYPE_JI] = 4, + [INST_TYPE_B] = 2, +}; + +void emit_single_r_type(FILE *f, struct r_type inst) +{ + const char *oper = get_asm_from_oper(inst.oper); + const char *dest = get_asm_from_reg(inst.dest); + const char *left = get_asm_from_reg(inst.left); + const char *right = get_asm_from_reg(inst.right); + + fprintf(f, "%s %s, %s, %s\n", oper, dest, left, right); +} + +void emit_single_i_type(FILE *f, struct i_type inst) +{ + const char *oper = get_asm_from_oper(inst.oper); + const char *dest = get_asm_from_reg(inst.dest); + const char *left = get_asm_from_reg(inst.left); + + fprintf(f, "%si %s, %s, 0x%x\n", oper, dest, left, inst.imm.value); +} + +/*void emit_single_wi_type(FILE *f, struct i_type inst) +{ + const char *oper = get_asm_from_oper(inst.oper); + const char *dest = get_asm_from_oper(inst.dest); + const char *left = get_asm_from_oper(inst.left); + + fprintf(f, "%si %s, %s, 0x%s\n", oper, dest, left, inst.imm.value); +}*/ + +void emit_single_ji_type(FILE *f, struct ji_type inst) +{ + const char *cond = get_asm_from_j(inst.cond); + + fprintf(f, "%s %s\n", cond, inst.imm.value); +} + +void emit_single_jr_type(FILE *f, struct jr_type inst) +{ + const char *cond = get_asm_from_j(inst.cond); + const char *reg = get_asm_from_reg(inst.reg); + + fprintf(f, "%s %s\n", cond, reg); +} + +void emit_single_b_type(FILE *f, struct b_type inst) +{ + const char *cond = get_asm_from_b(inst.cond); + + fprintf(f, "%s 0x%x\n", cond, inst.imm.value); +} + + +int look_up_label(struct label *labels, size_t labels_count, uint16_t *val, const char *label) +{ + size_t i = 0; + + for (i = 0; i < labels_count; i++) { + if (strcmp(labels[i].name, label) == 0) { + *val = labels[i].byte_offset; + return 0; + } + } + + /* FIXME emit */ + fprintf(stderr, "Reference to undefined label `%s'\n", label); + return 1; +} + +int output_single(FILE *f, size_t *cur_byte, struct label *labels, size_t labels_count, struct instruction inst) +{ + switch (inst.type) { + case INST_TYPE_R: + emit_single_r_type(f, inst.inst.r); + break; + case INST_TYPE_NI: + case INST_TYPE_WI: + if ( inst.inst.i.imm_is_ident + && look_up_label(labels, labels_count, &inst.inst.i.imm.value, inst.inst.i.imm.label)) + return 1; + + emit_single_i_type(f, inst.inst.i); + break; + case INST_TYPE_JR: + emit_single_jr_type(f, inst.inst.jr); + break; + case INST_TYPE_JI: + if ( inst.inst.ji.imm_is_ident + && look_up_label(labels, labels_count, &inst.inst.ji.imm.value, inst.inst.ji.imm.label)) + return 1; + + emit_single_ji_type(f, inst.inst.ji); + break; + case INST_TYPE_B: + if ( inst.inst.b.imm_is_ident + && look_up_label(labels, labels_count, &inst.inst.b.imm.value, inst.inst.b.imm.label)) + return 1; + emit_single_b_type(f, inst.inst.b); + break; + default: + fprintf(stderr, "Internal error: unhandled instruction type\n"); + break; + } + + *cur_byte += inst_sizes[inst.type]; + + return 0; +} + +int output_asm(FILE *fout, struct label *labels, size_t label_count, struct instruction *insts, size_t insts_count) +{ + size_t i = 0; + size_t cur_byte = 0; + + for (i = 0; i < insts_count; i++) + if (output_single(fout, &cur_byte, labels, label_count, insts[i])) + return 1; + + return 0; +} diff --git a/output_asm.h b/output_asm.h new file mode 100644 index 0000000..b4afd2a --- /dev/null +++ b/output_asm.h @@ -0,0 +1,6 @@ +#ifndef OUTPUT_ASM_H +#define OUTPUT_ASM_H + +int output_asm(FILE *fout, struct label *labels, size_t label_count, struct instruction *insts, size_t insts_count); + +#endif /* OUTPUT_ASM_H */ diff --git a/test/full-pipeline/run-full-pipeline.sh b/test/full-pipeline/run-full-pipeline.sh index 8598a8d..aeb7c4f 100755 --- a/test/full-pipeline/run-full-pipeline.sh +++ b/test/full-pipeline/run-full-pipeline.sh @@ -45,7 +45,7 @@ for first_stage_asm in *.asm ; do fi # Disassemble test code and re-assemble that disassembly - if ! "$DISASM" "$first_stage_bin" > "$second_stage_asm" ; then + if ! "$DISASM" "$first_stage_bin" "$second_stage_asm" ; then fail "$first_stage_asm" "first stage disassembly failed" continue fi |