From 5a22735d1c265a0a000c77ec9b5bd74688a87370 Mon Sep 17 00:00:00 2001 From: David Phillips Date: Mon, 29 Jul 2019 21:18:09 +1200 Subject: Implement quick disassembler --- .gitignore | 1 + Makefile | 17 ++++-- disassembler.c | 174 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ instruction.h | 17 ++++++ 4 files changed, 203 insertions(+), 6 deletions(-) create mode 100644 disassembler.c diff --git a/.gitignore b/.gitignore index 71e5da6..7c146f7 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ *.o *.bin assembler +disassembler diff --git a/Makefile b/Makefile index 34dfbdb..6edd47d 100644 --- a/Makefile +++ b/Makefile @@ -1,8 +1,11 @@ -OBJECTS = lex.o parse.o output.o assembler.o util.o +ASM_OBJECTS = lex.o parse.o output.o assembler.o util.o +DISASM_OBJECTS = util.o -all: assembler +all: assembler disassembler -assembler: $(OBJECTS) +assembler: $(ASM_OBJECTS) + +disassembler: $(DISASM_OBJECTS) lex.o: lex.h @@ -12,7 +15,9 @@ output.o: parse.h util.o: lex.h instruction.h - -.PHONY: clean +.PHONY: clean test clean: - - rm -f assembler $(OBJECTS) + - rm -f assembler disasm $(ASM_OBJECTS) + +test: + make -C test test diff --git a/disassembler.c b/disassembler.c new file mode 100644 index 0000000..24bbc50 --- /dev/null +++ b/disassembler.c @@ -0,0 +1,174 @@ +#include +#include +#include +#include + +#include "util.h" + +void print_help(const char *argv0) +{ + fprintf(stderr, "Syntax: %s \0 \n", argv0); +} + +int disasm_rtype(uint16_t i, uint16_t unused) +{ + const char *oper = get_asm_from_oper(GET_OPER(i)); + const char *dest = get_asm_from_reg(GET_REG_DEST(i)); + const char *left = get_asm_from_reg(GET_REG_LEFT(i)); + const char *right = get_asm_from_reg(GET_REG_RIGHT(i)); + + /* FIXME add special cases: + * - nop + * - neg + * - mv + */ + printf("%s %s, %s, %s\n", oper, dest, left, right); + + return 0; +} + +int disasm_nitype(uint16_t i, uint16_t unused) +{ + const char *oper = get_asm_from_oper(GET_OPER(i)); + const char *dest = get_asm_from_reg(GET_REG_DEST(i)); + const char *left = get_asm_from_reg(GET_REG_LEFT(i)); + uint16_t imm = GET_NI_IMM(i); + /** FIXME add special cases: + * - ldi + */ + /* FIXME review sign around immediate value */ + printf("%si %s, %s, 0x%x\n", oper, dest, left, imm); + return 0; +} + +int disasm_witype(uint16_t i, uint16_t unused) +{ + const char *oper = get_asm_from_oper(GET_OPER(i)); + const char *dest = get_asm_from_reg(GET_REG_DEST(i)); + const char *left = get_asm_from_reg(GET_REG_LEFT(i)); + uint16_t imm = GET_NI_IMM(i); + /* FIXME handle wide imm value */ + printf("%si, %s, %s, 0x%x\n", "oper", dest, left, "?"); + return 0; +} + +int disasm_jreg(uint16_t i, uint16_t unused) +{ + const char *inst = get_asm_from_j(GET_JB_COND(i)); + const char *reg = get_asm_from_reg(GET_JUMP_REG(i)); + printf("%s %s\n", inst, reg); + return 0; +} + +int disasm_bimm(uint16_t i, uint16_t unused) +{ + const char *inst = get_asm_from_b(GET_JB_COND(i)); + /* FIXME immediate value is meant to be signed */ + printf("%s 0x%x\n", inst, GET_B_OFFSET(i)); + return 0; +} + +int disasm_jimm(uint16_t i, uint16_t imm) +{ + const char *inst = get_asm_from_j(GET_JB_COND(i)); + printf("%s 0x%x\n", inst, imm); + return 0; +} + +int disasm(FILE *f) +{ + int ret = 0; + size_t offs = 0; + size_t extra_read = 0; + uint16_t inst = 0; + uint8_t c[2] = { 0 }; + int (*disasm_inst)(uint16_t, uint16_t); + + while (!feof(f) && fread(c, sizeof(c), 1, f) == 1) { + extra_read = 0; + inst = c[0] << 8 | c[1]; + switch (GET_INST_TYPE(inst)) { + case INST_TYPE_RTYPE: + disasm_inst = disasm_rtype; + break; + case INST_TYPE_NITYPE: + disasm_inst = disasm_nitype; + break; + case INST_TYPE_WITYPE: + if (fread(c, sizeof(c), 1, f) != 1) { + ret = -errno; + break; + } + extra_read = sizeof(c); + disasm_inst = disasm_witype; + break; + case INST_TYPE_JTYPE: + /* J Type can be split into three further subtypes: + * - branch (always immediate, 2 bytes) + * - jump reg (2 bytes) + * - jump immediate (4 bytes) + */ + if (inst & MASK_IS_BRANCH) { + disasm_inst = disasm_bimm; + } else { + if (inst & MASK_JR) { + disasm_inst = disasm_jreg; + } else { + if (fread(c, sizeof(c), 1, f) != 1) { + ret = -errno; + break; + } + extra_read = sizeof(c); + disasm_inst = disasm_jimm; + } + } + break; + default: + printf("Unhandled instruction at byte %zd\n", offs); + ret = -1; + break; + } + /* Fall out of loop if picking a handler errored */ + if (ret) { + break; + } + printf("%04x:\t", offs); + ret = disasm_inst(inst, c[0] << 8 | c[1]); + if (ret < 0) { + fprintf(stderr, "Error handling instruction at byte %zd\n", offs); + break; + } + offs += sizeof(c) + extra_read; + } + + if (!feof(f)) { + perror("fread"); + ret = errno; + } else { + /* print out final, empty label */ + printf("%04x:\n", offs); + } + + return ret; +} + +int main(int argc, char **argv) +{ + int ret = 0; + FILE *fin = NULL; + + if (argc < 2) { + print_help(argv[0]); + return 1; + } + + if (!(fin = fopen(argv[1], "rb"))) { + perror("fopen"); + return 1; + } + + ret = disasm(fin); + + fclose(fin); + return ret; +} diff --git a/instruction.h b/instruction.h index 323a66b..4ee68ed 100644 --- a/instruction.h +++ b/instruction.h @@ -22,6 +22,14 @@ enum INST_TYPE { #define MASK_INST_WITYPE (0x8000) #define MASK_INST_JTYPE (0xC000) +#define INST_TYPE_RTYPE 0 +#define INST_TYPE_NITYPE 1 +#define INST_TYPE_WITYPE 2 +#define INST_TYPE_JTYPE 3 +#define INST_TYPE_SHAMT (14) +#define MASK_INST_TYPE(x) ((x) << INST_TYPE_SHAMT) +#define GET_INST_TYPE(x) (0x3 & ((x) >> INST_TYPE_SHAMT)) + #define RTYPE_SIZE_BYTES 2 /* instruction fits in 16 bits */ #define NITYPE_SIZE_BYTES 2 /* instruction fits in 16 bits */ #define BTYPE_SIZE_BYTES 2 /* instruction fits in 16 bits */ @@ -46,6 +54,7 @@ enum OPER { }; #define OPER_SHAMT (11) #define MASK_OPER(x) ((x) << OPER_SHAMT) +#define GET_OPER(x) (0x7 & ((x) >> OPER_SHAMT)) /** * Masks for jump and branch conditions @@ -64,11 +73,14 @@ enum JCOND { }; #define JB_SHAMT (10) #define MASK_JB_COND(x) ((x) << JB_SHAMT) +#define GET_JB_COND(x) (0x7 & ((x) >> JB_SHAMT)) + #define MASK_IS_JUMP (0 << 13) #define MASK_IS_BRANCH (1 << 13) #define MASK_JI (0x0 << 8) #define MASK_JR (0x1 << 8) #define MASK_JUMP_REGISTER(x) ((x) << 5) +#define GET_JUMP_REG(x) (0x07 & ((x) >> 5)) /** @@ -92,19 +104,24 @@ enum REG { /* destination reg: xxxxx___ xxxxxxxx */ #define REG_DEST_OFFSET (8) #define MASK_REG_DEST(x) ((x) << REG_DEST_OFFSET) +#define GET_REG_DEST(x) (0x7 & ((x) >> REG_DEST_OFFSET)) /* left reg: xxxxxxxx ___xxxxx */ #define REG_LEFT_OFFSET (5) #define MASK_REG_LEFT(x) ((x) << REG_LEFT_OFFSET) +#define GET_REG_LEFT(x) (0x7 & ((x) >> REG_LEFT_OFFSET)) /* right reg (R-type only): xxxxxxxx xxx___xx */ #define REG_RIGHT_OFFSET (2) #define MASK_REG_RIGHT(x) ((x) << REG_RIGHT_OFFSET) +#define GET_REG_RIGHT(x) (0x7 & ((x) >> REG_RIGHT_OFFSET)) /* five LSb are narrow immediate value */ #define MASK_NI_IMM(x) ((x) & 0x1F) +#define GET_NI_IMM(x) (0x1F & (x)) /* 10 LSb is branch offset */ #define MASK_B_OFFSET(x) ((x) & 0x3FF) +#define GET_B_OFFSET(x) ((x) & 0x3FF) #endif /* INSTRUCTION_H */ -- cgit v1.1