Mercurial > hg > ede
changeset 22:c988e43bf2b5
commiting to backup
author | james <jb302@eecs.qmul.ac.uk> |
---|---|
date | Mon, 24 Feb 2014 18:59:47 +0000 |
parents | 5953026ed47e |
children | 2efb577ac2d7 |
files | assembler/assembler.py assembler/language.py assembler/language.pyc tests/.full.asm.swp tests/full.asm tests/full.bin |
diffstat | 6 files changed, 169 insertions(+), 134 deletions(-) [+] |
line wrap: on
line diff
--- a/assembler/assembler.py Sun Feb 23 19:17:25 2014 +0000 +++ b/assembler/assembler.py Mon Feb 24 18:59:47 2014 +0000 @@ -24,60 +24,72 @@ pc = 0 # <line> ::= [<statement>] [";"<comment>] <EOL> - for line in source_code: - + for line in source_code: try: - # remove EOL + # remove trailing whitespace and comments line = line.strip() - - # remove comments for i in range(len(line)): if line[i] == ';': line = line[:i] break - line = line.lower() - # <statement> ::= [ <label> ":"] <mnemonic> [<arguments>] # | <label> ":" # | "EOF" statement = line.split() + if not statement: + continue + + # replace equated strings + # because this happens on the first pass + # equates must be assigned before they are used + n = 1 + for i in statement[1:]: + # replace any equates already stored + # remove prefixes and suffixes before attempting to replace + t = e = '' + if i[0] in TYPES: + t = i[0] + i = i[1:] + if i and (i[-1] in [',', ')']): + e = i[-1] + i = i[:-1] + # replace and put removed characters back + for j in equ_index.keys(): + if i == j: + statement[n] = t + equ_index[j] + e + n = n + 1 - if not statement: continue - # deal with org and db - elif statement[0] == 'org': + # deal with org on second pass + if statement[0].lower() == 'org': asm.append([statement[0], statement[1:], ('',), '']) pc = string_num(statement[1]) continue - elif statement[0] == 'db': - asm.append([statement[0], statement[1:], ('',), '']) - pc = pc + len(statement[1:]) - continue # store equates - if (len(statement) >= 3) and (statement[1] == 'equ'): - equ_index[statement[0]] = statement[2] + # these are case sensative + if (len(statement) >= 3) and (statement[1].lower() == 'equ'): + equ_index[statement[0]] = ' '.join(statement[2:]) continue # if needed update index and remove label - elif statement[0][-1:] == ':': + elif statement[0][-1] == ':': label_index[statement[0][:-1]] = pc; del statement[0] - if not statement: continue + if not statement: + continue # <statement> ::= <mnemonic> [<arguments>] - mnemonic = statement[0] + mnemonic = statement[0].lower() arguments = ''.join(statement[1:]).split(',') - # replace equated arguments - # because this happens on the first pass - # equates must be assigned before they are used - i = 0 - for a in arguments: - if a in equ_index: - arguments[i] = equ_index[a] - i = i + 1 - + # deal with db + if statement[0].lower() == 'db': + symbols, constants = tokenize(arguments) + asm.append([mnemonic, arguments, symbols, constants]) + pc = pc + len(constants) + continue + symbols, constants = tokenize(arguments) width = iset[mnemonic][symbols][1] @@ -109,7 +121,7 @@ elif mne == 'db': data = '' for a in args: - data = data + struct.pack('>B', string_num(a)) + data = data + tokenize(a)[1] f.write(data) pc = pc + len(data) continue @@ -117,8 +129,8 @@ # replace labels with addresses i = 0 for a in args: - - if not a: continue + if not a: + continue elif (sym[i] == 'label') or (sym[i] == '@label'): # labeled pointer uglyness if (a[0] == '@') and (a[1:] in label_index): @@ -135,13 +147,18 @@ else: args[i] = num_string(label_index[a]) const = const + tokenize([args[i]])[1] - else: - print '** label error **\nline:\n', line - raise + elif a not in label_index: + print '** label error **', line + raise KeyError + i = i + 1 # assemble to file op, width = iset[mne][sym] + # theres gotta be a better way do deal with paged addresses + if mne in ['pcall', 'pjmp']: + op = op | ((string_num(args[0]) & 0x7FF) >> 8) + const = const[-1] f.write(struct.pack('>B', op)) # pad if needed for i in range(width - len(const) - 1):
--- a/assembler/language.py Sun Feb 23 19:17:25 2014 +0000 +++ b/assembler/language.py Mon Feb 24 18:59:47 2014 +0000 @@ -7,13 +7,17 @@ BASE = 16 # prefix must be only 2 characters otherwise tokenize() will break PREFIX = '0x' +# data types: ints, pointers, ports, vectors +TYPES = ['#', '@', '$', '*'] + def num_string(num): return hex(num) def string_num(string): return int(string, BASE) -# valid arguments for this instruction set +# valid reserved arguments for this instruction set +# lower and uppercase versions vargs = ('', '@a+dptr', 'dptr', 'bs', '@a+pc', 'a', 'c','r0', 'r1', 'r2', 'r3', 'ie', 'sp', 'flags', 'dpl', 'dph', '@dptr', @@ -210,8 +214,8 @@ ('a', 'r2'): [138, 1], ('a', 'r3'): [139, 1]}, 'out': { ('port_addr', 'a'): [253, 2]}, - 'pcall': { ('addr11',): [207, 2], ('label',): [207, 2]}, - 'pjmp': { ('addr11',): [199, 2], ('label',): [199, 2]}, + 'pcall': { ('addr16',): [207, 2], ('label',): [200, 2]}, + 'pjmp': { ('addr16',): [199, 2], ('label',): [192, 2]}, 'pop': { ('a',): [246, 1], ('dph',): [244, 1], ('dpl',): [245, 1], @@ -272,92 +276,106 @@ def tokenize(args): sym = [] data = '' - - i = 0 + for a in args: + # tokenize reserved arguments immediatly (not case sensitive) + # determine arg type and remove identifier if needed + # unprefixed arguments are addresses so this is the default + arg_type = 'addr' + if a.lower() in vargs: + sym.append(a.lower()) + continue + elif a[0] in TYPES: + arg_type = a[0] + a = a[1:] - # things that can't be negative - is_neg = 0 - # reserved words - if a in vargs: - sym.append(a) + # evaluate inline calculations + if (a[0] == '(') and (a[-1] == ')'): + a = num_string(eval(a[1:-1])) + # evaluate strings + elif (a[0] == '\'') and (a[-1] == '\''): + if len(a) == 3: + a = num_string(struct.unpack('>B', a[1:-1])[0]) + elif len(a) == 4: + a = num_string(struct.unpack('>H', a[1:-1])[0]) + else: + data = a[1:-1] + continue + + # check if negative and remove sign if needed + if a[:2] == '-' + PREFIX: + is_neg = 1 + a = a[1:] + elif a[:2] == PREFIX: + is_neg = 0 + # anything else must be a label or a source code error + # labeled pointer uglyness + elif (arg_type == '@') & (a[:2] != PREFIX): + sym.append('@label') continue - # 16 bit addresses - elif (a[:2] == PREFIX) and (2 < len(a[2:]) <= 4): - sym.append('addr16') + else: + sym.append('label') + continue + + # addresses + if arg_type == 'addr': + # 8 bit relative addresses (always signed) + if len(a) <= 4: + sym.append('rel8') + if is_neg: + val = string_num('-' + a) + else: + val = string_num(a) + data = data + struct.pack('>b', val) + continue + # 16 bit absolute addresses (never signed) + elif len(a) <= 6: + sym.append('addr16') + val = string_num(a) + data = data + struct.pack('>H', val) + continue + # immediate ints (signed when negative) + elif arg_type == '#': + # 8 bit ints + if len(a) <= 4: + sym.append('#data8') + if is_neg: + val = string_num('-' + a) + fmt = '>b' + else: + val = string_num(a) + fmt = '>B' + data = data + struct.pack(fmt, val) + continue + # 16 bit ints + elif len(a) <= 6: + sym.append('#data16') + if is_neg: + val = string_num('-' + a) + fmt = '>h' + else: + val = string_num(a) + fmt = '>H' + data = data + struct.pack(fmt, val) + continue + # pointers + elif arg_type == '@': + sym.append('@addr16') val = string_num(a) data = data + struct.pack('>H', val) continue - # pointers - elif a[0] == '@': - if a[:3] == '@' + PREFIX: - sym.append('@addr16') - val = string_num(a[1:]) - data = data + struct.pack('>H', val) - continue - # labeled pointers make for ugly code considering - # only two instructions can use them - else: - sym.append('@label') - continue # ports - elif a[:3] == 'p' + PREFIX: + elif arg_type == '$': sym.append('port_addr') - val = string_num(a[1:]) + val = string_num(a) data = data + struct.pack('>B', val) continue # vectors - elif a[:3] == 'v' + PREFIX: + elif arg_type == '*': sym.append('vect8') - val = string_num(a[1:]) + val = string_num(a) data = data + struct.pack('>B', val) continue - # check if other values are negative and remove sign - elif a[0] == '-': - a = a[1:] - is_neg = 1 - - # things that can be negative - # immediate ints (signed when negative) - if (a[:3] == '#' + PREFIX): - # 8 bit ints - if len(a[3:]) <= 2: - sym.append('#data8') - if is_neg: - val = string_num('-' + a[1:]) - fmt = '>b' - else: - val = string_num(a[1:]) - fmt = '>B' - # signed big-endian byte - data = data + struct.pack(fmt, val) - continue - # 16 bit ints - elif len(a[3:]) <= 4: - sym.append('#data16') - if is_neg: - val = string_num('-' + a[1:]) - fmt = '>h' - else: - val = string_num(a[1:]) - fmt = '>H' - # signed big-endian short - data = data + struct.pack(fmt, val) - continue - # 8 bit relative addresses (always signed) - elif (a[:2] == PREFIX) and (len(a[2:]) <= 2): - sym.append('rel8') - if is_neg: - val = string_num('-' + a) - else: - val = string_num(a) - # signed - data = data + struct.pack('>b', val) - continue - # unknown strings are either labels or source code errors - else: - sym.append('label') - continue - + return tuple(sym), data
--- a/tests/full.asm Sun Feb 23 19:17:25 2014 +0000 +++ b/tests/full.asm Mon Feb 24 18:59:47 2014 +0000 @@ -1,14 +1,15 @@ -#data8 EQU #0x42 -#data16 EQU #0x5757 -port_addr EQU P0x50 -vect8 EQU V0x56 +db 'ABCDEF', 0x0, (0x1 * 2) +data8 EQU 0x42 +data16 EQU ((0x2BAB * 2) - data8 + 0x01 + data8) +port_addr EQU $0x50 +vect8 EQU *0x56 NOP SET C CLR C SET BS CLR BS -SET IE +SeT iE CLR IE CPL C CPL A @@ -17,8 +18,8 @@ LAF MOV DPTR, SP MOV SP, DPTR -MOV A, #data8 -MOV SP, #data16 +MOV A, #'B' +MOV SP, #'WW' MOV DPTR, #data16 MOV A, addr16 MOV addr16, A @@ -188,22 +189,22 @@ SUBB A, DPL SUBB A, #data8 SUBB A, @DPTR -PJMP addr11 -PJMP addr11 -PJMP addr11 -PJMP addr11 -PJMP addr11 -PJMP addr11 -PJMP addr11 -PJMP addr11 -PCALL addr11 -PCALL addr11 -PCALL addr11 -PCALL addr11 -PCALL addr11 -PCALL addr11 -PCALL addr11 -PCALL addr11 +PJMP addr16 +PJMP addr16 +PJMP addr16 +PJMP addr16 +PJMP addr16 +PJMP addr16 +PJMP addr16 +PJMP addr16 +PCALL addr16 +PCALL addr16 +PCALL addr16 +PCALL addr16 +PCALL addr16 +PCALL addr16 +PCALL addr16 +PCALL addr16 DJNZ R0, rel8 DJNZ R1, rel8 DJNZ R2, rel8 @@ -255,6 +256,5 @@ org 0x0151 rel8: -addr11: addr16: NOP