Mercurial > hg > ede
view assembler/assembler.py @ 23:2efb577ac2d7
finished?
author | james <jb302@eecs.qmul.ac.uk> |
---|---|
date | Tue, 25 Feb 2014 01:55:39 +0000 |
parents | c988e43bf2b5 |
children | 45340c2a38c5 |
line wrap: on
line source
#!/usr/bin/env python2 # assembler.py import struct import sys from language import * # take source file and return preprocessed assembly code # for each non-empty line in the file: # remove comments from source # store label definitions # remove label definitions from source # determine format of arguments: # store constant data # store data type symbols # return all this in a list def first_pass(f): # read file into list, remove blank line f.seek(0) source_code = filter(lambda l: l != '\n', f.readlines()) asm = [] label_index = {} equ_index = {} pc = 0 # <line> ::= [<statement>] [";"<comment>] <EOL> for line in source_code: try: # remove trailing whitespace and comments line = line.strip() for i in range(len(line)): if line[i] == ';': line = line[:i] break # <statement> ::= [ <label> ":"] <mnemonic> [<arguments>] # | <label> ":" # | "EOF" statement = line.split() if not statement: continue # replace equated strings # because this happens on the first pass # equates must be assigned before they are used n = 1 for i in statement[1:]: # replace any equates already stored # remove prefixes and suffixes before attempting to replace t = e = '' if i[0] in TYPES: t = i[0] i = i[1:] if i and (i[-1] in [',', ')']): e = i[-1] i = i[:-1] # replace and put removed characters back if i in equ_index: statement[n] = t + equ_index[i] + e # labels can be used in equates but they have # to defined before they are used elif i in label_index: statement[n] = t + num_string(label_index[i]) + e n = n + 1 # deal with org if statement[0].lower() == 'org': asm.append(['org', statement[1:], ('',), '']) pc = string_num(statement[1]) continue # if needed update index and remove label elif statement[0][-1] == ':': label_index[statement[0][:-1]] = pc; del statement[0] # store equates # these are case sensative if (len(statement) >= 3) and (statement[1].lower() == 'equ'): equ_index[statement[0]] = ' '.join(statement[2:]) continue if not statement: continue # <statement> ::= <mnemonic> [<arguments>] mnemonic = statement[0].lower() arguments = ''.join(statement[1:]).split(',') # deal with db if mnemonic == 'db': constants = '' for a in arguments: constants = constants + tokenize(['#' + a])[1] asm.append([mnemonic, arguments, ('',), constants]) pc = pc + len(constants) continue symbols, constants = tokenize(arguments) width = iset[mnemonic][symbols][1] asm.append([mnemonic, arguments, symbols, constants]) pc = pc + width except: print ' ** first pass error **\nline:\n', line raise return asm, label_index # take preprocessed asm and write machine code to binary file # for each line of asm # check if it's an org or db command deal with it accordingly # check if arguments are labels and replace with value # write instruction to file def second_pass(f, asm, label_index): pc = 0 for line in asm: f.seek(pc) mne, args, sym, const = line try: if mne == 'org': pc = string_num(args[0]) continue elif mne == 'db': f.write(const) pc = pc + len(const) continue # replace labels with addresses i = 0 for a in args: if not a: continue elif (sym[i] == 'label') or (sym[i] == '@label'): # labeled pointer uglyness if (a[0] == '@') and (a[1:] in label_index): args[i] = '@' + num_string(label_index[a[1:]]) const = const + tokenize([args[i]])[1] elif a in label_index: # check if constant needs to be a relative address r = list(sym) r.insert(i, 'rel8') r.pop(i + 1) if tuple(r) in iset[mne].keys(): # relative addresses can be negative args[i] = num_string(label_index[a] - pc) else: args[i] = num_string(label_index[a]) const = const + tokenize([args[i]])[1] elif a not in label_index: print '** label error **', line raise KeyError i = i + 1 # assemble to file op, width = iset[mne][sym] # theres gotta be a better way do deal with paged addresses if mne in ['pcall', 'pjmp']: op = op | ((string_num(args[0]) & 0x7FF) >> 8) const = const[-1] f.write(struct.pack('>B', op)) # pad if needed for i in range(width - len(const) - 1): f.write(struct.pack('>B', 0)) f.write(const) pc = pc + width except: print '** second pass error **\nline:\n', line raise return f if __name__ == '__main__': f = open(sys.argv[1], 'r') b = open(sys.argv[2], 'wb') asm, label_index = first_pass(f) b = second_pass(b, asm, label_index) f.close() b.close()