Mercurial > hg > ede
view assembler/assembler.py @ 25:45340c2a38c5
tidier and less buggier
author | james <jb302@eecs.qmul.ac.uk> |
---|---|
date | Fri, 28 Feb 2014 17:21:11 +0000 |
parents | 2efb577ac2d7 |
children | 84716cd835dd |
line wrap: on
line source
#!/usr/bin/env python2 # assembler.py import struct import sys from language import * # take source file and return preprocessed assembly code # for each non-empty line in the file: # remove comments from source # replace equated strings # store label definitions and remove label from source # store new equates # make hashable format symbol from arguments # identify and save constant data # save instruction, arguments, symbol and data to list # also prepares org and db instructions for second_pass() def first_pass(f): asm = [] labels = {} equates = {} pc = 0 # read file into list, remove blank line f.seek(0) source_code = filter(lambda l: l != '\n', f.readlines()) # <line> ::= [<statement>] [";"<comment>] <EOL> for line in source_code: try: # remove trailing whitespace and comments line = line.strip() for i in range(len(line)): if line[i] == ';': line = line[:i] break # <statement> ::= [ <label> ":"] <mnemonic> [<arguments>] # | <label> ":" # | "EOF" statement = line.split() if not statement: continue # replace equated strings # because this happens on the first pass # equates must be assigned before they are used i = 1 for s in statement[1:]: # replace any equates already stored # remove prefixes and suffixes before attempting to replace prefix = suffix = '' # prefixes if s[0] in ids: prefix = prefix + s[0] s = s[1:] if s[0] == '(': prefix = prefix + s[0] s = s[1:] # suffixes if s and (s[-1] == ','): suffix = suffix + s[-1] s = s[:-1] if s and (s[-1] == ')'): suffix = s[-1] + suffix s = s[:-1] # replace and put removed characters back if s in equates: statement[i] = prefix + equates[s] + suffix # labels can be used in equates but they have # to be assigned before they are used as well elif s in labels: statement[i] = prefix + str(labels[s]) + suffix i = i + 1 # deal with org if statement[0].lower() == 'org': asm.append(['org', statement[1:], ('',), '']) pc = stoi(statement[1]) continue # if needed update index and remove label elif statement[0][-1] == ':': labels[statement[0][:-1]] = pc; del statement[0] # store equates # these are case sensative if (len(statement) >= 3) and (statement[1].lower() == 'equ'): equates[statement[0]] = ' '.join(statement[2:]) continue if not statement: continue # <statement> ::= <mnemonic> [<arguments>] mne = statement[0].lower() args = ''.join(statement[1:]).split(',') # deal with db if mne == 'db': const = '' for a in args: data = tokenize(mne, ['#' + a])[1] # deal with leading zeros # skip zeros unless zero is the # only number if data == '\x00\x00': const = const + '\x00' continue i = 0 for c in data: if c == '\x00': i = i + 1 else: pass const = const + data[i:] asm.append([mne, args, ('',), const]) pc = pc + len(const) continue # tokenize sym, const = tokenize(mne, args) asm.append([mne, args, sym, const]) # increase pc width = iset[mne][sym][1] pc = pc + width except: print ' ** first pass error **\nline:\n', line raise return asm, labels # take a preprocessed object asm and write machine code to binary file # for each line of asm: # check if it's an org or db command deal with it accordingly # check if arguments are labels and replace with value # write instruction to file def second_pass(f, asm, labels): pc = 0 for line in asm: f.seek(pc) mne, args, sym, const = line try: # deal with org and db if mne == 'org': pc = stoi(args[0]) continue elif mne == 'db': f.write(const) pc = pc + len(const) continue # replace labels with addresses i = 0 for a in args: if not a: continue elif (sym[i] == 'label') or (sym[i] == '@label'): # labeled pointer uglyness if (a[0] == '@') and (a[1:] in labels): args[i] = '@' + str(labels[a[1:]]) const = const + tokenize(mne, [args[i]])[1] else: # check if constant needs to be a relative address if mne in rinst: args[i] = str(labels[a] - pc) else: args[i] = str(labels[a]) const = const + tokenize(mne, [args[i]])[1] i = i + 1 # assemble to file op, width = iset[mne][sym] # theres gotta be a better way do deal with paged addresses if mne in ['pcall', 'pjmp']: op = op | ((stoi(args[0]) & 0x7FF) >> 8) const = const[-1] f.write(struct.pack('>B', op)) # pad if needed # i don't think this ever happens #for i in range(width - len(const) - 1): # f.write(struct.pack('>B', 0)) # check length and write constant or throw error of = len(const) - width + 1 if of > 0: if const[0] == ('\x00'): const = const[of:] else: raise ValueError f.write(const) pc = pc + width except: print '** second pass error **\nline:\n', line raise return f if __name__ == '__main__': f = open(sys.argv[1], 'r') try: b = open(sys.argv[2], 'wb') except IndexError: b = open('a.out', 'wb') asm, labels = first_pass(f) b = second_pass(b, asm, labels) f.close() b.close()