annotate assembler/assembler.py @ 6:3c8b4a4a1787

added some scratch-code for the emulator Wrote my interim report
author james <jb302@eecs.qmul.ac.uk>
date Sat, 07 Dec 2013 16:35:13 +0000
parents d8d210fc1ad9
children 81dd03d17c22
rev   line source
jb302@1 1 #!/usr/bin/env python2
jb302@1 2 # assembler.py
jb302@1 3 import struct
jb302@1 4 import sys
jb302@1 5 from language import *
jb302@1 6
jb302@1 7 # take source file and return preprocessed assembly code
jb302@1 8 # for each non-empty line in the file:
jb302@1 9 # remove comments from source
jb302@1 10 # store label definitions
jb302@1 11 # remove label definitions from source
jb302@1 12 # determine format of arguments:
jb302@1 13 # store constant data
jb302@1 14 # store data type symbols
jb302@1 15 # return all this in a list
jb302@1 16 def first_pass(f):
jb302@1 17 # read file into list, remove blank line
jb302@1 18 f.seek(0)
jb302@1 19 source_code = filter(lambda l: l != '\n', f.readlines())
jb302@1 20
jb302@1 21 asm = []
jb302@1 22 pc = 0
jb302@1 23
jb302@1 24 # <line> ::= [<statement>] [";"<comment>] <EOL>
jb302@1 25 for line in source_code:
jb302@1 26 # remove EOL
jb302@1 27 line = line.strip()
jb302@1 28 # remove comments
jb302@1 29 for i in range(len(line)):
jb302@1 30 if line[i] == ';':
jb302@1 31 line = line[:i]
jb302@1 32 break
jb302@1 33 line = line.lower()
jb302@1 34
jb302@1 35 # <statement> ::= [ <label> ":"] <mnemonic> [<arguments>]
jb302@1 36 # | <label> ":"
jb302@1 37 # | "end"
jb302@1 38 # skip empty statements
jb302@1 39 statement = line.split()
jb302@1 40
jb302@1 41 # skip empty statements
jb302@1 42 if not statement: continue
jb302@1 43 # if needed update label tag and remove label
jb302@1 44 label = None
jb302@1 45 if statement[0][-1:] == ':':
jb302@1 46 label = statement[0][:-1]
jb302@1 47 del statement[0]
jb302@1 48
jb302@1 49 # return things that return False if empty
jb302@1 50 if not statement:
jb302@1 51 asm.append(['', [], '', label])
jb302@1 52 continue
jb302@1 53
jb302@1 54 # <statement> ::= <mnemonic> [<arguments>]
jb302@1 55 mnemonic = statement[0]
jb302@1 56 arguments = ''.join(statement[1:]).split(',')
jb302@1 57
jb302@1 58 symbols, constants = tokenize(arguments)
jb302@1 59 asm.append([mnemonic, symbols, constants, label])
jb302@1 60
jb302@1 61 pc = pc + 1
jb302@1 62
jb302@1 63 return asm
jb302@1 64
jb302@1 65 # take preprocessed asm and write machine code to binary file
jb302@1 66 def second_pass(f, asm):
jb302@1 67 pc = 0
jb302@1 68 label_index = {}
jb302@1 69
jb302@1 70 for line in asm:
jb302@1 71 mne, sym, const, label = line
jb302@1 72
jb302@1 73 # if there is a label tag add label to index
jb302@1 74 if label:
jb302@1 75 label_index.update({label:pc})
jb302@1 76 # skip instructionless lines
jb302@1 77 if not mne: continue
jb302@1 78
jb302@1 79 # replace labels with addresses
jb302@1 80 i = 0
jb302@1 81 for s in sym:
jb302@1 82 if s in label_index:
jb302@1 83 sym[i] = num_string(label_index[s])
jb302@1 84 val = tokenize([sym[i]])[1]
jb302@1 85 const = const + val
jb302@1 86 i = i + 1
jb302@1 87 # re-tokenize
jb302@1 88 sym = tokenize(sym)[0]
jb302@1 89
jb302@1 90 # make symbols hashable
jb302@1 91 sym = tuple(sym)
jb302@1 92
jb302@1 93 # assemble to file
jb302@1 94 f.seek(pc)
jb302@1 95 try:
jb302@1 96 f.write(struct.pack('>B', iset[mne][sym]))
jb302@1 97 f.write(const)
jb302@1 98 except:
jb302@5 99 # will raise a symbol error when any unrecognised
jb302@1 100 # mnemonic or argument format is found
jb302@1 101 print 'syntax error: %s %s' % (mne, sym)
jb302@1 102 return 'symbol_error'
jb302@1 103
jb302@1 104 pc = pc + len(const) + 1
jb302@1 105
jb302@1 106 f.seek(0)
jb302@1 107 return f
jb302@1 108
jb302@1 109 if __name__ == '__main__':
jb302@1 110 f = open(sys.argv[1], 'r')
jb302@1 111 b = open(sys.argv[2], 'wb')
jb302@1 112 asm = first_pass(f)
jb302@1 113 b = second_pass(b, asm)
jb302@1 114 f.close()
jb302@1 115 b.close()
jb302@1 116