view assembler/assembler.py @ 8:81dd03d17c22

first_pass() didn't need a pc
author james <jb302@eecs.qmul.ac.uk>
date Sat, 07 Dec 2013 20:32:27 +0000
parents d8d210fc1ad9
children e9dc055a0f8b
line wrap: on
line source
#!/usr/bin/env python2
# assembler.py
import struct
import sys
from language import *

# take source file and return preprocessed assembly code
# for each non-empty line in the file:
#   remove comments from source
#   store label definitions
#   remove label definitions from source
#   determine format of arguments:
#       store constant data
#       store data type symbols
#   return all this in a list
def first_pass(f):
    # read file into list, remove blank line
    f.seek(0)
    source_code = filter(lambda l: l != '\n', f.readlines())
    
    asm = []
    # <line> ::= [<statement>] [";"<comment>] <EOL>
    for line in source_code:
        # remove EOL
        line = line.strip()
        # remove comments
        for i in range(len(line)):
            if line[i] == ';':
                line = line[:i]
                break
        line = line.lower()
        
        # <statement> ::= [ <label> ":"] <mnemonic> [<arguments>]
        #                 | <label> ":"
        #                 | "end"
        # skip empty statements
        statement = line.split()
        
        # skip empty statements
        if not statement: continue
        # if needed update label tag and remove label 
        label = None
        if statement[0][-1:] == ':':
            label = statement[0][:-1]
            del statement[0]
            
        # return things that return False if empty
        if not statement:
            asm.append(['', [], '', label])
            continue
        
        # <statement> ::= <mnemonic> [<arguments>]
        mnemonic = statement[0]
        arguments = ''.join(statement[1:]).split(',')

        symbols, constants = tokenize(arguments)
        asm.append([mnemonic, symbols, constants, label])
        
    return asm

# take preprocessed asm and write machine code to binary file
def second_pass(f, asm):
    pc = 0
    label_index = {}
    
    for line in asm:
        mne, sym, const, label = line
        
        # if there is a label tag add label to index
        if label:
            label_index.update({label:pc})
        # skip instructionless lines
        if not mne: continue

        # replace labels with addresses
        i = 0
        for s in sym:
            if s in label_index:
                sym[i] = num_string(label_index[s])
                val = tokenize([sym[i]])[1]
                const = const + val
            i = i + 1
        # re-tokenize
        sym = tokenize(sym)[0]
        
        # make symbols hashable
        sym = tuple(sym)
        
        # assemble to file
        f.seek(pc)
        try:
            f.write(struct.pack('>B', iset[mne][sym]))
            f.write(const)
        except:
            # will raise a symbol error when any unrecognised
            # mnemonic or argument format is found
            print 'syntax error: %s %s' % (mne, sym)
            return 'symbol_error'
        
        pc = pc + len(const) + 1
        
    f.seek(0)    
    return f
    
if __name__ == '__main__':
    f = open(sys.argv[1], 'r')
    b = open(sys.argv[2], 'wb')
    asm = first_pass(f)
    b = second_pass(b, asm)
    f.close()
    b.close()