view assembler/assembler.py @ 14:2b8eb2c86602

Major update to assembler design, now handles labels proper. Next step unit test Added boot procedure and experimental function look up table to emulator Started implementing operations tested SET and CLR operations with success
author james <jb302@eecs.qmul.ac.uk>
date Wed, 29 Jan 2014 20:11:07 +0000
parents e9dc055a0f8b
children 9bbdf7258cd8
line wrap: on
line source
#!/usr/bin/env python2
# assembler.py
import struct
import sys
from language import *

# take source file and return preprocessed assembly code
# for each non-empty line in the file:
#   remove comments from source
#   store label definitions
#   remove label definitions from source
#   determine format of arguments:
#       store constant data
#       store data type symbols
#   return all this in a list
def first_pass(f):
    # read file into list, remove blank line
    f.seek(0)
    source_code = filter(lambda l: l != '\n', f.readlines())
    
    asm = []
    label_index = {}
    pc = 0
    
    # <line> ::= [<statement>] [";"<comment>] <EOL>
    for line in source_code:

        # remove EOL
        line = line.strip()
        
        # remove comments
        for i in range(len(line)):
            if line[i] == ';':
                line = line[:i]
                break
        
        line = line.lower()
        
        # <statement> ::= [ <label> ":"] <mnemonic> [<arguments>]
        #                 | <label> ":"
        #                 | "end"
        # skip empty statements
        statement = line.split()
        
        # skip empty statements
        if not statement: continue
        
        # if needed update index and remove label 
        label = None
        if statement[0][-1:] == ':':
            label_index[statement[0][:-1]] = pc;
            del statement[0]
            
        # return things that return False if empty
        if not statement:
            asm.append(['', [], [], ''])
            continue
        
        # <statement> ::= <mnemonic> [<arguments>]
        mnemonic = statement[0]
        arguments = ''.join(statement[1:]).split(',')

        symbols, constants = tokenize(arguments)
        width = iset[mnemonic][symbols][1]

        asm.append([mnemonic, arguments, symbols, constants])
        
        pc = pc + width
    
    return asm, label_index

# take preprocessed asm and write machine code to binary file
def second_pass(f, asm, label_index):
    pc = 0

    for line in asm:
        mne, args, sym, const = line
    
        # skip instructionless lines
        if not mne: continue

        # replace labels with addresses
        i = 0
        for a in args:
            if a in label_index:
                args[i] = num_string(label_index[a])
                val = tokenize([args[i]])[1]
                const = const + val
            i = i + 1
        
        # assemble to file
        # need to make sure 16 bit arguments are padded
        f.seek(pc)
        try:
            f.write(struct.pack('>B', iset[mne][sym][0]))
            f.write(const)
        except:
            # will raise a symbol error when any unrecognised
            # mnemonic or argument format is found
            print 'syntax error: %s %s' % (mne, sym)
            return 'symbol_error'
        pc = pc + len(const) + 1
        
    f.seek(0)    
    return f
    
if __name__ == '__main__':
    f = open(sys.argv[1], 'r')
    b = open(sys.argv[2], 'wb')
    asm, label_index = first_pass(f)
    b = second_pass(b, asm, label_index)
    f.close()
    b.close()