view assembler/assembler.py @ 23:2efb577ac2d7

finished?
author james <jb302@eecs.qmul.ac.uk>
date Tue, 25 Feb 2014 01:55:39 +0000
parents c988e43bf2b5
children 45340c2a38c5
line wrap: on
line source
#!/usr/bin/env python2
# assembler.py
import struct
import sys
from language import *

# take source file and return preprocessed assembly code
# for each non-empty line in the file:
#   remove comments from source
#   store label definitions
#   remove label definitions from source
#   determine format of arguments:
#       store constant data
#       store data type symbols
#   return all this in a list
def first_pass(f):
    # read file into list, remove blank line
    f.seek(0)
    source_code = filter(lambda l: l != '\n', f.readlines())
    
    asm = []
    label_index = {}
    equ_index = {}
    pc = 0
    
    # <line> ::= [<statement>] [";"<comment>] <EOL>
    for line in source_code:  
        try:
            # remove trailing whitespace and comments
            line = line.strip()
            for i in range(len(line)):
                if line[i] == ';':
                    line = line[:i]
                    break
            
            # <statement> ::= [ <label> ":"] <mnemonic> [<arguments>]
            #                 | <label> ":"
            #                 | "EOF"
            statement = line.split()
            if not statement:
                continue

            # replace equated strings
            # because this happens on the first pass
            # equates must be assigned before they are used
            n = 1
            for i in statement[1:]:
                # replace any equates already stored
                # remove prefixes and suffixes before attempting to replace
                t = e = ''
                if i[0] in TYPES:
                    t = i[0]
                    i = i[1:]
                if i and (i[-1] in [',', ')']):
                    e = i[-1]
                    i = i[:-1]
                # replace and put removed characters back
                if i in equ_index:
                    statement[n] = t + equ_index[i] + e
                # labels can be used in equates but they have
                # to defined before they are used
                elif i in label_index:
                    statement[n] = t + num_string(label_index[i]) + e
                n = n + 1                   
            
            # deal with org
            if statement[0].lower() == 'org':
                asm.append(['org', statement[1:], ('',), ''])
                pc = string_num(statement[1])
                continue
            # if needed update index and remove label 
            elif statement[0][-1] == ':':
                label_index[statement[0][:-1]] = pc;
                del statement[0]
            
            # store equates
            # these are case sensative
            if (len(statement) >= 3) and (statement[1].lower() == 'equ'):
                equ_index[statement[0]] = ' '.join(statement[2:])
                continue
                
            if not statement:
                continue
            
            # <statement> ::= <mnemonic> [<arguments>]
            mnemonic = statement[0].lower()
            arguments = ''.join(statement[1:]).split(',')
            
            # deal with db
            if mnemonic == 'db':
                constants = ''
                for a in arguments:
                    constants = constants +  tokenize(['#' + a])[1]
                asm.append([mnemonic, arguments, ('',), constants])
                pc = pc + len(constants)
                continue
            
            symbols, constants = tokenize(arguments)
            width = iset[mnemonic][symbols][1]

            asm.append([mnemonic, arguments, symbols, constants])
            pc = pc + width
        
        except:
            print ' ** first pass error **\nline:\n', line
            raise
    
    return asm, label_index

# take preprocessed asm and write machine code to binary file
# for each line of asm
#   check if it's an org or db command deal with it accordingly
#   check if arguments are labels and replace with value
#   write instruction to file
def second_pass(f, asm, label_index):
    pc = 0

    for line in asm:
        f.seek(pc)
        mne, args, sym, const = line
    
        try:
            if mne == 'org':
                pc = string_num(args[0])
                continue
            elif mne == 'db':
                f.write(const)
                pc = pc + len(const)
                continue
            
            # replace labels with addresses
            i = 0
            for a in args:
                if not a:
                    continue
                elif (sym[i] == 'label') or (sym[i] == '@label'):
                    # labeled pointer uglyness
                    if (a[0] == '@') and (a[1:] in label_index):
                        args[i] = '@' + num_string(label_index[a[1:]])
                        const = const + tokenize([args[i]])[1]
                    elif a in label_index:
                        # check if constant needs to be a relative address
                        r = list(sym)
                        r.insert(i, 'rel8')
                        r.pop(i + 1)
                        if tuple(r) in iset[mne].keys():
                            # relative addresses can be negative
                            args[i] = num_string(label_index[a] - pc)
                        else:
                            args[i] = num_string(label_index[a])
                        const = const + tokenize([args[i]])[1]
                    elif a not in label_index:
                        print '** label error **', line
                        raise KeyError       
                i = i + 1
            
            # assemble to file
            op, width = iset[mne][sym]
            # theres gotta be a better way do deal with paged addresses
            if mne in ['pcall', 'pjmp']:
                op = op | ((string_num(args[0]) &  0x7FF) >> 8)
                const = const[-1]
            f.write(struct.pack('>B', op))
            # pad if needed
            for i in range(width - len(const) - 1):
                f.write(struct.pack('>B', 0))
            f.write(const)
            
            pc = pc + width
        
        except:
            print '** second pass error **\nline:\n', line
            raise

    return f
    
if __name__ == '__main__':
    f = open(sys.argv[1], 'r')
    b = open(sys.argv[2], 'wb')
    asm, label_index = first_pass(f)
    b = second_pass(b, asm, label_index)
    f.close()
    b.close()