view assembler/assembler.py @ 19:c9fbeb852ff2

unmangled if/elif/else structures in assembler
author james <jb302@eecs.qmul.ac.uk>
date Sun, 23 Feb 2014 14:47:41 +0000
parents 256d24488e3f
children c988e43bf2b5
line wrap: on
line source
#!/usr/bin/env python2
# assembler.py
import struct
import sys
from language import *

# take source file and return preprocessed assembly code
# for each non-empty line in the file:
#   remove comments from source
#   store label definitions
#   remove label definitions from source
#   determine format of arguments:
#       store constant data
#       store data type symbols
#   return all this in a list
def first_pass(f):
    # read file into list, remove blank line
    f.seek(0)
    source_code = filter(lambda l: l != '\n', f.readlines())
    
    asm = []
    label_index = {}
    equ_index = {}
    pc = 0
    
    # <line> ::= [<statement>] [";"<comment>] <EOL>
    for line in source_code:
        
        try:
            # remove EOL
            line = line.strip()
            
            # remove comments
            for i in range(len(line)):
                if line[i] == ';':
                    line = line[:i]
                    break
            
            line = line.lower()
            
            # <statement> ::= [ <label> ":"] <mnemonic> [<arguments>]
            #                 | <label> ":"
            #                 | "EOF"
            statement = line.split()
            
            if not statement: continue
            # deal with org and db
            elif statement[0] == 'org': 
                asm.append([statement[0], statement[1:], ('',), ''])
                pc = string_num(statement[1])
                continue
            elif statement[0] == 'db':
                asm.append([statement[0], statement[1:], ('',), ''])
                pc = pc + len(statement[1:])
                continue

            # store equates
            if (len(statement) >= 3) and (statement[1] == 'equ'):
                equ_index[statement[0]] = statement[2]
                continue
            # if needed update index and remove label 
            elif statement[0][-1:] == ':':
                label_index[statement[0][:-1]] = pc;
                del statement[0]
                
            if not statement: continue
            
            # <statement> ::= <mnemonic> [<arguments>]
            mnemonic = statement[0]
            arguments = ''.join(statement[1:]).split(',')
            
            # replace equated arguments
            # because this happens on the first pass
            # equates must be assigned before they are used
            i = 0
            for a in arguments:
                if a in equ_index:
                    arguments[i] = equ_index[a]
                i = i + 1

            symbols, constants = tokenize(arguments)
            width = iset[mnemonic][symbols][1]

            asm.append([mnemonic, arguments, symbols, constants])
            pc = pc + width
        
        except:
            print ' ** first pass error **\nline:\n', line
            raise
    
    return asm, label_index

# take preprocessed asm and write machine code to binary file
# for each line of asm
#   check if it's an org or db command deal with it accordingly
#   check if arguments are labels and replace with value
#   write instruction to file
def second_pass(f, asm, label_index):
    pc = 0

    for line in asm:
        f.seek(pc)
        mne, args, sym, const = line
    
        try:
            if mne == 'org':
                pc = string_num(args[0])
                continue
            elif mne == 'db':
                data = ''
                for a in args:
                    data = data + struct.pack('>B', string_num(a))
                f.write(data)
                pc = pc + len(data)
                continue
            
            # replace labels with addresses
            i = 0
            for a in args:
                
                if not a: continue
                elif (sym[i] == 'label') or (sym[i] == '@label'):
                    # labeled pointer uglyness
                    if (a[0] == '@') and (a[1:] in label_index):
                        args[i] = '@' + num_string(label_index[a[1:]])
                        const = const + tokenize([args[i]])[1]
                    elif a in label_index:
                        # check if constant needs to be a relative address
                        r = list(sym)
                        r.insert(i, 'rel8')
                        r.pop(i + 1)
                        if tuple(r) in iset[mne].keys():
                            # relative addresses can be negative
                            args[i] = num_string(label_index[a] - pc)
                        else:
                            args[i] = num_string(label_index[a])
                        const = const + tokenize([args[i]])[1]
                    else:
                        print '** label error **\nline:\n', line
                        raise
                i = i + 1
            
            # assemble to file
            op, width = iset[mne][sym]
            f.write(struct.pack('>B', op))
            # pad if needed
            for i in range(width - len(const) - 1):
                f.write(struct.pack('>B', 0))
            f.write(const)
            
            pc = pc + width
        
        except:
            print '** second pass error **\nline:\n', line
            raise

    return f
    
if __name__ == '__main__':
    f = open(sys.argv[1], 'r')
    b = open(sys.argv[2], 'wb')
    asm, label_index = first_pass(f)
    b = second_pass(b, asm, label_index)
    f.close()
    b.close()