view assembler/language.py @ 24:19cbcbde19af

.
author james <jb302@eecs.qmul.ac.uk>
date Tue, 25 Feb 2014 02:05:01 +0000
parents 2efb577ac2d7
children 45340c2a38c5
line wrap: on
line source
#!/usr/bin/env python
# language.py
import struct

# these definitions are here to make changing the source code
# representation of numbers easier
BASE = 16
# prefix must be only 2 characters otherwise tokenize() will break
PREFIX = '0x'
# data types: ints, pointers, ports, vectors
TYPES = ['#', '@', '$', '*']

def num_string(num):
    return hex(num)

def string_num(string):
    return int(string, BASE)

# valid reserved arguments for this instruction set
vargs = ('', '@a+dptr', 'dptr', 'bs', '@a+pc', 
         'a', 'c','r0', 'r1', 'r2', 'r3', 'ie',
         'sp', 'flags', 'dpl', 'dph', '@dptr',
         'sph', 'spl')

# dictionary embedded dictionaries?
# for every mnemonic in the instruction set index
# there is an index of possible argument formats ('symbol')
# for that mnemonic and a corresponding op code and it's length
iset =  {    'add': {    ('a', '#data8'): [166, 2],
                         ('a', '@dptr'): [167, 1],
                         ('a', 'dph'): [164, 1],
                         ('a', 'dpl'): [165, 1],
                         ('a', 'r0'): [160, 1],
                         ('a', 'r1'): [161, 1],
                         ('a', 'r2'): [162, 1],
                         ('a', 'r3'): [163, 1]},
             'addc': {    ('a', '#data8'): [174, 2],
                          ('a', '@dptr'): [175, 1],
                          ('a', 'dph'): [172, 1],
                          ('a', 'dpl'): [173, 1],
                          ('a', 'r0'): [168, 1],
                          ('a', 'r1'): [169, 1],
                          ('a', 'r2'): [170, 1],
                          ('a', 'r3'): [171, 1]},
             'anl': {    ('a', '#data8'): [134, 2],
                         ('a', '@dptr'): [135, 1],
                         ('a', 'dph'): [132, 1],
                         ('a', 'dpl'): [133, 1],
                         ('a', 'r0'): [128, 1],
                         ('a', 'r1'): [129, 1],
                         ('a', 'r2'): [130, 1],
                         ('a', 'r3'): [131, 1]},
             'cjne': {    ('a', '#data8', 'label'): [223, 3],
                          ('a', '#data8', 'rel8'): [223, 3],
                          ('r0', '#data8', 'label'): [212, 2],
                          ('r0', '#data8', 'rel8'): [212, 2],
                          ('r1', '#data8', 'label'): [213, 2],
                          ('r1', '#data8', 'rel8'): [213, 2],
                          ('r2', '#data8', 'label'): [214, 2],
                          ('r2', '#data8', 'rel8'): [214, 2],
                          ('r3', '#data8', 'label'): [215, 2],
                          ('r3', '#data8', 'rel8'): [215, 2]},
             'clr': {    ('bs',): [11, 1], ('c',): [9, 1], ('ie',): [13, 1]},
             'cpl': {    ('a',): [15, 1], ('c',): [14, 1]},
             'da': {    ('a',): [250, 1]},
             'dec': {    ('a',): [159, 1], ('dptr',): [157, 1]},
             'div': {    ('r0', 'r1'): [249, 1]},
             'djnz': {    ('r0', 'label'): [208, 2],
                          ('r0', 'rel8'): [208, 2],
                          ('r1', 'label'): [209, 2],
                          ('r1', 'rel8'): [209, 2],
                          ('r2', 'label'): [210, 2],
                          ('r2', 'rel8'): [210, 2],
                          ('r3', 'label'): [211, 2],
                          ('r3', 'rel8'): [211, 2]},
             'hlt': {    ('',): [255, 1]},
             'in': {    ('a', 'port_addr'): [252, 2]},
             'inc': {    ('a',): [158, 1], ('dptr',): [156, 1]},
             'int': {    ('vect8',): [254, 2]},
             'jc': {    ('label',): [226, 2], ('rel8',): [226, 2]},
             'jmp': {    ('@a+dptr',): [221, 1], ('@dptr',): [222, 1]},
             'jnc': {    ('label',): [227, 2], ('rel8',): [227, 2]},
             'jns': {    ('label',): [231, 2], ('rel8',): [231, 2]},
             'jnz': {    ('label',): [225, 2], ('rel8',): [225, 2]},
             'jpe': {    ('label',): [229, 2], ('rel8',): [229, 2]},
             'jpo': {    ('label',): [228, 2], ('rel8',): [228, 2]},
             'js': {    ('label',): [230, 2], ('rel8',): [230, 2]},
             'jz': {    ('label',): [224, 2], ('rel8',): [224, 2]},
             'laf': {    ('',): [18, 1]},
             'lcall': {    ('addr16',): [217, 3], ('label',): [217, 3]},
             'ljmp': {    ('addr16',): [216, 3], ('label',): [216, 3]},
             'mov': {    ('@addr16', 'a'): [29, 3],
                         ('@dptr', 'a'): [31, 1],
                         ('@dptr', 'dph'): [36, 1],
                         ('@dptr', 'dpl'): [37, 1],
                         ('@dptr', 'r0'): [32, 1],
                         ('@dptr', 'r1'): [33, 1],
                         ('@dptr', 'r2'): [34, 1],
                         ('@dptr', 'r3'): [35, 1],
                         ('@dptr', 'sph'): [38, 1],
                         ('@dptr', 'spl'): [39, 1],
                         ('@label', 'a'): [29, 3],
                         ('a', '#data8'): [21, 2],
                         ('a', '@a+dptr'): [26, 1],
                         ('a', '@a+pc'): [27, 1],
                         ('a', '@addr16'): [28, 3],
                         ('a', '@dptr'): [30, 1],
                         ('a', '@label'): [28, 3],
                         ('a', 'addr16'): [24, 3],
                         ('a', 'dph'): [60, 1],
                         ('a', 'dpl'): [61, 1],
                         ('a', 'label'): [24, 3],
                         ('a', 'r0'): [56, 1],
                         ('a', 'r1'): [57, 1],
                         ('a', 'r2'): [58, 1],
                         ('a', 'r3'): [59, 1],
                         ('a', 'sph'): [62, 1],
                         ('a', 'spl'): [63, 1],
                         ('addr16', 'a'): [25, 3],
                         ('dph', '#data8'): [44, 2],
                         ('dph', '@dptr'): [100, 1],
                         ('dph', 'a'): [52, 1],
                         ('dph', 'dpl'): [101, 1],
                         ('dph', 'r0'): [96, 1],
                         ('dph', 'r1'): [97, 1],
                         ('dph', 'r2'): [98, 1],
                         ('dph', 'r3'): [99, 1],
                         ('dph', 'sph'): [102, 1],
                         ('dph', 'spl'): [103, 1],
                         ('dpl', '#data8'): [45, 2],
                         ('dpl', '@dptr'): [109, 1],
                         ('dpl', 'a'): [53, 1],
                         ('dpl', 'dph'): [108, 1],
                         ('dpl', 'r0'): [104, 1],
                         ('dpl', 'r1'): [105, 1],
                         ('dpl', 'r2'): [106, 1],
                         ('dpl', 'r3'): [107, 1],
                         ('dpl', 'sph'): [110, 1],
                         ('dpl', 'spl'): [111, 1],
                         ('dptr', '#data16'): [23, 3],
                         ('dptr', 'sp'): [19, 1],
                         ('label', 'a'): [25, 3],
                         ('r0', '#data8'): [40, 2],
                         ('r0', '@dptr'): [64, 1],
                         ('r0', 'a'): [48, 1],
                         ('r0', 'dph'): [68, 1],
                         ('r0', 'dpl'): [69, 1],
                         ('r0', 'r1'): [65, 1],
                         ('r0', 'r2'): [66, 1],
                         ('r0', 'r3'): [67, 1],
                         ('r0', 'sph'): [70, 1],
                         ('r0', 'spl'): [71, 1],
                         ('r1', '#data8'): [41, 2],
                         ('r1', '@dptr'): [73, 1],
                         ('r1', 'a'): [49, 1],
                         ('r1', 'dph'): [76, 1],
                         ('r1', 'dpl'): [77, 1],
                         ('r1', 'r0'): [72, 1],
                         ('r1', 'r2'): [74, 1],
                         ('r1', 'r3'): [75, 1],
                         ('r1', 'sph'): [78, 1],
                         ('r1', 'spl'): [79, 1],
                         ('r2', '#data8'): [42, 2],
                         ('r2', '@dptr'): [82, 1],
                         ('r2', 'a'): [50, 1],
                         ('r2', 'dph'): [84, 1],
                         ('r2', 'dpl'): [85, 1],
                         ('r2', 'r0'): [80, 1],
                         ('r2', 'r1'): [81, 1],
                         ('r2', 'r3'): [83, 1],
                         ('r2', 'sph'): [86, 1],
                         ('r2', 'spl'): [87, 1],
                         ('r3', '#data8'): [43, 2],
                         ('r3', '@dptr'): [91, 1],
                         ('r3', 'a'): [51, 1],
                         ('r3', 'dph'): [92, 1],
                         ('r3', 'dpl'): [93, 1],
                         ('r3', 'r0'): [88, 1],
                         ('r3', 'r1'): [89, 1],
                         ('r3', 'r2'): [90, 1],
                         ('r3', 'sph'): [94, 1],
                         ('r3', 'spl'): [95, 1],
                         ('sp', '#data16'): [22, 3],
                         ('sp', 'dptr'): [20, 1],
                         ('sph', '#data8'): [46, 2],
                         ('sph', '@dptr'): [118, 1],
                         ('sph', 'a'): [54, 1],
                         ('sph', 'dph'): [116, 1],
                         ('sph', 'dpl'): [117, 1],
                         ('sph', 'r0'): [112, 1],
                         ('sph', 'r1'): [113, 1],
                         ('sph', 'r2'): [114, 1],
                         ('sph', 'r3'): [115, 1],
                         ('sph', 'spl'): [119, 1],
                         ('spl', '#data8'): [47, 2],
                         ('spl', '@dptr'): [127, 1],
                         ('spl', 'a'): [55, 1],
                         ('spl', 'dph'): [124, 1],
                         ('spl', 'dpl'): [125, 1],
                         ('spl', 'r0'): [120, 1],
                         ('spl', 'r1'): [121, 1],
                         ('spl', 'r2'): [122, 1],
                         ('spl', 'r3'): [123, 1],
                         ('spl', 'sph'): [126, 1]},
             'mul': {    ('r0', 'r1'): [248, 1]},
             'nop': {    ('',): [0, 1]},
             'orl': {    ('a', '#data8'): [142, 2],
                         ('a', '@dptr'): [143, 1],
                         ('a', 'dph'): [140, 1],
                         ('a', 'dpl'): [141, 1],
                         ('a', 'r0'): [136, 1],
                         ('a', 'r1'): [137, 1],
                         ('a', 'r2'): [138, 1],
                         ('a', 'r3'): [139, 1]},
             'out': {    ('port_addr', 'a'): [253, 2]},
             'pcall': {    ('addr16',): [207, 2], ('label',): [200, 2]},
             'pjmp': {    ('addr16',): [199, 2], ('label',): [192, 2]},
             'pop': {    ('a',): [246, 1],
                         ('dph',): [244, 1],
                         ('dpl',): [245, 1],
                         ('flags',): [247, 1],
                         ('r0',): [240, 1],
                         ('r1',): [241, 1],
                         ('r2',): [242, 1],
                         ('r3',): [243, 1]},
             'push': {    ('a',): [238, 1],
                          ('dph',): [236, 1],
                          ('dpl',): [237, 1],
                          ('flags',): [239, 1],
                          ('r0',): [232, 1],
                          ('r1',): [233, 1],
                          ('r2',): [234, 1],
                          ('r3',): [235, 1]},
             'reserved': {    ('',): [251, 1]},
             'ret': {    ('',): [218, 1]},
             'reti': {    ('',): [219, 1]},
             'rl': {    ('a',): [152, 1]},
             'rlc': {    ('a',): [153, 1]},
             'rr': {    ('a',): [154, 1]},
             'rrc': {    ('a',): [155, 1]},
             'set': {    ('bs',): [10, 1], ('c',): [8, 1], ('ie',): [12, 1]},
             'sfa': {    ('',): [17, 1]},
             'sjmp': {    ('label',): [220, 2], ('rel8',): [220, 2]},
             'sub': {    ('a', '#data8'): [182, 2],
                         ('a', '@dptr'): [183, 1],
                         ('a', 'dph'): [180, 1],
                         ('a', 'dpl'): [181, 1],
                         ('a', 'r0'): [176, 1],
                         ('a', 'r1'): [177, 1],
                         ('a', 'r2'): [178, 1],
                         ('a', 'r3'): [179, 1]},
             'subb': {    ('a', '#data8'): [190, 2],
                          ('a', '@dptr'): [191, 1],
                          ('a', 'dph'): [188, 1],
                          ('a', 'dpl'): [189, 1],
                          ('a', 'r0'): [184, 1],
                          ('a', 'r1'): [185, 1],
                          ('a', 'r2'): [186, 1],
                          ('a', 'r3'): [187, 1]},
             'xcsd': {    ('',): [16, 1]},
             'xrl': {    ('a', '#data8'): [150, 2],
                         ('a', '@dptr'): [151, 1],
                         ('a', 'dph'): [148, 1],
                         ('a', 'dpl'): [149, 1],
                         ('a', 'r0'): [144, 1],
                         ('a', 'r1'): [145, 1],
                         ('a', 'r2'): [146, 1],
                         ('a', 'r3'): [147, 1]}}


# take a list of arguments
# identify constant data:
#       pack that data into a bit string
# return hashable data type symbols and data
def tokenize(args):
    sym = []
    data = ''

    for a in args:
        # tokenize reserved arguments immediatly (not case sensitive)
        # determine arg type and remove identifier if needed
        # unprefixed arguments are addresses so this is the default
        arg_type = 'addr'
        if a.lower() in vargs:
            sym.append(a.lower())
            continue
        elif a[0] in TYPES:
            arg_type = a[0]
            a = a[1:]
        
        # evaluate inline calculations
        if (a[0] == '(') and (a[-1] == ')'):
            a = num_string(eval(a[1:-1]))
        # evaluate strings
        elif (a[0] == '\'') and (a[-1] == '\''):
            if len(a) == 3:
                a = num_string(struct.unpack('>B', a[1:-1])[0])
            elif len(a) == 4:
                a = num_string(struct.unpack('>H', a[1:-1])[0])
            else:
                data = a[1:-1]
                continue
                    
        # check if negative and remove sign if needed
        if a[:2] == '-' + PREFIX:
            is_neg = 1
            a = a[1:]
        elif a[:2] == PREFIX:
            is_neg = 0
        # anything else must be a label or a source code error
        # labeled pointer uglyness
        elif (arg_type == '@') & (a[:2] != PREFIX):
            sym.append('@label')
            continue
        else:
            sym.append('label')
            continue

        # addresses
        if arg_type == 'addr':
            # 8 bit relative addresses (always signed)
            if len(a) <= 4:
                sym.append('rel8')
                if is_neg:
                    val = string_num('-' + a)
                else:
                    val = string_num(a)
                data = data + struct.pack('>b', val)
                continue
            # 16 bit absolute addresses (never signed)
            elif len(a) <= 6:
                sym.append('addr16')
                val = string_num(a)
                data = data + struct.pack('>H', val)
                continue
        # immediate ints (signed when negative)
        elif arg_type == '#':
            # 8 bit ints
            if len(a) <= 4:
                sym.append('#data8')
                if is_neg:
                    val = string_num('-' + a)
                    fmt = '>b'
                else:
                    val = string_num(a)
                    fmt = '>B'
                data = data + struct.pack(fmt, val)
                continue
            # 16 bit ints
            elif len(a) <= 6:
                sym.append('#data16')
                if is_neg:
                    val = string_num('-' + a)
                    fmt = '>h'
                else:
                    val = string_num(a)
                    fmt = '>H'
                data = data + struct.pack(fmt, val)
                continue
        # pointers
        elif arg_type == '@':
            sym.append('@addr16')
            val = string_num(a)
            data = data + struct.pack('>H', val)
            continue
        # ports
        elif arg_type == '$':
            sym.append('port_addr')
            val = string_num(a)
            data = data + struct.pack('>B', val)
            continue
        # vectors
        elif arg_type == '*':
            sym.append('vect8')
            val = string_num(a)
            data = data + struct.pack('>B', val)
            continue

    return tuple(sym), data