view assembler/language.py @ 12:e9dc055a0f8b

emulator skeleton code
author james <jb302@eecs.qmul.ac.uk>
date Sat, 11 Jan 2014 02:33:32 +0000
parents 81dd03d17c22
children 2b8eb2c86602
line wrap: on
line source
#!/usr/bin/env python
# language.py
import struct

# these definitions are here to make changing the source code
# representation of numbers easier
BASE = 16
# prefix must be only 2 characters otherwise tokenize() will break
PREFIX = '0x'
def num_string(num):
    return hex(num)


# valid arguments for this instruction set
# not actually in use
iargs = ('', 'addr11', 'addr16', 'sph', 'spl', '@dptr', 'ie', '@a+dptr', 
         '@addr16', 'vect8', '#data8', 'port_addr', 'dptr', 'bs', '@a+pc', 
         'a', 'c', 'rel8', 'r0', 'r1', 'r2', 'r3', '#data16', 'sp', 'flags', 
         'dpl', 'dph')

# dictionary embedded dictionaries?
# for every mnemonic in the instruction set index
# there is an index of possible argument formats ('symbol')
# for that mnemonic and a corresponding op code
iset =  {'add': {('a', '#data8'): 166,
                 ('a', '@dptr'): 167,
                 ('a', 'dph'): 164,
                 ('a', 'dpl'): 165,
                 ('a', 'r0'): 160,
                 ('a', 'r1'): 161,
                 ('a', 'r2'): 162,
                 ('a', 'r3'): 163},
         'addc': {('a', '#data8'): 174,
                  ('a', '@dptr'): 175,
                  ('a', 'dph'): 172,
                  ('a', 'dpl'): 173,
                  ('a', 'r0'): 168,
                  ('a', 'r1'): 169,
                  ('a', 'r2'): 170,
                  ('a', 'r3'): 171},
         'anl': {('a', '#data8'): 134,
                 ('a', '@dptr'): 135,
                 ('a', 'dph'): 132,
                 ('a', 'dpl'): 133,
                 ('a', 'r0'): 128,
                 ('a', 'r1'): 129,
                 ('a', 'r2'): 130,
                 ('a', 'r3'): 131},
         'cjne': {('a', '#data8', 'rel8'): 223,
                  ('r0', '#data8', 'rel8'): 212,
                  ('r1', '#data8', 'rel8'): 213,
                  ('r2', '#data8', 'rel8'): 214,
                  ('r3', '#data8', 'rel8'): 215},
         'clr': {('bs',): 11, ('c',): 9, ('ie',): 13},
         'cpl': {('a',): 15, ('c',): 14},
         'da': {('a',): 250},
         'dec': {('a',): 159, ('dptr',): 157},
         'div': {('r0', 'r1'): 249},
         'djnz': {('r0', 'rel8'): 208,
                  ('r1', 'rel8'): 209,
                  ('r2', 'rel8'): 210,
                  ('r3', 'rel8'): 211},
         'hlt': {('',): 255},
         'in': {('a', 'port_addr'): 252},
         'inc': {('a',): 158, ('dptr',): 156},
         'int': {('vect8',): 254},
         'jc': {('rel8',): 226},
         'jmp': {('@a+dptr',): 221, ('@dptr',): 222},
         'jnc': {('rel8',): 227},
         'jns': {('rel8',): 231},
         'jnz': {('rel8',): 225},
         'jpe': {('rel8',): 229},
         'jpo': {('rel8',): 228},
         'js': {('rel8',): 230},
         'jz': {('rel8',): 224},
         'laf': {('',): 18},
         'lcall': {('addr16',): 217},
         'ljmp': {('addr16',): 216},
         'mov': {('@addr16', 'a'): 29,
                 ('@dptr', 'a'): 31,
                 ('@dptr', 'dph'): 36,
                 ('@dptr', 'dpl'): 37,
                 ('@dptr', 'r0'): 32,
                 ('@dptr', 'r1'): 33,
                 ('@dptr', 'r2'): 34,
                 ('@dptr', 'r3'): 35,
                 ('@dptr', 'sph'): 38,
                 ('@dptr', 'spl'): 39,
                 ('a', '#data8'): 21,
                 ('a', '@a+dptr'): 26,
                 ('a', '@a+pc'): 27,
                 ('a', '@addr16'): 28,
                 ('a', '@dptr'): 30,
                 ('a', 'addr16'): 24,
                 ('a', 'dph'): 60,
                 ('a', 'dpl'): 61,
                 ('a', 'r0'): 56,
                 ('a', 'r1'): 57,
                 ('a', 'r2'): 58,
                 ('a', 'r3'): 59,
                 ('a', 'sph'): 62,
                 ('a', 'spl'): 63,
                 ('addr16', 'a'): 25,
                 ('dph', '#data8'): 44,
                 ('dph', '@dptr'): 100,
                 ('dph', 'a'): 52,
                 ('dph', 'dpl'): 101,
                 ('dph', 'r0'): 96,
                 ('dph', 'r1'): 97,
                 ('dph', 'r2'): 98,
                 ('dph', 'r3'): 99,
                 ('dph', 'sph'): 102,
                 ('dph', 'spl'): 103,
                 ('dpl', '#data8'): 45,
                 ('dpl', '@dptr'): 109,
                 ('dpl', 'a'): 53,
                 ('dpl', 'dph'): 108,
                 ('dpl', 'r0'): 104,
                 ('dpl', 'r1'): 105,
                 ('dpl', 'r2'): 106,
                 ('dpl', 'r3'): 107,
                 ('dpl', 'sph'): 110,
                 ('dpl', 'spl'): 111,
                 ('dptr', '#data16'): 23,
                 ('dptr', 'sp'): 19,
                 ('r0', '#data8'): 40,
                 ('r0', '@dptr'): 64,
                 ('r0', 'a'): 48,
                 ('r0', 'dph'): 68,
                 ('r0', 'dpl'): 69,
                 ('r0', 'r1'): 65,
                 ('r0', 'r2'): 66,
                 ('r0', 'r3'): 67,
                 ('r0', 'sph'): 70,
                 ('r0', 'spl'): 71,
                 ('r1', '#data8'): 41,
                 ('r1', '@dptr'): 73,
                 ('r1', 'a'): 49,
                 ('r1', 'dph'): 76,
                 ('r1', 'dpl'): 77,
                 ('r1', 'r0'): 72,
                 ('r1', 'r2'): 74,
                 ('r1', 'r3'): 75,
                 ('r1', 'sph'): 78,
                 ('r1', 'spl'): 79,
                 ('r2', '#data8'): 42,
                 ('r2', '@dptr'): 82,
                 ('r2', 'a'): 50,
                 ('r2', 'dph'): 84,
                 ('r2', 'dpl'): 85,
                 ('r2', 'r0'): 80,
                 ('r2', 'r1'): 81,
                 ('r2', 'r3'): 83,
                 ('r2', 'sph'): 86,
                 ('r2', 'spl'): 87,
                 ('r3', '#data8'): 43,
                 ('r3', '@dptr'): 91,
                 ('r3', 'a'): 51,
                 ('r3', 'dph'): 92,
                 ('r3', 'dpl'): 93,
                 ('r3', 'r0'): 88,
                 ('r3', 'r1'): 89,
                 ('r3', 'r2'): 90,
                 ('r3', 'sph'): 94,
                 ('r3', 'spl'): 95,
                 ('sp', '#data16'): 22,
                 ('sp', 'dptr'): 20,
                 ('sph', '#data8'): 46,
                 ('sph', '@dptr'): 118,
                 ('sph', 'a'): 54,
                 ('sph', 'dph'): 116,
                 ('sph', 'dpl'): 117,
                 ('sph', 'r0'): 112,
                 ('sph', 'r1'): 113,
                 ('sph', 'r2'): 114,
                 ('sph', 'r3'): 115,
                 ('sph', 'spl'): 119,
                 ('spl', '#data8'): 47,
                 ('spl', '@dptr'): 127,
                 ('spl', 'a'): 55,
                 ('spl', 'dph'): 124,
                 ('spl', 'dpl'): 125,
                 ('spl', 'r0'): 120,
                 ('spl', 'r1'): 121,
                 ('spl', 'r2'): 122,
                 ('spl', 'r3'): 123,
                 ('spl', 'sph'): 126},
         'mul': {('r0', 'r1'): 248},
         'nop': {('',): 0},
         'orl': {('a', '#data8'): 142,
                 ('a', '@dptr'): 143,
                 ('a', 'dph'): 140,
                 ('a', 'dpl'): 141,
                 ('a', 'r0'): 136,
                 ('a', 'r1'): 137,
                 ('a', 'r2'): 138,
                 ('a', 'r3'): 139},
         'out': {('port_addr', 'a'): 253},
         'pcall': {('addr11',): 207},
         'pjmp': {('addr11',): 199},
         'pop': {('a',): 246,
                 ('dph',): 244,
                 ('dpl',): 245,
                 ('flags',): 247,
                 ('r0',): 240,
                 ('r1',): 241,
                 ('r2',): 242,
                 ('r3',): 243},
         'push': {('a',): 238,
                  ('dph',): 236,
                  ('dpl',): 237,
                  ('flags',): 239,
                  ('r0',): 232,
                  ('r1',): 233,
                  ('r2',): 234,
                  ('r3',): 235},
         'reserved': {('',): 251},
         'ret': {('',): 218},
         'reti': {('',): 219},
         'rl': {('a',): 152},
         'rlc': {('a',): 153},
         'rr': {('a',): 154},
         'rrc': {('a',): 155},
         'set': {('bs',): 10, ('c',): 8, ('ie',): 12},
         'sfa': {('',): 17},
         'sjmp': {('',): 220},
         'sub': {('a', '#data8'): 182,
                 ('a', '@dptr'): 183,
                 ('a', 'dph'): 180,
                 ('a', 'dpl'): 181,
                 ('a', 'r0'): 176,
                 ('a', 'r1'): 177,
                 ('a', 'r2'): 178,
                 ('a', 'r3'): 179},
         'subb': {('a', '#data8'): 190,
                  ('a', '@dptr'): 191,
                  ('a', 'dph'): 188,
                  ('a', 'dpl'): 189,
                  ('a', 'r0'): 184,
                  ('a', 'r1'): 185,
                  ('a', 'r2'): 186,
                  ('a', 'r3'): 187},
         'xcsd': {('',): 16},
         'xrl': {('a', '#data8'): 150,
                 ('a', '@dptr'): 151,
                 ('a', 'dph'): 148,
                 ('a', 'dpl'): 149,
                 ('a', 'r0'): 144,
                 ('a', 'r1'): 145,
                 ('a', 'r2'): 146,
                 ('a', 'r3'): 147}}

# take a list of arguments
# identify constant data:
#       pack that data into a bit string
# return data type symbols and data
def tokenize(args):
    sym = []
    data = ''
    
    for a in args:   
        
        # immediate ints
        if a[:3] == '#' + PREFIX:
            # 8 bit ints
            if len(a[3:]) <= 2:
                sym.append('#data8')
                val = int(a[1:], BASE)
                # big-endian byte
                data = data + struct.pack('>B', val)
            
            # 16 bit ints
            elif len(a[3:]) <= 4:
                sym.append('#data16')
                val = int(a[1:], BASE)
                # big-endian short
                data = data + struct.pack('>H', val)

            else:
                # bad idea to return junk to throw errors later?
                sym.append(a)
                     
        # addresses
        elif a[:2] == PREFIX:
            # 8 bit addresses
            if len(a[2:]) <= 2:
                sym.append('rel8')
                val = int(a, BASE)
                data = data + struct.pack('>B', val)
    
            # 16 bit addresses
            elif len(a[2:]) <= 4:
                sym.append('addr16')
                val = int(a, BASE)
                data = data + struct.pack('>H', val)
                
            else:
                # junk junk junk
                sym.append(a)
        
        # pointers
        elif a[:3] == '@' + PREFIX:
            sym.append('@addr16')
            val = int(a[1:], BASE)
            data = data + struct.pack('>H', val)
                
        # return unknown symbols so language can be extended more easily
        else:
            sym.append(a)
        
    return sym, data