Mercurial > hg > ede
view assembler/language.py @ 12:e9dc055a0f8b
emulator skeleton code
author | james <jb302@eecs.qmul.ac.uk> |
---|---|
date | Sat, 11 Jan 2014 02:33:32 +0000 |
parents | 81dd03d17c22 |
children | 2b8eb2c86602 |
line wrap: on
line source
#!/usr/bin/env python # language.py import struct # these definitions are here to make changing the source code # representation of numbers easier BASE = 16 # prefix must be only 2 characters otherwise tokenize() will break PREFIX = '0x' def num_string(num): return hex(num) # valid arguments for this instruction set # not actually in use iargs = ('', 'addr11', 'addr16', 'sph', 'spl', '@dptr', 'ie', '@a+dptr', '@addr16', 'vect8', '#data8', 'port_addr', 'dptr', 'bs', '@a+pc', 'a', 'c', 'rel8', 'r0', 'r1', 'r2', 'r3', '#data16', 'sp', 'flags', 'dpl', 'dph') # dictionary embedded dictionaries? # for every mnemonic in the instruction set index # there is an index of possible argument formats ('symbol') # for that mnemonic and a corresponding op code iset = {'add': {('a', '#data8'): 166, ('a', '@dptr'): 167, ('a', 'dph'): 164, ('a', 'dpl'): 165, ('a', 'r0'): 160, ('a', 'r1'): 161, ('a', 'r2'): 162, ('a', 'r3'): 163}, 'addc': {('a', '#data8'): 174, ('a', '@dptr'): 175, ('a', 'dph'): 172, ('a', 'dpl'): 173, ('a', 'r0'): 168, ('a', 'r1'): 169, ('a', 'r2'): 170, ('a', 'r3'): 171}, 'anl': {('a', '#data8'): 134, ('a', '@dptr'): 135, ('a', 'dph'): 132, ('a', 'dpl'): 133, ('a', 'r0'): 128, ('a', 'r1'): 129, ('a', 'r2'): 130, ('a', 'r3'): 131}, 'cjne': {('a', '#data8', 'rel8'): 223, ('r0', '#data8', 'rel8'): 212, ('r1', '#data8', 'rel8'): 213, ('r2', '#data8', 'rel8'): 214, ('r3', '#data8', 'rel8'): 215}, 'clr': {('bs',): 11, ('c',): 9, ('ie',): 13}, 'cpl': {('a',): 15, ('c',): 14}, 'da': {('a',): 250}, 'dec': {('a',): 159, ('dptr',): 157}, 'div': {('r0', 'r1'): 249}, 'djnz': {('r0', 'rel8'): 208, ('r1', 'rel8'): 209, ('r2', 'rel8'): 210, ('r3', 'rel8'): 211}, 'hlt': {('',): 255}, 'in': {('a', 'port_addr'): 252}, 'inc': {('a',): 158, ('dptr',): 156}, 'int': {('vect8',): 254}, 'jc': {('rel8',): 226}, 'jmp': {('@a+dptr',): 221, ('@dptr',): 222}, 'jnc': {('rel8',): 227}, 'jns': {('rel8',): 231}, 'jnz': {('rel8',): 225}, 'jpe': {('rel8',): 229}, 'jpo': {('rel8',): 228}, 'js': {('rel8',): 230}, 'jz': {('rel8',): 224}, 'laf': {('',): 18}, 'lcall': {('addr16',): 217}, 'ljmp': {('addr16',): 216}, 'mov': {('@addr16', 'a'): 29, ('@dptr', 'a'): 31, ('@dptr', 'dph'): 36, ('@dptr', 'dpl'): 37, ('@dptr', 'r0'): 32, ('@dptr', 'r1'): 33, ('@dptr', 'r2'): 34, ('@dptr', 'r3'): 35, ('@dptr', 'sph'): 38, ('@dptr', 'spl'): 39, ('a', '#data8'): 21, ('a', '@a+dptr'): 26, ('a', '@a+pc'): 27, ('a', '@addr16'): 28, ('a', '@dptr'): 30, ('a', 'addr16'): 24, ('a', 'dph'): 60, ('a', 'dpl'): 61, ('a', 'r0'): 56, ('a', 'r1'): 57, ('a', 'r2'): 58, ('a', 'r3'): 59, ('a', 'sph'): 62, ('a', 'spl'): 63, ('addr16', 'a'): 25, ('dph', '#data8'): 44, ('dph', '@dptr'): 100, ('dph', 'a'): 52, ('dph', 'dpl'): 101, ('dph', 'r0'): 96, ('dph', 'r1'): 97, ('dph', 'r2'): 98, ('dph', 'r3'): 99, ('dph', 'sph'): 102, ('dph', 'spl'): 103, ('dpl', '#data8'): 45, ('dpl', '@dptr'): 109, ('dpl', 'a'): 53, ('dpl', 'dph'): 108, ('dpl', 'r0'): 104, ('dpl', 'r1'): 105, ('dpl', 'r2'): 106, ('dpl', 'r3'): 107, ('dpl', 'sph'): 110, ('dpl', 'spl'): 111, ('dptr', '#data16'): 23, ('dptr', 'sp'): 19, ('r0', '#data8'): 40, ('r0', '@dptr'): 64, ('r0', 'a'): 48, ('r0', 'dph'): 68, ('r0', 'dpl'): 69, ('r0', 'r1'): 65, ('r0', 'r2'): 66, ('r0', 'r3'): 67, ('r0', 'sph'): 70, ('r0', 'spl'): 71, ('r1', '#data8'): 41, ('r1', '@dptr'): 73, ('r1', 'a'): 49, ('r1', 'dph'): 76, ('r1', 'dpl'): 77, ('r1', 'r0'): 72, ('r1', 'r2'): 74, ('r1', 'r3'): 75, ('r1', 'sph'): 78, ('r1', 'spl'): 79, ('r2', '#data8'): 42, ('r2', '@dptr'): 82, ('r2', 'a'): 50, ('r2', 'dph'): 84, ('r2', 'dpl'): 85, ('r2', 'r0'): 80, ('r2', 'r1'): 81, ('r2', 'r3'): 83, ('r2', 'sph'): 86, ('r2', 'spl'): 87, ('r3', '#data8'): 43, ('r3', '@dptr'): 91, ('r3', 'a'): 51, ('r3', 'dph'): 92, ('r3', 'dpl'): 93, ('r3', 'r0'): 88, ('r3', 'r1'): 89, ('r3', 'r2'): 90, ('r3', 'sph'): 94, ('r3', 'spl'): 95, ('sp', '#data16'): 22, ('sp', 'dptr'): 20, ('sph', '#data8'): 46, ('sph', '@dptr'): 118, ('sph', 'a'): 54, ('sph', 'dph'): 116, ('sph', 'dpl'): 117, ('sph', 'r0'): 112, ('sph', 'r1'): 113, ('sph', 'r2'): 114, ('sph', 'r3'): 115, ('sph', 'spl'): 119, ('spl', '#data8'): 47, ('spl', '@dptr'): 127, ('spl', 'a'): 55, ('spl', 'dph'): 124, ('spl', 'dpl'): 125, ('spl', 'r0'): 120, ('spl', 'r1'): 121, ('spl', 'r2'): 122, ('spl', 'r3'): 123, ('spl', 'sph'): 126}, 'mul': {('r0', 'r1'): 248}, 'nop': {('',): 0}, 'orl': {('a', '#data8'): 142, ('a', '@dptr'): 143, ('a', 'dph'): 140, ('a', 'dpl'): 141, ('a', 'r0'): 136, ('a', 'r1'): 137, ('a', 'r2'): 138, ('a', 'r3'): 139}, 'out': {('port_addr', 'a'): 253}, 'pcall': {('addr11',): 207}, 'pjmp': {('addr11',): 199}, 'pop': {('a',): 246, ('dph',): 244, ('dpl',): 245, ('flags',): 247, ('r0',): 240, ('r1',): 241, ('r2',): 242, ('r3',): 243}, 'push': {('a',): 238, ('dph',): 236, ('dpl',): 237, ('flags',): 239, ('r0',): 232, ('r1',): 233, ('r2',): 234, ('r3',): 235}, 'reserved': {('',): 251}, 'ret': {('',): 218}, 'reti': {('',): 219}, 'rl': {('a',): 152}, 'rlc': {('a',): 153}, 'rr': {('a',): 154}, 'rrc': {('a',): 155}, 'set': {('bs',): 10, ('c',): 8, ('ie',): 12}, 'sfa': {('',): 17}, 'sjmp': {('',): 220}, 'sub': {('a', '#data8'): 182, ('a', '@dptr'): 183, ('a', 'dph'): 180, ('a', 'dpl'): 181, ('a', 'r0'): 176, ('a', 'r1'): 177, ('a', 'r2'): 178, ('a', 'r3'): 179}, 'subb': {('a', '#data8'): 190, ('a', '@dptr'): 191, ('a', 'dph'): 188, ('a', 'dpl'): 189, ('a', 'r0'): 184, ('a', 'r1'): 185, ('a', 'r2'): 186, ('a', 'r3'): 187}, 'xcsd': {('',): 16}, 'xrl': {('a', '#data8'): 150, ('a', '@dptr'): 151, ('a', 'dph'): 148, ('a', 'dpl'): 149, ('a', 'r0'): 144, ('a', 'r1'): 145, ('a', 'r2'): 146, ('a', 'r3'): 147}} # take a list of arguments # identify constant data: # pack that data into a bit string # return data type symbols and data def tokenize(args): sym = [] data = '' for a in args: # immediate ints if a[:3] == '#' + PREFIX: # 8 bit ints if len(a[3:]) <= 2: sym.append('#data8') val = int(a[1:], BASE) # big-endian byte data = data + struct.pack('>B', val) # 16 bit ints elif len(a[3:]) <= 4: sym.append('#data16') val = int(a[1:], BASE) # big-endian short data = data + struct.pack('>H', val) else: # bad idea to return junk to throw errors later? sym.append(a) # addresses elif a[:2] == PREFIX: # 8 bit addresses if len(a[2:]) <= 2: sym.append('rel8') val = int(a, BASE) data = data + struct.pack('>B', val) # 16 bit addresses elif len(a[2:]) <= 4: sym.append('addr16') val = int(a, BASE) data = data + struct.pack('>H', val) else: # junk junk junk sym.append(a) # pointers elif a[:3] == '@' + PREFIX: sym.append('@addr16') val = int(a[1:], BASE) data = data + struct.pack('>H', val) # return unknown symbols so language can be extended more easily else: sym.append(a) return sym, data