Mercurial > hg > ede
diff assembler/language.py @ 1:82e82dda442b
alpha version of assembler 'finished'
some more documentation and test files added
author | james <jb302@eecs.qmul.ac.uk> |
---|---|
date | Fri, 06 Dec 2013 23:39:54 +0000 |
parents | |
children | 81dd03d17c22 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/assembler/language.py Fri Dec 06 23:39:54 2013 +0000 @@ -0,0 +1,304 @@ +#!/usr/bin/env python +# language.py +import struct + +# these definitions are here to make changing the source code +# representation of numbers easier +BASE = 16 +# prefix must be only 2 characters otherwise tokenize() will break +PREFIX = '0x' +def num_string(num): + return hex(num) + +# dictionary embedded dictionary? +# for every mnemonic in the instruction set index +# there is an index of possible argument types ('symbols') +# and a corresponding op code +iset = {'add': {('a', '#data8'): 166, + ('a', '@dptr'): 167, + ('a', 'dph'): 164, + ('a', 'dpl'): 165, + ('a', 'r0'): 160, + ('a', 'r1'): 161, + ('a', 'r2'): 162, + ('a', 'r3'): 163}, + 'addc': {('a', '#data8'): 174, + ('a', '@dptr'): 175, + ('a', 'dph'): 172, + ('a', 'dpl'): 173, + ('a', 'r0'): 168, + ('a', 'r1'): 169, + ('a', 'r2'): 170, + ('a', 'r3'): 171}, + 'anl': {('a', '#data8'): 134, + ('a', '@dptr'): 135, + ('a', 'dph'): 132, + ('a', 'dpl'): 133, + ('a', 'r0'): 128, + ('a', 'r1'): 129, + ('a', 'r2'): 130, + ('a', 'r3'): 131}, + 'cjne': {('a', '#data8', 'rel8'): 223, + ('r0', '#data8', 'rel8'): 212, + ('r1', '#data8', 'rel8'): 213, + ('r2', '#data8', 'rel8'): 214, + ('r3', '#data8', 'rel8'): 215}, + 'clr': {('bs',): 11, ('c',): 9, ('ie',): 13}, + 'cpl': {('a',): 15, ('c',): 14}, + 'da': {('a',): 250}, + 'dec': {('a',): 159, ('dptr',): 157}, + 'div': {('r0', 'r1'): 249}, + 'djnz': {('r0', 'rel8'): 208, + ('r1', 'rel8'): 209, + ('r2', 'rel8'): 210, + ('r3', 'rel8'): 211}, + 'hlt': {('',): 255}, + 'in': {('a', 'port_addr'): 252}, + 'inc': {('a',): 158, ('dptr',): 156}, + 'int': {('vect8',): 254}, + 'jc': {('rel8',): 226}, + 'jmp': {('@a+dptr',): 221, ('@dptr',): 222}, + 'jnc': {('rel8',): 227}, + 'jns': {('rel8',): 231}, + 'jnz': {('rel8',): 225}, + 'jpe': {('rel8',): 229}, + 'jpo': {('rel8',): 228}, + 'js': {('rel8',): 230}, + 'jz': {('rel8',): 224}, + 'laf': {('',): 18}, + 'lcall': {('addr16',): 217}, + 'ljmp': {('addr16',): 216}, + 'mov': {('@addr16', 'a'): 29, + ('@dptr', 'a'): 31, + ('@dptr', 'dph'): 36, + ('@dptr', 'dpl'): 37, + ('@dptr', 'r0'): 32, + ('@dptr', 'r1'): 33, + ('@dptr', 'r2'): 34, + ('@dptr', 'r3'): 35, + ('@dptr', 'sph'): 38, + ('@dptr', 'spl'): 39, + ('a', '#data8'): 21, + ('a', '@a+dptr'): 26, + ('a', '@a+pc'): 27, + ('a', '@addr16'): 28, + ('a', '@dptr'): 30, + ('a', 'addr16'): 24, + ('a', 'dph'): 60, + ('a', 'dpl'): 61, + ('a', 'r0'): 56, + ('a', 'r1'): 57, + ('a', 'r2'): 58, + ('a', 'r3'): 59, + ('a', 'sph'): 62, + ('a', 'spl'): 63, + ('addr16', 'a'): 25, + ('dph', '#data8'): 44, + ('dph', '@dptr'): 100, + ('dph', 'a'): 52, + ('dph', 'dpl'): 101, + ('dph', 'r0'): 96, + ('dph', 'r1'): 97, + ('dph', 'r2'): 98, + ('dph', 'r3'): 99, + ('dph', 'sph'): 102, + ('dph', 'spl'): 103, + ('dpl', '#data8'): 45, + ('dpl', '@dptr'): 109, + ('dpl', 'a'): 53, + ('dpl', 'dph'): 108, + ('dpl', 'r0'): 104, + ('dpl', 'r1'): 105, + ('dpl', 'r2'): 106, + ('dpl', 'r3'): 107, + ('dpl', 'sph'): 110, + ('dpl', 'spl'): 111, + ('dptr', '#data16'): 23, + ('dptr', 'sp'): 19, + ('r0', '#data8'): 40, + ('r0', '@dptr'): 64, + ('r0', 'a'): 48, + ('r0', 'dph'): 68, + ('r0', 'dpl'): 69, + ('r0', 'r1'): 65, + ('r0', 'r2'): 66, + ('r0', 'r3'): 67, + ('r0', 'sph'): 70, + ('r0', 'spl'): 71, + ('r1', '#data8'): 41, + ('r1', '@dptr'): 73, + ('r1', 'a'): 49, + ('r1', 'dph'): 76, + ('r1', 'dpl'): 77, + ('r1', 'r0'): 72, + ('r1', 'r2'): 74, + ('r1', 'r3'): 75, + ('r1', 'sph'): 78, + ('r1', 'spl'): 79, + ('r2', '#data8'): 42, + ('r2', '@dptr'): 82, + ('r2', 'a'): 50, + ('r2', 'dph'): 84, + ('r2', 'dpl'): 85, + ('r2', 'r0'): 80, + ('r2', 'r1'): 81, + ('r2', 'r3'): 83, + ('r2', 'sph'): 86, + ('r2', 'spl'): 87, + ('r3', '#data8'): 43, + ('r3', '@dptr'): 91, + ('r3', 'a'): 51, + ('r3', 'dph'): 92, + ('r3', 'dpl'): 93, + ('r3', 'r0'): 88, + ('r3', 'r1'): 89, + ('r3', 'r2'): 90, + ('r3', 'sph'): 94, + ('r3', 'spl'): 95, + ('sp', '#data16'): 22, + ('sp', 'dptr'): 20, + ('sph', '#data8'): 46, + ('sph', '@dptr'): 118, + ('sph', 'a'): 54, + ('sph', 'dph'): 116, + ('sph', 'dpl'): 117, + ('sph', 'r0'): 112, + ('sph', 'r1'): 113, + ('sph', 'r2'): 114, + ('sph', 'r3'): 115, + ('sph', 'spl'): 119, + ('spl', '#data8'): 47, + ('spl', '@dptr'): 127, + ('spl', 'a'): 55, + ('spl', 'dph'): 124, + ('spl', 'dpl'): 125, + ('spl', 'r0'): 120, + ('spl', 'r1'): 121, + ('spl', 'r2'): 122, + ('spl', 'r3'): 123, + ('spl', 'sph'): 126}, + 'mul': {('r0', 'r1'): 248}, + 'nop': {('',): 0}, + 'orl': {('a', '#data8'): 142, + ('a', '@dptr'): 143, + ('a', 'dph'): 140, + ('a', 'dpl'): 141, + ('a', 'r0'): 136, + ('a', 'r1'): 137, + ('a', 'r2'): 138, + ('a', 'r3'): 139}, + 'out': {('port_addr', 'a'): 253}, + 'pcall': {('addr11',): 207}, + 'pjmp': {('addr11',): 199}, + 'pop': {('a',): 246, + ('dph',): 244, + ('dpl',): 245, + ('flags',): 247, + ('r0',): 240, + ('r1',): 241, + ('r2',): 242, + ('r3',): 243}, + 'push': {('a',): 238, + ('dph',): 236, + ('dpl',): 237, + ('flags',): 239, + ('r0',): 232, + ('r1',): 233, + ('r2',): 234, + ('r3',): 235}, + 'reserved': {('',): 251}, + 'ret': {('',): 218}, + 'reti': {('',): 219}, + 'rl': {('a',): 152}, + 'rlc': {('a',): 153}, + 'rr': {('a',): 154}, + 'rrc': {('a',): 155}, + 'set': {('bs',): 10, ('c',): 8, ('ie',): 12}, + 'sfa': {('',): 17}, + 'sjmp': {('',): 220}, + 'sub': {('a', '#data8'): 182, + ('a', '@dptr'): 183, + ('a', 'dph'): 180, + ('a', 'dpl'): 181, + ('a', 'r0'): 176, + ('a', 'r1'): 177, + ('a', 'r2'): 178, + ('a', 'r3'): 179}, + 'subb': {('a', '#data8'): 190, + ('a', '@dptr'): 191, + ('a', 'dph'): 188, + ('a', 'dpl'): 189, + ('a', 'r0'): 184, + ('a', 'r1'): 185, + ('a', 'r2'): 186, + ('a', 'r3'): 187}, + 'xcsd': {('',): 16}, + 'xrl': {('a', '#data8'): 150, + ('a', '@dptr'): 151, + ('a', 'dph'): 148, + ('a', 'dpl'): 149, + ('a', 'r0'): 144, + ('a', 'r1'): 145, + ('a', 'r2'): 146, + ('a', 'r3'): 147}} + +# take a list of arguments +# identify dataant data: +# pack that data into a bit string +# return data type symbols and data +def tokenize(args): + sym = [] + data = '' + + for a in args: + + # immediate ints + if a[:3] == '#' + PREFIX: + # 8 bit ints + if len(a[3:]) <= 2: + sym.append('#data8') + val = int(a[1:], BASE) + # big-endian byte + data = data + struct.pack('>B', val) + + # 16 bit ints + elif len(a[3:]) <= 4: + sym.append('#data16') + val = int(a[1:], BASE) + # big-endian short + data = data + struct.pack('>H', val) + + else: + # bad idea to return junk to throw errors later? + sysm.append(a) + + # addresses + elif a[:2] == PREFIX: + # 8 bit addresses + if len(a[2:]) <= 2: + sym.append('rel8') + val = int(a, BASE) + data = data + struct.pack('>B', val) + + # 16 bit addresses + elif len(a[2:]) <= 4: + sym.append('addr16') + val = int(a, BASE) + data = data + struct.pack('>H', val) + + else: + # junk junk junk + sym.append(a) + + # pointers + elif a[:3] == '@' + PREFIX: + sym.append('@addr16') + val = int(a[1:], BASE) + data = data + struct.pack('>H', val) + + # return unknown symbols so language can be extended more easily + else: + sym.append(a) + + return sym, data +