jb302@1: #!/usr/bin/env python jb302@1: # language.py jb302@1: import struct jb302@1: jb302@1: # these definitions are here to make changing the source code jb302@1: # representation of numbers easier jb302@1: BASE = 16 jb302@1: # prefix must be only 2 characters otherwise tokenize() will break jb302@1: PREFIX = '0x' jb302@1: def num_string(num): jb302@1: return hex(num) jb302@1: jb302@1: # dictionary embedded dictionary? jb302@1: # for every mnemonic in the instruction set index jb302@1: # there is an index of possible argument types ('symbols') jb302@1: # and a corresponding op code jb302@1: iset = {'add': {('a', '#data8'): 166, jb302@1: ('a', '@dptr'): 167, jb302@1: ('a', 'dph'): 164, jb302@1: ('a', 'dpl'): 165, jb302@1: ('a', 'r0'): 160, jb302@1: ('a', 'r1'): 161, jb302@1: ('a', 'r2'): 162, jb302@1: ('a', 'r3'): 163}, jb302@1: 'addc': {('a', '#data8'): 174, jb302@1: ('a', '@dptr'): 175, jb302@1: ('a', 'dph'): 172, jb302@1: ('a', 'dpl'): 173, jb302@1: ('a', 'r0'): 168, jb302@1: ('a', 'r1'): 169, jb302@1: ('a', 'r2'): 170, jb302@1: ('a', 'r3'): 171}, jb302@1: 'anl': {('a', '#data8'): 134, jb302@1: ('a', '@dptr'): 135, jb302@1: ('a', 'dph'): 132, jb302@1: ('a', 'dpl'): 133, jb302@1: ('a', 'r0'): 128, jb302@1: ('a', 'r1'): 129, jb302@1: ('a', 'r2'): 130, jb302@1: ('a', 'r3'): 131}, jb302@1: 'cjne': {('a', '#data8', 'rel8'): 223, jb302@1: ('r0', '#data8', 'rel8'): 212, jb302@1: ('r1', '#data8', 'rel8'): 213, jb302@1: ('r2', '#data8', 'rel8'): 214, jb302@1: ('r3', '#data8', 'rel8'): 215}, jb302@1: 'clr': {('bs',): 11, ('c',): 9, ('ie',): 13}, jb302@1: 'cpl': {('a',): 15, ('c',): 14}, jb302@1: 'da': {('a',): 250}, jb302@1: 'dec': {('a',): 159, ('dptr',): 157}, jb302@1: 'div': {('r0', 'r1'): 249}, jb302@1: 'djnz': {('r0', 'rel8'): 208, jb302@1: ('r1', 'rel8'): 209, jb302@1: ('r2', 'rel8'): 210, jb302@1: ('r3', 'rel8'): 211}, jb302@1: 'hlt': {('',): 255}, jb302@1: 'in': {('a', 'port_addr'): 252}, jb302@1: 'inc': {('a',): 158, ('dptr',): 156}, jb302@1: 'int': {('vect8',): 254}, jb302@1: 'jc': {('rel8',): 226}, jb302@1: 'jmp': {('@a+dptr',): 221, ('@dptr',): 222}, jb302@1: 'jnc': {('rel8',): 227}, jb302@1: 'jns': {('rel8',): 231}, jb302@1: 'jnz': {('rel8',): 225}, jb302@1: 'jpe': {('rel8',): 229}, jb302@1: 'jpo': {('rel8',): 228}, jb302@1: 'js': {('rel8',): 230}, jb302@1: 'jz': {('rel8',): 224}, jb302@1: 'laf': {('',): 18}, jb302@1: 'lcall': {('addr16',): 217}, jb302@1: 'ljmp': {('addr16',): 216}, jb302@1: 'mov': {('@addr16', 'a'): 29, jb302@1: ('@dptr', 'a'): 31, jb302@1: ('@dptr', 'dph'): 36, jb302@1: ('@dptr', 'dpl'): 37, jb302@1: ('@dptr', 'r0'): 32, jb302@1: ('@dptr', 'r1'): 33, jb302@1: ('@dptr', 'r2'): 34, jb302@1: ('@dptr', 'r3'): 35, jb302@1: ('@dptr', 'sph'): 38, jb302@1: ('@dptr', 'spl'): 39, jb302@1: ('a', '#data8'): 21, jb302@1: ('a', '@a+dptr'): 26, jb302@1: ('a', '@a+pc'): 27, jb302@1: ('a', '@addr16'): 28, jb302@1: ('a', '@dptr'): 30, jb302@1: ('a', 'addr16'): 24, jb302@1: ('a', 'dph'): 60, jb302@1: ('a', 'dpl'): 61, jb302@1: ('a', 'r0'): 56, jb302@1: ('a', 'r1'): 57, jb302@1: ('a', 'r2'): 58, jb302@1: ('a', 'r3'): 59, jb302@1: ('a', 'sph'): 62, jb302@1: ('a', 'spl'): 63, jb302@1: ('addr16', 'a'): 25, jb302@1: ('dph', '#data8'): 44, jb302@1: ('dph', '@dptr'): 100, jb302@1: ('dph', 'a'): 52, jb302@1: ('dph', 'dpl'): 101, jb302@1: ('dph', 'r0'): 96, jb302@1: ('dph', 'r1'): 97, jb302@1: ('dph', 'r2'): 98, jb302@1: ('dph', 'r3'): 99, jb302@1: ('dph', 'sph'): 102, jb302@1: ('dph', 'spl'): 103, jb302@1: ('dpl', '#data8'): 45, jb302@1: ('dpl', '@dptr'): 109, jb302@1: ('dpl', 'a'): 53, jb302@1: ('dpl', 'dph'): 108, jb302@1: ('dpl', 'r0'): 104, jb302@1: ('dpl', 'r1'): 105, jb302@1: ('dpl', 'r2'): 106, jb302@1: ('dpl', 'r3'): 107, jb302@1: ('dpl', 'sph'): 110, jb302@1: ('dpl', 'spl'): 111, jb302@1: ('dptr', '#data16'): 23, jb302@1: ('dptr', 'sp'): 19, jb302@1: ('r0', '#data8'): 40, jb302@1: ('r0', '@dptr'): 64, jb302@1: ('r0', 'a'): 48, jb302@1: ('r0', 'dph'): 68, jb302@1: ('r0', 'dpl'): 69, jb302@1: ('r0', 'r1'): 65, jb302@1: ('r0', 'r2'): 66, jb302@1: ('r0', 'r3'): 67, jb302@1: ('r0', 'sph'): 70, jb302@1: ('r0', 'spl'): 71, jb302@1: ('r1', '#data8'): 41, jb302@1: ('r1', '@dptr'): 73, jb302@1: ('r1', 'a'): 49, jb302@1: ('r1', 'dph'): 76, jb302@1: ('r1', 'dpl'): 77, jb302@1: ('r1', 'r0'): 72, jb302@1: ('r1', 'r2'): 74, jb302@1: ('r1', 'r3'): 75, jb302@1: ('r1', 'sph'): 78, jb302@1: ('r1', 'spl'): 79, jb302@1: ('r2', '#data8'): 42, jb302@1: ('r2', '@dptr'): 82, jb302@1: ('r2', 'a'): 50, jb302@1: ('r2', 'dph'): 84, jb302@1: ('r2', 'dpl'): 85, jb302@1: ('r2', 'r0'): 80, jb302@1: ('r2', 'r1'): 81, jb302@1: ('r2', 'r3'): 83, jb302@1: ('r2', 'sph'): 86, jb302@1: ('r2', 'spl'): 87, jb302@1: ('r3', '#data8'): 43, jb302@1: ('r3', '@dptr'): 91, jb302@1: ('r3', 'a'): 51, jb302@1: ('r3', 'dph'): 92, jb302@1: ('r3', 'dpl'): 93, jb302@1: ('r3', 'r0'): 88, jb302@1: ('r3', 'r1'): 89, jb302@1: ('r3', 'r2'): 90, jb302@1: ('r3', 'sph'): 94, jb302@1: ('r3', 'spl'): 95, jb302@1: ('sp', '#data16'): 22, jb302@1: ('sp', 'dptr'): 20, jb302@1: ('sph', '#data8'): 46, jb302@1: ('sph', '@dptr'): 118, jb302@1: ('sph', 'a'): 54, jb302@1: ('sph', 'dph'): 116, jb302@1: ('sph', 'dpl'): 117, jb302@1: ('sph', 'r0'): 112, jb302@1: ('sph', 'r1'): 113, jb302@1: ('sph', 'r2'): 114, jb302@1: ('sph', 'r3'): 115, jb302@1: ('sph', 'spl'): 119, jb302@1: ('spl', '#data8'): 47, jb302@1: ('spl', '@dptr'): 127, jb302@1: ('spl', 'a'): 55, jb302@1: ('spl', 'dph'): 124, jb302@1: ('spl', 'dpl'): 125, jb302@1: ('spl', 'r0'): 120, jb302@1: ('spl', 'r1'): 121, jb302@1: ('spl', 'r2'): 122, jb302@1: ('spl', 'r3'): 123, jb302@1: ('spl', 'sph'): 126}, jb302@1: 'mul': {('r0', 'r1'): 248}, jb302@1: 'nop': {('',): 0}, jb302@1: 'orl': {('a', '#data8'): 142, jb302@1: ('a', '@dptr'): 143, jb302@1: ('a', 'dph'): 140, jb302@1: ('a', 'dpl'): 141, jb302@1: ('a', 'r0'): 136, jb302@1: ('a', 'r1'): 137, jb302@1: ('a', 'r2'): 138, jb302@1: ('a', 'r3'): 139}, jb302@1: 'out': {('port_addr', 'a'): 253}, jb302@1: 'pcall': {('addr11',): 207}, jb302@1: 'pjmp': {('addr11',): 199}, jb302@1: 'pop': {('a',): 246, jb302@1: ('dph',): 244, jb302@1: ('dpl',): 245, jb302@1: ('flags',): 247, jb302@1: ('r0',): 240, jb302@1: ('r1',): 241, jb302@1: ('r2',): 242, jb302@1: ('r3',): 243}, jb302@1: 'push': {('a',): 238, jb302@1: ('dph',): 236, jb302@1: ('dpl',): 237, jb302@1: ('flags',): 239, jb302@1: ('r0',): 232, jb302@1: ('r1',): 233, jb302@1: ('r2',): 234, jb302@1: ('r3',): 235}, jb302@1: 'reserved': {('',): 251}, jb302@1: 'ret': {('',): 218}, jb302@1: 'reti': {('',): 219}, jb302@1: 'rl': {('a',): 152}, jb302@1: 'rlc': {('a',): 153}, jb302@1: 'rr': {('a',): 154}, jb302@1: 'rrc': {('a',): 155}, jb302@1: 'set': {('bs',): 10, ('c',): 8, ('ie',): 12}, jb302@1: 'sfa': {('',): 17}, jb302@1: 'sjmp': {('',): 220}, jb302@1: 'sub': {('a', '#data8'): 182, jb302@1: ('a', '@dptr'): 183, jb302@1: ('a', 'dph'): 180, jb302@1: ('a', 'dpl'): 181, jb302@1: ('a', 'r0'): 176, jb302@1: ('a', 'r1'): 177, jb302@1: ('a', 'r2'): 178, jb302@1: ('a', 'r3'): 179}, jb302@1: 'subb': {('a', '#data8'): 190, jb302@1: ('a', '@dptr'): 191, jb302@1: ('a', 'dph'): 188, jb302@1: ('a', 'dpl'): 189, jb302@1: ('a', 'r0'): 184, jb302@1: ('a', 'r1'): 185, jb302@1: ('a', 'r2'): 186, jb302@1: ('a', 'r3'): 187}, jb302@1: 'xcsd': {('',): 16}, jb302@1: 'xrl': {('a', '#data8'): 150, jb302@1: ('a', '@dptr'): 151, jb302@1: ('a', 'dph'): 148, jb302@1: ('a', 'dpl'): 149, jb302@1: ('a', 'r0'): 144, jb302@1: ('a', 'r1'): 145, jb302@1: ('a', 'r2'): 146, jb302@1: ('a', 'r3'): 147}} jb302@1: jb302@1: # take a list of arguments jb302@1: # identify dataant data: jb302@1: # pack that data into a bit string jb302@1: # return data type symbols and data jb302@1: def tokenize(args): jb302@1: sym = [] jb302@1: data = '' jb302@1: jb302@1: for a in args: jb302@1: jb302@1: # immediate ints jb302@1: if a[:3] == '#' + PREFIX: jb302@1: # 8 bit ints jb302@1: if len(a[3:]) <= 2: jb302@1: sym.append('#data8') jb302@1: val = int(a[1:], BASE) jb302@1: # big-endian byte jb302@1: data = data + struct.pack('>B', val) jb302@1: jb302@1: # 16 bit ints jb302@1: elif len(a[3:]) <= 4: jb302@1: sym.append('#data16') jb302@1: val = int(a[1:], BASE) jb302@1: # big-endian short jb302@1: data = data + struct.pack('>H', val) jb302@1: jb302@1: else: jb302@1: # bad idea to return junk to throw errors later? jb302@1: sysm.append(a) jb302@1: jb302@1: # addresses jb302@1: elif a[:2] == PREFIX: jb302@1: # 8 bit addresses jb302@1: if len(a[2:]) <= 2: jb302@1: sym.append('rel8') jb302@1: val = int(a, BASE) jb302@1: data = data + struct.pack('>B', val) jb302@1: jb302@1: # 16 bit addresses jb302@1: elif len(a[2:]) <= 4: jb302@1: sym.append('addr16') jb302@1: val = int(a, BASE) jb302@1: data = data + struct.pack('>H', val) jb302@1: jb302@1: else: jb302@1: # junk junk junk jb302@1: sym.append(a) jb302@1: jb302@1: # pointers jb302@1: elif a[:3] == '@' + PREFIX: jb302@1: sym.append('@addr16') jb302@1: val = int(a[1:], BASE) jb302@1: data = data + struct.pack('>H', val) jb302@1: jb302@1: # return unknown symbols so language can be extended more easily jb302@1: else: jb302@1: sym.append(a) jb302@1: jb302@1: return sym, data jb302@1: