Mercurial > hg > ede
view assembler/language.py @ 14:2b8eb2c86602
Major update to assembler design, now handles labels proper.
Next step unit test
Added boot procedure and experimental function look up table to emulator
Started implementing operations
tested SET and CLR operations with success
author | james <jb302@eecs.qmul.ac.uk> |
---|---|
date | Wed, 29 Jan 2014 20:11:07 +0000 |
parents | e9dc055a0f8b |
children | 256d24488e3f |
line wrap: on
line source
#!/usr/bin/env python # language.py import struct # these definitions are here to make changing the source code # representation of numbers easier BASE = 16 # prefix must be only 2 characters otherwise tokenize() will break PREFIX = '0x' def num_string(num): return hex(num) # valid arguments for this instruction set vargs = ('', '@a+dptr', 'dptr', 'bs', '@a+pc', 'a', 'c','r0', 'r1', 'r2', 'r3', 'ie', 'sp', 'flags', 'dpl', 'dph') # dictionary embedded dictionaries? # for every mnemonic in the instruction set index # there is an index of possible argument formats ('symbol') # for that mnemonic and a corresponding op code and it's length iset = { 'add': { ('a', '#data8'): [166, 2], ('a', '@dptr'): [167, 1], ('a', 'dph'): [164, 1], ('a', 'dpl'): [165, 1], ('a', 'r0'): [160, 1], ('a', 'r1'): [161, 1], ('a', 'r2'): [162, 1], ('a', 'r3'): [163, 1]}, 'addc': { ('a', '#data8'): [174, 2], ('a', '@dptr'): [175, 1], ('a', 'dph'): [172, 1], ('a', 'dpl'): [173, 1], ('a', 'r0'): [168, 1], ('a', 'r1'): [169, 1], ('a', 'r2'): [170, 1], ('a', 'r3'): [171, 1]}, 'anl': { ('a', '#data8'): [134, 2], ('a', '@dptr'): [135, 1], ('a', 'dph'): [132, 1], ('a', 'dpl'): [133, 1], ('a', 'r0'): [128, 1], ('a', 'r1'): [129, 1], ('a', 'r2'): [130, 1], ('a', 'r3'): [131, 1]}, 'cjne': { ('a', '#data8', 'label'): [223, 3], ('a', '#data8', 'rel8'): [223, 3], ('r0', '#data', 'label'): [212, 2], ('r0', '#data', 'rel8'): [212, 2], ('r1', '#data', 'label'): [213, 2], ('r1', '#data', 'rel8'): [213, 2], ('r2', '#data', 'label'): [214, 2], ('r2', '#data', 'rel8'): [214, 2], ('r3', '#data', 'label'): [215, 2], ('r3', '#data', 'rel8'): [215, 2]}, 'clr': { ('bs',): [11, 1], ('c',): [9, 1], ('ie',): [13, 1]}, 'cpl': { ('a',): [15, 1], ('c',): [14, 1]}, 'da': { ('a',): [250, 1]}, 'dec': { ('a',): [159, 1], ('dptr',): [157, 1]}, 'div': { ('r0', 'r1'): [249, 1]}, 'djnz': { ('r0', 'label'): [208, 2], ('r0', 'rel8'): [208, 2], ('r1', 'label'): [209, 2], ('r1', 'rel8'): [209, 2], ('r2', 'label'): [210, 2], ('r2', 'rel8'): [210, 2], ('r3', 'label'): [211, 2], ('r3', 'rel8'): [211, 2]}, 'hlt': { ('',): [255, 1]}, 'in': { ('a', 'port_addr'): [252, 2]}, 'inc': { ('a',): [158, 1], ('dptr',): [156, 1]}, 'int': { ('vect8',): [254, 2]}, 'jc': { ('label',): [226, 2], ('rel8',): [226, 2]}, 'jmp': { ('@a+dptr',): [221, 1], ('@dptr',): [222, 1]}, 'jnc': { ('label',): [227, 2], ('rel8',): [227, 2]}, 'jns': { ('label',): [231, 2], ('rel8',): [231, 2]}, 'jnz': { ('label',): [225, 2], ('rel8',): [225, 2]}, 'jpe': { ('label',): [229, 2], ('rel8',): [229, 2]}, 'jpo': { ('label',): [228, 2], ('rel8',): [228, 2]}, 'js': { ('label',): [230, 2], ('rel8',): [230, 2]}, 'jz': { ('label',): [224, 2], ('rel8',): [224, 2]}, 'laf': { ('',): [18, 1]}, 'lcall': { ('addr16',): [217, 3], ('label',): [217, 3]}, 'ljmp': { ('addr16',): [216, 3], ('label',): [216, 3]}, 'mov': { ('@addr16', 'a'): [29, 3], ('@dptr', 'a'): [31, 1], ('@dptr', 'dph'): [36, 1], ('@dptr', 'dpl'): [37, 1], ('@dptr', 'r0'): [32, 1], ('@dptr', 'r1'): [33, 1], ('@dptr', 'r2'): [34, 1], ('@dptr', 'r3'): [35, 1], ('@dptr', 'sph'): [38, 1], ('@dptr', 'spl'): [39, 1], ('a', '#data8'): [21, 2], ('a', '@a+dptr'): [26, 1], ('a', '@a+pc'): [27, 1], ('a', '@addr16'): [28, 3], ('a', '@dptr'): [30, 1], ('a', 'addr16'): [24, 3], ('a', 'dph'): [60, 1], ('a', 'dpl'): [61, 1], ('a', 'label'): [24, 3], ('a', 'r0'): [56, 1], ('a', 'r1'): [57, 1], ('a', 'r2'): [58, 1], ('a', 'r3'): [59, 1], ('a', 'sph'): [62, 1], ('a', 'spl'): [63, 1], ('addr16', 'a'): [25, 3], ('dph', '#data8'): [44, 2], ('dph', '@dptr'): [100, 1], ('dph', 'a'): [52, 1], ('dph', 'dpl'): [101, 1], ('dph', 'r0'): [96, 1], ('dph', 'r1'): [97, 1], ('dph', 'r2'): [98, 1], ('dph', 'r3'): [99, 1], ('dph', 'sph'): [102, 1], ('dph', 'spl'): [103, 1], ('dpl', '#data8'): [45, 2], ('dpl', '@dptr'): [109, 1], ('dpl', 'a'): [53, 1], ('dpl', 'dph'): [108, 1], ('dpl', 'r0'): [104, 1], ('dpl', 'r1'): [105, 1], ('dpl', 'r2'): [106, 1], ('dpl', 'r3'): [107, 1], ('dpl', 'sph'): [110, 1], ('dpl', 'spl'): [111, 1], ('dptr', '#data16'): [23, 3], ('dptr', 'sp'): [19, 1], ('r0', '#data8'): [40, 2], ('r0', '@dptr'): [64, 1], ('r0', 'a'): [48, 1], ('r0', 'dph'): [68, 1], ('r0', 'dpl'): [69, 1], ('r0', 'r1'): [65, 1], ('r0', 'r2'): [66, 1], ('r0', 'r3'): [67, 1], ('r0', 'sph'): [70, 1], ('r0', 'spl'): [71, 1], ('r1', '#data8'): [41, 2], ('r1', '@dptr'): [73, 1], ('r1', 'a'): [49, 1], ('r1', 'dph'): [76, 1], ('r1', 'dpl'): [77, 1], ('r1', 'r0'): [72, 1], ('r1', 'r2'): [74, 1], ('r1', 'r3'): [75, 1], ('r1', 'sph'): [78, 1], ('r1', 'spl'): [79, 1], ('r2', '#data8'): [42, 2], ('r2', '@dptr'): [82, 1], ('r2', 'a'): [50, 1], ('r2', 'dph'): [84, 1], ('r2', 'dpl'): [85, 1], ('r2', 'r0'): [80, 1], ('r2', 'r1'): [81, 1], ('r2', 'r3'): [83, 1], ('r2', 'sph'): [86, 1], ('r2', 'spl'): [87, 1], ('r3', '#data8'): [43, 2], ('r3', '@dptr'): [91, 1], ('r3', 'a'): [51, 1], ('r3', 'dph'): [92, 1], ('r3', 'dpl'): [93, 1], ('r3', 'r0'): [88, 1], ('r3', 'r1'): [89, 1], ('r3', 'r2'): [90, 1], ('r3', 'sph'): [94, 1], ('r3', 'spl'): [95, 1], ('sp', '#data16'): [22, 3], ('sp', 'dptr'): [20, 1], ('sph', '#data8'): [46, 2], ('sph', '@dptr'): [118, 1], ('sph', 'a'): [54, 1], ('sph', 'dph'): [116, 1], ('sph', 'dpl'): [117, 1], ('sph', 'r0'): [112, 1], ('sph', 'r1'): [113, 1], ('sph', 'r2'): [114, 1], ('sph', 'r3'): [115, 1], ('sph', 'spl'): [119, 1], ('spl', '#data8'): [47, 2], ('spl', '@dptr'): [127, 1], ('spl', 'a'): [55, 1], ('spl', 'dph'): [124, 1], ('spl', 'dpl'): [125, 1], ('spl', 'r0'): [120, 1], ('spl', 'r1'): [121, 1], ('spl', 'r2'): [122, 1], ('spl', 'r3'): [123, 1], ('spl', 'sph'): [126, 1]}, 'mul': { ('r0', 'r1'): [248, 1]}, 'nop': { ('',): [0, 1]}, 'orl': { ('a', '#data8'): [142, 2], ('a', '@dptr'): [143, 1], ('a', 'dph'): [140, 1], ('a', 'dpl'): [141, 1], ('a', 'r0'): [136, 1], ('a', 'r1'): [137, 1], ('a', 'r2'): [138, 1], ('a', 'r3'): [139, 1]}, 'out': { ('port_addr', 'a'): [253, 2]}, 'pcall': { ('addr11',): [207, 2], ('label',): [207, 2]}, 'pjmp': { ('addr11',): [199, 2], ('label',): [199, 2]}, 'pop': { ('a',): [246, 1], ('dph',): [244, 1], ('dpl',): [245, 1], ('flags',): [247, 1], ('r0',): [240, 1], ('r1',): [241, 1], ('r2',): [242, 1], ('r3',): [243, 1]}, 'push': { ('a',): [238, 1], ('dph',): [236, 1], ('dpl',): [237, 1], ('flags',): [239, 1], ('r0',): [232, 1], ('r1',): [233, 1], ('r2',): [234, 1], ('r3',): [235, 1]}, 'reserved': { ('',): [251, 1]}, 'ret': { ('',): [218, 1]}, 'reti': { ('',): [219, 1]}, 'rl': { ('a',): [152, 1]}, 'rlc': { ('a',): [153, 1]}, 'rr': { ('a',): [154, 1]}, 'rrc': { ('a',): [155, 1]}, 'set': { ('bs',): [10, 1], ('c',): [8, 1], ('ie',): [12, 1]}, 'sfa': { ('',): [17, 1]}, 'sjmp': { ('label',): [220, 2], ('rel8',): [220, 2]}, 'sub': { ('a', '#data8'): [182, 2], ('a', '@dptr'): [183, 1], ('a', 'dph'): [180, 1], ('a', 'dpl'): [181, 1], ('a', 'r0'): [176, 1], ('a', 'r1'): [177, 1], ('a', 'r2'): [178, 1], ('a', 'r3'): [179, 1]}, 'subb': { ('a', '#data8'): [190, 2], ('a', '@dptr'): [191, 1], ('a', 'dph'): [188, 1], ('a', 'dpl'): [189, 1], ('a', 'r0'): [184, 1], ('a', 'r1'): [185, 1], ('a', 'r2'): [186, 1], ('a', 'r3'): [187, 1]}, 'xcsd': { ('',): [16, 1]}, 'xrl': { ('a', '#data8'): [150, 2], ('a', '@dptr'): [151, 1], ('a', 'dph'): [148, 1], ('a', 'dpl'): [149, 1], ('a', 'r0'): [144, 1], ('a', 'r1'): [145, 1], ('a', 'r2'): [146, 1], ('a', 'r3'): [147, 1]}} # take a list of arguments # identify constant data: # pack that data into a bit string # return data type symbols and data def tokenize(args): sym = [] data = '' for a in args: # immediate ints if a[:3] == '#' + PREFIX: # 8 bit ints if len(a[3:]) <= 2: sym.append('#data8') val = int(a[1:], BASE) # big-endian byte data = data + struct.pack('>B', val) # 16 bit ints elif len(a[3:]) <= 4: sym.append('#data16') val = int(a[1:], BASE) # big-endian short data = data + struct.pack('>H', val) else: # bad idea to return junk to throw errors later? sym.append(a) # addresses elif a[:2] == PREFIX: # 8 bit addresses if len(a[2:]) <= 2: sym.append('rel8') val = int(a, BASE) data = data + struct.pack('>B', val) # 16 bit addresses elif len(a[2:]) <= 4: sym.append('addr16') val = int(a, BASE) data = data + struct.pack('>H', val) else: # junk junk junk sym.append(a) # pointers elif a[:3] == '@' + PREFIX: sym.append('@addr16') val = int(a[1:], BASE) data = data + struct.pack('>H', val) elif a in vargs: sym.append(a) # return unknown stings are either labels or source code errors else: sym.append('label') return tuple(sym), data