jb302@28: #!/usr/bin/env python jb302@28: # language.py jb302@28: import struct jb302@28: jb302@28: # identifiers: immediate ints, pointers jb302@28: ids = ['#', '@'] jb302@28: jb302@28: # valid reserved arguments for this instruction set jb302@28: vargs = ('', '@a+dptr', 'dptr', 'bs', '@a+pc', jb302@28: 'a', 'c','r0', 'r1', 'r2', 'r3', 'ie', jb302@28: 'sp', 'flags', 'dpl', 'dph', '@dptr', jb302@28: 'sph', 'spl') jb302@28: jb302@28: # instructions that use relative addresses jb302@28: rinst = ('djnz', 'cjne', 'sjmp', 'jz', 'jnz', jb302@28: 'jc', 'jnc', 'jpo', 'jpe', 'js', 'jns') jb302@28: jb302@28: # dictionary embedded dictionaries? jb302@28: # for every mnemonic in the instruction set index jb302@28: # there is an index of possible argument formats (symbols) jb302@28: # with corresponding op codes and width jb302@28: iset = { 'add': { ('a', 'data'): [166, 2], jb302@28: ('a', '@dptr'): [167, 1], jb302@28: ('a', 'dph'): [164, 1], jb302@28: ('a', 'dpl'): [165, 1], jb302@28: ('a', 'r0'): [160, 1], jb302@28: ('a', 'r1'): [161, 1], jb302@28: ('a', 'r2'): [162, 1], jb302@28: ('a', 'r3'): [163, 1]}, jb302@28: 'addc': { ('a', 'data'): [174, 2], jb302@28: ('a', '@dptr'): [175, 1], jb302@28: ('a', 'dph'): [172, 1], jb302@28: ('a', 'dpl'): [173, 1], jb302@28: ('a', 'r0'): [168, 1], jb302@28: ('a', 'r1'): [169, 1], jb302@28: ('a', 'r2'): [170, 1], jb302@28: ('a', 'r3'): [171, 1]}, jb302@28: 'anl': { ('a', 'data'): [134, 2], jb302@28: ('a', '@dptr'): [135, 1], jb302@28: ('a', 'dph'): [132, 1], jb302@28: ('a', 'dpl'): [133, 1], jb302@28: ('a', 'r0'): [128, 1], jb302@28: ('a', 'r1'): [129, 1], jb302@28: ('a', 'r2'): [130, 1], jb302@28: ('a', 'r3'): [131, 1]}, jb302@28: 'cjne': { ('a', 'data', 'label'): [223, 3], jb302@28: ('a', 'data', 'rel8'): [223, 3], jb302@28: ('r0', 'data', 'label'): [212, 2], jb302@28: ('r0', 'data', 'rel8'): [212, 2], jb302@28: ('r1', 'data', 'label'): [213, 2], jb302@28: ('r1', 'data', 'rel8'): [213, 2], jb302@28: ('r2', 'data', 'label'): [214, 2], jb302@28: ('r2', 'data', 'rel8'): [214, 2], jb302@28: ('r3', 'data', 'label'): [215, 2], jb302@28: ('r3', 'data', 'rel8'): [215, 2]}, jb302@28: 'clr': { ('bs',): [11, 1], ('c',): [9, 1], ('ie',): [13, 1]}, jb302@28: 'cpl': { ('a',): [15, 1], ('c',): [14, 1]}, jb302@28: 'da': { ('a',): [250, 1]}, jb302@28: 'dec': { ('a',): [159, 1], ('dptr',): [157, 1]}, jb302@28: 'div': { ('r0', 'r1'): [249, 1]}, jb302@28: 'djnz': { ('r0', 'label'): [208, 2], jb302@28: ('r0', 'rel8'): [208, 2], jb302@28: ('r1', 'label'): [209, 2], jb302@28: ('r1', 'rel8'): [209, 2], jb302@28: ('r2', 'label'): [210, 2], jb302@28: ('r2', 'rel8'): [210, 2], jb302@28: ('r3', 'label'): [211, 2], jb302@28: ('r3', 'rel8'): [211, 2]}, jb302@28: 'hlt': { ('',): [255, 1]}, jb302@28: 'in': { ('a', 'addr'): [252, 2]}, jb302@28: 'inc': { ('a',): [158, 1], ('dptr',): [156, 1]}, jb302@28: 'int': { ('addr',): [254, 2]}, jb302@28: 'jc': { ('label',): [226, 2], ('rel8',): [226, 2]}, jb302@28: 'jmp': { ('@a+dptr',): [221, 1], ('@dptr',): [222, 1]}, jb302@28: 'jnc': { ('label',): [227, 2], ('rel8',): [227, 2]}, jb302@28: 'jns': { ('label',): [231, 2], ('rel8',): [231, 2]}, jb302@28: 'jnz': { ('label',): [225, 2], ('rel8',): [225, 2]}, jb302@28: 'jpe': { ('label',): [229, 2], ('rel8',): [229, 2]}, jb302@28: 'jpo': { ('label',): [228, 2], ('rel8',): [228, 2]}, jb302@28: 'js': { ('label',): [230, 2], ('rel8',): [230, 2]}, jb302@28: 'jz': { ('label',): [224, 2], ('rel8',): [224, 2]}, jb302@28: 'laf': { ('',): [18, 1]}, jb302@28: 'lcall': { ('addr',): [217, 3], ('label',): [217, 3]}, jb302@28: 'ljmp': { ('addr',): [216, 3], ('label',): [216, 3]}, jb302@28: 'mov': { ('@addr', 'a'): [29, 3], jb302@28: ('@dptr', 'a'): [31, 1], jb302@28: ('@dptr', 'dph'): [36, 1], jb302@28: ('@dptr', 'dpl'): [37, 1], jb302@28: ('@dptr', 'r0'): [32, 1], jb302@28: ('@dptr', 'r1'): [33, 1], jb302@28: ('@dptr', 'r2'): [34, 1], jb302@28: ('@dptr', 'r3'): [35, 1], jb302@28: ('@dptr', 'sph'): [38, 1], jb302@28: ('@dptr', 'spl'): [39, 1], jb302@28: ('@label', 'a'): [29, 3], jb302@28: ('a', 'data'): [21, 2], jb302@28: ('a', '@a+dptr'): [26, 1], jb302@28: ('a', '@a+pc'): [27, 1], jb302@28: ('a', '@addr'): [28, 3], jb302@28: ('a', '@dptr'): [30, 1], jb302@28: ('a', '@label'): [28, 3], jb302@28: ('a', 'addr'): [24, 3], jb302@28: ('a', 'dph'): [60, 1], jb302@28: ('a', 'dpl'): [61, 1], jb302@28: ('a', 'label'): [24, 3], jb302@28: ('a', 'r0'): [56, 1], jb302@28: ('a', 'r1'): [57, 1], jb302@28: ('a', 'r2'): [58, 1], jb302@28: ('a', 'r3'): [59, 1], jb302@28: ('a', 'sph'): [62, 1], jb302@28: ('a', 'spl'): [63, 1], jb302@28: ('addr', 'a'): [25, 3], jb302@28: ('dph', 'data'): [44, 2], jb302@28: ('dph', '@dptr'): [100, 1], jb302@28: ('dph', 'a'): [52, 1], jb302@28: ('dph', 'dpl'): [101, 1], jb302@28: ('dph', 'r0'): [96, 1], jb302@28: ('dph', 'r1'): [97, 1], jb302@28: ('dph', 'r2'): [98, 1], jb302@28: ('dph', 'r3'): [99, 1], jb302@28: ('dph', 'sph'): [102, 1], jb302@28: ('dph', 'spl'): [103, 1], jb302@28: ('dpl', 'data'): [45, 2], jb302@28: ('dpl', '@dptr'): [109, 1], jb302@28: ('dpl', 'a'): [53, 1], jb302@28: ('dpl', 'dph'): [108, 1], jb302@28: ('dpl', 'r0'): [104, 1], jb302@28: ('dpl', 'r1'): [105, 1], jb302@28: ('dpl', 'r2'): [106, 1], jb302@28: ('dpl', 'r3'): [107, 1], jb302@28: ('dpl', 'sph'): [110, 1], jb302@28: ('dpl', 'spl'): [111, 1], jb302@28: ('dptr', 'data'): [23, 3], jb302@28: ('dptr', 'sp'): [19, 1], jb302@28: ('label', 'a'): [25, 3], jb302@28: ('r0', 'data'): [40, 2], jb302@28: ('r0', '@dptr'): [64, 1], jb302@28: ('r0', 'a'): [48, 1], jb302@28: ('r0', 'dph'): [68, 1], jb302@28: ('r0', 'dpl'): [69, 1], jb302@28: ('r0', 'r1'): [65, 1], jb302@28: ('r0', 'r2'): [66, 1], jb302@28: ('r0', 'r3'): [67, 1], jb302@28: ('r0', 'sph'): [70, 1], jb302@28: ('r0', 'spl'): [71, 1], jb302@28: ('r1', 'data'): [41, 2], jb302@28: ('r1', '@dptr'): [73, 1], jb302@28: ('r1', 'a'): [49, 1], jb302@28: ('r1', 'dph'): [76, 1], jb302@28: ('r1', 'dpl'): [77, 1], jb302@28: ('r1', 'r0'): [72, 1], jb302@28: ('r1', 'r2'): [74, 1], jb302@28: ('r1', 'r3'): [75, 1], jb302@28: ('r1', 'sph'): [78, 1], jb302@28: ('r1', 'spl'): [79, 1], jb302@28: ('r2', 'data'): [42, 2], jb302@28: ('r2', '@dptr'): [82, 1], jb302@28: ('r2', 'a'): [50, 1], jb302@28: ('r2', 'dph'): [84, 1], jb302@28: ('r2', 'dpl'): [85, 1], jb302@28: ('r2', 'r0'): [80, 1], jb302@28: ('r2', 'r1'): [81, 1], jb302@28: ('r2', 'r3'): [83, 1], jb302@28: ('r2', 'sph'): [86, 1], jb302@28: ('r2', 'spl'): [87, 1], jb302@28: ('r3', 'data'): [43, 2], jb302@28: ('r3', '@dptr'): [91, 1], jb302@28: ('r3', 'a'): [51, 1], jb302@28: ('r3', 'dph'): [92, 1], jb302@28: ('r3', 'dpl'): [93, 1], jb302@28: ('r3', 'r0'): [88, 1], jb302@28: ('r3', 'r1'): [89, 1], jb302@28: ('r3', 'r2'): [90, 1], jb302@28: ('r3', 'sph'): [94, 1], jb302@28: ('r3', 'spl'): [95, 1], jb302@28: ('sp', 'data'): [22, 3], jb302@28: ('sp', 'dptr'): [20, 1], jb302@28: ('sph', 'data'): [46, 2], jb302@28: ('sph', '@dptr'): [118, 1], jb302@28: ('sph', 'a'): [54, 1], jb302@28: ('sph', 'dph'): [116, 1], jb302@28: ('sph', 'dpl'): [117, 1], jb302@28: ('sph', 'r0'): [112, 1], jb302@28: ('sph', 'r1'): [113, 1], jb302@28: ('sph', 'r2'): [114, 1], jb302@28: ('sph', 'r3'): [115, 1], jb302@28: ('sph', 'spl'): [119, 1], jb302@28: ('spl', 'data'): [47, 2], jb302@28: ('spl', '@dptr'): [127, 1], jb302@28: ('spl', 'a'): [55, 1], jb302@28: ('spl', 'dph'): [124, 1], jb302@28: ('spl', 'dpl'): [125, 1], jb302@28: ('spl', 'r0'): [120, 1], jb302@28: ('spl', 'r1'): [121, 1], jb302@28: ('spl', 'r2'): [122, 1], jb302@28: ('spl', 'r3'): [123, 1], jb302@28: ('spl', 'sph'): [126, 1]}, jb302@28: 'mul': { ('r0', 'r1'): [248, 1]}, jb302@28: 'nop': { ('',): [0, 1]}, jb302@28: 'orl': { ('a', 'data'): [142, 2], jb302@28: ('a', '@dptr'): [143, 1], jb302@28: ('a', 'dph'): [140, 1], jb302@28: ('a', 'dpl'): [141, 1], jb302@28: ('a', 'r0'): [136, 1], jb302@28: ('a', 'r1'): [137, 1], jb302@28: ('a', 'r2'): [138, 1], jb302@28: ('a', 'r3'): [139, 1]}, jb302@28: 'out': { ('addr', 'a'): [253, 2]}, jb302@28: 'pcall': { ('addr',): [207, 2], ('label',): [200, 2]}, jb302@28: 'pjmp': { ('addr',): [199, 2], ('label',): [192, 2]}, jb302@28: 'pop': { ('a',): [246, 1], jb302@28: ('dph',): [244, 1], jb302@28: ('dpl',): [245, 1], jb302@28: ('flags',): [247, 1], jb302@28: ('r0',): [240, 1], jb302@28: ('r1',): [241, 1], jb302@28: ('r2',): [242, 1], jb302@28: ('r3',): [243, 1]}, jb302@28: 'push': { ('a',): [238, 1], jb302@28: ('dph',): [236, 1], jb302@28: ('dpl',): [237, 1], jb302@28: ('flags',): [239, 1], jb302@28: ('r0',): [232, 1], jb302@28: ('r1',): [233, 1], jb302@28: ('r2',): [234, 1], jb302@28: ('r3',): [235, 1]}, jb302@28: 'ret': { ('',): [218, 1]}, jb302@28: 'reti': { ('',): [219, 1]}, jb302@28: 'rl': { ('a',): [152, 1]}, jb302@28: 'rlc': { ('a',): [153, 1]}, jb302@28: 'rr': { ('a',): [154, 1]}, jb302@28: 'rrc': { ('a',): [155, 1]}, jb302@28: 'set': { ('bs',): [10, 1], ('c',): [8, 1], ('ie',): [12, 1]}, jb302@28: 'sfa': { ('',): [17, 1]}, jb302@28: 'sjmp': { ('label',): [220, 2], ('rel8',): [220, 2]}, jb302@28: 'sub': { ('a', 'data'): [182, 2], jb302@28: ('a', '@dptr'): [183, 1], jb302@28: ('a', 'dph'): [180, 1], jb302@28: ('a', 'dpl'): [181, 1], jb302@28: ('a', 'r0'): [176, 1], jb302@28: ('a', 'r1'): [177, 1], jb302@28: ('a', 'r2'): [178, 1], jb302@28: ('a', 'r3'): [179, 1]}, jb302@28: 'subb': { ('a', 'data'): [190, 2], jb302@28: ('a', '@dptr'): [191, 1], jb302@28: ('a', 'dph'): [188, 1], jb302@28: ('a', 'dpl'): [189, 1], jb302@28: ('a', 'r0'): [184, 1], jb302@28: ('a', 'r1'): [185, 1], jb302@28: ('a', 'r2'): [186, 1], jb302@28: ('a', 'r3'): [187, 1]}, jb302@28: 'xcsd': { ('',): [16, 1]}, jb302@28: 'xrl': { ('a', 'data'): [150, 2], jb302@28: ('a', '@dptr'): [151, 1], jb302@28: ('a', 'dph'): [148, 1], jb302@28: ('a', 'dpl'): [149, 1], jb302@28: ('a', 'r0'): [144, 1], jb302@28: ('a', 'r1'): [145, 1], jb302@28: ('a', 'r2'): [146, 1], jb302@28: ('a', 'r3'): [147, 1]}} jb302@28: jb302@28: # take interger representation as string and return int: jb302@28: # supports: jb302@28: # decimal (no prefix) jb302@28: # octal (0) jb302@28: # hex (0x) jb302@28: # binary (0b) jb302@28: # return 'NaN' if it is none of the above jb302@28: def stoi(s): jb302@28: try: jb302@28: return int(s, 0) jb302@28: except: jb302@28: return 'NaN' jb302@28: jb302@28: jb302@28: # take a mnemonic and it's arguments jb302@28: # identify constant data: jb302@28: # pack that data into a bit string jb302@28: # return hashable format symbol and data jb302@28: def tokenize(mne, args): jb302@28: sym = [] jb302@28: data = '' jb302@28: jb302@28: for a in args: jb302@28: # tokenize reserved arguments immediatly (not case sensitive) jb302@28: # determine arg type and remove identifier if needed jb302@28: # unprefixed arguments are addresses so this is the default jb302@28: arg_type = 'addr' jb302@28: if a.lower() in vargs: jb302@28: sym.append(a.lower()) jb302@28: continue jb302@28: elif a[0] in ids: jb302@28: arg_type = a[0] jb302@28: a = a[1:] jb302@28: jb302@28: # evaluate inline calculations jb302@28: if (a[0] == '(') and (a[-1] == ')'): jb302@28: a = str(eval(a[1:-1])) jb302@28: # evaluate strings jb302@28: elif (a[0] == '\'') and (a[-1] == '\''): jb302@28: if len(a) == 3: jb302@28: a = str(struct.unpack('>B', a[1:-1])[0]) jb302@28: elif len(a) == 4: jb302@28: a = str(struct.unpack('>H', a[1:-1])[0]) jb302@28: else: jb302@28: data = a[1:-1] jb302@28: continue jb302@28: jb302@28: # non-numbers must be a label or a source code error jb302@28: if stoi(a) == 'NaN': jb302@28: if arg_type == '@': jb302@28: sym.append('@label') jb302@28: continue jb302@28: else: jb302@28: sym.append('label') jb302@28: continue jb302@28: # check if numbers are negative and remove sign if needed jb302@28: elif a[0] == '-': jb302@28: is_neg = 1 jb302@28: a = a[1:] jb302@28: else: jb302@28: is_neg = 0 jb302@28: jb302@28: # abolsute addresses and immediate ints are jb302@28: # are always 16 bits. second_pass() checks jb302@28: # if values are too long for instruction. jb302@28: # addresses jb302@28: if arg_type == 'addr': jb302@28: if mne in rinst: jb302@28: sym.append('rel8') jb302@28: fmt = '>b' jb302@28: else: jb302@28: sym.append('addr') jb302@28: fmt = '>H' jb302@30: if is_neg == 1: jb302@30: val = stoi('-' + a) jb302@30: else: jb302@30: val = stoi(a) jb302@28: data = data + struct.pack(fmt, val) jb302@28: continue jb302@28: # immediate ints (signed when negative) jb302@28: elif arg_type == '#': jb302@28: sym.append('data') jb302@28: if is_neg: jb302@28: val = stoi('-' + a) jb302@28: fmt = '>h' jb302@28: else: jb302@28: val = stoi(a) jb302@28: fmt = '>H' jb302@28: data = data + struct.pack(fmt, val) jb302@28: continue jb302@28: # pointers jb302@28: elif arg_type == '@': jb302@28: sym.append('@addr') jb302@28: val = stoi(a) jb302@28: data = data + struct.pack('>H', val) jb302@28: continue jb302@28: jb302@28: return tuple(sym), data jb302@28: