annotate asm/language.py @ 42:792da050d8c4 tip

more dox
author james <jb302@eecs.qmul.ac.uk>
date Tue, 22 Apr 2014 14:25:14 +0100
parents c0c2e99b6bb0
children
rev   line source
jb302@28 1 #!/usr/bin/env python
jb302@28 2 # language.py
jb302@28 3 import struct
jb302@28 4
jb302@28 5 # identifiers: immediate ints, pointers
jb302@28 6 ids = ['#', '@']
jb302@28 7
jb302@28 8 # valid reserved arguments for this instruction set
jb302@28 9 vargs = ('', '@a+dptr', 'dptr', 'bs', '@a+pc',
jb302@28 10 'a', 'c','r0', 'r1', 'r2', 'r3', 'ie',
jb302@28 11 'sp', 'flags', 'dpl', 'dph', '@dptr',
jb302@28 12 'sph', 'spl')
jb302@28 13
jb302@28 14 # instructions that use relative addresses
jb302@28 15 rinst = ('djnz', 'cjne', 'sjmp', 'jz', 'jnz',
jb302@28 16 'jc', 'jnc', 'jpo', 'jpe', 'js', 'jns')
jb302@28 17
jb302@28 18 # dictionary embedded dictionaries?
jb302@28 19 # for every mnemonic in the instruction set index
jb302@28 20 # there is an index of possible argument formats (symbols)
jb302@28 21 # with corresponding op codes and width
jb302@28 22 iset = { 'add': { ('a', 'data'): [166, 2],
jb302@28 23 ('a', '@dptr'): [167, 1],
jb302@28 24 ('a', 'dph'): [164, 1],
jb302@28 25 ('a', 'dpl'): [165, 1],
jb302@28 26 ('a', 'r0'): [160, 1],
jb302@28 27 ('a', 'r1'): [161, 1],
jb302@28 28 ('a', 'r2'): [162, 1],
jb302@28 29 ('a', 'r3'): [163, 1]},
jb302@28 30 'addc': { ('a', 'data'): [174, 2],
jb302@28 31 ('a', '@dptr'): [175, 1],
jb302@28 32 ('a', 'dph'): [172, 1],
jb302@28 33 ('a', 'dpl'): [173, 1],
jb302@28 34 ('a', 'r0'): [168, 1],
jb302@28 35 ('a', 'r1'): [169, 1],
jb302@28 36 ('a', 'r2'): [170, 1],
jb302@28 37 ('a', 'r3'): [171, 1]},
jb302@28 38 'anl': { ('a', 'data'): [134, 2],
jb302@28 39 ('a', '@dptr'): [135, 1],
jb302@28 40 ('a', 'dph'): [132, 1],
jb302@28 41 ('a', 'dpl'): [133, 1],
jb302@28 42 ('a', 'r0'): [128, 1],
jb302@28 43 ('a', 'r1'): [129, 1],
jb302@28 44 ('a', 'r2'): [130, 1],
jb302@28 45 ('a', 'r3'): [131, 1]},
jb302@28 46 'cjne': { ('a', 'data', 'label'): [223, 3],
jb302@28 47 ('a', 'data', 'rel8'): [223, 3],
jb302@28 48 ('r0', 'data', 'label'): [212, 2],
jb302@28 49 ('r0', 'data', 'rel8'): [212, 2],
jb302@28 50 ('r1', 'data', 'label'): [213, 2],
jb302@28 51 ('r1', 'data', 'rel8'): [213, 2],
jb302@28 52 ('r2', 'data', 'label'): [214, 2],
jb302@28 53 ('r2', 'data', 'rel8'): [214, 2],
jb302@28 54 ('r3', 'data', 'label'): [215, 2],
jb302@28 55 ('r3', 'data', 'rel8'): [215, 2]},
jb302@28 56 'clr': { ('bs',): [11, 1], ('c',): [9, 1], ('ie',): [13, 1]},
jb302@28 57 'cpl': { ('a',): [15, 1], ('c',): [14, 1]},
jb302@28 58 'da': { ('a',): [250, 1]},
jb302@28 59 'dec': { ('a',): [159, 1], ('dptr',): [157, 1]},
jb302@28 60 'div': { ('r0', 'r1'): [249, 1]},
jb302@28 61 'djnz': { ('r0', 'label'): [208, 2],
jb302@28 62 ('r0', 'rel8'): [208, 2],
jb302@28 63 ('r1', 'label'): [209, 2],
jb302@28 64 ('r1', 'rel8'): [209, 2],
jb302@28 65 ('r2', 'label'): [210, 2],
jb302@28 66 ('r2', 'rel8'): [210, 2],
jb302@28 67 ('r3', 'label'): [211, 2],
jb302@28 68 ('r3', 'rel8'): [211, 2]},
jb302@28 69 'hlt': { ('',): [255, 1]},
jb302@28 70 'in': { ('a', 'addr'): [252, 2]},
jb302@28 71 'inc': { ('a',): [158, 1], ('dptr',): [156, 1]},
jb302@28 72 'int': { ('addr',): [254, 2]},
jb302@28 73 'jc': { ('label',): [226, 2], ('rel8',): [226, 2]},
jb302@28 74 'jmp': { ('@a+dptr',): [221, 1], ('@dptr',): [222, 1]},
jb302@28 75 'jnc': { ('label',): [227, 2], ('rel8',): [227, 2]},
jb302@28 76 'jns': { ('label',): [231, 2], ('rel8',): [231, 2]},
jb302@28 77 'jnz': { ('label',): [225, 2], ('rel8',): [225, 2]},
jb302@28 78 'jpe': { ('label',): [229, 2], ('rel8',): [229, 2]},
jb302@28 79 'jpo': { ('label',): [228, 2], ('rel8',): [228, 2]},
jb302@28 80 'js': { ('label',): [230, 2], ('rel8',): [230, 2]},
jb302@28 81 'jz': { ('label',): [224, 2], ('rel8',): [224, 2]},
jb302@28 82 'laf': { ('',): [18, 1]},
jb302@28 83 'lcall': { ('addr',): [217, 3], ('label',): [217, 3]},
jb302@28 84 'ljmp': { ('addr',): [216, 3], ('label',): [216, 3]},
jb302@28 85 'mov': { ('@addr', 'a'): [29, 3],
jb302@28 86 ('@dptr', 'a'): [31, 1],
jb302@28 87 ('@dptr', 'dph'): [36, 1],
jb302@28 88 ('@dptr', 'dpl'): [37, 1],
jb302@28 89 ('@dptr', 'r0'): [32, 1],
jb302@28 90 ('@dptr', 'r1'): [33, 1],
jb302@28 91 ('@dptr', 'r2'): [34, 1],
jb302@28 92 ('@dptr', 'r3'): [35, 1],
jb302@28 93 ('@dptr', 'sph'): [38, 1],
jb302@28 94 ('@dptr', 'spl'): [39, 1],
jb302@28 95 ('@label', 'a'): [29, 3],
jb302@28 96 ('a', 'data'): [21, 2],
jb302@28 97 ('a', '@a+dptr'): [26, 1],
jb302@28 98 ('a', '@a+pc'): [27, 1],
jb302@28 99 ('a', '@addr'): [28, 3],
jb302@28 100 ('a', '@dptr'): [30, 1],
jb302@28 101 ('a', '@label'): [28, 3],
jb302@28 102 ('a', 'addr'): [24, 3],
jb302@28 103 ('a', 'dph'): [60, 1],
jb302@28 104 ('a', 'dpl'): [61, 1],
jb302@28 105 ('a', 'label'): [24, 3],
jb302@28 106 ('a', 'r0'): [56, 1],
jb302@28 107 ('a', 'r1'): [57, 1],
jb302@28 108 ('a', 'r2'): [58, 1],
jb302@28 109 ('a', 'r3'): [59, 1],
jb302@28 110 ('a', 'sph'): [62, 1],
jb302@28 111 ('a', 'spl'): [63, 1],
jb302@28 112 ('addr', 'a'): [25, 3],
jb302@28 113 ('dph', 'data'): [44, 2],
jb302@28 114 ('dph', '@dptr'): [100, 1],
jb302@28 115 ('dph', 'a'): [52, 1],
jb302@28 116 ('dph', 'dpl'): [101, 1],
jb302@28 117 ('dph', 'r0'): [96, 1],
jb302@28 118 ('dph', 'r1'): [97, 1],
jb302@28 119 ('dph', 'r2'): [98, 1],
jb302@28 120 ('dph', 'r3'): [99, 1],
jb302@28 121 ('dph', 'sph'): [102, 1],
jb302@28 122 ('dph', 'spl'): [103, 1],
jb302@28 123 ('dpl', 'data'): [45, 2],
jb302@28 124 ('dpl', '@dptr'): [109, 1],
jb302@28 125 ('dpl', 'a'): [53, 1],
jb302@28 126 ('dpl', 'dph'): [108, 1],
jb302@28 127 ('dpl', 'r0'): [104, 1],
jb302@28 128 ('dpl', 'r1'): [105, 1],
jb302@28 129 ('dpl', 'r2'): [106, 1],
jb302@28 130 ('dpl', 'r3'): [107, 1],
jb302@28 131 ('dpl', 'sph'): [110, 1],
jb302@28 132 ('dpl', 'spl'): [111, 1],
jb302@28 133 ('dptr', 'data'): [23, 3],
jb302@28 134 ('dptr', 'sp'): [19, 1],
jb302@28 135 ('label', 'a'): [25, 3],
jb302@28 136 ('r0', 'data'): [40, 2],
jb302@28 137 ('r0', '@dptr'): [64, 1],
jb302@28 138 ('r0', 'a'): [48, 1],
jb302@28 139 ('r0', 'dph'): [68, 1],
jb302@28 140 ('r0', 'dpl'): [69, 1],
jb302@28 141 ('r0', 'r1'): [65, 1],
jb302@28 142 ('r0', 'r2'): [66, 1],
jb302@28 143 ('r0', 'r3'): [67, 1],
jb302@28 144 ('r0', 'sph'): [70, 1],
jb302@28 145 ('r0', 'spl'): [71, 1],
jb302@28 146 ('r1', 'data'): [41, 2],
jb302@28 147 ('r1', '@dptr'): [73, 1],
jb302@28 148 ('r1', 'a'): [49, 1],
jb302@28 149 ('r1', 'dph'): [76, 1],
jb302@28 150 ('r1', 'dpl'): [77, 1],
jb302@28 151 ('r1', 'r0'): [72, 1],
jb302@28 152 ('r1', 'r2'): [74, 1],
jb302@28 153 ('r1', 'r3'): [75, 1],
jb302@28 154 ('r1', 'sph'): [78, 1],
jb302@28 155 ('r1', 'spl'): [79, 1],
jb302@28 156 ('r2', 'data'): [42, 2],
jb302@28 157 ('r2', '@dptr'): [82, 1],
jb302@28 158 ('r2', 'a'): [50, 1],
jb302@28 159 ('r2', 'dph'): [84, 1],
jb302@28 160 ('r2', 'dpl'): [85, 1],
jb302@28 161 ('r2', 'r0'): [80, 1],
jb302@28 162 ('r2', 'r1'): [81, 1],
jb302@28 163 ('r2', 'r3'): [83, 1],
jb302@28 164 ('r2', 'sph'): [86, 1],
jb302@28 165 ('r2', 'spl'): [87, 1],
jb302@28 166 ('r3', 'data'): [43, 2],
jb302@28 167 ('r3', '@dptr'): [91, 1],
jb302@28 168 ('r3', 'a'): [51, 1],
jb302@28 169 ('r3', 'dph'): [92, 1],
jb302@28 170 ('r3', 'dpl'): [93, 1],
jb302@28 171 ('r3', 'r0'): [88, 1],
jb302@28 172 ('r3', 'r1'): [89, 1],
jb302@28 173 ('r3', 'r2'): [90, 1],
jb302@28 174 ('r3', 'sph'): [94, 1],
jb302@28 175 ('r3', 'spl'): [95, 1],
jb302@28 176 ('sp', 'data'): [22, 3],
jb302@28 177 ('sp', 'dptr'): [20, 1],
jb302@28 178 ('sph', 'data'): [46, 2],
jb302@28 179 ('sph', '@dptr'): [118, 1],
jb302@28 180 ('sph', 'a'): [54, 1],
jb302@28 181 ('sph', 'dph'): [116, 1],
jb302@28 182 ('sph', 'dpl'): [117, 1],
jb302@28 183 ('sph', 'r0'): [112, 1],
jb302@28 184 ('sph', 'r1'): [113, 1],
jb302@28 185 ('sph', 'r2'): [114, 1],
jb302@28 186 ('sph', 'r3'): [115, 1],
jb302@28 187 ('sph', 'spl'): [119, 1],
jb302@28 188 ('spl', 'data'): [47, 2],
jb302@28 189 ('spl', '@dptr'): [127, 1],
jb302@28 190 ('spl', 'a'): [55, 1],
jb302@28 191 ('spl', 'dph'): [124, 1],
jb302@28 192 ('spl', 'dpl'): [125, 1],
jb302@28 193 ('spl', 'r0'): [120, 1],
jb302@28 194 ('spl', 'r1'): [121, 1],
jb302@28 195 ('spl', 'r2'): [122, 1],
jb302@28 196 ('spl', 'r3'): [123, 1],
jb302@28 197 ('spl', 'sph'): [126, 1]},
jb302@28 198 'mul': { ('r0', 'r1'): [248, 1]},
jb302@28 199 'nop': { ('',): [0, 1]},
jb302@28 200 'orl': { ('a', 'data'): [142, 2],
jb302@28 201 ('a', '@dptr'): [143, 1],
jb302@28 202 ('a', 'dph'): [140, 1],
jb302@28 203 ('a', 'dpl'): [141, 1],
jb302@28 204 ('a', 'r0'): [136, 1],
jb302@28 205 ('a', 'r1'): [137, 1],
jb302@28 206 ('a', 'r2'): [138, 1],
jb302@28 207 ('a', 'r3'): [139, 1]},
jb302@28 208 'out': { ('addr', 'a'): [253, 2]},
jb302@28 209 'pcall': { ('addr',): [207, 2], ('label',): [200, 2]},
jb302@28 210 'pjmp': { ('addr',): [199, 2], ('label',): [192, 2]},
jb302@28 211 'pop': { ('a',): [246, 1],
jb302@28 212 ('dph',): [244, 1],
jb302@28 213 ('dpl',): [245, 1],
jb302@28 214 ('flags',): [247, 1],
jb302@28 215 ('r0',): [240, 1],
jb302@28 216 ('r1',): [241, 1],
jb302@28 217 ('r2',): [242, 1],
jb302@28 218 ('r3',): [243, 1]},
jb302@28 219 'push': { ('a',): [238, 1],
jb302@28 220 ('dph',): [236, 1],
jb302@28 221 ('dpl',): [237, 1],
jb302@28 222 ('flags',): [239, 1],
jb302@28 223 ('r0',): [232, 1],
jb302@28 224 ('r1',): [233, 1],
jb302@28 225 ('r2',): [234, 1],
jb302@28 226 ('r3',): [235, 1]},
jb302@28 227 'ret': { ('',): [218, 1]},
jb302@28 228 'reti': { ('',): [219, 1]},
jb302@28 229 'rl': { ('a',): [152, 1]},
jb302@28 230 'rlc': { ('a',): [153, 1]},
jb302@28 231 'rr': { ('a',): [154, 1]},
jb302@28 232 'rrc': { ('a',): [155, 1]},
jb302@28 233 'set': { ('bs',): [10, 1], ('c',): [8, 1], ('ie',): [12, 1]},
jb302@28 234 'sfa': { ('',): [17, 1]},
jb302@28 235 'sjmp': { ('label',): [220, 2], ('rel8',): [220, 2]},
jb302@28 236 'sub': { ('a', 'data'): [182, 2],
jb302@28 237 ('a', '@dptr'): [183, 1],
jb302@28 238 ('a', 'dph'): [180, 1],
jb302@28 239 ('a', 'dpl'): [181, 1],
jb302@28 240 ('a', 'r0'): [176, 1],
jb302@28 241 ('a', 'r1'): [177, 1],
jb302@28 242 ('a', 'r2'): [178, 1],
jb302@28 243 ('a', 'r3'): [179, 1]},
jb302@28 244 'subb': { ('a', 'data'): [190, 2],
jb302@28 245 ('a', '@dptr'): [191, 1],
jb302@28 246 ('a', 'dph'): [188, 1],
jb302@28 247 ('a', 'dpl'): [189, 1],
jb302@28 248 ('a', 'r0'): [184, 1],
jb302@28 249 ('a', 'r1'): [185, 1],
jb302@28 250 ('a', 'r2'): [186, 1],
jb302@28 251 ('a', 'r3'): [187, 1]},
jb302@28 252 'xcsd': { ('',): [16, 1]},
jb302@28 253 'xrl': { ('a', 'data'): [150, 2],
jb302@28 254 ('a', '@dptr'): [151, 1],
jb302@28 255 ('a', 'dph'): [148, 1],
jb302@28 256 ('a', 'dpl'): [149, 1],
jb302@28 257 ('a', 'r0'): [144, 1],
jb302@28 258 ('a', 'r1'): [145, 1],
jb302@28 259 ('a', 'r2'): [146, 1],
jb302@28 260 ('a', 'r3'): [147, 1]}}
jb302@28 261
jb302@28 262 # take interger representation as string and return int:
jb302@28 263 # supports:
jb302@28 264 # decimal (no prefix)
jb302@28 265 # octal (0)
jb302@28 266 # hex (0x)
jb302@28 267 # binary (0b)
jb302@28 268 # return 'NaN' if it is none of the above
jb302@28 269 def stoi(s):
jb302@28 270 try:
jb302@28 271 return int(s, 0)
jb302@28 272 except:
jb302@28 273 return 'NaN'
jb302@28 274
jb302@28 275
jb302@28 276 # take a mnemonic and it's arguments
jb302@28 277 # identify constant data:
jb302@28 278 # pack that data into a bit string
jb302@28 279 # return hashable format symbol and data
jb302@28 280 def tokenize(mne, args):
jb302@28 281 sym = []
jb302@28 282 data = ''
jb302@28 283
jb302@28 284 for a in args:
jb302@28 285 # tokenize reserved arguments immediatly (not case sensitive)
jb302@28 286 # determine arg type and remove identifier if needed
jb302@28 287 # unprefixed arguments are addresses so this is the default
jb302@28 288 arg_type = 'addr'
jb302@28 289 if a.lower() in vargs:
jb302@28 290 sym.append(a.lower())
jb302@28 291 continue
jb302@28 292 elif a[0] in ids:
jb302@28 293 arg_type = a[0]
jb302@28 294 a = a[1:]
jb302@28 295
jb302@28 296 # evaluate inline calculations
jb302@28 297 if (a[0] == '(') and (a[-1] == ')'):
jb302@28 298 a = str(eval(a[1:-1]))
jb302@28 299 # evaluate strings
jb302@28 300 elif (a[0] == '\'') and (a[-1] == '\''):
jb302@28 301 if len(a) == 3:
jb302@28 302 a = str(struct.unpack('>B', a[1:-1])[0])
jb302@28 303 elif len(a) == 4:
jb302@28 304 a = str(struct.unpack('>H', a[1:-1])[0])
jb302@28 305 else:
jb302@28 306 data = a[1:-1]
jb302@28 307 continue
jb302@28 308
jb302@28 309 # non-numbers must be a label or a source code error
jb302@28 310 if stoi(a) == 'NaN':
jb302@28 311 if arg_type == '@':
jb302@28 312 sym.append('@label')
jb302@28 313 continue
jb302@28 314 else:
jb302@28 315 sym.append('label')
jb302@28 316 continue
jb302@28 317 # check if numbers are negative and remove sign if needed
jb302@28 318 elif a[0] == '-':
jb302@28 319 is_neg = 1
jb302@28 320 a = a[1:]
jb302@28 321 else:
jb302@28 322 is_neg = 0
jb302@28 323
jb302@28 324 # abolsute addresses and immediate ints are
jb302@28 325 # are always 16 bits. second_pass() checks
jb302@28 326 # if values are too long for instruction.
jb302@28 327 # addresses
jb302@28 328 if arg_type == 'addr':
jb302@28 329 if mne in rinst:
jb302@28 330 sym.append('rel8')
jb302@28 331 fmt = '>b'
jb302@28 332 else:
jb302@28 333 sym.append('addr')
jb302@28 334 fmt = '>H'
jb302@30 335 if is_neg == 1:
jb302@30 336 val = stoi('-' + a)
jb302@30 337 else:
jb302@30 338 val = stoi(a)
jb302@28 339 data = data + struct.pack(fmt, val)
jb302@28 340 continue
jb302@28 341 # immediate ints (signed when negative)
jb302@28 342 elif arg_type == '#':
jb302@28 343 sym.append('data')
jb302@28 344 if is_neg:
jb302@28 345 val = stoi('-' + a)
jb302@28 346 fmt = '>h'
jb302@28 347 else:
jb302@28 348 val = stoi(a)
jb302@28 349 fmt = '>H'
jb302@28 350 data = data + struct.pack(fmt, val)
jb302@28 351 continue
jb302@28 352 # pointers
jb302@28 353 elif arg_type == '@':
jb302@28 354 sym.append('@addr')
jb302@28 355 val = stoi(a)
jb302@28 356 data = data + struct.pack('>H', val)
jb302@28 357 continue
jb302@28 358
jb302@28 359 return tuple(sym), data
jb302@28 360