annotate assembler/language.py @ 1:82e82dda442b

alpha version of assembler 'finished' some more documentation and test files added
author james <jb302@eecs.qmul.ac.uk>
date Fri, 06 Dec 2013 23:39:54 +0000
parents
children 81dd03d17c22
rev   line source
jb302@1 1 #!/usr/bin/env python
jb302@1 2 # language.py
jb302@1 3 import struct
jb302@1 4
jb302@1 5 # these definitions are here to make changing the source code
jb302@1 6 # representation of numbers easier
jb302@1 7 BASE = 16
jb302@1 8 # prefix must be only 2 characters otherwise tokenize() will break
jb302@1 9 PREFIX = '0x'
jb302@1 10 def num_string(num):
jb302@1 11 return hex(num)
jb302@1 12
jb302@1 13 # dictionary embedded dictionary?
jb302@1 14 # for every mnemonic in the instruction set index
jb302@1 15 # there is an index of possible argument types ('symbols')
jb302@1 16 # and a corresponding op code
jb302@1 17 iset = {'add': {('a', '#data8'): 166,
jb302@1 18 ('a', '@dptr'): 167,
jb302@1 19 ('a', 'dph'): 164,
jb302@1 20 ('a', 'dpl'): 165,
jb302@1 21 ('a', 'r0'): 160,
jb302@1 22 ('a', 'r1'): 161,
jb302@1 23 ('a', 'r2'): 162,
jb302@1 24 ('a', 'r3'): 163},
jb302@1 25 'addc': {('a', '#data8'): 174,
jb302@1 26 ('a', '@dptr'): 175,
jb302@1 27 ('a', 'dph'): 172,
jb302@1 28 ('a', 'dpl'): 173,
jb302@1 29 ('a', 'r0'): 168,
jb302@1 30 ('a', 'r1'): 169,
jb302@1 31 ('a', 'r2'): 170,
jb302@1 32 ('a', 'r3'): 171},
jb302@1 33 'anl': {('a', '#data8'): 134,
jb302@1 34 ('a', '@dptr'): 135,
jb302@1 35 ('a', 'dph'): 132,
jb302@1 36 ('a', 'dpl'): 133,
jb302@1 37 ('a', 'r0'): 128,
jb302@1 38 ('a', 'r1'): 129,
jb302@1 39 ('a', 'r2'): 130,
jb302@1 40 ('a', 'r3'): 131},
jb302@1 41 'cjne': {('a', '#data8', 'rel8'): 223,
jb302@1 42 ('r0', '#data8', 'rel8'): 212,
jb302@1 43 ('r1', '#data8', 'rel8'): 213,
jb302@1 44 ('r2', '#data8', 'rel8'): 214,
jb302@1 45 ('r3', '#data8', 'rel8'): 215},
jb302@1 46 'clr': {('bs',): 11, ('c',): 9, ('ie',): 13},
jb302@1 47 'cpl': {('a',): 15, ('c',): 14},
jb302@1 48 'da': {('a',): 250},
jb302@1 49 'dec': {('a',): 159, ('dptr',): 157},
jb302@1 50 'div': {('r0', 'r1'): 249},
jb302@1 51 'djnz': {('r0', 'rel8'): 208,
jb302@1 52 ('r1', 'rel8'): 209,
jb302@1 53 ('r2', 'rel8'): 210,
jb302@1 54 ('r3', 'rel8'): 211},
jb302@1 55 'hlt': {('',): 255},
jb302@1 56 'in': {('a', 'port_addr'): 252},
jb302@1 57 'inc': {('a',): 158, ('dptr',): 156},
jb302@1 58 'int': {('vect8',): 254},
jb302@1 59 'jc': {('rel8',): 226},
jb302@1 60 'jmp': {('@a+dptr',): 221, ('@dptr',): 222},
jb302@1 61 'jnc': {('rel8',): 227},
jb302@1 62 'jns': {('rel8',): 231},
jb302@1 63 'jnz': {('rel8',): 225},
jb302@1 64 'jpe': {('rel8',): 229},
jb302@1 65 'jpo': {('rel8',): 228},
jb302@1 66 'js': {('rel8',): 230},
jb302@1 67 'jz': {('rel8',): 224},
jb302@1 68 'laf': {('',): 18},
jb302@1 69 'lcall': {('addr16',): 217},
jb302@1 70 'ljmp': {('addr16',): 216},
jb302@1 71 'mov': {('@addr16', 'a'): 29,
jb302@1 72 ('@dptr', 'a'): 31,
jb302@1 73 ('@dptr', 'dph'): 36,
jb302@1 74 ('@dptr', 'dpl'): 37,
jb302@1 75 ('@dptr', 'r0'): 32,
jb302@1 76 ('@dptr', 'r1'): 33,
jb302@1 77 ('@dptr', 'r2'): 34,
jb302@1 78 ('@dptr', 'r3'): 35,
jb302@1 79 ('@dptr', 'sph'): 38,
jb302@1 80 ('@dptr', 'spl'): 39,
jb302@1 81 ('a', '#data8'): 21,
jb302@1 82 ('a', '@a+dptr'): 26,
jb302@1 83 ('a', '@a+pc'): 27,
jb302@1 84 ('a', '@addr16'): 28,
jb302@1 85 ('a', '@dptr'): 30,
jb302@1 86 ('a', 'addr16'): 24,
jb302@1 87 ('a', 'dph'): 60,
jb302@1 88 ('a', 'dpl'): 61,
jb302@1 89 ('a', 'r0'): 56,
jb302@1 90 ('a', 'r1'): 57,
jb302@1 91 ('a', 'r2'): 58,
jb302@1 92 ('a', 'r3'): 59,
jb302@1 93 ('a', 'sph'): 62,
jb302@1 94 ('a', 'spl'): 63,
jb302@1 95 ('addr16', 'a'): 25,
jb302@1 96 ('dph', '#data8'): 44,
jb302@1 97 ('dph', '@dptr'): 100,
jb302@1 98 ('dph', 'a'): 52,
jb302@1 99 ('dph', 'dpl'): 101,
jb302@1 100 ('dph', 'r0'): 96,
jb302@1 101 ('dph', 'r1'): 97,
jb302@1 102 ('dph', 'r2'): 98,
jb302@1 103 ('dph', 'r3'): 99,
jb302@1 104 ('dph', 'sph'): 102,
jb302@1 105 ('dph', 'spl'): 103,
jb302@1 106 ('dpl', '#data8'): 45,
jb302@1 107 ('dpl', '@dptr'): 109,
jb302@1 108 ('dpl', 'a'): 53,
jb302@1 109 ('dpl', 'dph'): 108,
jb302@1 110 ('dpl', 'r0'): 104,
jb302@1 111 ('dpl', 'r1'): 105,
jb302@1 112 ('dpl', 'r2'): 106,
jb302@1 113 ('dpl', 'r3'): 107,
jb302@1 114 ('dpl', 'sph'): 110,
jb302@1 115 ('dpl', 'spl'): 111,
jb302@1 116 ('dptr', '#data16'): 23,
jb302@1 117 ('dptr', 'sp'): 19,
jb302@1 118 ('r0', '#data8'): 40,
jb302@1 119 ('r0', '@dptr'): 64,
jb302@1 120 ('r0', 'a'): 48,
jb302@1 121 ('r0', 'dph'): 68,
jb302@1 122 ('r0', 'dpl'): 69,
jb302@1 123 ('r0', 'r1'): 65,
jb302@1 124 ('r0', 'r2'): 66,
jb302@1 125 ('r0', 'r3'): 67,
jb302@1 126 ('r0', 'sph'): 70,
jb302@1 127 ('r0', 'spl'): 71,
jb302@1 128 ('r1', '#data8'): 41,
jb302@1 129 ('r1', '@dptr'): 73,
jb302@1 130 ('r1', 'a'): 49,
jb302@1 131 ('r1', 'dph'): 76,
jb302@1 132 ('r1', 'dpl'): 77,
jb302@1 133 ('r1', 'r0'): 72,
jb302@1 134 ('r1', 'r2'): 74,
jb302@1 135 ('r1', 'r3'): 75,
jb302@1 136 ('r1', 'sph'): 78,
jb302@1 137 ('r1', 'spl'): 79,
jb302@1 138 ('r2', '#data8'): 42,
jb302@1 139 ('r2', '@dptr'): 82,
jb302@1 140 ('r2', 'a'): 50,
jb302@1 141 ('r2', 'dph'): 84,
jb302@1 142 ('r2', 'dpl'): 85,
jb302@1 143 ('r2', 'r0'): 80,
jb302@1 144 ('r2', 'r1'): 81,
jb302@1 145 ('r2', 'r3'): 83,
jb302@1 146 ('r2', 'sph'): 86,
jb302@1 147 ('r2', 'spl'): 87,
jb302@1 148 ('r3', '#data8'): 43,
jb302@1 149 ('r3', '@dptr'): 91,
jb302@1 150 ('r3', 'a'): 51,
jb302@1 151 ('r3', 'dph'): 92,
jb302@1 152 ('r3', 'dpl'): 93,
jb302@1 153 ('r3', 'r0'): 88,
jb302@1 154 ('r3', 'r1'): 89,
jb302@1 155 ('r3', 'r2'): 90,
jb302@1 156 ('r3', 'sph'): 94,
jb302@1 157 ('r3', 'spl'): 95,
jb302@1 158 ('sp', '#data16'): 22,
jb302@1 159 ('sp', 'dptr'): 20,
jb302@1 160 ('sph', '#data8'): 46,
jb302@1 161 ('sph', '@dptr'): 118,
jb302@1 162 ('sph', 'a'): 54,
jb302@1 163 ('sph', 'dph'): 116,
jb302@1 164 ('sph', 'dpl'): 117,
jb302@1 165 ('sph', 'r0'): 112,
jb302@1 166 ('sph', 'r1'): 113,
jb302@1 167 ('sph', 'r2'): 114,
jb302@1 168 ('sph', 'r3'): 115,
jb302@1 169 ('sph', 'spl'): 119,
jb302@1 170 ('spl', '#data8'): 47,
jb302@1 171 ('spl', '@dptr'): 127,
jb302@1 172 ('spl', 'a'): 55,
jb302@1 173 ('spl', 'dph'): 124,
jb302@1 174 ('spl', 'dpl'): 125,
jb302@1 175 ('spl', 'r0'): 120,
jb302@1 176 ('spl', 'r1'): 121,
jb302@1 177 ('spl', 'r2'): 122,
jb302@1 178 ('spl', 'r3'): 123,
jb302@1 179 ('spl', 'sph'): 126},
jb302@1 180 'mul': {('r0', 'r1'): 248},
jb302@1 181 'nop': {('',): 0},
jb302@1 182 'orl': {('a', '#data8'): 142,
jb302@1 183 ('a', '@dptr'): 143,
jb302@1 184 ('a', 'dph'): 140,
jb302@1 185 ('a', 'dpl'): 141,
jb302@1 186 ('a', 'r0'): 136,
jb302@1 187 ('a', 'r1'): 137,
jb302@1 188 ('a', 'r2'): 138,
jb302@1 189 ('a', 'r3'): 139},
jb302@1 190 'out': {('port_addr', 'a'): 253},
jb302@1 191 'pcall': {('addr11',): 207},
jb302@1 192 'pjmp': {('addr11',): 199},
jb302@1 193 'pop': {('a',): 246,
jb302@1 194 ('dph',): 244,
jb302@1 195 ('dpl',): 245,
jb302@1 196 ('flags',): 247,
jb302@1 197 ('r0',): 240,
jb302@1 198 ('r1',): 241,
jb302@1 199 ('r2',): 242,
jb302@1 200 ('r3',): 243},
jb302@1 201 'push': {('a',): 238,
jb302@1 202 ('dph',): 236,
jb302@1 203 ('dpl',): 237,
jb302@1 204 ('flags',): 239,
jb302@1 205 ('r0',): 232,
jb302@1 206 ('r1',): 233,
jb302@1 207 ('r2',): 234,
jb302@1 208 ('r3',): 235},
jb302@1 209 'reserved': {('',): 251},
jb302@1 210 'ret': {('',): 218},
jb302@1 211 'reti': {('',): 219},
jb302@1 212 'rl': {('a',): 152},
jb302@1 213 'rlc': {('a',): 153},
jb302@1 214 'rr': {('a',): 154},
jb302@1 215 'rrc': {('a',): 155},
jb302@1 216 'set': {('bs',): 10, ('c',): 8, ('ie',): 12},
jb302@1 217 'sfa': {('',): 17},
jb302@1 218 'sjmp': {('',): 220},
jb302@1 219 'sub': {('a', '#data8'): 182,
jb302@1 220 ('a', '@dptr'): 183,
jb302@1 221 ('a', 'dph'): 180,
jb302@1 222 ('a', 'dpl'): 181,
jb302@1 223 ('a', 'r0'): 176,
jb302@1 224 ('a', 'r1'): 177,
jb302@1 225 ('a', 'r2'): 178,
jb302@1 226 ('a', 'r3'): 179},
jb302@1 227 'subb': {('a', '#data8'): 190,
jb302@1 228 ('a', '@dptr'): 191,
jb302@1 229 ('a', 'dph'): 188,
jb302@1 230 ('a', 'dpl'): 189,
jb302@1 231 ('a', 'r0'): 184,
jb302@1 232 ('a', 'r1'): 185,
jb302@1 233 ('a', 'r2'): 186,
jb302@1 234 ('a', 'r3'): 187},
jb302@1 235 'xcsd': {('',): 16},
jb302@1 236 'xrl': {('a', '#data8'): 150,
jb302@1 237 ('a', '@dptr'): 151,
jb302@1 238 ('a', 'dph'): 148,
jb302@1 239 ('a', 'dpl'): 149,
jb302@1 240 ('a', 'r0'): 144,
jb302@1 241 ('a', 'r1'): 145,
jb302@1 242 ('a', 'r2'): 146,
jb302@1 243 ('a', 'r3'): 147}}
jb302@1 244
jb302@1 245 # take a list of arguments
jb302@1 246 # identify dataant data:
jb302@1 247 # pack that data into a bit string
jb302@1 248 # return data type symbols and data
jb302@1 249 def tokenize(args):
jb302@1 250 sym = []
jb302@1 251 data = ''
jb302@1 252
jb302@1 253 for a in args:
jb302@1 254
jb302@1 255 # immediate ints
jb302@1 256 if a[:3] == '#' + PREFIX:
jb302@1 257 # 8 bit ints
jb302@1 258 if len(a[3:]) <= 2:
jb302@1 259 sym.append('#data8')
jb302@1 260 val = int(a[1:], BASE)
jb302@1 261 # big-endian byte
jb302@1 262 data = data + struct.pack('>B', val)
jb302@1 263
jb302@1 264 # 16 bit ints
jb302@1 265 elif len(a[3:]) <= 4:
jb302@1 266 sym.append('#data16')
jb302@1 267 val = int(a[1:], BASE)
jb302@1 268 # big-endian short
jb302@1 269 data = data + struct.pack('>H', val)
jb302@1 270
jb302@1 271 else:
jb302@1 272 # bad idea to return junk to throw errors later?
jb302@1 273 sysm.append(a)
jb302@1 274
jb302@1 275 # addresses
jb302@1 276 elif a[:2] == PREFIX:
jb302@1 277 # 8 bit addresses
jb302@1 278 if len(a[2:]) <= 2:
jb302@1 279 sym.append('rel8')
jb302@1 280 val = int(a, BASE)
jb302@1 281 data = data + struct.pack('>B', val)
jb302@1 282
jb302@1 283 # 16 bit addresses
jb302@1 284 elif len(a[2:]) <= 4:
jb302@1 285 sym.append('addr16')
jb302@1 286 val = int(a, BASE)
jb302@1 287 data = data + struct.pack('>H', val)
jb302@1 288
jb302@1 289 else:
jb302@1 290 # junk junk junk
jb302@1 291 sym.append(a)
jb302@1 292
jb302@1 293 # pointers
jb302@1 294 elif a[:3] == '@' + PREFIX:
jb302@1 295 sym.append('@addr16')
jb302@1 296 val = int(a[1:], BASE)
jb302@1 297 data = data + struct.pack('>H', val)
jb302@1 298
jb302@1 299 # return unknown symbols so language can be extended more easily
jb302@1 300 else:
jb302@1 301 sym.append(a)
jb302@1 302
jb302@1 303 return sym, data
jb302@1 304