comparison assembler/language.py @ 1:82e82dda442b

alpha version of assembler 'finished' some more documentation and test files added
author james <jb302@eecs.qmul.ac.uk>
date Fri, 06 Dec 2013 23:39:54 +0000
parents
children 81dd03d17c22
comparison
equal deleted inserted replaced
0:f598703553ef 1:82e82dda442b
1 #!/usr/bin/env python
2 # language.py
3 import struct
4
5 # these definitions are here to make changing the source code
6 # representation of numbers easier
7 BASE = 16
8 # prefix must be only 2 characters otherwise tokenize() will break
9 PREFIX = '0x'
10 def num_string(num):
11 return hex(num)
12
13 # dictionary embedded dictionary?
14 # for every mnemonic in the instruction set index
15 # there is an index of possible argument types ('symbols')
16 # and a corresponding op code
17 iset = {'add': {('a', '#data8'): 166,
18 ('a', '@dptr'): 167,
19 ('a', 'dph'): 164,
20 ('a', 'dpl'): 165,
21 ('a', 'r0'): 160,
22 ('a', 'r1'): 161,
23 ('a', 'r2'): 162,
24 ('a', 'r3'): 163},
25 'addc': {('a', '#data8'): 174,
26 ('a', '@dptr'): 175,
27 ('a', 'dph'): 172,
28 ('a', 'dpl'): 173,
29 ('a', 'r0'): 168,
30 ('a', 'r1'): 169,
31 ('a', 'r2'): 170,
32 ('a', 'r3'): 171},
33 'anl': {('a', '#data8'): 134,
34 ('a', '@dptr'): 135,
35 ('a', 'dph'): 132,
36 ('a', 'dpl'): 133,
37 ('a', 'r0'): 128,
38 ('a', 'r1'): 129,
39 ('a', 'r2'): 130,
40 ('a', 'r3'): 131},
41 'cjne': {('a', '#data8', 'rel8'): 223,
42 ('r0', '#data8', 'rel8'): 212,
43 ('r1', '#data8', 'rel8'): 213,
44 ('r2', '#data8', 'rel8'): 214,
45 ('r3', '#data8', 'rel8'): 215},
46 'clr': {('bs',): 11, ('c',): 9, ('ie',): 13},
47 'cpl': {('a',): 15, ('c',): 14},
48 'da': {('a',): 250},
49 'dec': {('a',): 159, ('dptr',): 157},
50 'div': {('r0', 'r1'): 249},
51 'djnz': {('r0', 'rel8'): 208,
52 ('r1', 'rel8'): 209,
53 ('r2', 'rel8'): 210,
54 ('r3', 'rel8'): 211},
55 'hlt': {('',): 255},
56 'in': {('a', 'port_addr'): 252},
57 'inc': {('a',): 158, ('dptr',): 156},
58 'int': {('vect8',): 254},
59 'jc': {('rel8',): 226},
60 'jmp': {('@a+dptr',): 221, ('@dptr',): 222},
61 'jnc': {('rel8',): 227},
62 'jns': {('rel8',): 231},
63 'jnz': {('rel8',): 225},
64 'jpe': {('rel8',): 229},
65 'jpo': {('rel8',): 228},
66 'js': {('rel8',): 230},
67 'jz': {('rel8',): 224},
68 'laf': {('',): 18},
69 'lcall': {('addr16',): 217},
70 'ljmp': {('addr16',): 216},
71 'mov': {('@addr16', 'a'): 29,
72 ('@dptr', 'a'): 31,
73 ('@dptr', 'dph'): 36,
74 ('@dptr', 'dpl'): 37,
75 ('@dptr', 'r0'): 32,
76 ('@dptr', 'r1'): 33,
77 ('@dptr', 'r2'): 34,
78 ('@dptr', 'r3'): 35,
79 ('@dptr', 'sph'): 38,
80 ('@dptr', 'spl'): 39,
81 ('a', '#data8'): 21,
82 ('a', '@a+dptr'): 26,
83 ('a', '@a+pc'): 27,
84 ('a', '@addr16'): 28,
85 ('a', '@dptr'): 30,
86 ('a', 'addr16'): 24,
87 ('a', 'dph'): 60,
88 ('a', 'dpl'): 61,
89 ('a', 'r0'): 56,
90 ('a', 'r1'): 57,
91 ('a', 'r2'): 58,
92 ('a', 'r3'): 59,
93 ('a', 'sph'): 62,
94 ('a', 'spl'): 63,
95 ('addr16', 'a'): 25,
96 ('dph', '#data8'): 44,
97 ('dph', '@dptr'): 100,
98 ('dph', 'a'): 52,
99 ('dph', 'dpl'): 101,
100 ('dph', 'r0'): 96,
101 ('dph', 'r1'): 97,
102 ('dph', 'r2'): 98,
103 ('dph', 'r3'): 99,
104 ('dph', 'sph'): 102,
105 ('dph', 'spl'): 103,
106 ('dpl', '#data8'): 45,
107 ('dpl', '@dptr'): 109,
108 ('dpl', 'a'): 53,
109 ('dpl', 'dph'): 108,
110 ('dpl', 'r0'): 104,
111 ('dpl', 'r1'): 105,
112 ('dpl', 'r2'): 106,
113 ('dpl', 'r3'): 107,
114 ('dpl', 'sph'): 110,
115 ('dpl', 'spl'): 111,
116 ('dptr', '#data16'): 23,
117 ('dptr', 'sp'): 19,
118 ('r0', '#data8'): 40,
119 ('r0', '@dptr'): 64,
120 ('r0', 'a'): 48,
121 ('r0', 'dph'): 68,
122 ('r0', 'dpl'): 69,
123 ('r0', 'r1'): 65,
124 ('r0', 'r2'): 66,
125 ('r0', 'r3'): 67,
126 ('r0', 'sph'): 70,
127 ('r0', 'spl'): 71,
128 ('r1', '#data8'): 41,
129 ('r1', '@dptr'): 73,
130 ('r1', 'a'): 49,
131 ('r1', 'dph'): 76,
132 ('r1', 'dpl'): 77,
133 ('r1', 'r0'): 72,
134 ('r1', 'r2'): 74,
135 ('r1', 'r3'): 75,
136 ('r1', 'sph'): 78,
137 ('r1', 'spl'): 79,
138 ('r2', '#data8'): 42,
139 ('r2', '@dptr'): 82,
140 ('r2', 'a'): 50,
141 ('r2', 'dph'): 84,
142 ('r2', 'dpl'): 85,
143 ('r2', 'r0'): 80,
144 ('r2', 'r1'): 81,
145 ('r2', 'r3'): 83,
146 ('r2', 'sph'): 86,
147 ('r2', 'spl'): 87,
148 ('r3', '#data8'): 43,
149 ('r3', '@dptr'): 91,
150 ('r3', 'a'): 51,
151 ('r3', 'dph'): 92,
152 ('r3', 'dpl'): 93,
153 ('r3', 'r0'): 88,
154 ('r3', 'r1'): 89,
155 ('r3', 'r2'): 90,
156 ('r3', 'sph'): 94,
157 ('r3', 'spl'): 95,
158 ('sp', '#data16'): 22,
159 ('sp', 'dptr'): 20,
160 ('sph', '#data8'): 46,
161 ('sph', '@dptr'): 118,
162 ('sph', 'a'): 54,
163 ('sph', 'dph'): 116,
164 ('sph', 'dpl'): 117,
165 ('sph', 'r0'): 112,
166 ('sph', 'r1'): 113,
167 ('sph', 'r2'): 114,
168 ('sph', 'r3'): 115,
169 ('sph', 'spl'): 119,
170 ('spl', '#data8'): 47,
171 ('spl', '@dptr'): 127,
172 ('spl', 'a'): 55,
173 ('spl', 'dph'): 124,
174 ('spl', 'dpl'): 125,
175 ('spl', 'r0'): 120,
176 ('spl', 'r1'): 121,
177 ('spl', 'r2'): 122,
178 ('spl', 'r3'): 123,
179 ('spl', 'sph'): 126},
180 'mul': {('r0', 'r1'): 248},
181 'nop': {('',): 0},
182 'orl': {('a', '#data8'): 142,
183 ('a', '@dptr'): 143,
184 ('a', 'dph'): 140,
185 ('a', 'dpl'): 141,
186 ('a', 'r0'): 136,
187 ('a', 'r1'): 137,
188 ('a', 'r2'): 138,
189 ('a', 'r3'): 139},
190 'out': {('port_addr', 'a'): 253},
191 'pcall': {('addr11',): 207},
192 'pjmp': {('addr11',): 199},
193 'pop': {('a',): 246,
194 ('dph',): 244,
195 ('dpl',): 245,
196 ('flags',): 247,
197 ('r0',): 240,
198 ('r1',): 241,
199 ('r2',): 242,
200 ('r3',): 243},
201 'push': {('a',): 238,
202 ('dph',): 236,
203 ('dpl',): 237,
204 ('flags',): 239,
205 ('r0',): 232,
206 ('r1',): 233,
207 ('r2',): 234,
208 ('r3',): 235},
209 'reserved': {('',): 251},
210 'ret': {('',): 218},
211 'reti': {('',): 219},
212 'rl': {('a',): 152},
213 'rlc': {('a',): 153},
214 'rr': {('a',): 154},
215 'rrc': {('a',): 155},
216 'set': {('bs',): 10, ('c',): 8, ('ie',): 12},
217 'sfa': {('',): 17},
218 'sjmp': {('',): 220},
219 'sub': {('a', '#data8'): 182,
220 ('a', '@dptr'): 183,
221 ('a', 'dph'): 180,
222 ('a', 'dpl'): 181,
223 ('a', 'r0'): 176,
224 ('a', 'r1'): 177,
225 ('a', 'r2'): 178,
226 ('a', 'r3'): 179},
227 'subb': {('a', '#data8'): 190,
228 ('a', '@dptr'): 191,
229 ('a', 'dph'): 188,
230 ('a', 'dpl'): 189,
231 ('a', 'r0'): 184,
232 ('a', 'r1'): 185,
233 ('a', 'r2'): 186,
234 ('a', 'r3'): 187},
235 'xcsd': {('',): 16},
236 'xrl': {('a', '#data8'): 150,
237 ('a', '@dptr'): 151,
238 ('a', 'dph'): 148,
239 ('a', 'dpl'): 149,
240 ('a', 'r0'): 144,
241 ('a', 'r1'): 145,
242 ('a', 'r2'): 146,
243 ('a', 'r3'): 147}}
244
245 # take a list of arguments
246 # identify dataant data:
247 # pack that data into a bit string
248 # return data type symbols and data
249 def tokenize(args):
250 sym = []
251 data = ''
252
253 for a in args:
254
255 # immediate ints
256 if a[:3] == '#' + PREFIX:
257 # 8 bit ints
258 if len(a[3:]) <= 2:
259 sym.append('#data8')
260 val = int(a[1:], BASE)
261 # big-endian byte
262 data = data + struct.pack('>B', val)
263
264 # 16 bit ints
265 elif len(a[3:]) <= 4:
266 sym.append('#data16')
267 val = int(a[1:], BASE)
268 # big-endian short
269 data = data + struct.pack('>H', val)
270
271 else:
272 # bad idea to return junk to throw errors later?
273 sysm.append(a)
274
275 # addresses
276 elif a[:2] == PREFIX:
277 # 8 bit addresses
278 if len(a[2:]) <= 2:
279 sym.append('rel8')
280 val = int(a, BASE)
281 data = data + struct.pack('>B', val)
282
283 # 16 bit addresses
284 elif len(a[2:]) <= 4:
285 sym.append('addr16')
286 val = int(a, BASE)
287 data = data + struct.pack('>H', val)
288
289 else:
290 # junk junk junk
291 sym.append(a)
292
293 # pointers
294 elif a[:3] == '@' + PREFIX:
295 sym.append('@addr16')
296 val = int(a[1:], BASE)
297 data = data + struct.pack('>H', val)
298
299 # return unknown symbols so language can be extended more easily
300 else:
301 sym.append(a)
302
303 return sym, data
304