Mercurial > hg > ede
comparison assembler/language.py @ 1:82e82dda442b
alpha version of assembler 'finished'
some more documentation and test files added
author | james <jb302@eecs.qmul.ac.uk> |
---|---|
date | Fri, 06 Dec 2013 23:39:54 +0000 |
parents | |
children | 81dd03d17c22 |
comparison
equal
deleted
inserted
replaced
0:f598703553ef | 1:82e82dda442b |
---|---|
1 #!/usr/bin/env python | |
2 # language.py | |
3 import struct | |
4 | |
5 # these definitions are here to make changing the source code | |
6 # representation of numbers easier | |
7 BASE = 16 | |
8 # prefix must be only 2 characters otherwise tokenize() will break | |
9 PREFIX = '0x' | |
10 def num_string(num): | |
11 return hex(num) | |
12 | |
13 # dictionary embedded dictionary? | |
14 # for every mnemonic in the instruction set index | |
15 # there is an index of possible argument types ('symbols') | |
16 # and a corresponding op code | |
17 iset = {'add': {('a', '#data8'): 166, | |
18 ('a', '@dptr'): 167, | |
19 ('a', 'dph'): 164, | |
20 ('a', 'dpl'): 165, | |
21 ('a', 'r0'): 160, | |
22 ('a', 'r1'): 161, | |
23 ('a', 'r2'): 162, | |
24 ('a', 'r3'): 163}, | |
25 'addc': {('a', '#data8'): 174, | |
26 ('a', '@dptr'): 175, | |
27 ('a', 'dph'): 172, | |
28 ('a', 'dpl'): 173, | |
29 ('a', 'r0'): 168, | |
30 ('a', 'r1'): 169, | |
31 ('a', 'r2'): 170, | |
32 ('a', 'r3'): 171}, | |
33 'anl': {('a', '#data8'): 134, | |
34 ('a', '@dptr'): 135, | |
35 ('a', 'dph'): 132, | |
36 ('a', 'dpl'): 133, | |
37 ('a', 'r0'): 128, | |
38 ('a', 'r1'): 129, | |
39 ('a', 'r2'): 130, | |
40 ('a', 'r3'): 131}, | |
41 'cjne': {('a', '#data8', 'rel8'): 223, | |
42 ('r0', '#data8', 'rel8'): 212, | |
43 ('r1', '#data8', 'rel8'): 213, | |
44 ('r2', '#data8', 'rel8'): 214, | |
45 ('r3', '#data8', 'rel8'): 215}, | |
46 'clr': {('bs',): 11, ('c',): 9, ('ie',): 13}, | |
47 'cpl': {('a',): 15, ('c',): 14}, | |
48 'da': {('a',): 250}, | |
49 'dec': {('a',): 159, ('dptr',): 157}, | |
50 'div': {('r0', 'r1'): 249}, | |
51 'djnz': {('r0', 'rel8'): 208, | |
52 ('r1', 'rel8'): 209, | |
53 ('r2', 'rel8'): 210, | |
54 ('r3', 'rel8'): 211}, | |
55 'hlt': {('',): 255}, | |
56 'in': {('a', 'port_addr'): 252}, | |
57 'inc': {('a',): 158, ('dptr',): 156}, | |
58 'int': {('vect8',): 254}, | |
59 'jc': {('rel8',): 226}, | |
60 'jmp': {('@a+dptr',): 221, ('@dptr',): 222}, | |
61 'jnc': {('rel8',): 227}, | |
62 'jns': {('rel8',): 231}, | |
63 'jnz': {('rel8',): 225}, | |
64 'jpe': {('rel8',): 229}, | |
65 'jpo': {('rel8',): 228}, | |
66 'js': {('rel8',): 230}, | |
67 'jz': {('rel8',): 224}, | |
68 'laf': {('',): 18}, | |
69 'lcall': {('addr16',): 217}, | |
70 'ljmp': {('addr16',): 216}, | |
71 'mov': {('@addr16', 'a'): 29, | |
72 ('@dptr', 'a'): 31, | |
73 ('@dptr', 'dph'): 36, | |
74 ('@dptr', 'dpl'): 37, | |
75 ('@dptr', 'r0'): 32, | |
76 ('@dptr', 'r1'): 33, | |
77 ('@dptr', 'r2'): 34, | |
78 ('@dptr', 'r3'): 35, | |
79 ('@dptr', 'sph'): 38, | |
80 ('@dptr', 'spl'): 39, | |
81 ('a', '#data8'): 21, | |
82 ('a', '@a+dptr'): 26, | |
83 ('a', '@a+pc'): 27, | |
84 ('a', '@addr16'): 28, | |
85 ('a', '@dptr'): 30, | |
86 ('a', 'addr16'): 24, | |
87 ('a', 'dph'): 60, | |
88 ('a', 'dpl'): 61, | |
89 ('a', 'r0'): 56, | |
90 ('a', 'r1'): 57, | |
91 ('a', 'r2'): 58, | |
92 ('a', 'r3'): 59, | |
93 ('a', 'sph'): 62, | |
94 ('a', 'spl'): 63, | |
95 ('addr16', 'a'): 25, | |
96 ('dph', '#data8'): 44, | |
97 ('dph', '@dptr'): 100, | |
98 ('dph', 'a'): 52, | |
99 ('dph', 'dpl'): 101, | |
100 ('dph', 'r0'): 96, | |
101 ('dph', 'r1'): 97, | |
102 ('dph', 'r2'): 98, | |
103 ('dph', 'r3'): 99, | |
104 ('dph', 'sph'): 102, | |
105 ('dph', 'spl'): 103, | |
106 ('dpl', '#data8'): 45, | |
107 ('dpl', '@dptr'): 109, | |
108 ('dpl', 'a'): 53, | |
109 ('dpl', 'dph'): 108, | |
110 ('dpl', 'r0'): 104, | |
111 ('dpl', 'r1'): 105, | |
112 ('dpl', 'r2'): 106, | |
113 ('dpl', 'r3'): 107, | |
114 ('dpl', 'sph'): 110, | |
115 ('dpl', 'spl'): 111, | |
116 ('dptr', '#data16'): 23, | |
117 ('dptr', 'sp'): 19, | |
118 ('r0', '#data8'): 40, | |
119 ('r0', '@dptr'): 64, | |
120 ('r0', 'a'): 48, | |
121 ('r0', 'dph'): 68, | |
122 ('r0', 'dpl'): 69, | |
123 ('r0', 'r1'): 65, | |
124 ('r0', 'r2'): 66, | |
125 ('r0', 'r3'): 67, | |
126 ('r0', 'sph'): 70, | |
127 ('r0', 'spl'): 71, | |
128 ('r1', '#data8'): 41, | |
129 ('r1', '@dptr'): 73, | |
130 ('r1', 'a'): 49, | |
131 ('r1', 'dph'): 76, | |
132 ('r1', 'dpl'): 77, | |
133 ('r1', 'r0'): 72, | |
134 ('r1', 'r2'): 74, | |
135 ('r1', 'r3'): 75, | |
136 ('r1', 'sph'): 78, | |
137 ('r1', 'spl'): 79, | |
138 ('r2', '#data8'): 42, | |
139 ('r2', '@dptr'): 82, | |
140 ('r2', 'a'): 50, | |
141 ('r2', 'dph'): 84, | |
142 ('r2', 'dpl'): 85, | |
143 ('r2', 'r0'): 80, | |
144 ('r2', 'r1'): 81, | |
145 ('r2', 'r3'): 83, | |
146 ('r2', 'sph'): 86, | |
147 ('r2', 'spl'): 87, | |
148 ('r3', '#data8'): 43, | |
149 ('r3', '@dptr'): 91, | |
150 ('r3', 'a'): 51, | |
151 ('r3', 'dph'): 92, | |
152 ('r3', 'dpl'): 93, | |
153 ('r3', 'r0'): 88, | |
154 ('r3', 'r1'): 89, | |
155 ('r3', 'r2'): 90, | |
156 ('r3', 'sph'): 94, | |
157 ('r3', 'spl'): 95, | |
158 ('sp', '#data16'): 22, | |
159 ('sp', 'dptr'): 20, | |
160 ('sph', '#data8'): 46, | |
161 ('sph', '@dptr'): 118, | |
162 ('sph', 'a'): 54, | |
163 ('sph', 'dph'): 116, | |
164 ('sph', 'dpl'): 117, | |
165 ('sph', 'r0'): 112, | |
166 ('sph', 'r1'): 113, | |
167 ('sph', 'r2'): 114, | |
168 ('sph', 'r3'): 115, | |
169 ('sph', 'spl'): 119, | |
170 ('spl', '#data8'): 47, | |
171 ('spl', '@dptr'): 127, | |
172 ('spl', 'a'): 55, | |
173 ('spl', 'dph'): 124, | |
174 ('spl', 'dpl'): 125, | |
175 ('spl', 'r0'): 120, | |
176 ('spl', 'r1'): 121, | |
177 ('spl', 'r2'): 122, | |
178 ('spl', 'r3'): 123, | |
179 ('spl', 'sph'): 126}, | |
180 'mul': {('r0', 'r1'): 248}, | |
181 'nop': {('',): 0}, | |
182 'orl': {('a', '#data8'): 142, | |
183 ('a', '@dptr'): 143, | |
184 ('a', 'dph'): 140, | |
185 ('a', 'dpl'): 141, | |
186 ('a', 'r0'): 136, | |
187 ('a', 'r1'): 137, | |
188 ('a', 'r2'): 138, | |
189 ('a', 'r3'): 139}, | |
190 'out': {('port_addr', 'a'): 253}, | |
191 'pcall': {('addr11',): 207}, | |
192 'pjmp': {('addr11',): 199}, | |
193 'pop': {('a',): 246, | |
194 ('dph',): 244, | |
195 ('dpl',): 245, | |
196 ('flags',): 247, | |
197 ('r0',): 240, | |
198 ('r1',): 241, | |
199 ('r2',): 242, | |
200 ('r3',): 243}, | |
201 'push': {('a',): 238, | |
202 ('dph',): 236, | |
203 ('dpl',): 237, | |
204 ('flags',): 239, | |
205 ('r0',): 232, | |
206 ('r1',): 233, | |
207 ('r2',): 234, | |
208 ('r3',): 235}, | |
209 'reserved': {('',): 251}, | |
210 'ret': {('',): 218}, | |
211 'reti': {('',): 219}, | |
212 'rl': {('a',): 152}, | |
213 'rlc': {('a',): 153}, | |
214 'rr': {('a',): 154}, | |
215 'rrc': {('a',): 155}, | |
216 'set': {('bs',): 10, ('c',): 8, ('ie',): 12}, | |
217 'sfa': {('',): 17}, | |
218 'sjmp': {('',): 220}, | |
219 'sub': {('a', '#data8'): 182, | |
220 ('a', '@dptr'): 183, | |
221 ('a', 'dph'): 180, | |
222 ('a', 'dpl'): 181, | |
223 ('a', 'r0'): 176, | |
224 ('a', 'r1'): 177, | |
225 ('a', 'r2'): 178, | |
226 ('a', 'r3'): 179}, | |
227 'subb': {('a', '#data8'): 190, | |
228 ('a', '@dptr'): 191, | |
229 ('a', 'dph'): 188, | |
230 ('a', 'dpl'): 189, | |
231 ('a', 'r0'): 184, | |
232 ('a', 'r1'): 185, | |
233 ('a', 'r2'): 186, | |
234 ('a', 'r3'): 187}, | |
235 'xcsd': {('',): 16}, | |
236 'xrl': {('a', '#data8'): 150, | |
237 ('a', '@dptr'): 151, | |
238 ('a', 'dph'): 148, | |
239 ('a', 'dpl'): 149, | |
240 ('a', 'r0'): 144, | |
241 ('a', 'r1'): 145, | |
242 ('a', 'r2'): 146, | |
243 ('a', 'r3'): 147}} | |
244 | |
245 # take a list of arguments | |
246 # identify dataant data: | |
247 # pack that data into a bit string | |
248 # return data type symbols and data | |
249 def tokenize(args): | |
250 sym = [] | |
251 data = '' | |
252 | |
253 for a in args: | |
254 | |
255 # immediate ints | |
256 if a[:3] == '#' + PREFIX: | |
257 # 8 bit ints | |
258 if len(a[3:]) <= 2: | |
259 sym.append('#data8') | |
260 val = int(a[1:], BASE) | |
261 # big-endian byte | |
262 data = data + struct.pack('>B', val) | |
263 | |
264 # 16 bit ints | |
265 elif len(a[3:]) <= 4: | |
266 sym.append('#data16') | |
267 val = int(a[1:], BASE) | |
268 # big-endian short | |
269 data = data + struct.pack('>H', val) | |
270 | |
271 else: | |
272 # bad idea to return junk to throw errors later? | |
273 sysm.append(a) | |
274 | |
275 # addresses | |
276 elif a[:2] == PREFIX: | |
277 # 8 bit addresses | |
278 if len(a[2:]) <= 2: | |
279 sym.append('rel8') | |
280 val = int(a, BASE) | |
281 data = data + struct.pack('>B', val) | |
282 | |
283 # 16 bit addresses | |
284 elif len(a[2:]) <= 4: | |
285 sym.append('addr16') | |
286 val = int(a, BASE) | |
287 data = data + struct.pack('>H', val) | |
288 | |
289 else: | |
290 # junk junk junk | |
291 sym.append(a) | |
292 | |
293 # pointers | |
294 elif a[:3] == '@' + PREFIX: | |
295 sym.append('@addr16') | |
296 val = int(a[1:], BASE) | |
297 data = data + struct.pack('>H', val) | |
298 | |
299 # return unknown symbols so language can be extended more easily | |
300 else: | |
301 sym.append(a) | |
302 | |
303 return sym, data | |
304 |