jb302@1
|
1 #!/usr/bin/env python
|
jb302@1
|
2 # language.py
|
jb302@1
|
3 import struct
|
jb302@1
|
4
|
jb302@1
|
5 # these definitions are here to make changing the source code
|
jb302@1
|
6 # representation of numbers easier
|
jb302@1
|
7 BASE = 16
|
jb302@1
|
8 # prefix must be only 2 characters otherwise tokenize() will break
|
jb302@1
|
9 PREFIX = '0x'
|
jb302@1
|
10 def num_string(num):
|
jb302@1
|
11 return hex(num)
|
jb302@1
|
12
|
jb302@1
|
13 # dictionary embedded dictionary?
|
jb302@1
|
14 # for every mnemonic in the instruction set index
|
jb302@1
|
15 # there is an index of possible argument types ('symbols')
|
jb302@1
|
16 # and a corresponding op code
|
jb302@1
|
17 iset = {'add': {('a', '#data8'): 166,
|
jb302@1
|
18 ('a', '@dptr'): 167,
|
jb302@1
|
19 ('a', 'dph'): 164,
|
jb302@1
|
20 ('a', 'dpl'): 165,
|
jb302@1
|
21 ('a', 'r0'): 160,
|
jb302@1
|
22 ('a', 'r1'): 161,
|
jb302@1
|
23 ('a', 'r2'): 162,
|
jb302@1
|
24 ('a', 'r3'): 163},
|
jb302@1
|
25 'addc': {('a', '#data8'): 174,
|
jb302@1
|
26 ('a', '@dptr'): 175,
|
jb302@1
|
27 ('a', 'dph'): 172,
|
jb302@1
|
28 ('a', 'dpl'): 173,
|
jb302@1
|
29 ('a', 'r0'): 168,
|
jb302@1
|
30 ('a', 'r1'): 169,
|
jb302@1
|
31 ('a', 'r2'): 170,
|
jb302@1
|
32 ('a', 'r3'): 171},
|
jb302@1
|
33 'anl': {('a', '#data8'): 134,
|
jb302@1
|
34 ('a', '@dptr'): 135,
|
jb302@1
|
35 ('a', 'dph'): 132,
|
jb302@1
|
36 ('a', 'dpl'): 133,
|
jb302@1
|
37 ('a', 'r0'): 128,
|
jb302@1
|
38 ('a', 'r1'): 129,
|
jb302@1
|
39 ('a', 'r2'): 130,
|
jb302@1
|
40 ('a', 'r3'): 131},
|
jb302@1
|
41 'cjne': {('a', '#data8', 'rel8'): 223,
|
jb302@1
|
42 ('r0', '#data8', 'rel8'): 212,
|
jb302@1
|
43 ('r1', '#data8', 'rel8'): 213,
|
jb302@1
|
44 ('r2', '#data8', 'rel8'): 214,
|
jb302@1
|
45 ('r3', '#data8', 'rel8'): 215},
|
jb302@1
|
46 'clr': {('bs',): 11, ('c',): 9, ('ie',): 13},
|
jb302@1
|
47 'cpl': {('a',): 15, ('c',): 14},
|
jb302@1
|
48 'da': {('a',): 250},
|
jb302@1
|
49 'dec': {('a',): 159, ('dptr',): 157},
|
jb302@1
|
50 'div': {('r0', 'r1'): 249},
|
jb302@1
|
51 'djnz': {('r0', 'rel8'): 208,
|
jb302@1
|
52 ('r1', 'rel8'): 209,
|
jb302@1
|
53 ('r2', 'rel8'): 210,
|
jb302@1
|
54 ('r3', 'rel8'): 211},
|
jb302@1
|
55 'hlt': {('',): 255},
|
jb302@1
|
56 'in': {('a', 'port_addr'): 252},
|
jb302@1
|
57 'inc': {('a',): 158, ('dptr',): 156},
|
jb302@1
|
58 'int': {('vect8',): 254},
|
jb302@1
|
59 'jc': {('rel8',): 226},
|
jb302@1
|
60 'jmp': {('@a+dptr',): 221, ('@dptr',): 222},
|
jb302@1
|
61 'jnc': {('rel8',): 227},
|
jb302@1
|
62 'jns': {('rel8',): 231},
|
jb302@1
|
63 'jnz': {('rel8',): 225},
|
jb302@1
|
64 'jpe': {('rel8',): 229},
|
jb302@1
|
65 'jpo': {('rel8',): 228},
|
jb302@1
|
66 'js': {('rel8',): 230},
|
jb302@1
|
67 'jz': {('rel8',): 224},
|
jb302@1
|
68 'laf': {('',): 18},
|
jb302@1
|
69 'lcall': {('addr16',): 217},
|
jb302@1
|
70 'ljmp': {('addr16',): 216},
|
jb302@1
|
71 'mov': {('@addr16', 'a'): 29,
|
jb302@1
|
72 ('@dptr', 'a'): 31,
|
jb302@1
|
73 ('@dptr', 'dph'): 36,
|
jb302@1
|
74 ('@dptr', 'dpl'): 37,
|
jb302@1
|
75 ('@dptr', 'r0'): 32,
|
jb302@1
|
76 ('@dptr', 'r1'): 33,
|
jb302@1
|
77 ('@dptr', 'r2'): 34,
|
jb302@1
|
78 ('@dptr', 'r3'): 35,
|
jb302@1
|
79 ('@dptr', 'sph'): 38,
|
jb302@1
|
80 ('@dptr', 'spl'): 39,
|
jb302@1
|
81 ('a', '#data8'): 21,
|
jb302@1
|
82 ('a', '@a+dptr'): 26,
|
jb302@1
|
83 ('a', '@a+pc'): 27,
|
jb302@1
|
84 ('a', '@addr16'): 28,
|
jb302@1
|
85 ('a', '@dptr'): 30,
|
jb302@1
|
86 ('a', 'addr16'): 24,
|
jb302@1
|
87 ('a', 'dph'): 60,
|
jb302@1
|
88 ('a', 'dpl'): 61,
|
jb302@1
|
89 ('a', 'r0'): 56,
|
jb302@1
|
90 ('a', 'r1'): 57,
|
jb302@1
|
91 ('a', 'r2'): 58,
|
jb302@1
|
92 ('a', 'r3'): 59,
|
jb302@1
|
93 ('a', 'sph'): 62,
|
jb302@1
|
94 ('a', 'spl'): 63,
|
jb302@1
|
95 ('addr16', 'a'): 25,
|
jb302@1
|
96 ('dph', '#data8'): 44,
|
jb302@1
|
97 ('dph', '@dptr'): 100,
|
jb302@1
|
98 ('dph', 'a'): 52,
|
jb302@1
|
99 ('dph', 'dpl'): 101,
|
jb302@1
|
100 ('dph', 'r0'): 96,
|
jb302@1
|
101 ('dph', 'r1'): 97,
|
jb302@1
|
102 ('dph', 'r2'): 98,
|
jb302@1
|
103 ('dph', 'r3'): 99,
|
jb302@1
|
104 ('dph', 'sph'): 102,
|
jb302@1
|
105 ('dph', 'spl'): 103,
|
jb302@1
|
106 ('dpl', '#data8'): 45,
|
jb302@1
|
107 ('dpl', '@dptr'): 109,
|
jb302@1
|
108 ('dpl', 'a'): 53,
|
jb302@1
|
109 ('dpl', 'dph'): 108,
|
jb302@1
|
110 ('dpl', 'r0'): 104,
|
jb302@1
|
111 ('dpl', 'r1'): 105,
|
jb302@1
|
112 ('dpl', 'r2'): 106,
|
jb302@1
|
113 ('dpl', 'r3'): 107,
|
jb302@1
|
114 ('dpl', 'sph'): 110,
|
jb302@1
|
115 ('dpl', 'spl'): 111,
|
jb302@1
|
116 ('dptr', '#data16'): 23,
|
jb302@1
|
117 ('dptr', 'sp'): 19,
|
jb302@1
|
118 ('r0', '#data8'): 40,
|
jb302@1
|
119 ('r0', '@dptr'): 64,
|
jb302@1
|
120 ('r0', 'a'): 48,
|
jb302@1
|
121 ('r0', 'dph'): 68,
|
jb302@1
|
122 ('r0', 'dpl'): 69,
|
jb302@1
|
123 ('r0', 'r1'): 65,
|
jb302@1
|
124 ('r0', 'r2'): 66,
|
jb302@1
|
125 ('r0', 'r3'): 67,
|
jb302@1
|
126 ('r0', 'sph'): 70,
|
jb302@1
|
127 ('r0', 'spl'): 71,
|
jb302@1
|
128 ('r1', '#data8'): 41,
|
jb302@1
|
129 ('r1', '@dptr'): 73,
|
jb302@1
|
130 ('r1', 'a'): 49,
|
jb302@1
|
131 ('r1', 'dph'): 76,
|
jb302@1
|
132 ('r1', 'dpl'): 77,
|
jb302@1
|
133 ('r1', 'r0'): 72,
|
jb302@1
|
134 ('r1', 'r2'): 74,
|
jb302@1
|
135 ('r1', 'r3'): 75,
|
jb302@1
|
136 ('r1', 'sph'): 78,
|
jb302@1
|
137 ('r1', 'spl'): 79,
|
jb302@1
|
138 ('r2', '#data8'): 42,
|
jb302@1
|
139 ('r2', '@dptr'): 82,
|
jb302@1
|
140 ('r2', 'a'): 50,
|
jb302@1
|
141 ('r2', 'dph'): 84,
|
jb302@1
|
142 ('r2', 'dpl'): 85,
|
jb302@1
|
143 ('r2', 'r0'): 80,
|
jb302@1
|
144 ('r2', 'r1'): 81,
|
jb302@1
|
145 ('r2', 'r3'): 83,
|
jb302@1
|
146 ('r2', 'sph'): 86,
|
jb302@1
|
147 ('r2', 'spl'): 87,
|
jb302@1
|
148 ('r3', '#data8'): 43,
|
jb302@1
|
149 ('r3', '@dptr'): 91,
|
jb302@1
|
150 ('r3', 'a'): 51,
|
jb302@1
|
151 ('r3', 'dph'): 92,
|
jb302@1
|
152 ('r3', 'dpl'): 93,
|
jb302@1
|
153 ('r3', 'r0'): 88,
|
jb302@1
|
154 ('r3', 'r1'): 89,
|
jb302@1
|
155 ('r3', 'r2'): 90,
|
jb302@1
|
156 ('r3', 'sph'): 94,
|
jb302@1
|
157 ('r3', 'spl'): 95,
|
jb302@1
|
158 ('sp', '#data16'): 22,
|
jb302@1
|
159 ('sp', 'dptr'): 20,
|
jb302@1
|
160 ('sph', '#data8'): 46,
|
jb302@1
|
161 ('sph', '@dptr'): 118,
|
jb302@1
|
162 ('sph', 'a'): 54,
|
jb302@1
|
163 ('sph', 'dph'): 116,
|
jb302@1
|
164 ('sph', 'dpl'): 117,
|
jb302@1
|
165 ('sph', 'r0'): 112,
|
jb302@1
|
166 ('sph', 'r1'): 113,
|
jb302@1
|
167 ('sph', 'r2'): 114,
|
jb302@1
|
168 ('sph', 'r3'): 115,
|
jb302@1
|
169 ('sph', 'spl'): 119,
|
jb302@1
|
170 ('spl', '#data8'): 47,
|
jb302@1
|
171 ('spl', '@dptr'): 127,
|
jb302@1
|
172 ('spl', 'a'): 55,
|
jb302@1
|
173 ('spl', 'dph'): 124,
|
jb302@1
|
174 ('spl', 'dpl'): 125,
|
jb302@1
|
175 ('spl', 'r0'): 120,
|
jb302@1
|
176 ('spl', 'r1'): 121,
|
jb302@1
|
177 ('spl', 'r2'): 122,
|
jb302@1
|
178 ('spl', 'r3'): 123,
|
jb302@1
|
179 ('spl', 'sph'): 126},
|
jb302@1
|
180 'mul': {('r0', 'r1'): 248},
|
jb302@1
|
181 'nop': {('',): 0},
|
jb302@1
|
182 'orl': {('a', '#data8'): 142,
|
jb302@1
|
183 ('a', '@dptr'): 143,
|
jb302@1
|
184 ('a', 'dph'): 140,
|
jb302@1
|
185 ('a', 'dpl'): 141,
|
jb302@1
|
186 ('a', 'r0'): 136,
|
jb302@1
|
187 ('a', 'r1'): 137,
|
jb302@1
|
188 ('a', 'r2'): 138,
|
jb302@1
|
189 ('a', 'r3'): 139},
|
jb302@1
|
190 'out': {('port_addr', 'a'): 253},
|
jb302@1
|
191 'pcall': {('addr11',): 207},
|
jb302@1
|
192 'pjmp': {('addr11',): 199},
|
jb302@1
|
193 'pop': {('a',): 246,
|
jb302@1
|
194 ('dph',): 244,
|
jb302@1
|
195 ('dpl',): 245,
|
jb302@1
|
196 ('flags',): 247,
|
jb302@1
|
197 ('r0',): 240,
|
jb302@1
|
198 ('r1',): 241,
|
jb302@1
|
199 ('r2',): 242,
|
jb302@1
|
200 ('r3',): 243},
|
jb302@1
|
201 'push': {('a',): 238,
|
jb302@1
|
202 ('dph',): 236,
|
jb302@1
|
203 ('dpl',): 237,
|
jb302@1
|
204 ('flags',): 239,
|
jb302@1
|
205 ('r0',): 232,
|
jb302@1
|
206 ('r1',): 233,
|
jb302@1
|
207 ('r2',): 234,
|
jb302@1
|
208 ('r3',): 235},
|
jb302@1
|
209 'reserved': {('',): 251},
|
jb302@1
|
210 'ret': {('',): 218},
|
jb302@1
|
211 'reti': {('',): 219},
|
jb302@1
|
212 'rl': {('a',): 152},
|
jb302@1
|
213 'rlc': {('a',): 153},
|
jb302@1
|
214 'rr': {('a',): 154},
|
jb302@1
|
215 'rrc': {('a',): 155},
|
jb302@1
|
216 'set': {('bs',): 10, ('c',): 8, ('ie',): 12},
|
jb302@1
|
217 'sfa': {('',): 17},
|
jb302@1
|
218 'sjmp': {('',): 220},
|
jb302@1
|
219 'sub': {('a', '#data8'): 182,
|
jb302@1
|
220 ('a', '@dptr'): 183,
|
jb302@1
|
221 ('a', 'dph'): 180,
|
jb302@1
|
222 ('a', 'dpl'): 181,
|
jb302@1
|
223 ('a', 'r0'): 176,
|
jb302@1
|
224 ('a', 'r1'): 177,
|
jb302@1
|
225 ('a', 'r2'): 178,
|
jb302@1
|
226 ('a', 'r3'): 179},
|
jb302@1
|
227 'subb': {('a', '#data8'): 190,
|
jb302@1
|
228 ('a', '@dptr'): 191,
|
jb302@1
|
229 ('a', 'dph'): 188,
|
jb302@1
|
230 ('a', 'dpl'): 189,
|
jb302@1
|
231 ('a', 'r0'): 184,
|
jb302@1
|
232 ('a', 'r1'): 185,
|
jb302@1
|
233 ('a', 'r2'): 186,
|
jb302@1
|
234 ('a', 'r3'): 187},
|
jb302@1
|
235 'xcsd': {('',): 16},
|
jb302@1
|
236 'xrl': {('a', '#data8'): 150,
|
jb302@1
|
237 ('a', '@dptr'): 151,
|
jb302@1
|
238 ('a', 'dph'): 148,
|
jb302@1
|
239 ('a', 'dpl'): 149,
|
jb302@1
|
240 ('a', 'r0'): 144,
|
jb302@1
|
241 ('a', 'r1'): 145,
|
jb302@1
|
242 ('a', 'r2'): 146,
|
jb302@1
|
243 ('a', 'r3'): 147}}
|
jb302@1
|
244
|
jb302@1
|
245 # take a list of arguments
|
jb302@1
|
246 # identify dataant data:
|
jb302@1
|
247 # pack that data into a bit string
|
jb302@1
|
248 # return data type symbols and data
|
jb302@1
|
249 def tokenize(args):
|
jb302@1
|
250 sym = []
|
jb302@1
|
251 data = ''
|
jb302@1
|
252
|
jb302@1
|
253 for a in args:
|
jb302@1
|
254
|
jb302@1
|
255 # immediate ints
|
jb302@1
|
256 if a[:3] == '#' + PREFIX:
|
jb302@1
|
257 # 8 bit ints
|
jb302@1
|
258 if len(a[3:]) <= 2:
|
jb302@1
|
259 sym.append('#data8')
|
jb302@1
|
260 val = int(a[1:], BASE)
|
jb302@1
|
261 # big-endian byte
|
jb302@1
|
262 data = data + struct.pack('>B', val)
|
jb302@1
|
263
|
jb302@1
|
264 # 16 bit ints
|
jb302@1
|
265 elif len(a[3:]) <= 4:
|
jb302@1
|
266 sym.append('#data16')
|
jb302@1
|
267 val = int(a[1:], BASE)
|
jb302@1
|
268 # big-endian short
|
jb302@1
|
269 data = data + struct.pack('>H', val)
|
jb302@1
|
270
|
jb302@1
|
271 else:
|
jb302@1
|
272 # bad idea to return junk to throw errors later?
|
jb302@1
|
273 sysm.append(a)
|
jb302@1
|
274
|
jb302@1
|
275 # addresses
|
jb302@1
|
276 elif a[:2] == PREFIX:
|
jb302@1
|
277 # 8 bit addresses
|
jb302@1
|
278 if len(a[2:]) <= 2:
|
jb302@1
|
279 sym.append('rel8')
|
jb302@1
|
280 val = int(a, BASE)
|
jb302@1
|
281 data = data + struct.pack('>B', val)
|
jb302@1
|
282
|
jb302@1
|
283 # 16 bit addresses
|
jb302@1
|
284 elif len(a[2:]) <= 4:
|
jb302@1
|
285 sym.append('addr16')
|
jb302@1
|
286 val = int(a, BASE)
|
jb302@1
|
287 data = data + struct.pack('>H', val)
|
jb302@1
|
288
|
jb302@1
|
289 else:
|
jb302@1
|
290 # junk junk junk
|
jb302@1
|
291 sym.append(a)
|
jb302@1
|
292
|
jb302@1
|
293 # pointers
|
jb302@1
|
294 elif a[:3] == '@' + PREFIX:
|
jb302@1
|
295 sym.append('@addr16')
|
jb302@1
|
296 val = int(a[1:], BASE)
|
jb302@1
|
297 data = data + struct.pack('>H', val)
|
jb302@1
|
298
|
jb302@1
|
299 # return unknown symbols so language can be extended more easily
|
jb302@1
|
300 else:
|
jb302@1
|
301 sym.append(a)
|
jb302@1
|
302
|
jb302@1
|
303 return sym, data
|
jb302@1
|
304
|