jb302@28
|
1 #!/usr/bin/env python
|
jb302@28
|
2 # language.py
|
jb302@28
|
3 import struct
|
jb302@28
|
4
|
jb302@28
|
5 # identifiers: immediate ints, pointers
|
jb302@28
|
6 ids = ['#', '@']
|
jb302@28
|
7
|
jb302@28
|
8 # valid reserved arguments for this instruction set
|
jb302@28
|
9 vargs = ('', '@a+dptr', 'dptr', 'bs', '@a+pc',
|
jb302@28
|
10 'a', 'c','r0', 'r1', 'r2', 'r3', 'ie',
|
jb302@28
|
11 'sp', 'flags', 'dpl', 'dph', '@dptr',
|
jb302@28
|
12 'sph', 'spl')
|
jb302@28
|
13
|
jb302@28
|
14 # instructions that use relative addresses
|
jb302@28
|
15 rinst = ('djnz', 'cjne', 'sjmp', 'jz', 'jnz',
|
jb302@28
|
16 'jc', 'jnc', 'jpo', 'jpe', 'js', 'jns')
|
jb302@28
|
17
|
jb302@28
|
18 # dictionary embedded dictionaries?
|
jb302@28
|
19 # for every mnemonic in the instruction set index
|
jb302@28
|
20 # there is an index of possible argument formats (symbols)
|
jb302@28
|
21 # with corresponding op codes and width
|
jb302@28
|
22 iset = { 'add': { ('a', 'data'): [166, 2],
|
jb302@28
|
23 ('a', '@dptr'): [167, 1],
|
jb302@28
|
24 ('a', 'dph'): [164, 1],
|
jb302@28
|
25 ('a', 'dpl'): [165, 1],
|
jb302@28
|
26 ('a', 'r0'): [160, 1],
|
jb302@28
|
27 ('a', 'r1'): [161, 1],
|
jb302@28
|
28 ('a', 'r2'): [162, 1],
|
jb302@28
|
29 ('a', 'r3'): [163, 1]},
|
jb302@28
|
30 'addc': { ('a', 'data'): [174, 2],
|
jb302@28
|
31 ('a', '@dptr'): [175, 1],
|
jb302@28
|
32 ('a', 'dph'): [172, 1],
|
jb302@28
|
33 ('a', 'dpl'): [173, 1],
|
jb302@28
|
34 ('a', 'r0'): [168, 1],
|
jb302@28
|
35 ('a', 'r1'): [169, 1],
|
jb302@28
|
36 ('a', 'r2'): [170, 1],
|
jb302@28
|
37 ('a', 'r3'): [171, 1]},
|
jb302@28
|
38 'anl': { ('a', 'data'): [134, 2],
|
jb302@28
|
39 ('a', '@dptr'): [135, 1],
|
jb302@28
|
40 ('a', 'dph'): [132, 1],
|
jb302@28
|
41 ('a', 'dpl'): [133, 1],
|
jb302@28
|
42 ('a', 'r0'): [128, 1],
|
jb302@28
|
43 ('a', 'r1'): [129, 1],
|
jb302@28
|
44 ('a', 'r2'): [130, 1],
|
jb302@28
|
45 ('a', 'r3'): [131, 1]},
|
jb302@28
|
46 'cjne': { ('a', 'data', 'label'): [223, 3],
|
jb302@28
|
47 ('a', 'data', 'rel8'): [223, 3],
|
jb302@28
|
48 ('r0', 'data', 'label'): [212, 2],
|
jb302@28
|
49 ('r0', 'data', 'rel8'): [212, 2],
|
jb302@28
|
50 ('r1', 'data', 'label'): [213, 2],
|
jb302@28
|
51 ('r1', 'data', 'rel8'): [213, 2],
|
jb302@28
|
52 ('r2', 'data', 'label'): [214, 2],
|
jb302@28
|
53 ('r2', 'data', 'rel8'): [214, 2],
|
jb302@28
|
54 ('r3', 'data', 'label'): [215, 2],
|
jb302@28
|
55 ('r3', 'data', 'rel8'): [215, 2]},
|
jb302@28
|
56 'clr': { ('bs',): [11, 1], ('c',): [9, 1], ('ie',): [13, 1]},
|
jb302@28
|
57 'cpl': { ('a',): [15, 1], ('c',): [14, 1]},
|
jb302@28
|
58 'da': { ('a',): [250, 1]},
|
jb302@28
|
59 'dec': { ('a',): [159, 1], ('dptr',): [157, 1]},
|
jb302@28
|
60 'div': { ('r0', 'r1'): [249, 1]},
|
jb302@28
|
61 'djnz': { ('r0', 'label'): [208, 2],
|
jb302@28
|
62 ('r0', 'rel8'): [208, 2],
|
jb302@28
|
63 ('r1', 'label'): [209, 2],
|
jb302@28
|
64 ('r1', 'rel8'): [209, 2],
|
jb302@28
|
65 ('r2', 'label'): [210, 2],
|
jb302@28
|
66 ('r2', 'rel8'): [210, 2],
|
jb302@28
|
67 ('r3', 'label'): [211, 2],
|
jb302@28
|
68 ('r3', 'rel8'): [211, 2]},
|
jb302@28
|
69 'hlt': { ('',): [255, 1]},
|
jb302@28
|
70 'in': { ('a', 'addr'): [252, 2]},
|
jb302@28
|
71 'inc': { ('a',): [158, 1], ('dptr',): [156, 1]},
|
jb302@28
|
72 'int': { ('addr',): [254, 2]},
|
jb302@28
|
73 'jc': { ('label',): [226, 2], ('rel8',): [226, 2]},
|
jb302@28
|
74 'jmp': { ('@a+dptr',): [221, 1], ('@dptr',): [222, 1]},
|
jb302@28
|
75 'jnc': { ('label',): [227, 2], ('rel8',): [227, 2]},
|
jb302@28
|
76 'jns': { ('label',): [231, 2], ('rel8',): [231, 2]},
|
jb302@28
|
77 'jnz': { ('label',): [225, 2], ('rel8',): [225, 2]},
|
jb302@28
|
78 'jpe': { ('label',): [229, 2], ('rel8',): [229, 2]},
|
jb302@28
|
79 'jpo': { ('label',): [228, 2], ('rel8',): [228, 2]},
|
jb302@28
|
80 'js': { ('label',): [230, 2], ('rel8',): [230, 2]},
|
jb302@28
|
81 'jz': { ('label',): [224, 2], ('rel8',): [224, 2]},
|
jb302@28
|
82 'laf': { ('',): [18, 1]},
|
jb302@28
|
83 'lcall': { ('addr',): [217, 3], ('label',): [217, 3]},
|
jb302@28
|
84 'ljmp': { ('addr',): [216, 3], ('label',): [216, 3]},
|
jb302@28
|
85 'mov': { ('@addr', 'a'): [29, 3],
|
jb302@28
|
86 ('@dptr', 'a'): [31, 1],
|
jb302@28
|
87 ('@dptr', 'dph'): [36, 1],
|
jb302@28
|
88 ('@dptr', 'dpl'): [37, 1],
|
jb302@28
|
89 ('@dptr', 'r0'): [32, 1],
|
jb302@28
|
90 ('@dptr', 'r1'): [33, 1],
|
jb302@28
|
91 ('@dptr', 'r2'): [34, 1],
|
jb302@28
|
92 ('@dptr', 'r3'): [35, 1],
|
jb302@28
|
93 ('@dptr', 'sph'): [38, 1],
|
jb302@28
|
94 ('@dptr', 'spl'): [39, 1],
|
jb302@28
|
95 ('@label', 'a'): [29, 3],
|
jb302@28
|
96 ('a', 'data'): [21, 2],
|
jb302@28
|
97 ('a', '@a+dptr'): [26, 1],
|
jb302@28
|
98 ('a', '@a+pc'): [27, 1],
|
jb302@28
|
99 ('a', '@addr'): [28, 3],
|
jb302@28
|
100 ('a', '@dptr'): [30, 1],
|
jb302@28
|
101 ('a', '@label'): [28, 3],
|
jb302@28
|
102 ('a', 'addr'): [24, 3],
|
jb302@28
|
103 ('a', 'dph'): [60, 1],
|
jb302@28
|
104 ('a', 'dpl'): [61, 1],
|
jb302@28
|
105 ('a', 'label'): [24, 3],
|
jb302@28
|
106 ('a', 'r0'): [56, 1],
|
jb302@28
|
107 ('a', 'r1'): [57, 1],
|
jb302@28
|
108 ('a', 'r2'): [58, 1],
|
jb302@28
|
109 ('a', 'r3'): [59, 1],
|
jb302@28
|
110 ('a', 'sph'): [62, 1],
|
jb302@28
|
111 ('a', 'spl'): [63, 1],
|
jb302@28
|
112 ('addr', 'a'): [25, 3],
|
jb302@28
|
113 ('dph', 'data'): [44, 2],
|
jb302@28
|
114 ('dph', '@dptr'): [100, 1],
|
jb302@28
|
115 ('dph', 'a'): [52, 1],
|
jb302@28
|
116 ('dph', 'dpl'): [101, 1],
|
jb302@28
|
117 ('dph', 'r0'): [96, 1],
|
jb302@28
|
118 ('dph', 'r1'): [97, 1],
|
jb302@28
|
119 ('dph', 'r2'): [98, 1],
|
jb302@28
|
120 ('dph', 'r3'): [99, 1],
|
jb302@28
|
121 ('dph', 'sph'): [102, 1],
|
jb302@28
|
122 ('dph', 'spl'): [103, 1],
|
jb302@28
|
123 ('dpl', 'data'): [45, 2],
|
jb302@28
|
124 ('dpl', '@dptr'): [109, 1],
|
jb302@28
|
125 ('dpl', 'a'): [53, 1],
|
jb302@28
|
126 ('dpl', 'dph'): [108, 1],
|
jb302@28
|
127 ('dpl', 'r0'): [104, 1],
|
jb302@28
|
128 ('dpl', 'r1'): [105, 1],
|
jb302@28
|
129 ('dpl', 'r2'): [106, 1],
|
jb302@28
|
130 ('dpl', 'r3'): [107, 1],
|
jb302@28
|
131 ('dpl', 'sph'): [110, 1],
|
jb302@28
|
132 ('dpl', 'spl'): [111, 1],
|
jb302@28
|
133 ('dptr', 'data'): [23, 3],
|
jb302@28
|
134 ('dptr', 'sp'): [19, 1],
|
jb302@28
|
135 ('label', 'a'): [25, 3],
|
jb302@28
|
136 ('r0', 'data'): [40, 2],
|
jb302@28
|
137 ('r0', '@dptr'): [64, 1],
|
jb302@28
|
138 ('r0', 'a'): [48, 1],
|
jb302@28
|
139 ('r0', 'dph'): [68, 1],
|
jb302@28
|
140 ('r0', 'dpl'): [69, 1],
|
jb302@28
|
141 ('r0', 'r1'): [65, 1],
|
jb302@28
|
142 ('r0', 'r2'): [66, 1],
|
jb302@28
|
143 ('r0', 'r3'): [67, 1],
|
jb302@28
|
144 ('r0', 'sph'): [70, 1],
|
jb302@28
|
145 ('r0', 'spl'): [71, 1],
|
jb302@28
|
146 ('r1', 'data'): [41, 2],
|
jb302@28
|
147 ('r1', '@dptr'): [73, 1],
|
jb302@28
|
148 ('r1', 'a'): [49, 1],
|
jb302@28
|
149 ('r1', 'dph'): [76, 1],
|
jb302@28
|
150 ('r1', 'dpl'): [77, 1],
|
jb302@28
|
151 ('r1', 'r0'): [72, 1],
|
jb302@28
|
152 ('r1', 'r2'): [74, 1],
|
jb302@28
|
153 ('r1', 'r3'): [75, 1],
|
jb302@28
|
154 ('r1', 'sph'): [78, 1],
|
jb302@28
|
155 ('r1', 'spl'): [79, 1],
|
jb302@28
|
156 ('r2', 'data'): [42, 2],
|
jb302@28
|
157 ('r2', '@dptr'): [82, 1],
|
jb302@28
|
158 ('r2', 'a'): [50, 1],
|
jb302@28
|
159 ('r2', 'dph'): [84, 1],
|
jb302@28
|
160 ('r2', 'dpl'): [85, 1],
|
jb302@28
|
161 ('r2', 'r0'): [80, 1],
|
jb302@28
|
162 ('r2', 'r1'): [81, 1],
|
jb302@28
|
163 ('r2', 'r3'): [83, 1],
|
jb302@28
|
164 ('r2', 'sph'): [86, 1],
|
jb302@28
|
165 ('r2', 'spl'): [87, 1],
|
jb302@28
|
166 ('r3', 'data'): [43, 2],
|
jb302@28
|
167 ('r3', '@dptr'): [91, 1],
|
jb302@28
|
168 ('r3', 'a'): [51, 1],
|
jb302@28
|
169 ('r3', 'dph'): [92, 1],
|
jb302@28
|
170 ('r3', 'dpl'): [93, 1],
|
jb302@28
|
171 ('r3', 'r0'): [88, 1],
|
jb302@28
|
172 ('r3', 'r1'): [89, 1],
|
jb302@28
|
173 ('r3', 'r2'): [90, 1],
|
jb302@28
|
174 ('r3', 'sph'): [94, 1],
|
jb302@28
|
175 ('r3', 'spl'): [95, 1],
|
jb302@28
|
176 ('sp', 'data'): [22, 3],
|
jb302@28
|
177 ('sp', 'dptr'): [20, 1],
|
jb302@28
|
178 ('sph', 'data'): [46, 2],
|
jb302@28
|
179 ('sph', '@dptr'): [118, 1],
|
jb302@28
|
180 ('sph', 'a'): [54, 1],
|
jb302@28
|
181 ('sph', 'dph'): [116, 1],
|
jb302@28
|
182 ('sph', 'dpl'): [117, 1],
|
jb302@28
|
183 ('sph', 'r0'): [112, 1],
|
jb302@28
|
184 ('sph', 'r1'): [113, 1],
|
jb302@28
|
185 ('sph', 'r2'): [114, 1],
|
jb302@28
|
186 ('sph', 'r3'): [115, 1],
|
jb302@28
|
187 ('sph', 'spl'): [119, 1],
|
jb302@28
|
188 ('spl', 'data'): [47, 2],
|
jb302@28
|
189 ('spl', '@dptr'): [127, 1],
|
jb302@28
|
190 ('spl', 'a'): [55, 1],
|
jb302@28
|
191 ('spl', 'dph'): [124, 1],
|
jb302@28
|
192 ('spl', 'dpl'): [125, 1],
|
jb302@28
|
193 ('spl', 'r0'): [120, 1],
|
jb302@28
|
194 ('spl', 'r1'): [121, 1],
|
jb302@28
|
195 ('spl', 'r2'): [122, 1],
|
jb302@28
|
196 ('spl', 'r3'): [123, 1],
|
jb302@28
|
197 ('spl', 'sph'): [126, 1]},
|
jb302@28
|
198 'mul': { ('r0', 'r1'): [248, 1]},
|
jb302@28
|
199 'nop': { ('',): [0, 1]},
|
jb302@28
|
200 'orl': { ('a', 'data'): [142, 2],
|
jb302@28
|
201 ('a', '@dptr'): [143, 1],
|
jb302@28
|
202 ('a', 'dph'): [140, 1],
|
jb302@28
|
203 ('a', 'dpl'): [141, 1],
|
jb302@28
|
204 ('a', 'r0'): [136, 1],
|
jb302@28
|
205 ('a', 'r1'): [137, 1],
|
jb302@28
|
206 ('a', 'r2'): [138, 1],
|
jb302@28
|
207 ('a', 'r3'): [139, 1]},
|
jb302@28
|
208 'out': { ('addr', 'a'): [253, 2]},
|
jb302@28
|
209 'pcall': { ('addr',): [207, 2], ('label',): [200, 2]},
|
jb302@28
|
210 'pjmp': { ('addr',): [199, 2], ('label',): [192, 2]},
|
jb302@28
|
211 'pop': { ('a',): [246, 1],
|
jb302@28
|
212 ('dph',): [244, 1],
|
jb302@28
|
213 ('dpl',): [245, 1],
|
jb302@28
|
214 ('flags',): [247, 1],
|
jb302@28
|
215 ('r0',): [240, 1],
|
jb302@28
|
216 ('r1',): [241, 1],
|
jb302@28
|
217 ('r2',): [242, 1],
|
jb302@28
|
218 ('r3',): [243, 1]},
|
jb302@28
|
219 'push': { ('a',): [238, 1],
|
jb302@28
|
220 ('dph',): [236, 1],
|
jb302@28
|
221 ('dpl',): [237, 1],
|
jb302@28
|
222 ('flags',): [239, 1],
|
jb302@28
|
223 ('r0',): [232, 1],
|
jb302@28
|
224 ('r1',): [233, 1],
|
jb302@28
|
225 ('r2',): [234, 1],
|
jb302@28
|
226 ('r3',): [235, 1]},
|
jb302@28
|
227 'ret': { ('',): [218, 1]},
|
jb302@28
|
228 'reti': { ('',): [219, 1]},
|
jb302@28
|
229 'rl': { ('a',): [152, 1]},
|
jb302@28
|
230 'rlc': { ('a',): [153, 1]},
|
jb302@28
|
231 'rr': { ('a',): [154, 1]},
|
jb302@28
|
232 'rrc': { ('a',): [155, 1]},
|
jb302@28
|
233 'set': { ('bs',): [10, 1], ('c',): [8, 1], ('ie',): [12, 1]},
|
jb302@28
|
234 'sfa': { ('',): [17, 1]},
|
jb302@28
|
235 'sjmp': { ('label',): [220, 2], ('rel8',): [220, 2]},
|
jb302@28
|
236 'sub': { ('a', 'data'): [182, 2],
|
jb302@28
|
237 ('a', '@dptr'): [183, 1],
|
jb302@28
|
238 ('a', 'dph'): [180, 1],
|
jb302@28
|
239 ('a', 'dpl'): [181, 1],
|
jb302@28
|
240 ('a', 'r0'): [176, 1],
|
jb302@28
|
241 ('a', 'r1'): [177, 1],
|
jb302@28
|
242 ('a', 'r2'): [178, 1],
|
jb302@28
|
243 ('a', 'r3'): [179, 1]},
|
jb302@28
|
244 'subb': { ('a', 'data'): [190, 2],
|
jb302@28
|
245 ('a', '@dptr'): [191, 1],
|
jb302@28
|
246 ('a', 'dph'): [188, 1],
|
jb302@28
|
247 ('a', 'dpl'): [189, 1],
|
jb302@28
|
248 ('a', 'r0'): [184, 1],
|
jb302@28
|
249 ('a', 'r1'): [185, 1],
|
jb302@28
|
250 ('a', 'r2'): [186, 1],
|
jb302@28
|
251 ('a', 'r3'): [187, 1]},
|
jb302@28
|
252 'xcsd': { ('',): [16, 1]},
|
jb302@28
|
253 'xrl': { ('a', 'data'): [150, 2],
|
jb302@28
|
254 ('a', '@dptr'): [151, 1],
|
jb302@28
|
255 ('a', 'dph'): [148, 1],
|
jb302@28
|
256 ('a', 'dpl'): [149, 1],
|
jb302@28
|
257 ('a', 'r0'): [144, 1],
|
jb302@28
|
258 ('a', 'r1'): [145, 1],
|
jb302@28
|
259 ('a', 'r2'): [146, 1],
|
jb302@28
|
260 ('a', 'r3'): [147, 1]}}
|
jb302@28
|
261
|
jb302@28
|
262 # take interger representation as string and return int:
|
jb302@28
|
263 # supports:
|
jb302@28
|
264 # decimal (no prefix)
|
jb302@28
|
265 # octal (0)
|
jb302@28
|
266 # hex (0x)
|
jb302@28
|
267 # binary (0b)
|
jb302@28
|
268 # return 'NaN' if it is none of the above
|
jb302@28
|
269 def stoi(s):
|
jb302@28
|
270 try:
|
jb302@28
|
271 return int(s, 0)
|
jb302@28
|
272 except:
|
jb302@28
|
273 return 'NaN'
|
jb302@28
|
274
|
jb302@28
|
275
|
jb302@28
|
276 # take a mnemonic and it's arguments
|
jb302@28
|
277 # identify constant data:
|
jb302@28
|
278 # pack that data into a bit string
|
jb302@28
|
279 # return hashable format symbol and data
|
jb302@28
|
280 def tokenize(mne, args):
|
jb302@28
|
281 sym = []
|
jb302@28
|
282 data = ''
|
jb302@28
|
283
|
jb302@28
|
284 for a in args:
|
jb302@28
|
285 # tokenize reserved arguments immediatly (not case sensitive)
|
jb302@28
|
286 # determine arg type and remove identifier if needed
|
jb302@28
|
287 # unprefixed arguments are addresses so this is the default
|
jb302@28
|
288 arg_type = 'addr'
|
jb302@28
|
289 if a.lower() in vargs:
|
jb302@28
|
290 sym.append(a.lower())
|
jb302@28
|
291 continue
|
jb302@28
|
292 elif a[0] in ids:
|
jb302@28
|
293 arg_type = a[0]
|
jb302@28
|
294 a = a[1:]
|
jb302@28
|
295
|
jb302@28
|
296 # evaluate inline calculations
|
jb302@28
|
297 if (a[0] == '(') and (a[-1] == ')'):
|
jb302@28
|
298 a = str(eval(a[1:-1]))
|
jb302@28
|
299 # evaluate strings
|
jb302@28
|
300 elif (a[0] == '\'') and (a[-1] == '\''):
|
jb302@28
|
301 if len(a) == 3:
|
jb302@28
|
302 a = str(struct.unpack('>B', a[1:-1])[0])
|
jb302@28
|
303 elif len(a) == 4:
|
jb302@28
|
304 a = str(struct.unpack('>H', a[1:-1])[0])
|
jb302@28
|
305 else:
|
jb302@28
|
306 data = a[1:-1]
|
jb302@28
|
307 continue
|
jb302@28
|
308
|
jb302@28
|
309 # non-numbers must be a label or a source code error
|
jb302@28
|
310 if stoi(a) == 'NaN':
|
jb302@28
|
311 if arg_type == '@':
|
jb302@28
|
312 sym.append('@label')
|
jb302@28
|
313 continue
|
jb302@28
|
314 else:
|
jb302@28
|
315 sym.append('label')
|
jb302@28
|
316 continue
|
jb302@28
|
317 # check if numbers are negative and remove sign if needed
|
jb302@28
|
318 elif a[0] == '-':
|
jb302@28
|
319 is_neg = 1
|
jb302@28
|
320 a = a[1:]
|
jb302@28
|
321 else:
|
jb302@28
|
322 is_neg = 0
|
jb302@28
|
323
|
jb302@28
|
324 # abolsute addresses and immediate ints are
|
jb302@28
|
325 # are always 16 bits. second_pass() checks
|
jb302@28
|
326 # if values are too long for instruction.
|
jb302@28
|
327 # addresses
|
jb302@28
|
328 if arg_type == 'addr':
|
jb302@28
|
329 if mne in rinst:
|
jb302@28
|
330 sym.append('rel8')
|
jb302@28
|
331 fmt = '>b'
|
jb302@28
|
332 else:
|
jb302@28
|
333 sym.append('addr')
|
jb302@28
|
334 fmt = '>H'
|
jb302@30
|
335 if is_neg == 1:
|
jb302@30
|
336 val = stoi('-' + a)
|
jb302@30
|
337 else:
|
jb302@30
|
338 val = stoi(a)
|
jb302@28
|
339 data = data + struct.pack(fmt, val)
|
jb302@28
|
340 continue
|
jb302@28
|
341 # immediate ints (signed when negative)
|
jb302@28
|
342 elif arg_type == '#':
|
jb302@28
|
343 sym.append('data')
|
jb302@28
|
344 if is_neg:
|
jb302@28
|
345 val = stoi('-' + a)
|
jb302@28
|
346 fmt = '>h'
|
jb302@28
|
347 else:
|
jb302@28
|
348 val = stoi(a)
|
jb302@28
|
349 fmt = '>H'
|
jb302@28
|
350 data = data + struct.pack(fmt, val)
|
jb302@28
|
351 continue
|
jb302@28
|
352 # pointers
|
jb302@28
|
353 elif arg_type == '@':
|
jb302@28
|
354 sym.append('@addr')
|
jb302@28
|
355 val = stoi(a)
|
jb302@28
|
356 data = data + struct.pack('>H', val)
|
jb302@28
|
357 continue
|
jb302@28
|
358
|
jb302@28
|
359 return tuple(sym), data
|
jb302@28
|
360
|