jb302@1
|
1 #!/usr/bin/env python2
|
jb302@1
|
2 # assembler.py
|
jb302@1
|
3 import struct
|
jb302@1
|
4 import sys
|
jb302@1
|
5 from language import *
|
jb302@1
|
6
|
jb302@1
|
7 # take source file and return preprocessed assembly code
|
jb302@1
|
8 # for each non-empty line in the file:
|
jb302@1
|
9 # remove comments from source
|
jb302@25
|
10 # replace equated strings
|
jb302@25
|
11 # store label definitions and remove label from source
|
jb302@25
|
12 # store new equates
|
jb302@25
|
13 # make hashable format symbol from arguments
|
jb302@25
|
14 # identify and save constant data
|
jb302@25
|
15 # save instruction, arguments, symbol and data to list
|
jb302@25
|
16 # also prepares org and db instructions for second_pass()
|
jb302@1
|
17 def first_pass(f):
|
jb302@25
|
18 asm = []
|
jb302@25
|
19 labels = {}
|
jb302@25
|
20 equates = {}
|
jb302@25
|
21 pc = 0
|
jb302@25
|
22
|
jb302@1
|
23 # read file into list, remove blank line
|
jb302@1
|
24 f.seek(0)
|
jb302@25
|
25 source_code = filter(lambda l: l != '\n', f.readlines())
|
jb302@14
|
26
|
jb302@1
|
27 # <line> ::= [<statement>] [";"<comment>] <EOL>
|
jb302@22
|
28 for line in source_code:
|
jb302@18
|
29 try:
|
jb302@22
|
30 # remove trailing whitespace and comments
|
jb302@18
|
31 line = line.strip()
|
jb302@18
|
32 for i in range(len(line)):
|
jb302@18
|
33 if line[i] == ';':
|
jb302@18
|
34 line = line[:i]
|
jb302@18
|
35 break
|
jb302@18
|
36
|
jb302@18
|
37 # <statement> ::= [ <label> ":"] <mnemonic> [<arguments>]
|
jb302@18
|
38 # | <label> ":"
|
jb302@18
|
39 # | "EOF"
|
jb302@18
|
40 statement = line.split()
|
jb302@22
|
41 if not statement:
|
jb302@22
|
42 continue
|
jb302@22
|
43
|
jb302@22
|
44 # replace equated strings
|
jb302@22
|
45 # because this happens on the first pass
|
jb302@22
|
46 # equates must be assigned before they are used
|
jb302@25
|
47 i = 1
|
jb302@25
|
48 for s in statement[1:]:
|
jb302@22
|
49 # replace any equates already stored
|
jb302@22
|
50 # remove prefixes and suffixes before attempting to replace
|
jb302@25
|
51 prefix = suffix = ''
|
jb302@25
|
52 # prefixes
|
jb302@25
|
53 if s[0] in ids:
|
jb302@25
|
54 prefix = prefix + s[0]
|
jb302@25
|
55 s = s[1:]
|
jb302@25
|
56 if s[0] == '(':
|
jb302@25
|
57 prefix = prefix + s[0]
|
jb302@25
|
58 s = s[1:]
|
jb302@25
|
59 # suffixes
|
jb302@25
|
60 if s and (s[-1] == ','):
|
jb302@25
|
61 suffix = suffix + s[-1]
|
jb302@25
|
62 s = s[:-1]
|
jb302@25
|
63 if s and (s[-1] == ')'):
|
jb302@25
|
64 suffix = s[-1] + suffix
|
jb302@25
|
65 s = s[:-1]
|
jb302@22
|
66 # replace and put removed characters back
|
jb302@25
|
67 if s in equates:
|
jb302@25
|
68 statement[i] = prefix + equates[s] + suffix
|
jb302@23
|
69 # labels can be used in equates but they have
|
jb302@25
|
70 # to be assigned before they are used as well
|
jb302@25
|
71 elif s in labels:
|
jb302@34
|
72 if statement[0].lower() in rinst:
|
jb302@26
|
73 statement[i] = prefix + str(labels[s] - pc) + suffix
|
jb302@26
|
74 else:
|
jb302@26
|
75 statement[i] = prefix + str(labels[s]) + suffix
|
jb302@25
|
76 i = i + 1
|
jb302@18
|
77
|
jb302@23
|
78 # deal with org
|
jb302@22
|
79 if statement[0].lower() == 'org':
|
jb302@23
|
80 asm.append(['org', statement[1:], ('',), ''])
|
jb302@25
|
81 pc = stoi(statement[1])
|
jb302@18
|
82 continue
|
jb302@23
|
83 # if needed update index and remove label
|
jb302@23
|
84 elif statement[0][-1] == ':':
|
jb302@25
|
85 labels[statement[0][:-1]] = pc;
|
jb302@23
|
86 del statement[0]
|
jb302@18
|
87 # store equates
|
jb302@22
|
88 # these are case sensative
|
jb302@26
|
89 elif (len(statement) >= 3) and (statement[1].lower() == 'equ'):
|
jb302@25
|
90 equates[statement[0]] = ' '.join(statement[2:])
|
jb302@18
|
91 continue
|
jb302@18
|
92
|
jb302@22
|
93 if not statement:
|
jb302@22
|
94 continue
|
jb302@18
|
95
|
jb302@18
|
96 # <statement> ::= <mnemonic> [<arguments>]
|
jb302@25
|
97 mne = statement[0].lower()
|
jb302@25
|
98 args = ''.join(statement[1:]).split(',')
|
jb302@25
|
99
|
jb302@22
|
100 # deal with db
|
jb302@25
|
101 if mne == 'db':
|
jb302@25
|
102 const = ''
|
jb302@25
|
103 for a in args:
|
jb302@25
|
104 data = tokenize(mne, ['#' + a])[1]
|
jb302@25
|
105 # deal with leading zeros
|
jb302@25
|
106 # skip zeros unless zero is the
|
jb302@25
|
107 # only number
|
jb302@25
|
108 if data == '\x00\x00':
|
jb302@25
|
109 const = const + '\x00'
|
jb302@25
|
110 continue
|
jb302@25
|
111 i = 0
|
jb302@25
|
112 for c in data:
|
jb302@25
|
113 if c == '\x00':
|
jb302@25
|
114 i = i + 1
|
jb302@25
|
115 else:
|
jb302@25
|
116 pass
|
jb302@25
|
117 const = const + data[i:]
|
jb302@25
|
118 asm.append([mne, args, ('',), const])
|
jb302@25
|
119 pc = pc + len(const)
|
jb302@22
|
120 continue
|
jb302@40
|
121 elif mne == 'ds':
|
jb302@40
|
122 asm.append([mne, args, ('',), ''])
|
jb302@40
|
123 pc = pc + stoi(args[0])
|
jb302@40
|
124 continue
|
jb302@40
|
125
|
jb302@40
|
126
|
jb302@22
|
127
|
jb302@25
|
128 # tokenize
|
jb302@25
|
129 sym, const = tokenize(mne, args)
|
jb302@25
|
130 asm.append([mne, args, sym, const])
|
jb302@25
|
131 # increase pc
|
jb302@25
|
132 width = iset[mne][sym][1]
|
jb302@18
|
133 pc = pc + width
|
jb302@12
|
134
|
jb302@18
|
135 except:
|
jb302@18
|
136 print ' ** first pass error **\nline:\n', line
|
jb302@18
|
137 raise
|
jb302@19
|
138
|
jb302@25
|
139 return asm, labels
|
jb302@1
|
140
|
jb302@25
|
141 # take a preprocessed object asm and write machine code to binary file
|
jb302@25
|
142 # for each line of asm:
|
jb302@19
|
143 # check if it's an org or db command deal with it accordingly
|
jb302@19
|
144 # check if arguments are labels and replace with value
|
jb302@19
|
145 # write instruction to file
|
jb302@34
|
146 def second_pass(f, asm, labels, d=None):
|
jb302@1
|
147 pc = 0
|
jb302@14
|
148
|
jb302@14
|
149 for line in asm:
|
jb302@18
|
150 f.seek(pc)
|
jb302@14
|
151 mne, args, sym, const = line
|
jb302@1
|
152
|
jb302@1
|
153 try:
|
jb302@25
|
154 # deal with org and db
|
jb302@18
|
155 if mne == 'org':
|
jb302@25
|
156 pc = stoi(args[0])
|
jb302@18
|
157 continue
|
jb302@18
|
158 elif mne == 'db':
|
jb302@23
|
159 f.write(const)
|
jb302@23
|
160 pc = pc + len(const)
|
jb302@18
|
161 continue
|
jb302@40
|
162 elif mne == 'ds':
|
jb302@40
|
163 pc = pc + stoi(args[0])
|
jb302@40
|
164 continue
|
jb302@18
|
165
|
jb302@18
|
166 # replace labels with addresses
|
jb302@18
|
167 i = 0
|
jb302@18
|
168 for a in args:
|
jb302@22
|
169 if not a:
|
jb302@22
|
170 continue
|
jb302@19
|
171 elif (sym[i] == 'label') or (sym[i] == '@label'):
|
jb302@19
|
172 # labeled pointer uglyness
|
jb302@25
|
173 if (a[0] == '@') and (a[1:] in labels):
|
jb302@25
|
174 args[i] = '@' + str(labels[a[1:]])
|
jb302@25
|
175 const = const + tokenize(mne, [args[i]])[1]
|
jb302@25
|
176 else:
|
jb302@19
|
177 # check if constant needs to be a relative address
|
jb302@25
|
178 if mne in rinst:
|
jb302@25
|
179 args[i] = str(labels[a] - pc)
|
jb302@19
|
180 else:
|
jb302@25
|
181 args[i] = str(labels[a])
|
jb302@25
|
182 const = const + tokenize(mne, [args[i]])[1]
|
jb302@18
|
183 i = i + 1
|
jb302@18
|
184
|
jb302@18
|
185 # assemble to file
|
jb302@18
|
186 op, width = iset[mne][sym]
|
jb302@22
|
187 # theres gotta be a better way do deal with paged addresses
|
jb302@22
|
188 if mne in ['pcall', 'pjmp']:
|
jb302@25
|
189 op = op | ((stoi(args[0]) & 0x7FF) >> 8)
|
jb302@22
|
190 const = const[-1]
|
jb302@17
|
191 f.write(struct.pack('>B', op))
|
jb302@25
|
192
|
jb302@17
|
193 # pad if needed
|
jb302@25
|
194 # i don't think this ever happens
|
jb302@25
|
195 #for i in range(width - len(const) - 1):
|
jb302@25
|
196 # f.write(struct.pack('>B', 0))
|
jb302@25
|
197
|
jb302@25
|
198 # check length and write constant or throw error
|
jb302@25
|
199 of = len(const) - width + 1
|
jb302@25
|
200 if of > 0:
|
jb302@25
|
201 if const[0] == ('\x00'):
|
jb302@25
|
202 const = const[of:]
|
jb302@25
|
203 else:
|
jb302@25
|
204 raise ValueError
|
jb302@1
|
205 f.write(const)
|
jb302@34
|
206
|
jb302@34
|
207 # write debug file
|
jb302@34
|
208 if d != None:
|
jb302@34
|
209 if len(const) == 0:
|
jb302@34
|
210 uconst = ' '
|
jb302@34
|
211 elif len(const) == 2:
|
jb302@34
|
212 uconst = hex(struct.unpack('>H', const)[0])
|
jb302@34
|
213 else:
|
jb302@34
|
214 if mne in rinst:
|
jb302@34
|
215 fmt = '>b'
|
jb302@34
|
216 else:
|
jb302@34
|
217 fmt = '>B'
|
jb302@34
|
218 uconst = hex(struct.unpack(fmt, const)[0])
|
jb302@34
|
219 argstr = ', '.join(args)
|
jb302@34
|
220 d.write(hex(pc) + '\t' + hex(op) + '\t' + uconst + '\t' + mne + '\t' + argstr + '\n');
|
jb302@34
|
221
|
jb302@18
|
222 pc = pc + width
|
jb302@18
|
223
|
jb302@1
|
224 except:
|
jb302@18
|
225 print '** second pass error **\nline:\n', line
|
jb302@18
|
226 raise
|
jb302@18
|
227
|
jb302@34
|
228 return f, d
|
jb302@1
|
229
|
jb302@1
|
230 if __name__ == '__main__':
|
jb302@1
|
231 f = open(sys.argv[1], 'r')
|
jb302@25
|
232 try:
|
jb302@25
|
233 b = open(sys.argv[2], 'wb')
|
jb302@34
|
234 d = open(sys.argv[2] + '.dsm', 'w')
|
jb302@25
|
235 except IndexError:
|
jb302@25
|
236 b = open('a.out', 'wb')
|
jb302@34
|
237 d = open('a.dsm', 'wb')
|
jb302@25
|
238 asm, labels = first_pass(f)
|
jb302@34
|
239 b, d= second_pass(b, asm, labels, d)
|
jb302@1
|
240 f.close()
|
jb302@1
|
241 b.close()
|
jb302@34
|
242 d.close()
|
jb302@1
|
243
|