annotate asm/asm.py @ 42:792da050d8c4 tip

more dox
author james <jb302@eecs.qmul.ac.uk>
date Tue, 22 Apr 2014 14:25:14 +0100
parents 6b947f6d69d9
children
rev   line source
jb302@1 1 #!/usr/bin/env python2
jb302@1 2 # assembler.py
jb302@1 3 import struct
jb302@1 4 import sys
jb302@1 5 from language import *
jb302@1 6
jb302@1 7 # take source file and return preprocessed assembly code
jb302@1 8 # for each non-empty line in the file:
jb302@1 9 # remove comments from source
jb302@25 10 # replace equated strings
jb302@25 11 # store label definitions and remove label from source
jb302@25 12 # store new equates
jb302@25 13 # make hashable format symbol from arguments
jb302@25 14 # identify and save constant data
jb302@25 15 # save instruction, arguments, symbol and data to list
jb302@25 16 # also prepares org and db instructions for second_pass()
jb302@1 17 def first_pass(f):
jb302@25 18 asm = []
jb302@25 19 labels = {}
jb302@25 20 equates = {}
jb302@25 21 pc = 0
jb302@25 22
jb302@1 23 # read file into list, remove blank line
jb302@1 24 f.seek(0)
jb302@25 25 source_code = filter(lambda l: l != '\n', f.readlines())
jb302@14 26
jb302@1 27 # <line> ::= [<statement>] [";"<comment>] <EOL>
jb302@22 28 for line in source_code:
jb302@18 29 try:
jb302@22 30 # remove trailing whitespace and comments
jb302@18 31 line = line.strip()
jb302@18 32 for i in range(len(line)):
jb302@18 33 if line[i] == ';':
jb302@18 34 line = line[:i]
jb302@18 35 break
jb302@18 36
jb302@18 37 # <statement> ::= [ <label> ":"] <mnemonic> [<arguments>]
jb302@18 38 # | <label> ":"
jb302@18 39 # | "EOF"
jb302@18 40 statement = line.split()
jb302@22 41 if not statement:
jb302@22 42 continue
jb302@22 43
jb302@22 44 # replace equated strings
jb302@22 45 # because this happens on the first pass
jb302@22 46 # equates must be assigned before they are used
jb302@25 47 i = 1
jb302@25 48 for s in statement[1:]:
jb302@22 49 # replace any equates already stored
jb302@22 50 # remove prefixes and suffixes before attempting to replace
jb302@25 51 prefix = suffix = ''
jb302@25 52 # prefixes
jb302@25 53 if s[0] in ids:
jb302@25 54 prefix = prefix + s[0]
jb302@25 55 s = s[1:]
jb302@25 56 if s[0] == '(':
jb302@25 57 prefix = prefix + s[0]
jb302@25 58 s = s[1:]
jb302@25 59 # suffixes
jb302@25 60 if s and (s[-1] == ','):
jb302@25 61 suffix = suffix + s[-1]
jb302@25 62 s = s[:-1]
jb302@25 63 if s and (s[-1] == ')'):
jb302@25 64 suffix = s[-1] + suffix
jb302@25 65 s = s[:-1]
jb302@22 66 # replace and put removed characters back
jb302@25 67 if s in equates:
jb302@25 68 statement[i] = prefix + equates[s] + suffix
jb302@23 69 # labels can be used in equates but they have
jb302@25 70 # to be assigned before they are used as well
jb302@25 71 elif s in labels:
jb302@34 72 if statement[0].lower() in rinst:
jb302@26 73 statement[i] = prefix + str(labels[s] - pc) + suffix
jb302@26 74 else:
jb302@26 75 statement[i] = prefix + str(labels[s]) + suffix
jb302@25 76 i = i + 1
jb302@18 77
jb302@23 78 # deal with org
jb302@22 79 if statement[0].lower() == 'org':
jb302@23 80 asm.append(['org', statement[1:], ('',), ''])
jb302@25 81 pc = stoi(statement[1])
jb302@18 82 continue
jb302@23 83 # if needed update index and remove label
jb302@23 84 elif statement[0][-1] == ':':
jb302@25 85 labels[statement[0][:-1]] = pc;
jb302@23 86 del statement[0]
jb302@18 87 # store equates
jb302@22 88 # these are case sensative
jb302@26 89 elif (len(statement) >= 3) and (statement[1].lower() == 'equ'):
jb302@25 90 equates[statement[0]] = ' '.join(statement[2:])
jb302@18 91 continue
jb302@18 92
jb302@22 93 if not statement:
jb302@22 94 continue
jb302@18 95
jb302@18 96 # <statement> ::= <mnemonic> [<arguments>]
jb302@25 97 mne = statement[0].lower()
jb302@25 98 args = ''.join(statement[1:]).split(',')
jb302@25 99
jb302@22 100 # deal with db
jb302@25 101 if mne == 'db':
jb302@25 102 const = ''
jb302@25 103 for a in args:
jb302@25 104 data = tokenize(mne, ['#' + a])[1]
jb302@25 105 # deal with leading zeros
jb302@25 106 # skip zeros unless zero is the
jb302@25 107 # only number
jb302@25 108 if data == '\x00\x00':
jb302@25 109 const = const + '\x00'
jb302@25 110 continue
jb302@25 111 i = 0
jb302@25 112 for c in data:
jb302@25 113 if c == '\x00':
jb302@25 114 i = i + 1
jb302@25 115 else:
jb302@25 116 pass
jb302@25 117 const = const + data[i:]
jb302@25 118 asm.append([mne, args, ('',), const])
jb302@25 119 pc = pc + len(const)
jb302@22 120 continue
jb302@40 121 elif mne == 'ds':
jb302@40 122 asm.append([mne, args, ('',), ''])
jb302@40 123 pc = pc + stoi(args[0])
jb302@40 124 continue
jb302@40 125
jb302@40 126
jb302@22 127
jb302@25 128 # tokenize
jb302@25 129 sym, const = tokenize(mne, args)
jb302@25 130 asm.append([mne, args, sym, const])
jb302@25 131 # increase pc
jb302@25 132 width = iset[mne][sym][1]
jb302@18 133 pc = pc + width
jb302@12 134
jb302@18 135 except:
jb302@18 136 print ' ** first pass error **\nline:\n', line
jb302@18 137 raise
jb302@19 138
jb302@25 139 return asm, labels
jb302@1 140
jb302@25 141 # take a preprocessed object asm and write machine code to binary file
jb302@25 142 # for each line of asm:
jb302@19 143 # check if it's an org or db command deal with it accordingly
jb302@19 144 # check if arguments are labels and replace with value
jb302@19 145 # write instruction to file
jb302@34 146 def second_pass(f, asm, labels, d=None):
jb302@1 147 pc = 0
jb302@14 148
jb302@14 149 for line in asm:
jb302@18 150 f.seek(pc)
jb302@14 151 mne, args, sym, const = line
jb302@1 152
jb302@1 153 try:
jb302@25 154 # deal with org and db
jb302@18 155 if mne == 'org':
jb302@25 156 pc = stoi(args[0])
jb302@18 157 continue
jb302@18 158 elif mne == 'db':
jb302@23 159 f.write(const)
jb302@23 160 pc = pc + len(const)
jb302@18 161 continue
jb302@40 162 elif mne == 'ds':
jb302@40 163 pc = pc + stoi(args[0])
jb302@40 164 continue
jb302@18 165
jb302@18 166 # replace labels with addresses
jb302@18 167 i = 0
jb302@18 168 for a in args:
jb302@22 169 if not a:
jb302@22 170 continue
jb302@19 171 elif (sym[i] == 'label') or (sym[i] == '@label'):
jb302@19 172 # labeled pointer uglyness
jb302@25 173 if (a[0] == '@') and (a[1:] in labels):
jb302@25 174 args[i] = '@' + str(labels[a[1:]])
jb302@25 175 const = const + tokenize(mne, [args[i]])[1]
jb302@25 176 else:
jb302@19 177 # check if constant needs to be a relative address
jb302@25 178 if mne in rinst:
jb302@25 179 args[i] = str(labels[a] - pc)
jb302@19 180 else:
jb302@25 181 args[i] = str(labels[a])
jb302@25 182 const = const + tokenize(mne, [args[i]])[1]
jb302@18 183 i = i + 1
jb302@18 184
jb302@18 185 # assemble to file
jb302@18 186 op, width = iset[mne][sym]
jb302@22 187 # theres gotta be a better way do deal with paged addresses
jb302@22 188 if mne in ['pcall', 'pjmp']:
jb302@25 189 op = op | ((stoi(args[0]) & 0x7FF) >> 8)
jb302@22 190 const = const[-1]
jb302@17 191 f.write(struct.pack('>B', op))
jb302@25 192
jb302@17 193 # pad if needed
jb302@25 194 # i don't think this ever happens
jb302@25 195 #for i in range(width - len(const) - 1):
jb302@25 196 # f.write(struct.pack('>B', 0))
jb302@25 197
jb302@25 198 # check length and write constant or throw error
jb302@25 199 of = len(const) - width + 1
jb302@25 200 if of > 0:
jb302@25 201 if const[0] == ('\x00'):
jb302@25 202 const = const[of:]
jb302@25 203 else:
jb302@25 204 raise ValueError
jb302@1 205 f.write(const)
jb302@34 206
jb302@34 207 # write debug file
jb302@34 208 if d != None:
jb302@34 209 if len(const) == 0:
jb302@34 210 uconst = ' '
jb302@34 211 elif len(const) == 2:
jb302@34 212 uconst = hex(struct.unpack('>H', const)[0])
jb302@34 213 else:
jb302@34 214 if mne in rinst:
jb302@34 215 fmt = '>b'
jb302@34 216 else:
jb302@34 217 fmt = '>B'
jb302@34 218 uconst = hex(struct.unpack(fmt, const)[0])
jb302@34 219 argstr = ', '.join(args)
jb302@34 220 d.write(hex(pc) + '\t' + hex(op) + '\t' + uconst + '\t' + mne + '\t' + argstr + '\n');
jb302@34 221
jb302@18 222 pc = pc + width
jb302@18 223
jb302@1 224 except:
jb302@18 225 print '** second pass error **\nline:\n', line
jb302@18 226 raise
jb302@18 227
jb302@34 228 return f, d
jb302@1 229
jb302@1 230 if __name__ == '__main__':
jb302@1 231 f = open(sys.argv[1], 'r')
jb302@25 232 try:
jb302@25 233 b = open(sys.argv[2], 'wb')
jb302@34 234 d = open(sys.argv[2] + '.dsm', 'w')
jb302@25 235 except IndexError:
jb302@25 236 b = open('a.out', 'wb')
jb302@34 237 d = open('a.dsm', 'wb')
jb302@25 238 asm, labels = first_pass(f)
jb302@34 239 b, d= second_pass(b, asm, labels, d)
jb302@1 240 f.close()
jb302@1 241 b.close()
jb302@34 242 d.close()
jb302@1 243