changeset 22:c988e43bf2b5

commiting to backup
author james <jb302@eecs.qmul.ac.uk>
date Mon, 24 Feb 2014 18:59:47 +0000
parents 5953026ed47e
children 2efb577ac2d7
files assembler/assembler.py assembler/language.py assembler/language.pyc tests/.full.asm.swp tests/full.asm tests/full.bin
diffstat 6 files changed, 169 insertions(+), 134 deletions(-) [+]
line wrap: on
line diff
--- a/assembler/assembler.py	Sun Feb 23 19:17:25 2014 +0000
+++ b/assembler/assembler.py	Mon Feb 24 18:59:47 2014 +0000
@@ -24,60 +24,72 @@
     pc = 0
     
     # <line> ::= [<statement>] [";"<comment>] <EOL>
-    for line in source_code:
-        
+    for line in source_code:  
         try:
-            # remove EOL
+            # remove trailing whitespace and comments
             line = line.strip()
-            
-            # remove comments
             for i in range(len(line)):
                 if line[i] == ';':
                     line = line[:i]
                     break
             
-            line = line.lower()
-            
             # <statement> ::= [ <label> ":"] <mnemonic> [<arguments>]
             #                 | <label> ":"
             #                 | "EOF"
             statement = line.split()
+            if not statement:
+                continue
+
+            # replace equated strings
+            # because this happens on the first pass
+            # equates must be assigned before they are used
+            n = 1
+            for i in statement[1:]:
+                # replace any equates already stored
+                # remove prefixes and suffixes before attempting to replace
+                t = e = ''
+                if i[0] in TYPES:
+                    t = i[0]
+                    i = i[1:]
+                if i and (i[-1] in [',', ')']):
+                    e = i[-1]
+                    i = i[:-1]
+                # replace and put removed characters back
+                for j in equ_index.keys():
+                    if i == j:
+                        statement[n] = t + equ_index[j] + e
+                n = n + 1                   
             
-            if not statement: continue
-            # deal with org and db
-            elif statement[0] == 'org': 
+            # deal with org on second pass
+            if statement[0].lower() == 'org':
                 asm.append([statement[0], statement[1:], ('',), ''])
                 pc = string_num(statement[1])
                 continue
-            elif statement[0] == 'db':
-                asm.append([statement[0], statement[1:], ('',), ''])
-                pc = pc + len(statement[1:])
-                continue
 
             # store equates
-            if (len(statement) >= 3) and (statement[1] == 'equ'):
-                equ_index[statement[0]] = statement[2]
+            # these are case sensative
+            if (len(statement) >= 3) and (statement[1].lower() == 'equ'):
+                equ_index[statement[0]] = ' '.join(statement[2:])
                 continue
             # if needed update index and remove label 
-            elif statement[0][-1:] == ':':
+            elif statement[0][-1] == ':':
                 label_index[statement[0][:-1]] = pc;
                 del statement[0]
                 
-            if not statement: continue
+            if not statement:
+                continue
             
             # <statement> ::= <mnemonic> [<arguments>]
-            mnemonic = statement[0]
+            mnemonic = statement[0].lower()
             arguments = ''.join(statement[1:]).split(',')
             
-            # replace equated arguments
-            # because this happens on the first pass
-            # equates must be assigned before they are used
-            i = 0
-            for a in arguments:
-                if a in equ_index:
-                    arguments[i] = equ_index[a]
-                i = i + 1
-
+            # deal with db
+            if statement[0].lower() == 'db':
+                symbols, constants = tokenize(arguments)
+                asm.append([mnemonic, arguments, symbols, constants])
+                pc = pc + len(constants)
+                continue
+            
             symbols, constants = tokenize(arguments)
             width = iset[mnemonic][symbols][1]
 
@@ -109,7 +121,7 @@
             elif mne == 'db':
                 data = ''
                 for a in args:
-                    data = data + struct.pack('>B', string_num(a))
+                    data = data + tokenize(a)[1]
                 f.write(data)
                 pc = pc + len(data)
                 continue
@@ -117,8 +129,8 @@
             # replace labels with addresses
             i = 0
             for a in args:
-                
-                if not a: continue
+                if not a:
+                    continue
                 elif (sym[i] == 'label') or (sym[i] == '@label'):
                     # labeled pointer uglyness
                     if (a[0] == '@') and (a[1:] in label_index):
@@ -135,13 +147,18 @@
                         else:
                             args[i] = num_string(label_index[a])
                         const = const + tokenize([args[i]])[1]
-                    else:
-                        print '** label error **\nline:\n', line
-                        raise
+                    elif a not in label_index:
+                        print '** label error **', line
+                        raise KeyError
+                        
                 i = i + 1
             
             # assemble to file
             op, width = iset[mne][sym]
+            # theres gotta be a better way do deal with paged addresses
+            if mne in ['pcall', 'pjmp']:
+                op = op | ((string_num(args[0]) &  0x7FF) >> 8)
+                const = const[-1]
             f.write(struct.pack('>B', op))
             # pad if needed
             for i in range(width - len(const) - 1):
--- a/assembler/language.py	Sun Feb 23 19:17:25 2014 +0000
+++ b/assembler/language.py	Mon Feb 24 18:59:47 2014 +0000
@@ -7,13 +7,17 @@
 BASE = 16
 # prefix must be only 2 characters otherwise tokenize() will break
 PREFIX = '0x'
+# data types: ints, pointers, ports, vectors
+TYPES = ['#', '@', '$', '*']
+
 def num_string(num):
     return hex(num)
 
 def string_num(string):
     return int(string, BASE)
 
-# valid arguments for this instruction set
+# valid reserved arguments for this instruction set
+# lower and uppercase versions
 vargs = ('', '@a+dptr', 'dptr', 'bs', '@a+pc', 
          'a', 'c','r0', 'r1', 'r2', 'r3', 'ie',
          'sp', 'flags', 'dpl', 'dph', '@dptr',
@@ -210,8 +214,8 @@
                          ('a', 'r2'): [138, 1],
                          ('a', 'r3'): [139, 1]},
              'out': {    ('port_addr', 'a'): [253, 2]},
-             'pcall': {    ('addr11',): [207, 2], ('label',): [207, 2]},
-             'pjmp': {    ('addr11',): [199, 2], ('label',): [199, 2]},
+             'pcall': {    ('addr16',): [207, 2], ('label',): [200, 2]},
+             'pjmp': {    ('addr16',): [199, 2], ('label',): [192, 2]},
              'pop': {    ('a',): [246, 1],
                          ('dph',): [244, 1],
                          ('dpl',): [245, 1],
@@ -272,92 +276,106 @@
 def tokenize(args):
     sym = []
     data = ''
-    
-    i = 0
+
     for a in args:   
+        # tokenize reserved arguments immediatly (not case sensitive)
+        # determine arg type and remove identifier if needed
+        # unprefixed arguments are addresses so this is the default
+        arg_type = 'addr'
+        if a.lower() in vargs:
+            sym.append(a.lower())
+            continue
+        elif a[0] in TYPES:
+            arg_type = a[0]
+            a = a[1:]
         
-        # things that can't be negative
-        is_neg = 0
-        # reserved words
-        if a in vargs:
-            sym.append(a)
+        # evaluate inline calculations
+        if (a[0] == '(') and (a[-1] == ')'):
+            a = num_string(eval(a[1:-1]))
+        # evaluate strings
+        elif (a[0] == '\'') and (a[-1] == '\''):
+            if len(a) == 3:
+                a = num_string(struct.unpack('>B', a[1:-1])[0])
+            elif len(a) == 4:
+                a = num_string(struct.unpack('>H', a[1:-1])[0])
+            else:
+                data = a[1:-1]
+                continue
+                    
+        # check if negative and remove sign if needed
+        if a[:2] == '-' + PREFIX:
+            is_neg = 1
+            a = a[1:]
+        elif a[:2] == PREFIX:
+            is_neg = 0
+        # anything else must be a label or a source code error
+        # labeled pointer uglyness
+        elif (arg_type == '@') & (a[:2] != PREFIX):
+            sym.append('@label')
             continue
-        # 16 bit addresses
-        elif (a[:2] == PREFIX) and (2 < len(a[2:]) <= 4):
-            sym.append('addr16')
+        else:
+            sym.append('label')
+            continue
+
+        # addresses
+        if arg_type == 'addr':
+            # 8 bit relative addresses (always signed)
+            if len(a) <= 4:
+                sym.append('rel8')
+                if is_neg:
+                    val = string_num('-' + a)
+                else:
+                    val = string_num(a)
+                data = data + struct.pack('>b', val)
+                continue
+            # 16 bit absolute addresses (never signed)
+            elif len(a) <= 6:
+                sym.append('addr16')
+                val = string_num(a)
+                data = data + struct.pack('>H', val)
+                continue
+        # immediate ints (signed when negative)
+        elif arg_type == '#':
+            # 8 bit ints
+            if len(a) <= 4:
+                sym.append('#data8')
+                if is_neg:
+                    val = string_num('-' + a)
+                    fmt = '>b'
+                else:
+                    val = string_num(a)
+                    fmt = '>B'
+                data = data + struct.pack(fmt, val)
+                continue
+            # 16 bit ints
+            elif len(a) <= 6:
+                sym.append('#data16')
+                if is_neg:
+                    val = string_num('-' + a)
+                    fmt = '>h'
+                else:
+                    val = string_num(a)
+                    fmt = '>H'
+                data = data + struct.pack(fmt, val)
+                continue
+        # pointers
+        elif arg_type == '@':
+            sym.append('@addr16')
             val = string_num(a)
             data = data + struct.pack('>H', val)
             continue
-        # pointers
-        elif a[0] == '@':
-            if a[:3] == '@' + PREFIX:
-                sym.append('@addr16')
-                val = string_num(a[1:])
-                data = data + struct.pack('>H', val)
-                continue
-            # labeled pointers make for ugly code considering
-            # only two instructions can use them
-            else:
-                sym.append('@label')
-                continue
         # ports
-        elif a[:3] == 'p' + PREFIX:
+        elif arg_type == '$':
             sym.append('port_addr')
-            val = string_num(a[1:])
+            val = string_num(a)
             data = data + struct.pack('>B', val)
             continue
         # vectors
-        elif a[:3] == 'v' + PREFIX:
+        elif arg_type == '*':
             sym.append('vect8')
-            val = string_num(a[1:])
+            val = string_num(a)
             data = data + struct.pack('>B', val)
             continue
-        # check if other values are negative and remove sign
-        elif a[0] == '-':
-            a = a[1:]
-            is_neg = 1
-        
-        # things that can be negative
-        # immediate ints (signed when negative)
-        if (a[:3] == '#' + PREFIX):
-            # 8 bit ints
-            if len(a[3:]) <= 2:
-                sym.append('#data8')
-                if is_neg:
-                    val = string_num('-' + a[1:])
-                    fmt = '>b'
-                else:
-                    val = string_num(a[1:])
-                    fmt = '>B'
-                # signed big-endian byte
-                data = data + struct.pack(fmt, val)
-                continue
-            # 16 bit ints
-            elif len(a[3:]) <= 4:
-                sym.append('#data16')
-                if is_neg:
-                    val = string_num('-' + a[1:])
-                    fmt = '>h'
-                else:
-                    val = string_num(a[1:])
-                    fmt = '>H'
-                # signed big-endian short
-                data = data + struct.pack(fmt, val)
-                continue
-        # 8 bit relative addresses (always signed)
-        elif (a[:2] == PREFIX) and (len(a[2:]) <= 2):
-            sym.append('rel8')
-            if is_neg:
-                val = string_num('-' + a)
-            else:
-                val = string_num(a)
-            # signed
-            data = data + struct.pack('>b', val)
-            continue
-        # unknown strings are either labels or source code errors
-        else:
-            sym.append('label')
-            continue
-    
+
     return tuple(sym), data
 
Binary file assembler/language.pyc has changed
Binary file tests/.full.asm.swp has changed
--- a/tests/full.asm	Sun Feb 23 19:17:25 2014 +0000
+++ b/tests/full.asm	Mon Feb 24 18:59:47 2014 +0000
@@ -1,14 +1,15 @@
-#data8 EQU #0x42
-#data16 EQU #0x5757
-port_addr EQU P0x50
-vect8 EQU V0x56
+db 'ABCDEF', 0x0, (0x1 * 2)
+data8 EQU 0x42
+data16 EQU ((0x2BAB * 2) - data8 + 0x01 + data8)
+port_addr EQU $0x50
+vect8 EQU *0x56
 
 NOP
 SET C
 CLR C
 SET BS
 CLR BS
-SET IE
+SeT iE
 CLR IE
 CPL C
 CPL A
@@ -17,8 +18,8 @@
 LAF
 MOV DPTR, SP
 MOV SP, DPTR
-MOV A, #data8
-MOV SP, #data16
+MOV A, #'B'
+MOV SP, #'WW'
 MOV DPTR, #data16
 MOV A, addr16
 MOV addr16, A
@@ -188,22 +189,22 @@
 SUBB A, DPL
 SUBB A, #data8
 SUBB A, @DPTR
-PJMP addr11
-PJMP addr11
-PJMP addr11
-PJMP addr11
-PJMP addr11
-PJMP addr11
-PJMP addr11
-PJMP addr11
-PCALL addr11
-PCALL addr11
-PCALL addr11
-PCALL addr11
-PCALL addr11
-PCALL addr11
-PCALL addr11
-PCALL addr11
+PJMP addr16
+PJMP addr16
+PJMP addr16
+PJMP addr16
+PJMP addr16
+PJMP addr16
+PJMP addr16
+PJMP addr16
+PCALL addr16
+PCALL addr16
+PCALL addr16
+PCALL addr16
+PCALL addr16
+PCALL addr16
+PCALL addr16
+PCALL addr16
 DJNZ R0, rel8
 DJNZ R1, rel8
 DJNZ R2, rel8
@@ -255,6 +256,5 @@
 
 org 0x0151
 rel8:
-addr11:
 addr16:
 NOP
Binary file tests/full.bin has changed