changeset 25:45340c2a38c5

tidier and less buggier
author james <jb302@eecs.qmul.ac.uk>
date Fri, 28 Feb 2014 17:21:11 +0000
parents 19cbcbde19af
children 84716cd835dd
files assembler/assembler.py assembler/language.py assembler/language.pyc emulator/a.out emulator/emu.c tests/.full.asm.swp tests/a.out tests/full.asm tests/full.bin tests/ljmp.asm tests/test.asm tests/test.bin
diffstat 12 files changed, 223 insertions(+), 220 deletions(-) [+]
line wrap: on
line diff
--- a/assembler/assembler.py	Tue Feb 25 02:05:01 2014 +0000
+++ b/assembler/assembler.py	Fri Feb 28 17:21:11 2014 +0000
@@ -7,21 +7,22 @@
 # take source file and return preprocessed assembly code
 # for each non-empty line in the file:
 #   remove comments from source
-#   store label definitions
-#   remove label definitions from source
-#   determine format of arguments:
-#       store constant data
-#       store data type symbols
-#   return all this in a list
+#   replace equated strings
+#   store label definitions and remove label from source
+#   store new equates
+#   make hashable format symbol from arguments
+#   identify and save constant data
+#   save instruction, arguments, symbol and data to list
+# also prepares org and db instructions for second_pass()
 def first_pass(f):
+    asm = []
+    labels = {}
+    equates = {}
+    pc = 0
+    
     # read file into list, remove blank line
     f.seek(0)
-    source_code = filter(lambda l: l != '\n', f.readlines())
-    
-    asm = []
-    label_index = {}
-    equ_index = {}
-    pc = 0
+    source_code = filter(lambda l: l != '\n', f.readlines())  
     
     # <line> ::= [<statement>] [";"<comment>] <EOL>
     for line in source_code:  
@@ -43,76 +44,98 @@
             # replace equated strings
             # because this happens on the first pass
             # equates must be assigned before they are used
-            n = 1
-            for i in statement[1:]:
+            i = 1
+            for s in statement[1:]:
                 # replace any equates already stored
                 # remove prefixes and suffixes before attempting to replace
-                t = e = ''
-                if i[0] in TYPES:
-                    t = i[0]
-                    i = i[1:]
-                if i and (i[-1] in [',', ')']):
-                    e = i[-1]
-                    i = i[:-1]
+                prefix = suffix = ''
+                # prefixes
+                if s[0] in ids:
+                    prefix = prefix + s[0]
+                    s = s[1:]
+                if s[0] == '(':
+                    prefix = prefix + s[0]
+                    s = s[1:]
+                # suffixes
+                if s and (s[-1] == ','):
+                    suffix = suffix + s[-1]
+                    s = s[:-1]
+                if s and (s[-1] == ')'):
+                    suffix = s[-1] + suffix
+                    s = s[:-1]
                 # replace and put removed characters back
-                if i in equ_index:
-                    statement[n] = t + equ_index[i] + e
+                if s in equates:
+                    statement[i] = prefix + equates[s] + suffix
                 # labels can be used in equates but they have
-                # to defined before they are used
-                elif i in label_index:
-                    statement[n] = t + num_string(label_index[i]) + e
-                n = n + 1                   
+                # to be assigned before they are used as well
+                elif s in labels:
+                    statement[i] = prefix + str(labels[s]) + suffix
+                i = i + 1                   
             
             # deal with org
             if statement[0].lower() == 'org':
                 asm.append(['org', statement[1:], ('',), ''])
-                pc = string_num(statement[1])
+                pc = stoi(statement[1])
                 continue
             # if needed update index and remove label 
             elif statement[0][-1] == ':':
-                label_index[statement[0][:-1]] = pc;
+                labels[statement[0][:-1]] = pc;
                 del statement[0]
             
             # store equates
             # these are case sensative
             if (len(statement) >= 3) and (statement[1].lower() == 'equ'):
-                equ_index[statement[0]] = ' '.join(statement[2:])
+                equates[statement[0]] = ' '.join(statement[2:])
                 continue
                 
             if not statement:
                 continue
             
             # <statement> ::= <mnemonic> [<arguments>]
-            mnemonic = statement[0].lower()
-            arguments = ''.join(statement[1:]).split(',')
-            
+            mne = statement[0].lower()
+            args = ''.join(statement[1:]).split(',')
+        
             # deal with db
-            if mnemonic == 'db':
-                constants = ''
-                for a in arguments:
-                    constants = constants +  tokenize(['#' + a])[1]
-                asm.append([mnemonic, arguments, ('',), constants])
-                pc = pc + len(constants)
+            if mne == 'db':
+                const = ''
+                for a in args:
+                    data = tokenize(mne, ['#' + a])[1]
+                    # deal with leading zeros
+                    # skip zeros unless zero is the 
+                    # only number
+                    if data == '\x00\x00':
+                        const = const + '\x00'
+                        continue
+                    i = 0
+                    for c in data:
+                        if c == '\x00':
+                            i = i + 1
+                        else:
+                            pass
+                    const = const + data[i:]
+                asm.append([mne, args, ('',), const])
+                pc = pc + len(const)
                 continue
             
-            symbols, constants = tokenize(arguments)
-            width = iset[mnemonic][symbols][1]
-
-            asm.append([mnemonic, arguments, symbols, constants])
+            # tokenize
+            sym, const = tokenize(mne, args)
+            asm.append([mne, args, sym, const])
+            # increase pc
+            width = iset[mne][sym][1]
             pc = pc + width
         
         except:
             print ' ** first pass error **\nline:\n', line
             raise
     
-    return asm, label_index
+    return asm, labels
 
-# take preprocessed asm and write machine code to binary file
-# for each line of asm
+# take a preprocessed object asm and write machine code to binary file
+# for each line of asm:
 #   check if it's an org or db command deal with it accordingly
 #   check if arguments are labels and replace with value
 #   write instruction to file
-def second_pass(f, asm, label_index):
+def second_pass(f, asm, labels):
     pc = 0
 
     for line in asm:
@@ -120,8 +143,9 @@
         mne, args, sym, const = line
     
         try:
+            # deal with org and db
             if mne == 'org':
-                pc = string_num(args[0])
+                pc = stoi(args[0])
                 continue
             elif mne == 'db':
                 f.write(const)
@@ -135,37 +159,39 @@
                     continue
                 elif (sym[i] == 'label') or (sym[i] == '@label'):
                     # labeled pointer uglyness
-                    if (a[0] == '@') and (a[1:] in label_index):
-                        args[i] = '@' + num_string(label_index[a[1:]])
-                        const = const + tokenize([args[i]])[1]
-                    elif a in label_index:
+                    if (a[0] == '@') and (a[1:] in labels):
+                        args[i] = '@' + str(labels[a[1:]])
+                        const = const + tokenize(mne, [args[i]])[1]
+                    else:
                         # check if constant needs to be a relative address
-                        r = list(sym)
-                        r.insert(i, 'rel8')
-                        r.pop(i + 1)
-                        if tuple(r) in iset[mne].keys():
-                            # relative addresses can be negative
-                            args[i] = num_string(label_index[a] - pc)
+                        if mne in rinst:
+                            args[i] = str(labels[a] - pc)
                         else:
-                            args[i] = num_string(label_index[a])
-                        const = const + tokenize([args[i]])[1]
-                    elif a not in label_index:
-                        print '** label error **', line
-                        raise KeyError       
+                            args[i] = str(labels[a])
+                        const = const + tokenize(mne, [args[i]])[1]  
                 i = i + 1
             
             # assemble to file
             op, width = iset[mne][sym]
             # theres gotta be a better way do deal with paged addresses
             if mne in ['pcall', 'pjmp']:
-                op = op | ((string_num(args[0]) &  0x7FF) >> 8)
+                op = op | ((stoi(args[0]) &  0x7FF) >> 8)
                 const = const[-1]
             f.write(struct.pack('>B', op))
+            
             # pad if needed
-            for i in range(width - len(const) - 1):
-                f.write(struct.pack('>B', 0))
+            # i don't think this ever happens
+            #for i in range(width - len(const) - 1):
+            #    f.write(struct.pack('>B', 0))
+            
+            # check length and write constant or throw error
+            of = len(const) - width + 1
+            if of > 0:
+                if const[0] == ('\x00'):
+                    const = const[of:]
+                else:
+                    raise ValueError
             f.write(const)
-            
             pc = pc + width
         
         except:
@@ -176,9 +202,12 @@
     
 if __name__ == '__main__':
     f = open(sys.argv[1], 'r')
-    b = open(sys.argv[2], 'wb')
-    asm, label_index = first_pass(f)
-    b = second_pass(b, asm, label_index)
+    try:
+        b = open(sys.argv[2], 'wb')
+    except IndexError:
+        b = open('a.out', 'wb')
+    asm, labels = first_pass(f)
+    b = second_pass(b, asm, labels)
     f.close()
     b.close()
 
--- a/assembler/language.py	Tue Feb 25 02:05:01 2014 +0000
+++ b/assembler/language.py	Fri Feb 28 17:21:11 2014 +0000
@@ -2,19 +2,8 @@
 # language.py
 import struct
 
-# these definitions are here to make changing the source code
-# representation of numbers easier
-BASE = 16
-# prefix must be only 2 characters otherwise tokenize() will break
-PREFIX = '0x'
-# data types: ints, pointers, ports, vectors
-TYPES = ['#', '@', '$', '*']
-
-def num_string(num):
-    return hex(num)
-
-def string_num(string):
-    return int(string, BASE)
+# identifiers: immediate ints, pointers
+ids = ['#', '@']
 
 # valid reserved arguments for this instruction set
 vargs = ('', '@a+dptr', 'dptr', 'bs', '@a+pc', 
@@ -22,11 +11,15 @@
          'sp', 'flags', 'dpl', 'dph', '@dptr',
          'sph', 'spl')
 
+# instructions that use relative addresses
+rinst = ('djnz', 'cjne', 'sjmp', 'jz', 'jnz', 
+         'jc', 'jnc', 'jpo', 'jpe', 'js', 'jns')
+
 # dictionary embedded dictionaries?
-# for every mnemonic in the instruction set index
-# there is an index of possible argument formats ('symbol')
-# for that mnemonic and a corresponding op code and it's length
-iset =  {    'add': {    ('a', '#data8'): [166, 2],
+#     for every mnemonic in the instruction set index
+#     there is an index of possible argument formats (symbols)
+#     with corresponding op codes and width
+iset =  {    'add': {    ('a', 'data'): [166, 2],
                          ('a', '@dptr'): [167, 1],
                          ('a', 'dph'): [164, 1],
                          ('a', 'dpl'): [165, 1],
@@ -34,7 +27,7 @@
                          ('a', 'r1'): [161, 1],
                          ('a', 'r2'): [162, 1],
                          ('a', 'r3'): [163, 1]},
-             'addc': {    ('a', '#data8'): [174, 2],
+             'addc': {    ('a', 'data'): [174, 2],
                           ('a', '@dptr'): [175, 1],
                           ('a', 'dph'): [172, 1],
                           ('a', 'dpl'): [173, 1],
@@ -42,7 +35,7 @@
                           ('a', 'r1'): [169, 1],
                           ('a', 'r2'): [170, 1],
                           ('a', 'r3'): [171, 1]},
-             'anl': {    ('a', '#data8'): [134, 2],
+             'anl': {    ('a', 'data'): [134, 2],
                          ('a', '@dptr'): [135, 1],
                          ('a', 'dph'): [132, 1],
                          ('a', 'dpl'): [133, 1],
@@ -50,16 +43,16 @@
                          ('a', 'r1'): [129, 1],
                          ('a', 'r2'): [130, 1],
                          ('a', 'r3'): [131, 1]},
-             'cjne': {    ('a', '#data8', 'label'): [223, 3],
-                          ('a', '#data8', 'rel8'): [223, 3],
-                          ('r0', '#data8', 'label'): [212, 2],
-                          ('r0', '#data8', 'rel8'): [212, 2],
-                          ('r1', '#data8', 'label'): [213, 2],
-                          ('r1', '#data8', 'rel8'): [213, 2],
-                          ('r2', '#data8', 'label'): [214, 2],
-                          ('r2', '#data8', 'rel8'): [214, 2],
-                          ('r3', '#data8', 'label'): [215, 2],
-                          ('r3', '#data8', 'rel8'): [215, 2]},
+             'cjne': {    ('a', 'data', 'label'): [223, 3],
+                          ('a', 'data', 'rel8'): [223, 3],
+                          ('r0', 'data', 'label'): [212, 2],
+                          ('r0', 'data', 'rel8'): [212, 2],
+                          ('r1', 'data', 'label'): [213, 2],
+                          ('r1', 'data', 'rel8'): [213, 2],
+                          ('r2', 'data', 'label'): [214, 2],
+                          ('r2', 'data', 'rel8'): [214, 2],
+                          ('r3', 'data', 'label'): [215, 2],
+                          ('r3', 'data', 'rel8'): [215, 2]},
              'clr': {    ('bs',): [11, 1], ('c',): [9, 1], ('ie',): [13, 1]},
              'cpl': {    ('a',): [15, 1], ('c',): [14, 1]},
              'da': {    ('a',): [250, 1]},
@@ -74,9 +67,9 @@
                           ('r3', 'label'): [211, 2],
                           ('r3', 'rel8'): [211, 2]},
              'hlt': {    ('',): [255, 1]},
-             'in': {    ('a', 'port_addr'): [252, 2]},
+             'in': {    ('a', 'addr'): [252, 2]},
              'inc': {    ('a',): [158, 1], ('dptr',): [156, 1]},
-             'int': {    ('vect8',): [254, 2]},
+             'int': {    ('addr',): [254, 2]},
              'jc': {    ('label',): [226, 2], ('rel8',): [226, 2]},
              'jmp': {    ('@a+dptr',): [221, 1], ('@dptr',): [222, 1]},
              'jnc': {    ('label',): [227, 2], ('rel8',): [227, 2]},
@@ -87,9 +80,9 @@
              'js': {    ('label',): [230, 2], ('rel8',): [230, 2]},
              'jz': {    ('label',): [224, 2], ('rel8',): [224, 2]},
              'laf': {    ('',): [18, 1]},
-             'lcall': {    ('addr16',): [217, 3], ('label',): [217, 3]},
-             'ljmp': {    ('addr16',): [216, 3], ('label',): [216, 3]},
-             'mov': {    ('@addr16', 'a'): [29, 3],
+             'lcall': {    ('addr',): [217, 3], ('label',): [217, 3]},
+             'ljmp': {    ('addr',): [216, 3], ('label',): [216, 3]},
+             'mov': {    ('@addr', 'a'): [29, 3],
                          ('@dptr', 'a'): [31, 1],
                          ('@dptr', 'dph'): [36, 1],
                          ('@dptr', 'dpl'): [37, 1],
@@ -100,13 +93,13 @@
                          ('@dptr', 'sph'): [38, 1],
                          ('@dptr', 'spl'): [39, 1],
                          ('@label', 'a'): [29, 3],
-                         ('a', '#data8'): [21, 2],
+                         ('a', 'data'): [21, 2],
                          ('a', '@a+dptr'): [26, 1],
                          ('a', '@a+pc'): [27, 1],
-                         ('a', '@addr16'): [28, 3],
+                         ('a', '@addr'): [28, 3],
                          ('a', '@dptr'): [30, 1],
                          ('a', '@label'): [28, 3],
-                         ('a', 'addr16'): [24, 3],
+                         ('a', 'addr'): [24, 3],
                          ('a', 'dph'): [60, 1],
                          ('a', 'dpl'): [61, 1],
                          ('a', 'label'): [24, 3],
@@ -116,8 +109,8 @@
                          ('a', 'r3'): [59, 1],
                          ('a', 'sph'): [62, 1],
                          ('a', 'spl'): [63, 1],
-                         ('addr16', 'a'): [25, 3],
-                         ('dph', '#data8'): [44, 2],
+                         ('addr', 'a'): [25, 3],
+                         ('dph', 'data'): [44, 2],
                          ('dph', '@dptr'): [100, 1],
                          ('dph', 'a'): [52, 1],
                          ('dph', 'dpl'): [101, 1],
@@ -127,7 +120,7 @@
                          ('dph', 'r3'): [99, 1],
                          ('dph', 'sph'): [102, 1],
                          ('dph', 'spl'): [103, 1],
-                         ('dpl', '#data8'): [45, 2],
+                         ('dpl', 'data'): [45, 2],
                          ('dpl', '@dptr'): [109, 1],
                          ('dpl', 'a'): [53, 1],
                          ('dpl', 'dph'): [108, 1],
@@ -137,10 +130,10 @@
                          ('dpl', 'r3'): [107, 1],
                          ('dpl', 'sph'): [110, 1],
                          ('dpl', 'spl'): [111, 1],
-                         ('dptr', '#data16'): [23, 3],
+                         ('dptr', 'data'): [23, 3],
                          ('dptr', 'sp'): [19, 1],
                          ('label', 'a'): [25, 3],
-                         ('r0', '#data8'): [40, 2],
+                         ('r0', 'data'): [40, 2],
                          ('r0', '@dptr'): [64, 1],
                          ('r0', 'a'): [48, 1],
                          ('r0', 'dph'): [68, 1],
@@ -150,7 +143,7 @@
                          ('r0', 'r3'): [67, 1],
                          ('r0', 'sph'): [70, 1],
                          ('r0', 'spl'): [71, 1],
-                         ('r1', '#data8'): [41, 2],
+                         ('r1', 'data'): [41, 2],
                          ('r1', '@dptr'): [73, 1],
                          ('r1', 'a'): [49, 1],
                          ('r1', 'dph'): [76, 1],
@@ -160,7 +153,7 @@
                          ('r1', 'r3'): [75, 1],
                          ('r1', 'sph'): [78, 1],
                          ('r1', 'spl'): [79, 1],
-                         ('r2', '#data8'): [42, 2],
+                         ('r2', 'data'): [42, 2],
                          ('r2', '@dptr'): [82, 1],
                          ('r2', 'a'): [50, 1],
                          ('r2', 'dph'): [84, 1],
@@ -170,7 +163,7 @@
                          ('r2', 'r3'): [83, 1],
                          ('r2', 'sph'): [86, 1],
                          ('r2', 'spl'): [87, 1],
-                         ('r3', '#data8'): [43, 2],
+                         ('r3', 'data'): [43, 2],
                          ('r3', '@dptr'): [91, 1],
                          ('r3', 'a'): [51, 1],
                          ('r3', 'dph'): [92, 1],
@@ -180,9 +173,9 @@
                          ('r3', 'r2'): [90, 1],
                          ('r3', 'sph'): [94, 1],
                          ('r3', 'spl'): [95, 1],
-                         ('sp', '#data16'): [22, 3],
+                         ('sp', 'data'): [22, 3],
                          ('sp', 'dptr'): [20, 1],
-                         ('sph', '#data8'): [46, 2],
+                         ('sph', 'data'): [46, 2],
                          ('sph', '@dptr'): [118, 1],
                          ('sph', 'a'): [54, 1],
                          ('sph', 'dph'): [116, 1],
@@ -192,7 +185,7 @@
                          ('sph', 'r2'): [114, 1],
                          ('sph', 'r3'): [115, 1],
                          ('sph', 'spl'): [119, 1],
-                         ('spl', '#data8'): [47, 2],
+                         ('spl', 'data'): [47, 2],
                          ('spl', '@dptr'): [127, 1],
                          ('spl', 'a'): [55, 1],
                          ('spl', 'dph'): [124, 1],
@@ -204,7 +197,7 @@
                          ('spl', 'sph'): [126, 1]},
              'mul': {    ('r0', 'r1'): [248, 1]},
              'nop': {    ('',): [0, 1]},
-             'orl': {    ('a', '#data8'): [142, 2],
+             'orl': {    ('a', 'data'): [142, 2],
                          ('a', '@dptr'): [143, 1],
                          ('a', 'dph'): [140, 1],
                          ('a', 'dpl'): [141, 1],
@@ -212,9 +205,9 @@
                          ('a', 'r1'): [137, 1],
                          ('a', 'r2'): [138, 1],
                          ('a', 'r3'): [139, 1]},
-             'out': {    ('port_addr', 'a'): [253, 2]},
-             'pcall': {    ('addr16',): [207, 2], ('label',): [200, 2]},
-             'pjmp': {    ('addr16',): [199, 2], ('label',): [192, 2]},
+             'out': {    ('addr', 'a'): [253, 2]},
+             'pcall': {    ('addr',): [207, 2], ('label',): [200, 2]},
+             'pjmp': {    ('addr',): [199, 2], ('label',): [192, 2]},
              'pop': {    ('a',): [246, 1],
                          ('dph',): [244, 1],
                          ('dpl',): [245, 1],
@@ -231,7 +224,6 @@
                           ('r1',): [233, 1],
                           ('r2',): [234, 1],
                           ('r3',): [235, 1]},
-             'reserved': {    ('',): [251, 1]},
              'ret': {    ('',): [218, 1]},
              'reti': {    ('',): [219, 1]},
              'rl': {    ('a',): [152, 1]},
@@ -241,7 +233,7 @@
              'set': {    ('bs',): [10, 1], ('c',): [8, 1], ('ie',): [12, 1]},
              'sfa': {    ('',): [17, 1]},
              'sjmp': {    ('label',): [220, 2], ('rel8',): [220, 2]},
-             'sub': {    ('a', '#data8'): [182, 2],
+             'sub': {    ('a', 'data'): [182, 2],
                          ('a', '@dptr'): [183, 1],
                          ('a', 'dph'): [180, 1],
                          ('a', 'dpl'): [181, 1],
@@ -249,7 +241,7 @@
                          ('a', 'r1'): [177, 1],
                          ('a', 'r2'): [178, 1],
                          ('a', 'r3'): [179, 1]},
-             'subb': {    ('a', '#data8'): [190, 2],
+             'subb': {    ('a', 'data'): [190, 2],
                           ('a', '@dptr'): [191, 1],
                           ('a', 'dph'): [188, 1],
                           ('a', 'dpl'): [189, 1],
@@ -258,7 +250,7 @@
                           ('a', 'r2'): [186, 1],
                           ('a', 'r3'): [187, 1]},
              'xcsd': {    ('',): [16, 1]},
-             'xrl': {    ('a', '#data8'): [150, 2],
+             'xrl': {    ('a', 'data'): [150, 2],
                          ('a', '@dptr'): [151, 1],
                          ('a', 'dph'): [148, 1],
                          ('a', 'dpl'): [149, 1],
@@ -267,12 +259,37 @@
                          ('a', 'r2'): [146, 1],
                          ('a', 'r3'): [147, 1]}}
 
+# take interger representation as string and return int:
+# supports:
+#   decimal (no prefix)
+#   octal (0)
+#   hex (0x) 
+#   binary (0b)
+# return 'NaN' if it is none of the above
+def stoi(s):
+    try:
+        if s[0] == '0':
+            return int(s, 8)
+        else:
+            raise 
+    except:
+        try:
+           return int(s)
+        except ValueError:
+            try:
+                return int(s, 16)
+            except ValueError:
+                try:
+                    return int(s, 2)
+                except ValueError:
+                    return 'NaN'
 
-# take a list of arguments
+
+# take a mnemonic and it's arguments
 # identify constant data:
-#       pack that data into a bit string
-# return hashable data type symbols and data
-def tokenize(args):
+#   pack that data into a bit string
+# return hashable format symbol and data
+def tokenize(mne, args):
     sym = []
     data = ''
 
@@ -284,97 +301,69 @@
         if a.lower() in vargs:
             sym.append(a.lower())
             continue
-        elif a[0] in TYPES:
+        elif a[0] in ids:
             arg_type = a[0]
             a = a[1:]
         
         # evaluate inline calculations
         if (a[0] == '(') and (a[-1] == ')'):
-            a = num_string(eval(a[1:-1]))
+            a = str(eval(a[1:-1]))
         # evaluate strings
         elif (a[0] == '\'') and (a[-1] == '\''):
             if len(a) == 3:
-                a = num_string(struct.unpack('>B', a[1:-1])[0])
+                a = str(struct.unpack('>B', a[1:-1])[0])
             elif len(a) == 4:
-                a = num_string(struct.unpack('>H', a[1:-1])[0])
+                a = str(struct.unpack('>H', a[1:-1])[0])
             else:
                 data = a[1:-1]
                 continue
-                    
-        # check if negative and remove sign if needed
-        if a[:2] == '-' + PREFIX:
+
+        # non-numbers must be a label or a source code error
+        if stoi(a) == 'NaN':
+            if arg_type == '@':
+                sym.append('@label')
+                continue
+            else:
+                sym.append('label')
+                continue   
+        # check if numbers are negative and remove sign if needed
+        elif a[0] == '-':
             is_neg = 1
             a = a[1:]
-        elif a[:2] == PREFIX:
+        else:
             is_neg = 0
-        # anything else must be a label or a source code error
-        # labeled pointer uglyness
-        elif (arg_type == '@') & (a[:2] != PREFIX):
-            sym.append('@label')
-            continue
-        else:
-            sym.append('label')
-            continue
-
+        
+        # abolsute addresses and immediate ints are
+        # are always 16 bits. second_pass() checks
+        # if values are too long for instruction.
         # addresses
         if arg_type == 'addr':
-            # 8 bit relative addresses (always signed)
-            if len(a) <= 4:
+            if mne in rinst:
                 sym.append('rel8')
-                if is_neg:
-                    val = string_num('-' + a)
-                else:
-                    val = string_num(a)
-                data = data + struct.pack('>b', val)
-                continue
-            # 16 bit absolute addresses (never signed)
-            elif len(a) <= 6:
-                sym.append('addr16')
-                val = string_num(a)
-                data = data + struct.pack('>H', val)
-                continue
+                fmt = '>b'
+            else:
+                sym.append('addr')
+                fmt = '>H'
+            val = stoi(a)
+            data = data + struct.pack(fmt, val)
+            continue
         # immediate ints (signed when negative)
         elif arg_type == '#':
-            # 8 bit ints
-            if len(a) <= 4:
-                sym.append('#data8')
-                if is_neg:
-                    val = string_num('-' + a)
-                    fmt = '>b'
-                else:
-                    val = string_num(a)
-                    fmt = '>B'
-                data = data + struct.pack(fmt, val)
-                continue
-            # 16 bit ints
-            elif len(a) <= 6:
-                sym.append('#data16')
-                if is_neg:
-                    val = string_num('-' + a)
-                    fmt = '>h'
-                else:
-                    val = string_num(a)
-                    fmt = '>H'
-                data = data + struct.pack(fmt, val)
-                continue
+            sym.append('data')
+            if is_neg:
+                val = stoi('-' + a)
+                fmt = '>h'
+            else:
+                val = stoi(a)
+                fmt = '>H'
+            data = data + struct.pack(fmt, val)
+            continue
         # pointers
         elif arg_type == '@':
-            sym.append('@addr16')
-            val = string_num(a)
+            sym.append('@addr')
+            val = stoi(a)
             data = data + struct.pack('>H', val)
             continue
-        # ports
-        elif arg_type == '$':
-            sym.append('port_addr')
-            val = string_num(a)
-            data = data + struct.pack('>B', val)
-            continue
-        # vectors
-        elif arg_type == '*':
-            sym.append('vect8')
-            val = string_num(a)
-            data = data + struct.pack('>B', val)
-            continue
 
     return tuple(sym), data
 
Binary file assembler/language.pyc has changed
Binary file emulator/a.out has changed
--- a/emulator/emu.c	Tue Feb 25 02:05:01 2014 +0000
+++ b/emulator/emu.c	Fri Feb 28 17:21:11 2014 +0000
@@ -7,7 +7,7 @@
 
 /* instruction table */
 void (*iset[256])(void) = {
-    NOP, RESERVED, RESERVED, RESERVED, RESERVED, RESERVED, RESERVED, RESERVED,
+    NOP, NOP, NOP, NOP, NOP, NOP, NOP, NOP,
     SET, CLR, SET, CLR, SET, CLR, CPL, CPL,
     XCSD, SFA, LAF, MOV, MOV, MOV, MOV, MOV,
     MOV, MOV, MOV, MOV, MOV, MOV, MOV, MOV,
@@ -38,10 +38,9 @@
     JZ, JNZ, JC, JNC, JPO, JPE, JS, JNS,
     PUSH, PUSH, PUSH, PUSH, PUSH, PUSH, PUSH, PUSH,
     POP, POP, POP, POP, POP, POP, POP, POP,
-    MUL, DIV, DA, RESERVED, IN, OUT, INT, HLT
+    MUL, DIV, DA, NOP, IN, OUT, INT, HLT
 };
 
-
 void
 boot(void) {
     /* set everything to zero */
@@ -70,9 +69,6 @@
     boot();
     for (;;) { 
         step();
-        //debug
-        //putchar(registers.flags);
-        putchar((BYTE)(registers.PC & 0x0F)); 
     }
 }
 
Binary file tests/.full.asm.swp has changed
Binary file tests/a.out has changed
--- a/tests/full.asm	Tue Feb 25 02:05:01 2014 +0000
+++ b/tests/full.asm	Fri Feb 28 17:21:11 2014 +0000
@@ -1,9 +1,9 @@
 zero:
-db 'ABCDEF', 0x0, (0x1 * 2 + zero)
-data8 EQU (0x42 + zero)
+db 'DB', 0104, 66, 0x4442, (0 + zero), zero
+data8 EQU 0x42
 data16 EQU ((0x2BAB * 2) - data8 + 0x01 + data8)
-port_addr EQU $0x50
-vect8 EQU *0x56
+port_addr EQU 'P'
+vect8 EQU 'V'
 
 NOP
 SET C
@@ -21,7 +21,7 @@
 MOV SP, DPTR
 MOV A, #'B'
 MOV SP, #'WW'
-MOV DPTR, #((0x2BAB * 2) - data8 + 0x01 + data8)
+MOV DPTR, #data16
 MOV A, addr16
 MOV addr16, A
 MOV A, @A+DPTR
@@ -249,13 +249,11 @@
 MUL R0, R1
 DIV R0, R1
 DA A
-reserved
 IN A, port_addr
 OUT port_addr, A
 INT vect8
 HLT
 
-ORG 0x0152
+ORG 0x0161
 rel8:
 addr16:
-NOP
Binary file tests/full.bin has changed
--- a/tests/ljmp.asm	Tue Feb 25 02:05:01 2014 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,7 +0,0 @@
-LJMP label
-CLR C
-label:
-SET C
-label2:
-SJMP label2
-
--- a/tests/test.asm	Tue Feb 25 02:05:01 2014 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,2 +0,0 @@
-org 0x0010
-db 0x42 0x57 0x42 0x57 0x00 0x00 0x00
Binary file tests/test.bin has changed