diff assembler/language.py @ 1:82e82dda442b

alpha version of assembler 'finished' some more documentation and test files added
author james <jb302@eecs.qmul.ac.uk>
date Fri, 06 Dec 2013 23:39:54 +0000
parents
children 81dd03d17c22
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/assembler/language.py	Fri Dec 06 23:39:54 2013 +0000
@@ -0,0 +1,304 @@
+#!/usr/bin/env python
+# language.py
+import struct
+
+# these definitions are here to make changing the source code
+# representation of numbers easier
+BASE = 16
+# prefix must be only 2 characters otherwise tokenize() will break
+PREFIX = '0x'
+def num_string(num):
+    return hex(num)
+
+# dictionary embedded dictionary?
+# for every mnemonic in the instruction set index
+# there is an index of possible argument types ('symbols')
+# and a corresponding op code
+iset =  {'add': {('a', '#data8'): 166,
+                 ('a', '@dptr'): 167,
+                 ('a', 'dph'): 164,
+                 ('a', 'dpl'): 165,
+                 ('a', 'r0'): 160,
+                 ('a', 'r1'): 161,
+                 ('a', 'r2'): 162,
+                 ('a', 'r3'): 163},
+         'addc': {('a', '#data8'): 174,
+                  ('a', '@dptr'): 175,
+                  ('a', 'dph'): 172,
+                  ('a', 'dpl'): 173,
+                  ('a', 'r0'): 168,
+                  ('a', 'r1'): 169,
+                  ('a', 'r2'): 170,
+                  ('a', 'r3'): 171},
+         'anl': {('a', '#data8'): 134,
+                 ('a', '@dptr'): 135,
+                 ('a', 'dph'): 132,
+                 ('a', 'dpl'): 133,
+                 ('a', 'r0'): 128,
+                 ('a', 'r1'): 129,
+                 ('a', 'r2'): 130,
+                 ('a', 'r3'): 131},
+         'cjne': {('a', '#data8', 'rel8'): 223,
+                  ('r0', '#data8', 'rel8'): 212,
+                  ('r1', '#data8', 'rel8'): 213,
+                  ('r2', '#data8', 'rel8'): 214,
+                  ('r3', '#data8', 'rel8'): 215},
+         'clr': {('bs',): 11, ('c',): 9, ('ie',): 13},
+         'cpl': {('a',): 15, ('c',): 14},
+         'da': {('a',): 250},
+         'dec': {('a',): 159, ('dptr',): 157},
+         'div': {('r0', 'r1'): 249},
+         'djnz': {('r0', 'rel8'): 208,
+                  ('r1', 'rel8'): 209,
+                  ('r2', 'rel8'): 210,
+                  ('r3', 'rel8'): 211},
+         'hlt': {('',): 255},
+         'in': {('a', 'port_addr'): 252},
+         'inc': {('a',): 158, ('dptr',): 156},
+         'int': {('vect8',): 254},
+         'jc': {('rel8',): 226},
+         'jmp': {('@a+dptr',): 221, ('@dptr',): 222},
+         'jnc': {('rel8',): 227},
+         'jns': {('rel8',): 231},
+         'jnz': {('rel8',): 225},
+         'jpe': {('rel8',): 229},
+         'jpo': {('rel8',): 228},
+         'js': {('rel8',): 230},
+         'jz': {('rel8',): 224},
+         'laf': {('',): 18},
+         'lcall': {('addr16',): 217},
+         'ljmp': {('addr16',): 216},
+         'mov': {('@addr16', 'a'): 29,
+                 ('@dptr', 'a'): 31,
+                 ('@dptr', 'dph'): 36,
+                 ('@dptr', 'dpl'): 37,
+                 ('@dptr', 'r0'): 32,
+                 ('@dptr', 'r1'): 33,
+                 ('@dptr', 'r2'): 34,
+                 ('@dptr', 'r3'): 35,
+                 ('@dptr', 'sph'): 38,
+                 ('@dptr', 'spl'): 39,
+                 ('a', '#data8'): 21,
+                 ('a', '@a+dptr'): 26,
+                 ('a', '@a+pc'): 27,
+                 ('a', '@addr16'): 28,
+                 ('a', '@dptr'): 30,
+                 ('a', 'addr16'): 24,
+                 ('a', 'dph'): 60,
+                 ('a', 'dpl'): 61,
+                 ('a', 'r0'): 56,
+                 ('a', 'r1'): 57,
+                 ('a', 'r2'): 58,
+                 ('a', 'r3'): 59,
+                 ('a', 'sph'): 62,
+                 ('a', 'spl'): 63,
+                 ('addr16', 'a'): 25,
+                 ('dph', '#data8'): 44,
+                 ('dph', '@dptr'): 100,
+                 ('dph', 'a'): 52,
+                 ('dph', 'dpl'): 101,
+                 ('dph', 'r0'): 96,
+                 ('dph', 'r1'): 97,
+                 ('dph', 'r2'): 98,
+                 ('dph', 'r3'): 99,
+                 ('dph', 'sph'): 102,
+                 ('dph', 'spl'): 103,
+                 ('dpl', '#data8'): 45,
+                 ('dpl', '@dptr'): 109,
+                 ('dpl', 'a'): 53,
+                 ('dpl', 'dph'): 108,
+                 ('dpl', 'r0'): 104,
+                 ('dpl', 'r1'): 105,
+                 ('dpl', 'r2'): 106,
+                 ('dpl', 'r3'): 107,
+                 ('dpl', 'sph'): 110,
+                 ('dpl', 'spl'): 111,
+                 ('dptr', '#data16'): 23,
+                 ('dptr', 'sp'): 19,
+                 ('r0', '#data8'): 40,
+                 ('r0', '@dptr'): 64,
+                 ('r0', 'a'): 48,
+                 ('r0', 'dph'): 68,
+                 ('r0', 'dpl'): 69,
+                 ('r0', 'r1'): 65,
+                 ('r0', 'r2'): 66,
+                 ('r0', 'r3'): 67,
+                 ('r0', 'sph'): 70,
+                 ('r0', 'spl'): 71,
+                 ('r1', '#data8'): 41,
+                 ('r1', '@dptr'): 73,
+                 ('r1', 'a'): 49,
+                 ('r1', 'dph'): 76,
+                 ('r1', 'dpl'): 77,
+                 ('r1', 'r0'): 72,
+                 ('r1', 'r2'): 74,
+                 ('r1', 'r3'): 75,
+                 ('r1', 'sph'): 78,
+                 ('r1', 'spl'): 79,
+                 ('r2', '#data8'): 42,
+                 ('r2', '@dptr'): 82,
+                 ('r2', 'a'): 50,
+                 ('r2', 'dph'): 84,
+                 ('r2', 'dpl'): 85,
+                 ('r2', 'r0'): 80,
+                 ('r2', 'r1'): 81,
+                 ('r2', 'r3'): 83,
+                 ('r2', 'sph'): 86,
+                 ('r2', 'spl'): 87,
+                 ('r3', '#data8'): 43,
+                 ('r3', '@dptr'): 91,
+                 ('r3', 'a'): 51,
+                 ('r3', 'dph'): 92,
+                 ('r3', 'dpl'): 93,
+                 ('r3', 'r0'): 88,
+                 ('r3', 'r1'): 89,
+                 ('r3', 'r2'): 90,
+                 ('r3', 'sph'): 94,
+                 ('r3', 'spl'): 95,
+                 ('sp', '#data16'): 22,
+                 ('sp', 'dptr'): 20,
+                 ('sph', '#data8'): 46,
+                 ('sph', '@dptr'): 118,
+                 ('sph', 'a'): 54,
+                 ('sph', 'dph'): 116,
+                 ('sph', 'dpl'): 117,
+                 ('sph', 'r0'): 112,
+                 ('sph', 'r1'): 113,
+                 ('sph', 'r2'): 114,
+                 ('sph', 'r3'): 115,
+                 ('sph', 'spl'): 119,
+                 ('spl', '#data8'): 47,
+                 ('spl', '@dptr'): 127,
+                 ('spl', 'a'): 55,
+                 ('spl', 'dph'): 124,
+                 ('spl', 'dpl'): 125,
+                 ('spl', 'r0'): 120,
+                 ('spl', 'r1'): 121,
+                 ('spl', 'r2'): 122,
+                 ('spl', 'r3'): 123,
+                 ('spl', 'sph'): 126},
+         'mul': {('r0', 'r1'): 248},
+         'nop': {('',): 0},
+         'orl': {('a', '#data8'): 142,
+                 ('a', '@dptr'): 143,
+                 ('a', 'dph'): 140,
+                 ('a', 'dpl'): 141,
+                 ('a', 'r0'): 136,
+                 ('a', 'r1'): 137,
+                 ('a', 'r2'): 138,
+                 ('a', 'r3'): 139},
+         'out': {('port_addr', 'a'): 253},
+         'pcall': {('addr11',): 207},
+         'pjmp': {('addr11',): 199},
+         'pop': {('a',): 246,
+                 ('dph',): 244,
+                 ('dpl',): 245,
+                 ('flags',): 247,
+                 ('r0',): 240,
+                 ('r1',): 241,
+                 ('r2',): 242,
+                 ('r3',): 243},
+         'push': {('a',): 238,
+                  ('dph',): 236,
+                  ('dpl',): 237,
+                  ('flags',): 239,
+                  ('r0',): 232,
+                  ('r1',): 233,
+                  ('r2',): 234,
+                  ('r3',): 235},
+         'reserved': {('',): 251},
+         'ret': {('',): 218},
+         'reti': {('',): 219},
+         'rl': {('a',): 152},
+         'rlc': {('a',): 153},
+         'rr': {('a',): 154},
+         'rrc': {('a',): 155},
+         'set': {('bs',): 10, ('c',): 8, ('ie',): 12},
+         'sfa': {('',): 17},
+         'sjmp': {('',): 220},
+         'sub': {('a', '#data8'): 182,
+                 ('a', '@dptr'): 183,
+                 ('a', 'dph'): 180,
+                 ('a', 'dpl'): 181,
+                 ('a', 'r0'): 176,
+                 ('a', 'r1'): 177,
+                 ('a', 'r2'): 178,
+                 ('a', 'r3'): 179},
+         'subb': {('a', '#data8'): 190,
+                  ('a', '@dptr'): 191,
+                  ('a', 'dph'): 188,
+                  ('a', 'dpl'): 189,
+                  ('a', 'r0'): 184,
+                  ('a', 'r1'): 185,
+                  ('a', 'r2'): 186,
+                  ('a', 'r3'): 187},
+         'xcsd': {('',): 16},
+         'xrl': {('a', '#data8'): 150,
+                 ('a', '@dptr'): 151,
+                 ('a', 'dph'): 148,
+                 ('a', 'dpl'): 149,
+                 ('a', 'r0'): 144,
+                 ('a', 'r1'): 145,
+                 ('a', 'r2'): 146,
+                 ('a', 'r3'): 147}}
+
+# take a list of arguments
+# identify dataant data:
+#       pack that data into a bit string
+# return data type symbols and data
+def tokenize(args):
+    sym = []
+    data = ''
+    
+    for a in args:   
+        
+        # immediate ints
+        if a[:3] == '#' + PREFIX:
+            # 8 bit ints
+            if len(a[3:]) <= 2:
+                sym.append('#data8')
+                val = int(a[1:], BASE)
+                # big-endian byte
+                data = data + struct.pack('>B', val)
+            
+            # 16 bit ints
+            elif len(a[3:]) <= 4:
+                sym.append('#data16')
+                val = int(a[1:], BASE)
+                # big-endian short
+                data = data + struct.pack('>H', val)
+
+            else:
+                # bad idea to return junk to throw errors later?
+                sysm.append(a)
+                     
+        # addresses
+        elif a[:2] == PREFIX:
+            # 8 bit addresses
+            if len(a[2:]) <= 2:
+                sym.append('rel8')
+                val = int(a, BASE)
+                data = data + struct.pack('>B', val)
+    
+            # 16 bit addresses
+            elif len(a[2:]) <= 4:
+                sym.append('addr16')
+                val = int(a, BASE)
+                data = data + struct.pack('>H', val)
+                
+            else:
+                # junk junk junk
+                sym.append(a)
+        
+        # pointers
+        elif a[:3] == '@' + PREFIX:
+            sym.append('@addr16')
+            val = int(a[1:], BASE)
+            data = data + struct.pack('>H', val)
+                
+        # return unknown symbols so language can be extended more easily
+        else:
+            sym.append(a)
+        
+    return sym, data
+