view docs/lyx/asm.lyx @ 42:792da050d8c4 tip

more dox
author james <jb302@eecs.qmul.ac.uk>
date Tue, 22 Apr 2014 14:25:14 +0100
parents a9bf262f557b
children
line wrap: on
line source
#LyX 2.0 created this file. For more info see http://www.lyx.org/
\lyxformat 413
\begin_document
\begin_header
\textclass article
\use_default_options true
\maintain_unincluded_children false
\language english
\language_package default
\inputencoding auto
\fontencoding global
\font_roman default
\font_sans default
\font_typewriter default
\font_default_family default
\use_non_tex_fonts false
\font_sc false
\font_osf false
\font_sf_scale 100
\font_tt_scale 100

\graphics default
\default_output_format default
\output_sync 0
\bibtex_command default
\index_command default
\paperfontsize default
\use_hyperref false
\papersize default
\use_geometry false
\use_amsmath 1
\use_esint 1
\use_mhchem 1
\use_mathdots 1
\cite_engine basic
\use_bibtopic false
\use_indices false
\paperorientation portrait
\suppress_date false
\use_refstyle 1
\index Index
\shortcut idx
\color #008000
\end_index
\secnumdepth 3
\tocdepth 3
\paragraph_separation indent
\paragraph_indentation default
\quotes_language english
\papercolumns 1
\papersides 1
\paperpagestyle default
\tracking_changes false
\output_changes false
\html_math_output 0
\html_css_as_file 0
\html_be_strict false
\end_header

\begin_body

\begin_layout Part
The Assembler
\end_layout

\begin_layout Section
Assembler Design
\end_layout

\begin_layout Standard
The assembler will take assembly source code as input and produce two output
 files: a binary executable and a debug file.
\end_layout

\begin_layout Standard
\begin_inset ERT
status open

\begin_layout Plain Layout


\backslash
bigskip
\end_layout

\end_inset


\end_layout

\begin_layout Standard
\begin_inset Graphics
	filename /home/jmz/qm/ede/docs/img/asm/assembler.svg
	display false

\end_inset


\end_layout

\begin_layout Standard
\begin_inset ERT
status open

\begin_layout Plain Layout


\backslash
bigskip
\end_layout

\end_inset


\end_layout

\begin_layout Standard
The binary executable will be a simple raw binary file in big-endian format.
 The debug file will be a tabulated text file with the following format:
\end_layout

\begin_layout Standard
\begin_inset ERT
status open

\begin_layout Plain Layout


\backslash
bigskip
\end_layout

\end_inset


\end_layout

\begin_layout Standard
\begin_inset Tabular
<lyxtabular version="3" rows="1" columns="5">
<features tabularvalignment="middle">
<column alignment="center" valignment="top" width="0">
<column alignment="center" valignment="top" width="0">
<column alignment="center" valignment="top" width="0">
<column alignment="center" valignment="top" width="0">
<column alignment="center" valignment="top" width="0">
<row>
<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
\begin_inset Text

\begin_layout Plain Layout
PC
\end_layout

\end_inset
</cell>
<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
\begin_inset Text

\begin_layout Plain Layout
OPCODE
\end_layout

\end_inset
</cell>
<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
\begin_inset Text

\begin_layout Plain Layout
DATA
\end_layout

\end_inset
</cell>
<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
\begin_inset Text

\begin_layout Plain Layout
MNEMONIC
\end_layout

\end_inset
</cell>
<cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
\begin_inset Text

\begin_layout Plain Layout
ARGUMENTS
\end_layout

\end_inset
</cell>
</row>
</lyxtabular>

\end_inset


\end_layout

\begin_layout Standard
\begin_inset ERT
status open

\begin_layout Plain Layout


\backslash
bigskip
\end_layout

\end_inset


\end_layout

\begin_layout Standard
This file will essentially be the plain text source side by side with the
 binary machine code and address for each instruction (represented as hexadecima
l for greater readability).
 This file will provide the information needed for setting break pointers
 with the debugger later, it will also be generally useful for debugging
 programs written in the assembler, and the assembler itself.
\end_layout

\begin_layout Standard
\begin_inset Newpage pagebreak
\end_inset


\end_layout

\begin_layout Subsection
Language design in BNF syntax
\end_layout

\begin_layout Standard
The ELB816 specification [1] defines an assembly language in Backus Naur
 form, however the assembler designed for this project differs slightly
 (more details in Assembler Implementation below).
 Here is the language definition for this assembler:
\end_layout

\begin_layout Standard
\begin_inset ERT
status open

\begin_layout Plain Layout


\backslash
bigskip
\end_layout

\end_inset


\end_layout

\begin_layout Standard
\begin_inset listings
lstparams "basicstyle={\ttfamily},captionpos=b,keywordstyle={\color{blue}},tabsize=4"
inline false
status open

\begin_layout Plain Layout

<assembly_code> ::= <line> <assembly_code> | <line> <EOF> 
\end_layout

\begin_layout Plain Layout

\end_layout

\begin_layout Plain Layout

<EOF> ::= <end of file>
\end_layout

\begin_layout Plain Layout

\end_layout

\begin_layout Plain Layout

<line> ::= [<statement> [";"<comment>]] <EOL>
\end_layout

\begin_layout Plain Layout

\end_layout

\begin_layout Plain Layout

<EOL> ::= <end of line character>
\end_layout

\begin_layout Plain Layout

  
\end_layout

\begin_layout Plain Layout

<statement> ::= [<label> ":"] <mnemonic> [<arguments>]
\end_layout

\begin_layout Plain Layout

              | [<label>] <directive> [<arguments>]
\end_layout

\begin_layout Plain Layout

              | <label> ":"
\end_layout

\begin_layout Plain Layout

\end_layout

\begin_layout Plain Layout

<character> ::= <any ASCII character>
\end_layout

\begin_layout Plain Layout

\end_layout

\begin_layout Plain Layout

<mnemonic> ::= "MOV" | "NOP" | "XCSD" | "SFA" | "LAF" | "ANL" 
\end_layout

\begin_layout Plain Layout

             | "ORL" | "XRL" | "RL" | "RLC" | "RR" | "RRC"             
                                                                       
\end_layout

\begin_layout Plain Layout

             | "INC" | "DEC" | "SET" | "CLR" | "CPL" | "ADD" 
\end_layout

\begin_layout Plain Layout

             | "ADD" | "ADDC" | "SUB" | "SUBB" | "PJUMP"
\end_layout

\begin_layout Plain Layout

             | "PCALL" | "LJMP" | "LCALL" | "DJNZ" | "CJNE"
\end_layout

\begin_layout Plain Layout

             | "RET" | "RETI" | "SJMP" | "JMP" | "JZ" | "JNZ"
\end_layout

\begin_layout Plain Layout

             | "JC" | "JNC" | "JPO" | "JPE" | "JS" | "JNS"
\end_layout

\begin_layout Plain Layout

             | "PUSH" | "POP" | "IN" | "OUT" | "HLT"
\end_layout

\begin_layout Plain Layout

\end_layout

\begin_layout Plain Layout

<directive> ::= "ORG" | "EQU" | "DB" | "DS" 
\end_layout

\begin_layout Plain Layout

\end_layout

\begin_layout Plain Layout

<arguments> ::= <argument> | <arguments> "," <argument>
\end_layout

\begin_layout Plain Layout

\end_layout

\begin_layout Plain Layout

<argument> ::= <register> | <reg_pair> | ["@"] <address>
\end_layout

\begin_layout Plain Layout

             | <flag> | ["#"] <aggregate>
\end_layout

\begin_layout Plain Layout

\end_layout

\begin_layout Plain Layout

<register> ::=  "A" | "FLAGS" | "R0" | "R1" | "R2" | "R3"
\end_layout

\begin_layout Plain Layout

              | "DPH" | "DPL" | "SPH" | "SPL"
\end_layout

\begin_layout Plain Layout

\end_layout

\begin_layout Plain Layout

<reg_pair> ::= "DPTR" | "SP" | "PC"
\end_layout

\begin_layout Plain Layout

\end_layout

\begin_layout Plain Layout

<flag> ::= "C" | "IE" | "BS" 
\end_layout

\begin_layout Plain Layout

\end_layout

\begin_layout Plain Layout

<aggregate> ::= ["("] <python arithmetic> [")"] | <python integer>
\end_layout

\end_inset


\end_layout

\begin_layout Section
Assembler Implementation
\end_layout

\begin_layout Standard
The assembler is written in pure Python 2 using only the standard library.
 It assembles the assembly the language described in the ELB816 specification
 [1] with a few minor differences.
 These differences are:
\end_layout

\begin_layout Itemize
In-line arithmetic must be wrapped in curved brackets eg.
 start with '(' and end with ')'.
 This is a limitation of the design of the program and to change it would
 require a large amount of code to be re-written.
\end_layout

\begin_layout Itemize
The only directives that have been implemented are 
\begin_inset listings
lstparams "basicstyle={\ttfamily}"
inline true
status open

\begin_layout Plain Layout

ORG
\end_layout

\end_inset

, 
\begin_inset listings
lstparams "basicstyle={\ttfamily}"
inline true
status open

\begin_layout Plain Layout

EQU
\end_layout

\end_inset

, 
\begin_inset listings
lstparams "basicstyle={\ttfamily}"
inline true
status open

\begin_layout Plain Layout

DB
\end_layout

\end_inset

 and 
\begin_inset listings
lstparams "basicstyle={\ttfamily}"
inline true
status open

\begin_layout Plain Layout

DS
\end_layout

\end_inset

.
 The other directives listed in the specification have not been implemented,
 but there omission is only due to time constraints and they could easily
 be implemented in a later version.
\end_layout

\begin_layout Itemize
Macros have not been implemented also due to time constraints.
\end_layout

\begin_layout Standard
The assembler consists of two files: 
\end_layout

\begin_layout Itemize
\begin_inset listings
lstparams "basicstyle={\ttfamily}"
inline true
status open

\begin_layout Plain Layout

language.py
\end_layout

\end_inset

 which contains the language definition in an index and some functions to
 help encode instructions.
\end_layout

\begin_layout Itemize
\begin_inset listings
lstparams "basicstyle={\ttfamily}"
inline true
status open

\begin_layout Plain Layout

assembler.py
\end_layout

\end_inset

 which contains the first and second pass functions and handles opening
 source files and writing binary files.
\end_layout

\begin_layout Standard
The following sections details the design and behavior of the assembler.
 However it must be noted that these are abstract and high level descriptions
 that do not fully explain minor routines, but give an overview of the entire
 process.
 The full commented source code is provided with the Supporting Material
 and should be referenced for a deeper understanding of the program's operation.
 
\end_layout

\begin_layout Standard
\begin_inset Newpage pagebreak
\end_inset


\end_layout

\begin_layout Subsection
Data Structures
\end_layout

\begin_layout Itemize
\begin_inset listings
lstparams "basicstyle={\ttfamily}"
inline true
status open

\begin_layout Plain Layout

reserved arguments
\end_layout

\end_inset


\end_layout

\begin_layout Standard
This structure contains a list of string representations of the reserved
 word arguments for the instruction set.
 These all equate to registers or register pointers.
 The full list is as follows:
\end_layout

\begin_layout Standard
\begin_inset listings
lstparams "basicstyle={\ttfamily},frame=tb,framexbottommargin=1em,framextopmargin=1em,keywordstyle={\color{blue}},tabsize=4"
inline false
status open

\begin_layout Plain Layout

a, c, bs, ie, flags, 
\end_layout

\begin_layout Plain Layout

r0, r1, r2, r3, 
\end_layout

\begin_layout Plain Layout

dptr, dpl, dph,  
\end_layout

\begin_layout Plain Layout

sp, sph, spl,
\end_layout

\begin_layout Plain Layout

@a+pc, @a+dptr, @dptr
\end_layout

\end_inset


\end_layout

\begin_layout Itemize
\begin_inset listings
lstparams "basicstyle={\ttfamily}"
inline true
status open

\begin_layout Plain Layout

relative instructions
\end_layout

\end_inset

 
\end_layout

\begin_layout Standard
This structure contains a list of string representations of the mnemonics
 of instructions that use relative addressing.
 The full list is as follows:
\end_layout

\begin_layout Standard
\begin_inset listings
lstparams "basicstyle={\ttfamily},captionpos=b,frame=tb,framexbottommargin=1em,framextopmargin=1em,keywordstyle={\color{blue}},tabsize=4"
inline false
status open

\begin_layout Plain Layout

djnz, cjne, sjmp, jz,
\end_layout

\begin_layout Plain Layout

jnz, jc, jnc, jpo, 
\end_layout

\begin_layout Plain Layout

jpe, js, jns
\end_layout

\end_inset


\end_layout

\begin_layout Itemize
\begin_inset listings
lstparams "basicstyle={\ttfamily}"
inline true
status open

\begin_layout Plain Layout

instruction index
\end_layout

\end_inset

 
\end_layout

\begin_layout Standard
This structure contains an index of all possible instructions in the instruction
 set, along with the the corresponding opcode and instruction width.
 This is implemented using a combination of Python's dictionary, tuple and
 list objects.
 Its structure is demonstrated below:
\end_layout

\begin_layout Standard
\begin_inset listings
lstparams "basicstyle={\ttfamily},captionpos=b,frame=tb,framexbottommargin=1em,framextopmargin=1em,keywordstyle={\color{blue}},tabsize=4"
inline false
status open

\begin_layout Plain Layout

mnemonic: (arg type, arg type, ...): [opcode, width]
\end_layout

\end_inset


\end_layout

\begin_layout Standard
Each mnemonic has an entry in the parent index which returns another index
 of possible argument formats for that mnemonic with their corresponding
 opcode and length.
 Argument types can be either be one of the reserved arguments or one of
 the following values: 
\begin_inset listings
lstparams "basicstyle={\ttfamily}"
inline true
status open

\begin_layout Plain Layout

address
\end_layout

\end_inset

, 
\begin_inset listings
lstparams "basicstyle={\ttfamily}"
inline true
status open

\begin_layout Plain Layout

pointer
\end_layout

\end_inset

, 
\begin_inset listings
lstparams "basicstyle={\ttfamily}"
inline true
status open

\begin_layout Plain Layout

data
\end_layout

\end_inset

 or 
\begin_inset listings
lstparams "basicstyle={\ttfamily}"
inline true
status open

\begin_layout Plain Layout

label
\end_layout

\end_inset

 .
 Width is represented in number of bytes, ie.
 
\begin_inset listings
lstparams "basicstyle={\ttfamily}"
inline true
status open

\begin_layout Plain Layout

width = 3
\end_layout

\end_inset

 means 1 byte of opcode and 2 bytes of arguments.
\end_layout

\begin_layout Itemize
\begin_inset listings
lstparams "basicstyle={\ttfamily}"
inline true
status open

\begin_layout Plain Layout

label index
\end_layout

\end_inset

 
\end_layout

\begin_layout Standard
This structure is used to store an index of label definitions.
\end_layout

\begin_layout Itemize
\begin_inset listings
lstparams "basicstyle={\ttfamily}"
inline true
status open

\begin_layout Plain Layout

equate index
\end_layout

\end_inset

 
\end_layout

\begin_layout Standard
This structure is used to store an index of equated strings.
\end_layout

\begin_layout Standard
\begin_inset Newpage newpage
\end_inset


\end_layout

\begin_layout Subsection
Functions
\end_layout

\begin_layout Itemize
\begin_inset listings
lstparams "basicstyle={\ttfamily}"
inline true
status open

\begin_layout Plain Layout

first_pass(source file)
\end_layout

\end_inset

 
\end_layout

\begin_layout Standard
This function pre-processes a source file and stores it in a format containing
 the necessary data for the 
\begin_inset listings
lstparams "basicstyle={\ttfamily}"
inline true
status open

\begin_layout Plain Layout

second_pass()
\end_layout

\end_inset

 function to assemble it.
 It processes labels and 
\begin_inset listings
lstparams "basicstyle={\ttfamily}"
inline true
status open

\begin_layout Plain Layout

EQU
\end_layout

\end_inset

 directives by storing strings and their corresponding values in indexes
 and replacing any subsequent appearances of the string with the value.
 It prepares 
\begin_inset listings
lstparams "basicstyle={\ttfamily}"
inline true
status open

\begin_layout Plain Layout

ORG
\end_layout

\end_inset

 and 
\begin_inset listings
lstparams "basicstyle={\ttfamily}"
inline true
status open

\begin_layout Plain Layout

DB
\end_layout

\end_inset

 statements for the 
\begin_inset listings
lstparams "basicstyle={\ttfamily}"
inline true
status open

\begin_layout Plain Layout

second_pass()
\end_layout

\end_inset

.
 It uses the 
\begin_inset listings
lstparams "basicstyle={\ttfamily}"
inline true
status open

\begin_layout Plain Layout

tokenize()
\end_layout

\end_inset

 function to determine the argument symbols and operand bit string.
 Finally it uses the 
\begin_inset listings
lstparams "basicstyle={\ttfamily}"
inline true
status open

\begin_layout Plain Layout

instruction index
\end_layout

\end_inset

 to determine the instruction width.
\end_layout

\begin_layout Itemize
\begin_inset listings
lstparams "basicstyle={\ttfamily}"
inline true
status open

\begin_layout Plain Layout

second_pass(asm, label index)
\end_layout

\end_inset

 
\end_layout

\begin_layout Standard
This function takes the pre-processed assembly code and 
\begin_inset listings
lstparams "basicstyle={\ttfamily}"
inline true
status open

\begin_layout Plain Layout

label index
\end_layout

\end_inset

 output by 
\begin_inset listings
lstparams "basicstyle={\ttfamily}"
inline true
status open

\begin_layout Plain Layout

first_pass()
\end_layout

\end_inset

 as input.
 First it checks for 
\begin_inset listings
lstparams "basicstyle={\ttfamily}"
inline true
status open

\begin_layout Plain Layout

ORG
\end_layout

\end_inset

 and 
\begin_inset listings
lstparams "basicstyle={\ttfamily}"
inline true
status open

\begin_layout Plain Layout

DB
\end_layout

\end_inset

 statements and handles them if necessary.
 Then it replaces any labels that were used before they were defined and
 therefore not replaced on by 
\begin_inset listings
lstparams "basicstyle={\ttfamily}"
inline true
status open

\begin_layout Plain Layout

first_pass()
\end_layout

\end_inset

 .
 It uses the 
\begin_inset listings
lstparams "basicstyle={\ttfamily}"
inline true
status open

\begin_layout Plain Layout

instruction index 
\end_layout

\end_inset

 to determine the opcode and the width of the instruction, then it writes
 the opcode and operand to the file.
 If the combined width of the opcode and operand is greater than the instruction
 width the function raises an error.
 
\end_layout

\begin_layout Itemize
\begin_inset listings
lstparams "basicstyle={\ttfamily}"
inline true
status open

\begin_layout Plain Layout

tokenize(mnemonic, arguments)
\end_layout

\end_inset


\end_layout

\begin_layout Standard
This function processes an instruction in order to produce a hashable symbol
 that represents the format of its arguments.
 This symbol is used to look up opcodes in the 
\begin_inset listings
lstparams "basicstyle={\ttfamily}"
inline true
status open

\begin_layout Plain Layout

instruction index
\end_layout

\end_inset

.
 It also detects string representations of numbers in the arguments and
 stores a C type struct representation of the operands to be returned along
 with the symbol.
 It does this with the help of the 
\begin_inset listings
lstparams "basicstyle={\ttfamily}"
inline true
status open

\begin_layout Plain Layout

stoi()
\end_layout

\end_inset

 function and Python's 
\begin_inset listings
lstparams "basicstyle={\ttfamily}"
inline true
status open

\begin_layout Plain Layout

struct
\end_layout

\end_inset

 module .
\end_layout

\begin_layout Itemize
\begin_inset listings
lstparams "basicstyle={\ttfamily}"
inline true
status open

\begin_layout Plain Layout

stoi(string)
\end_layout

\end_inset

 
\end_layout

\begin_layout Standard
This function is a general purpose function that is actually used throughout
 the code, although mainly in the 
\begin_inset listings
lstparams "basicstyle={\ttfamily}"
inline true
status open

\begin_layout Plain Layout

tokenize()
\end_layout

\end_inset

 function.
 It takes a string as an input and tries to convert it to an integer using
 Pythons integer representation syntax.
 It can recognize decimal, octal, hexadecimal and binary numbers which are
 denoted with different prefixes.
 If it receives a string it can not represent as an integer it returns the
 string 'NaN', (Not a Number)
\end_layout

\begin_layout Standard
\begin_inset ERT
status open

\begin_layout Plain Layout


\backslash
bigskip
\end_layout

\end_inset


\end_layout

\begin_layout Standard
Below is an abstract representation major components of the assembler.
 The 
\begin_inset listings
lstparams "basicstyle={\ttfamily}"
inline true
status open

\begin_layout Plain Layout

first_pass()
\end_layout

\end_inset

 and 
\begin_inset listings
lstparams "basicstyle={\ttfamily}"
inline true
status open

\begin_layout Plain Layout

second_pass()
\end_layout

\end_inset

 are represented in pseudo-code, 
\begin_inset listings
lstparams "basicstyle={\ttfamily}"
inline true
status open

\begin_layout Plain Layout

tokenize()
\end_layout

\end_inset

 is more easily understood when represented as flowcharts.
 
\end_layout

\begin_layout Standard
\begin_inset Newpage newpage
\end_inset


\end_layout

\begin_layout Subsubsection
first_pass
\end_layout

\begin_layout Standard
\begin_inset listings
lstparams "basicstyle={\small\ttfamily},captionpos=b,frame=tb,framexbottommargin=3em,framextopmargin=3em,keywordstyle={\color{blue}},showstringspaces=false,tabsize=4"
inline false
status open

\begin_layout Plain Layout

first_pass(source file):
\end_layout

\begin_layout Plain Layout

\end_layout

\begin_layout Plain Layout

	address = 0
\end_layout

\begin_layout Plain Layout

\end_layout

\begin_layout Plain Layout

	for statement in source file:
\end_layout

\begin_layout Plain Layout

\end_layout

\begin_layout Plain Layout

		remove comments
\end_layout

\begin_layout Plain Layout

	
\end_layout

\begin_layout Plain Layout

		for word in statement:
\end_layout

\begin_layout Plain Layout

		
\end_layout

\begin_layout Plain Layout

			if word is in equate index:
\end_layout

\begin_layout Plain Layout

				replace word with equated value
\end_layout

\begin_layout Plain Layout

			else if word is in label index:
\end_layout

\begin_layout Plain Layout

				replace word with address at label
\end_layout

\begin_layout Plain Layout

	
\end_layout

\begin_layout Plain Layout

			if first word == 'org'
\end_layout

\begin_layout Plain Layout

				address = second word
\end_layout

\begin_layout Plain Layout

			else if last character of first word == ':':
\end_layout

\begin_layout Plain Layout

				remove ':'
\end_layout

\begin_layout Plain Layout

				add word = address to label index
\end_layout

\begin_layout Plain Layout

				next statement
\end_layout

\begin_layout Plain Layout

			else if second word == 'equ'
\end_layout

\begin_layout Plain Layout

				add first word = third word to equate index
\end_layout

\begin_layout Plain Layout

				next statement
\end_layout

\begin_layout Plain Layout

	
\end_layout

\begin_layout Plain Layout

		mnemonic = first word
\end_layout

\begin_layout Plain Layout

		arguments = [second word ...
 last word]
\end_layout

\begin_layout Plain Layout

	
\end_layout

\begin_layout Plain Layout

		symbol, constant = tokenize(arguments)
\end_layout

\begin_layout Plain Layout

		if mnemonic == 'db':
\end_layout

\begin_layout Plain Layout

			address = address + width of constant
\end_layout

\begin_layout Plain Layout

			next statement
\end_layout

\begin_layout Plain Layout

		if mnemonic == 'ds':
\end_layout

\begin_layout Plain Layout

			address = address + first argument
\end_layout

\begin_layout Plain Layout

			next statement
\end_layout

\begin_layout Plain Layout

		
\end_layout

\begin_layout Plain Layout

        width = instruction index[mnemonic][symbol][width]
\end_layout

\begin_layout Plain Layout

		address = address + width
\end_layout

\begin_layout Plain Layout

	
\end_layout

\begin_layout Plain Layout

		append [mnemonic, argument, symbol, constant] to asm
\end_layout

\begin_layout Plain Layout

\end_layout

\begin_layout Plain Layout

	return asm, label index
\end_layout

\end_inset


\begin_inset Newpage newpage
\end_inset


\end_layout

\begin_layout Subsubsection
second_pass
\end_layout

\begin_layout Standard
\begin_inset listings
lstparams "basicstyle={\small\ttfamily},breaklines=true,captionpos=b,frame=tb,framexbottommargin=3em,framextopmargin=3em,keywordstyle={\color{blue}},tabsize=4"
inline false
status open

\begin_layout Plain Layout

second_pass(file, asm, label index):
\end_layout

\begin_layout Plain Layout

\end_layout

\begin_layout Plain Layout

	address = 0
\end_layout

\begin_layout Plain Layout

\end_layout

\begin_layout Plain Layout

	for line in asm:
\end_layout

\begin_layout Plain Layout

\end_layout

\begin_layout Plain Layout

		file offset = address
\end_layout

\begin_layout Plain Layout

		
\end_layout

\begin_layout Plain Layout

		mnemonic, arguments, symbol, constant = line
\end_layout

\begin_layout Plain Layout

	
\end_layout

\begin_layout Plain Layout

		if mnemonic == 'org':
\end_layout

\begin_layout Plain Layout

			address = first argument
\end_layout

\begin_layout Plain Layout

			next line
\end_layout

\begin_layout Plain Layout

		else if mnemonic == 'db':
\end_layout

\begin_layout Plain Layout

			write constant to file
\end_layout

\begin_layout Plain Layout

			address = address + width of constant
\end_layout

\begin_layout Plain Layout

			next line
\end_layout

\begin_layout Plain Layout

		if mnemonic == 'ds':
\end_layout

\begin_layout Plain Layout

			address = address + first argument
\end_layout

\begin_layout Plain Layout

            next line
\end_layout

\begin_layout Plain Layout

	
\end_layout

\begin_layout Plain Layout

		for argument in arguments:
\end_layout

\begin_layout Plain Layout

			if argument is a label:
\end_layout

\begin_layout Plain Layout

				replace argument with address at label
\end_layout

\begin_layout Plain Layout

				symbol, data = tokenize(argument)
\end_layout

\begin_layout Plain Layout

				append data to constant
\end_layout

\begin_layout Plain Layout

	
\end_layout

\begin_layout Plain Layout

		op, width = instruction index[mnemonic][symbol]
\end_layout

\begin_layout Plain Layout

	
\end_layout

\begin_layout Plain Layout

		write op to file
\end_layout

\begin_layout Plain Layout

\end_layout

\begin_layout Plain Layout

		if width of constant - width + 1 > 0:
\end_layout

\begin_layout Plain Layout

			raise error
\end_layout

\begin_layout Plain Layout

		else if:
\end_layout

\begin_layout Plain Layout

			write constant to file
\end_layout

\begin_layout Plain Layout

			address = address.+ width
\end_layout

\begin_layout Plain Layout

	
\end_layout

\begin_layout Plain Layout

	return file
\end_layout

\end_inset


\end_layout

\begin_layout Standard
\begin_inset Newpage pagebreak
\end_inset


\end_layout

\begin_layout Subsubsection
tokenize
\end_layout

\begin_layout Standard
\begin_inset Graphics
	filename /home/jmz/qm/ede/docs/img/asm/tokenize.svg
	display false

\end_inset


\end_layout

\begin_layout Section
Assembler Testing
\end_layout

\begin_layout Standard
The assembler was tested by assembling the entire instruction table, while
 at the same time testing labels and directives.
 The test file was assembled and then the binary and .dsm files were inspected
 manually by checking their size and content.
 None of this testing is automated and files are inspected by hand.
 The test files and results can be found in the Supporting Material.
\end_layout

\end_body
\end_document