samer@74: \documentclass{beamer}
samer@74: 
samer@74: \usepackage[T1]{fontenc}
samer@74: \usepackage{microtype}
samer@74: \usepackage{multimedia}
samer@74: \usepackage{tikz}
samer@74: \usetikzlibrary{matrix}
samer@74: \usetikzlibrary{patterns}
samer@74: \usetikzlibrary{arrows}
samer@74: \usetikzlibrary{calc}
samer@74: \usepackage{tools}
samer@74: %\usepackage{amsfonts,amssymb}
samer@74: 
samer@74: \tikzset{every picture/.style=semithick}
samer@74: 
samer@74: %%% font options:
samer@74: % atypewri, frankgth, gillsans, centuryg, futura, eurostil 
samer@74: %\usepackage{fourier}    	% Maths in serif Utopia
samer@74: \usepackage[sf]{frankgth}
samer@74: %\usepackage[sf]{optima}
samer@74: 
samer@74: %%% Monospace font
samer@74: %\usepackage[scaled=0.88]{ulgothic} % 0.88 % suits narrow faces
samer@74: \renewcommand{\ttdefault}{plg}  % Adobe Letter Gothic - suits light medium width face
samer@74: %\renewcommand{\ttdefault}{pcr}  % Courier - suits wide faces
samer@74: % remember to match up size and weight of monospace font to main font
samer@74: 
samer@74: \newcommand{\mytt}[1]{{\texttt{\footnotesize\fontseries{bx}\selectfont #1}}}
samer@74: 
samer@74: \DeclareMathAlphabet{\mathcal}{OMS}{cmsy}{m}{n}
samer@74: 
samer@74: 
samer@74: %%% Black on white
samer@74: \definecolor{base}{rgb}{0,0,0}
samer@74: \definecolor{comp}{named}{green}
samer@74: \definecolor{paper}{named}{white}
samer@74: 
samer@74: \logo{%
samer@74: 	\includegraphics[height=16pt]{qmul-black}\hspace*{45pt}%
samer@74: 	\raisebox{1pt}{\includegraphics[height=12pt]{c4dm-black-white}}%
samer@74: }
samer@74: 
samer@74: %%% Red on black
samer@74: \comment{
samer@74: \definecolor{base}{rgb}{1,0,0}
samer@74: \definecolor{comp}{rgb}{0,0.8,0.2}
samer@74: \definecolor{paper}{named}{black}
samer@74: 
samer@74: \logo{%
samer@74: 	\includegraphics[height=16pt]{qmul-red}\hspace*{45pt}%
samer@74: 	\raisebox{1pt}{\includegraphics[height=12pt]{c4dm-red-black}}%
samer@74: }
samer@74: }
samer@74: 
samer@74: 																								 
samer@74: \useinnertheme{default}%circles
samer@74: \useoutertheme{default}
samer@74: \usefonttheme[onlymath]{serif}
samer@74: 
samer@74: \setbeamercolor{normal text}{bg=paper,fg=base!90!-paper}
samer@74: \setbeamercolor{background}{bg=comp!50!paper,fg=comp}
samer@74: %\setbeamercolor{structure}{fg=base!75!-paper}
samer@74: \setbeamercolor{structure}{fg=red!50!base}
samer@74: \setbeamercolor{palette primary}{bg=yellow!50!paper,fg=yellow}
samer@74: \setbeamercolor{palette secondary}{bg=orange!50!paper,fg=orange}
samer@74: \setbeamercolor{palette tertiary}{bg=blue!50!paper,fg=blue}
samer@74: \setbeamercolor{palette quaternary}{bg=green!50!paper,fg=green}
samer@74: \setbeamercolor{block body}{bg=base!20!paper}
samer@74: \setbeamercolor{block title}{bg=base!60!paper,fg=paper}
samer@74: \setbeamercolor{navigation symbols}{fg=base!90!paper}
samer@74: \setbeamercolor{separation line}{bg=blue,fg=yellow}
samer@74: \setbeamercolor{fine separation line}{bg=blue,fg=orange}
samer@74: 
samer@74: % Title page
samer@74: %	\setbeamercolor{title}{bg=base!20!paper}
samer@74: %	\setbeamercolor{subtitle}{bg=base!20!paper}
samer@74: %	\setbeamercolor{title page}{bg=base!40!paper}
samer@74: 
samer@74: %	\setbeamercolor{headline}{bg=blue}
samer@74: %	\setbeamercolor{footline}{bg=blue}
samer@74: %	\setbeamercolor{frametitle}{bg=base!30!paper}
samer@74: %	\setbeamercolor{framesubtitle}{bg=base!40!paper}
samer@74: 
samer@74: %	\setbeamercolor{section in toc}{bg=base!25!paper,fg=orange}
samer@74: %	\setbeamercolor{section in toc shaded}{bg=base!25!paper,fg=orange!80!paper}
samer@74: %	\setbeamercolor{subsection in toc}{bg=base!25!paper,fg=orange}
samer@74: %	\setbeamercolor{subsection in toc shaded}{bg=yellow!25!paper,fg=orange!80!paper}
samer@74: %  page number in head/foot
samer@74: %  section in head/foot
samer@74: %	section in head/foot shaded
samer@74: 
samer@74: 
samer@74: \setbeamerfont{structure}{series=\bfseries}
samer@74: \setbeamerfont{title}{series=\mdseries,size=\Large}
samer@74: %\setbeamerfont{title}{series=\ltseries,size=\huge}
samer@74: \setbeamerfont{date}{size=\footnotesize}%,series=\mdcseries}
samer@74: \setbeamerfont{institute}{size=\footnotesize}%,series=\mdcseries}
samer@74: \setbeamerfont{author}{size=\footnotesize,series=\bfseries}
samer@74: \setbeamercolor{bibliography item}{parent={normal text}}
samer@74: \setbeamercolor{bibliography entry author}{fg=base}
samer@74: \setbeamercolor{bibliography entry location}{fg=base!70!paper}
samer@74: 
samer@74: %%% Templates
samer@74: 
samer@74: \setbeamertemplate{bibliography item}[text]
samer@74: \setbeamertemplate{bibliography entry title}{ }
samer@74: \setbeamertemplate{bibliography entry location}{ }
samer@74: \setbeamertemplate{blocks}[rounded][shadow=false]
samer@74: \setbeamertemplate{items}[circle]
samer@74: %\setbeamertemplate{bibliography item}[triangle]
samer@74: %	\setbeamertemplate{title page}[default][rounded=true,shadow=false]
samer@74: %	\setbeamertemplate{frametitle}[default][rounded=true,shadow=false]
samer@74: \setbeamertemplate{sidebar right}{}
samer@74: \setbeamertemplate{footline}{
samer@74: 	\hspace*{0.2cm}
samer@74: 	\insertlogo
samer@74: 	\hfill
samer@74: 	\usebeamertemplate***{navigation symbols}%
samer@74: 	\hfill
samer@74: 	\makebox[6ex]{\hfill\insertframenumber/\inserttotalframenumber}%
samer@74: 	\hspace*{0.2cm}
samer@74: 
samer@74: 	\vskip 4pt
samer@74: }
samer@74: 			 
samer@74: \setbeamertemplate{navigation symbols}
samer@74: {%
samer@74:   \hbox{%
samer@74:     \hbox{\insertslidenavigationsymbol}
samer@74:     \hbox{\insertframenavigationsymbol}
samer@74: %    \hbox{\insertsubsectionnavigationsymbol}
samer@74:     \hbox{\insertsectionnavigationsymbol}
samer@74:     \hbox{\insertdocnavigationsymbol}
samer@74: %    \hbox{\insertbackfindforwardnavigationsymbol}%
samer@74:   }%
samer@74: }
samer@74: 
samer@74: 
samer@74: \AtBeginSection[]{
samer@74: 	\begin{iframe}[Outline]
samer@74: 		\tableofcontents[currentsection]
samer@74: 	\end{iframe}
samer@74: }                                                                                                                    
samer@74: %\linespread{1.1}
samer@74: 
samer@74: \setlength{\parskip}{0.5em}
samer@74: 
samer@74: \newenvironment{bframe}[1][untitled]{\begin{frame}[allowframebreaks]\frametitle{#1}}{\end{frame}}
samer@74: \newenvironment{iframe}[1][untitled]{\begin{frame}\frametitle{#1}}{\end{frame}}
samer@74: \newenvironment{isframe}[1][untitled]{\begin{frame}[fragile=singleslide,environment=isframe]\frametitle{#1}}{\end{frame}}
samer@74: 
samer@74: \renewenvironment{fig}[1]
samer@74: 	{%
samer@74: 		\begin{figure}
samer@74: 		\def\fglbl{f:#1}
samer@74: 		\let\ocap=\caption
samer@74: 		\renewcommand{\caption}[2][]{\ocap[##1]{\small ##2}}
samer@74: 		\centering\small
samer@74: 	}{%
samer@74: 		\label{\fglbl}
samer@74: 		\end{figure}
samer@74: 	}
samer@74: 
samer@74: \newcommand{\paragraph}[1]{\textbf{#1}\qquad}
samer@74: \newcommand{\colfig}[2][1]{\includegraphics[width=#1\linewidth]{figs/#2}}%
samer@74: \let\citep=\cite
samer@74: %\newcommand{\dotmath}[2]{\psfrag{#1}[Bc][Bc]{\small $#2$}}
samer@74: 
samer@74: \title{Cognitive Music Modelling:\\An Information Dynamics Approach}
samer@74: \author{Samer Abdallah, Henrik Ekeus, Peter Foster,\\Andrew Robertson and Mark Plumbley}
samer@74: \institute{Centre for Digital Music\\Queen Mary, University of London}
samer@74: 
samer@74: \date{\today}
samer@74: 
samer@74: \def\X{\mathcal{X}}
samer@74: \def\Y{\mathcal{Y}}
samer@74: \def\Past{\mathrm{Past}}
samer@74: \def\Future{\mathrm{Future}}
samer@74: \def\Present{\mathrm{Present}}
samer@74: \def\param{\theta}
samer@74: \def\trans{a}
samer@74: \def\init{\pi^{\trans}}
samer@74: %\def\entrorate(#1){\mathcal{H}(#1)}
samer@74: %\def\entrorate(#1){\dot{\mathcal{H}}(#1)}
samer@74: \def\entrorate{h}
samer@74: \def\emcmarg(#1){b_#1}
samer@74: \def\mcmarg{\vec{b}}
samer@74: \def\domS{\mathcal{S}}
samer@74: \def\domA{\mathcal{A}}
samer@74: 
samer@74: \def\Lxz(#1,#2){\mathcal{L}(#1|#2)}
samer@74: \def\LXz(#1){\overline{\mathcal{L}}(#1)}
samer@74: \def\LxZ(#1){\underline{\mathcal{L}}(#1)}
samer@74: \def\LXZ{\overline{\underline{\mathcal{L}}}}
samer@74: \def\Ixz(#1,#2){\mathcal{I}(#1|#2)}
samer@74: \def\IXz(#1){\overline{\mathcal{I}}(#1)}
samer@74: \def\IxZ(#1){\underline{\mathcal{I}}(#1)}
samer@74: \def\IXZ{\overline{\underline{\mathcal{I}}}}
samer@74: 
samer@74: \def\ev(#1=#2){#1\!\!=\!#2}
samer@74: \def\sev(#1=#2){#1\!=#2}
samer@74: 
samer@74: \def\FE{\mathcal{F}}
samer@74: 
samer@74: \newcommand\past[1]{\overset{\rule{0pt}{0.2em}\smash{\leftarrow}}{#1}}
samer@74: \newcommand\fut[1]{\overset{\rule{0pt}{0.1em}\smash{\rightarrow}}{#1}}
samer@74: 
samer@74: \def\cn(#1,#2) {\node[circle,draw,inner sep=0.2em] (#1#2) {${#1}_{#2}$};}
samer@74: \def\dn(#1) {\node[circle,inner sep=0.2em] (#1) {$\cdots$};}
samer@74: \def\rl(#1,#2) {\draw (#1) -- (#2);}
samer@74: 
samer@74: \definecolor{un0}{rgb}{0.5,0.0,0.0}
samer@74: \definecolor{un1}{rgb}{0.6,0.15,0.15}
samer@74: \definecolor{un2}{rgb}{0.7,0.3,0.3}
samer@74: \definecolor{un3}{rgb}{0.8,0.45,0.45}
samer@74: \definecolor{un4}{rgb}{0.9,0.6,0.6}{
samer@74: \definecolor{un5}{rgb}{1.0,0.75,0.75}
samer@74: 
samer@74: %\def\blob(#1){\node[circle,draw,fill=#1,inner sep=0.25em]{};}
samer@74: \def\bl(#1){\draw[circle,fill=#1] (0,0) circle (0.4em);}
samer@74: \def\noderow(#1,#2,#3,#4,#5,#6){%
samer@74: 	\tikz{\matrix[draw,rounded corners,inner sep=0.4em,column sep=2.1em,ampersand replacement=\&]{%
samer@74: 	\bl(#1)\&\bl(#2)\&\bl(#3)\&\bl(#4)\&\bl(#5)\&\bl(#6)\\};}}
samer@74: 
samer@74: \begin{document}
samer@74: 	\frame{\titlepage}
samer@74: 	\section[Outline]{}
samer@74: 	\frame{
samer@74: 		\frametitle{Outline}
samer@74: 		\tableofcontents
samer@74: 	}
samer@74: 
samer@74: 	
samer@74: 
samer@74: \section{Expectation and surprise in music}
samer@74: \label{s:Intro}
samer@74: 
samer@74: \begin{iframe}[`Unfoldingness']
samer@74: 	Music is experienced as a 
samer@74: 	\uncover<2->{phenomenon}
samer@74: 	\uncover<3->{that}
samer@74: 	\uncover<4->{`unfolds'} \uncover<5->{in}\\
samer@74: 	\only<6>{blancmange}%
samer@74: 	\only<7>{(just kidding)}%
samer@74: 	\uncover<8->{time,} 
samer@74: 	\uncover<9->{rather than being apprehended as a static object presented in its 
samer@74: 	entirety.} 
samer@74: 
samer@74: 	\uncover<10->{[This is recognised in computation linguistics where the phenomenon is known as \emph{incrementality}, \eg in incremental parsing.]}
samer@74: 	
samer@74: 	\uncover<11->{%
samer@74: 	Meyer \cite{Meyer67} argued that musical experience depends on
samer@74: 	how we change and revise our conceptions \emph{as events happen},
samer@74: 	on how expectation and prediction interact with occurrence, and that, to a large
samer@74: 	degree, the way to understand the effect of music is to focus on
samer@74: 	this `kinetics' of expectation and surprise.%
samer@74: 	}
samer@74: \end{iframe}
samer@74: 
samer@74: \begin{iframe}[Expectation and suprise in music]
samer@74: 
samer@74: 	Music creates
samer@74: 	\emph{expectations} of what is to come next, which may be fulfilled
samer@74: 	immediately, after some delay, or not at all.
samer@74: 	Suggested by music theorists, \eg 
samer@74: 	L. B. Meyer \cite{Meyer67} and Narmour \citep{Narmour77} but also
samer@74: 	noted much earlier by Hanslick \cite{Hanslick1854} in the
samer@74: 	 1850s:
samer@74: 		\begin{quote}
samer@74: 			\small
samer@74: 			`The most important factor in the mental process which accompanies the
samer@74: 			act of listening to music, and which converts it to a source of pleasure, is
samer@74: 			\ldots
samer@74: %			frequently overlooked. We here refer to 
samer@74: 			the intellectual satisfaction which the
samer@74: 			listener derives from continually following and anticipating the composer's
samer@74: 			intentions---now, to see his expectations fulfilled, and now, to find himself
samer@74: 			agreeably mistaken. It is a matter of course that this intellectual flux and
samer@74: 			reflux, this perpetual giving and receiving takes place unconsciously, and with 
samer@74: 			the rapidity of lightning-flashes.'
samer@74: 		\end{quote}
samer@74: \end{iframe}
samer@74: 
samer@74: \begin{iframe}[Probabilistic reasoning]
samer@74: 	\uncover<1->{%
samer@74: 	Making predictions and assessing surprise is 
samer@74: 	essentially reasoning with degrees of belief and (arguably)
samer@74: 	the best way to do this is using Bayesian probability theory \cite{Cox1946,Jaynes27}.%
samer@74: 
samer@74: 	[NB. this is \textbf{subjective} probability as advocated by \eg De Finetti and Jaynes.]
samer@74: 	}
samer@74: 
samer@74: %  Thus, we assume that musical schemata are encoded as probabilistic % \citep{Meyer56} models, and 
samer@74: 	\uncover<2->{%
samer@74:    We suppose that familiarity with different styles of music takes the form
samer@74: 	of various probabilistic models, and that these models are adapted through listening.%
samer@74: 	}
samer@74: %	various stylistic norms is encoded as
samer@74: %	using models that encode the statistics of music in general, the particular styles
samer@74: %	of music that seem best to fit the piece we happen to be listening to, and the emerging 
samer@74: %	structures peculiar to the current piece.
samer@74: 
samer@74: 	\uncover<3->{%
samer@74: 	Experimental evidence that humans are able to internalise statistical
samer@74: 	knowledge about musical: \citep{SaffranJohnsonAslin1999,EerolaToiviainenKrumhansl2002}; and also
samer@74: 	that statistical models are effective for computational analysis of music, \eg \cite{ConklinWitten95,Pearce2005}.%
samer@74: 	}
samer@74: 
samer@74: 	% analysis of music, \eg \cite{ConklinWitten95,PonsfordWigginsMellish1999,Pearce2005}.
samer@74: %		\cite{Ferrand2002}. Dubnov and Assayag PSTs? 
samer@74: \end{iframe}
samer@74: 
samer@74: \begin{iframe}[Music and information theory]
samer@74: 	\uncover<1->{
samer@74: 	With probabilistic models in hand we can apply quantitative information theory: we can compute entropies,
samer@74: 	relative entropies, mutual information, and all that.
samer@74: 	}
samer@74: 
samer@74: 	\uncover<2->{
samer@74: 	Lots of interest in application of information theory to perception, music and aesthetics since the 50s,
samer@74: 	\eg Moles \cite{Moles66}, Meyer \cite{Meyer67}, Cohen \cite{Cohen1962}, Berlyne \cite{Berlyne71}.
samer@74: 	(See also Bense, Hiller)
samer@74: 	}
samer@74: 
samer@74: 	\uncover<3->{
samer@74: 	Idea is that subjective qualities and 
samer@74: 	states like uncertainty, surprise, complexity, tension, and interestingness
samer@74: 	are determined by information-theoretic quantities.
samer@74: 	}
samer@74: 
samer@74: 	\uncover<4->{
samer@74: 	Berlyne \cite{Berlyne71} called such quantities `collative variables', since they are 
samer@74: 	to do with patterns of occurrence rather than medium-specific details.
samer@74: 	\emph{Information aesthetics}.
samer@74: 	}
samer@74: %	Listeners then experience greater or lesser levels of surprise
samer@74: %	in response to departures from these norms. 
samer@74: %	By careful manipulation
samer@74: %	of the material, the composer can thus define, and induce within the
samer@74: %	listener, a temporal programme of varying
samer@74: %	levels of uncertainty, ambiguity and surprise. 
samer@74: \end{iframe}
samer@74: 
samer@74: \begin{iframe}[Probabilistic model-based observer hypothesis]
samer@74: 	\begin{itemize}
samer@74: 		\item<1-> 
samer@74: 		As we listen, we maintain a probabilistic model that enables 
samer@74: 		us to make predictions.  As events unfold, we revise our probabilistic `belief state', 
samer@74: 		including predictions about the future.
samer@74: 		\item<2-> 
samer@74: 		Probability distributions and changes in distributions are characterised in terms 
samer@74: 		of information theoretic-measures such as entropy and relative entropy (KL divergence).
samer@74: 		\item<3->
samer@74: 		The dynamic evolution of these information measures captures significant structure,
samer@74: 		\eg events that are surprising, informative, explanatory \etc
samer@74: 	\end{itemize}
samer@74: 	
samer@74: \end{iframe}
samer@74: 
samer@74: \begin{iframe}[Features of information dynamics]
samer@74: 	\uncover<1->{
samer@74: 	\textbf{Abstraction}: sensitive mainly to \emph{patterns} of occurence, 
samer@74: 	rather than details of which specific things occur or the sensory medium. 
samer@74: %	it operates at a level of abstraction removed from the details of the sensory experience and 
samer@74: %	the medium through which it was received, suggesting that the same
samer@74: %	approach could, in principle, be used to analyse and compare information 
samer@74: %	flow in different temporal media regardless of whether they are auditory, visual or otherwise. 
samer@74: 	}
samer@74: 
samer@74: 	\uncover<2->{
samer@74: 	\textbf{Generality}: applicable in principle to any probabilistic model, in particular,
samer@74: 	models with time-dependent latent variables such as HMMs.
samer@74: 	Many important musical concepts like key, harmony, and beat are essentially `hidden variables'.
samer@74: 	}
samer@74: 
samer@74: 	\uncover<3->{
samer@74: 	\textbf{Richness}: when applied to models with latent variables, can result in many-layered 
samer@74: 	analysis, capturing information flow about harmony, tempo, \etc
samer@74: 	}
samer@74: 
samer@74: 	\uncover<4->{
samer@74: 	\textbf{Subjectivity}: all probabilities are \emph{subjective} probabilities relative to \emph{observer's} 
samer@74: 	model, which can depend on observer's capabilities and prior experience.
samer@74: 	}
samer@74: \end{iframe}
samer@74: 
samer@74: \section{Surprise, entropy and information in random sequences}
samer@74: \label{s:InfoInRandomProcs}
samer@74: 
samer@74: \begin{iframe}[Information theory primer\nicedot Entropy]
samer@74: 		Let $X$ be a discrete-valued random (in the sense of \emph{subjective} probability) variable.
samer@74: 		Entropy is a measure of \emph{uncertainty}. If observer expects to see $x$ with probability $p(x)$, 
samer@74: 		then 
samer@74: 		\begin{align*}
samer@74: 			H(X) &= \sum_{x\in\X} - p(x) \log p(x) \\
samer@74: 			&= \expect{[-\log p(X)]}. 
samer@74: 		\end{align*}
samer@74: 		Consider $-\log p(x)$ as the `surprisingness' of $x$, then the entropy is the `expected surprisingness'.
samer@74: 		High for spread out distributions and low for concentrated ones.
samer@74: \end{iframe}
samer@74: 
samer@74: \begin{iframe}[Information theory primer\nicedot Relative entropy]
samer@74: 		Relative entropy or Kullback-Leibler (KL) divergence quantifies difference between 
samer@74: 		probability distributions.
samer@74: 		If observer receives data $\mathcal{D}$, divergence between (subjective) prior and 
samer@74: 		posterior distributions is the
samer@74: 		amount of information in $\mathcal{D}$ \emph{about} $X$ for this observer:
samer@74: 		\[
samer@74: 			I(\mathcal{D}\to X) = 
samer@74: 			D(p_{X|\mathcal{D}} || p_X) 
samer@74: 				= \sum_{x\in\X} p(x|\mathcal{D}) \log \frac{p(x|\mathcal{D})}{p(x)}. 
samer@74: 		\]
samer@74: 		If observing $\mathcal{D}$ causes a large change in belief about $X$, then $\mathcal{D}$
samer@74: 		contained a lot of information about $X$.
samer@74: 
samer@74: 		Like Lindley's (1956) information (thanks Lars!).
samer@74: \end{iframe}
samer@74: 
samer@74: \begin{iframe}[Information theory primer\nicedot Mutual information]
samer@74: 		Mutual information between (MI) $X_1$ and $X_2$ is the expected amount of information about 
samer@74: 		$X_2$ in an observation of $X_1$. Can be written in several ways:
samer@74: 		\begin{align*}
samer@74: 			I(X_1;X_2) &= \sum_{x_1,x_2} p(x_1,x_2) \log \frac{p(x_1,x_2)}{p(x_1)p(x_2)} \\
samer@74: 					&= H(X_1) + H(X_2) - H(X_1,X_2) \\
samer@74: 					&= H(X_2) - H(X_2|X_1).
samer@74: 		\end{align*}
samer@74: 		(1) Expected information about $X_2$ in an observation of $X_1$;\\
samer@74: 		(2) Expected reduction in uncertainty about $X_2$ after observing $X_1$;\\
samer@74: 		(3) Symmetric: $I(X_1;X_2) = I(X_2;X_1)$.
samer@74: \end{iframe}
samer@74: 
samer@74: \begin{iframe}[Information theory primer\nicedot Conditional MI]
samer@74: 		Information in one variable about another given observations of some third variable.
samer@74: 		Formulated analogously by adding conditioning variables to entropies:
samer@74: 		\begin{align*}
samer@74: 			I(X_1;X_2|X_3) &= H(X_1|X_3) - H(X_1|X_2,X_3).
samer@74: 		\end{align*}
samer@74: 		Makes explicit the dependence of information assessment on background knowledge,
samer@74: 		represented by conditioning variables.
samer@74: \end{iframe}
samer@74: 
samer@74: 
samer@74: \begin{isframe}[Information theory primer\nicedot I-Diagrams]
samer@74: 		\newcommand\rad{2.2em}%
samer@74: 		\newcommand\circo{circle (3.4em)}%
samer@74: 		\newcommand\labrad{4.3em}
samer@74: 		\newcommand\bound{(-6em,-5em) rectangle (6em,6em)}
samer@74: 		\newcommand\clipin[1]{\clip (#1) \circo;}%
samer@74: 		\newcommand\clipout[1]{\clip \bound (#1) \circo;}%
samer@74: 		\newcommand\cliptwo[3]{%
samer@74: 			\begin{scope}
samer@74: 				\clipin{#1};
samer@74: 				\clipin{#2};
samer@74: 				\clipout{#3};
samer@74: 				\fill[black!30] \bound;
samer@74: 			\end{scope}
samer@74: 		}%
samer@74: 		\newcommand\clipone[3]{%
samer@74: 			\begin{scope}
samer@74: 				\clipin{#1};
samer@74: 				\clipout{#2};
samer@74: 				\clipout{#3};
samer@74: 				\fill[black!15] \bound;
samer@74: 			\end{scope}
samer@74: 		}%
samer@74: 		Information diagrams are a Venn diagram-like represention of entropies and mutual 
samer@74: 		informations for a set of random variables.
samer@74: 	\begin{center}
samer@74: 		\begin{tabular}{c@{\ }c}
samer@74: 			\scalebox{0.8}{%
samer@74: 			\begin{tikzpicture}[baseline=0pt]
samer@74: 				\coordinate (p1) at (90:\rad);
samer@74: 				\coordinate (p2) at (210:\rad);
samer@74: 				\coordinate (p3) at (-30:\rad);
samer@74: 				\clipone{p1}{p2}{p3};
samer@74: 				\clipone{p2}{p3}{p1};
samer@74: 				\clipone{p3}{p1}{p2};
samer@74: 				\cliptwo{p1}{p2}{p3};
samer@74: 				\cliptwo{p2}{p3}{p1};
samer@74: 				\cliptwo{p3}{p1}{p2};
samer@74:             \begin{scope}
samer@74:                \clip (p1) \circo;
samer@74:                \clip (p2) \circo;
samer@74:                \clip (p3) \circo;
samer@74:                \fill[black!45] \bound;
samer@74:             \end{scope}
samer@74: 				\draw (p1) \circo;
samer@74: 				\draw (p2) \circo;
samer@74: 				\draw (p3) \circo;
samer@74: 				\path 
samer@74: 					(barycentric cs:p3=1,p1=-0.2,p2=-0.1) +(0ex,0) node {$I_{3|12}$}
samer@74: 					(barycentric cs:p1=1,p2=-0.2,p3=-0.1) +(0ex,0) node {$I_{1|23}$}
samer@74: 					(barycentric cs:p2=1,p3=-0.2,p1=-0.1) +(0ex,0) node {$I_{2|13}$}
samer@74: 					(barycentric cs:p3=1,p2=1,p1=-0.55) +(0ex,0) node {$I_{23|1}$}
samer@74: 					(barycentric cs:p1=1,p3=1,p2=-0.55) +(0ex,0) node {$I_{13|2}$}
samer@74: 					(barycentric cs:p2=1,p1=1,p3=-0.55) +(0ex,0) node {$I_{12|3}$}
samer@74: 					(barycentric cs:p3=1,p2=1,p1=1) node {$I_{123}$}
samer@74: 					;
samer@74: 				\path
samer@74: 					(p1) +(140:\labrad) node {$X_1$}
samer@74: 					(p2) +(-140:\labrad) node {$X_2$}
samer@74: 					(p3) +(-40:\labrad) node {$X_3$};
samer@74: 			\end{tikzpicture}%
samer@74: 			}
samer@74: 			&
samer@74: 			\parbox{0.5\linewidth}{
samer@74: 				\small
samer@74: 				\begin{align*}
samer@74: 					I_{1|23} &= H(X_1|X_2,X_3) \\
samer@74: 					I_{13|2} &= I(X_1;X_3|X_2) \\
samer@74: 					I_{1|23} + I_{13|2} &= H(X_1|X_2) \\
samer@74: 					I_{12|3} + I_{123} &= I(X_1;X_2) 
samer@74: 				\end{align*}
samer@74: 			}
samer@74: 		\end{tabular}
samer@74: 	\end{center}
samer@74: The areas of 
samer@74: 		the three circles represent $H(X_1)$, $H(X_2)$ and $H(X_3)$ respectively.
samer@74: 		The total shaded area is the joint entropy $H(X_1,X_2,X_3)$.
samer@74: 		Each undivided region is an \emph{atom} of the I-diagram.
samer@74: \end{isframe}
samer@74: 
samer@74: 
samer@74: 
samer@74: 
samer@74: \begin{isframe}[Information theory in sequences]
samer@74: 	\def\bx{1.6em}%
samer@74: 	\def\cn(#1,#2) {\node[circle,draw,fill=white,inner sep=0.2em] at(#1) {$#2$};}%
samer@74: 	\def\dn(#1){\node[circle,inner sep=0.2em] at(#1) {$\cdots$};}%
samer@74: 	\def\en(#1){coordinate(#1)}%
samer@74: 	\def\tb{++(3.8em,0)}%
samer@74: 	\def\lb(#1)#2{\path (#1)+(0,\bx) node[anchor=south] {#2};}
samer@74: 	\def\nr(#1,#2,#3){\draw[rounded corners,fill=#3] (#1) rectangle (#2);}%
samer@74: 
samer@74: 		Consider an observer receiving elements of a random sequence
samer@74: 		$(\ldots, X_{-1}, X_0, X_1, X_2, \ldots)$, so that at any time $t$ there is 
samer@74: 		a `present' $X_t$, an observed pasti $\past{X}_t$, and an unobserved future
samer@74: 		$\fut{X}_t$. Eg, at time $t=3$:
samer@74: 
samer@74: 		\begin{figure}
samer@74: 				\begin{tikzpicture}%[baseline=-1em]
samer@74: 					\path (0,0) \en(X0) \tb \en(X1) \tb \en(X2) \tb \en(X3) \tb \en(X4) \tb \en(X5) \tb \en(X6);
samer@74: 					\path (X0)+(-\bx,-\bx) \en(p1) (X2)+(\bx,\bx) \en(p2)
samer@74: 					      (X3)+(-\bx,-\bx) \en(p3) (X3)+(\bx,\bx) \en(p4)
samer@74: 					      (X4)+(-\bx,-\bx) \en(p5) (X6)+(\bx,\bx) \en(p6);
samer@74: 					\nr(p1,p2,un3) \nr(p3,p4,un4) \nr(p5,p6,un5)
samer@74: 					\dn(X0) \cn(X1,X_1) \cn(X2,X_2) \cn(X3,X_3) \cn(X4,X_4) \cn(X5,X_5) \dn(X6)
samer@74: 					\lb(X1){Past: $\past{X}_3$}
samer@74: 					\lb(X5){Future $\fut{X}_3$}
samer@74: 					\lb(X3){Present}
samer@74: 				\end{tikzpicture}%}%
samer@74: 		\end{figure}
samer@74: 	Consider how the observer's belief state evolves when, having observed up to
samer@74: 	$X_2$, it learns the value of $X_3$.
samer@74: \end{isframe}
samer@74: 
samer@74: \begin{iframe}[`Surprise' based quantities]
samer@74: 	To obtain first set of measures, we ignore the future $\fut{X}_t$
samer@74: 	and consider the probability distribution for $X_t$ give the
samer@74: 	observed past $\past{X}_t=\past{x}_t$.
samer@74: 	
samer@74: 	\begin{enumerate}
samer@74: 		\item<1->
samer@74: 		\textbf{Surprisingness}: negative log-probability
samer@74: 		$\ell_t = -\log p(x_t|\past{x}_t)$.
samer@74: 
samer@74: 		\item<2->
samer@74: 		Expected surprisingness given context $\past{X}=\past{x}_t$ is the entropy of the predictive distribution,
samer@74: 		$H(X_t|\ev(\past{X}_t=\past{x}_t))$: uncertainty about $X_t$ before the observation is made.
samer@74: 		
samer@74: 		\item<3->
samer@74: 		Expectation over all possible realisations of process is the conditional entropy 
samer@74: 		$H(X_t|\past{X}_t)$ according to the observer's model. For stationary process, is
samer@74: 		\emph{entropy rate} $h_\mu$.
samer@74: 	\end{enumerate}
samer@74: \end{iframe}
samer@74: 
samer@74: \begin{iframe}[Predictive information]
samer@74: 	Second set of measures based on amount of information the observation $\ev(X_t=x_t)$
samer@74: 	carries \emph{about} about the unobserved future $\fut{X}_t$, \emph{given} that we already 
samer@74: 	know the past $\ev(\past{X}_t=\past{x}_t)$:
samer@74: 	is
samer@74: 	\begin{equation*}
samer@74: 		\mathcal{I}_t = I(\ev(X_t=x_t)\to\fut{X}_t|\ev(\past{X}_t=\past{x}_t)).
samer@74: 	\end{equation*}
samer@74: 	Is KL divergence between beliefs about future $\fut{X}_t$ prior and posterior
samer@74: 	to observation $\ev(X_t=x_t)$.
samer@74: 	Hence, for continuous valued variables, invariant to invertible
samer@74: 	transformations of the observation spaces. 
samer@74: \end{iframe}
samer@74: 
samer@74: \begin{iframe}[Predictive information based quantities]
samer@74: 	\begin{enumerate}
samer@74: 	\item<1->
samer@74: 		\emph{Instantaneous predictive information} (IPI) is just $\mathcal{I}_t$.
samer@74: 
samer@74: %	Expectations over $X|\ev(Z=z)$, $Z|\ev(X=x)$, and $(X,Z)$ give 3 more information measures:
samer@74: 	\item<2->
samer@74: 		Expectation of $\mathcal{I}_t$ before observation at time $t$ is 
samer@74: 		$I(X_t;\fut{X}_t | \ev(\past{X}_t=\past{x}_t))$: mutual information conditioned on
samer@74: 		observed past. Is the amount of new information about the future expected from the next observation.
samer@74: 		Useful for directing attention towards the next event even before it happens?
samer@74: 
samer@74: %	This is different from Itti and Baldi's proposal that Bayesian
samer@74: %	\emph{surprise} attracts attention \cite{IttiBaldi2005}, as it is a mechanism which can 
samer@74: %	operate \emph{before} the surprise occurs.
samer@74: 
samer@74: 
samer@74: 	\item<3->
samer@74: 	Expectation over all possible realisations is the conditional mutual information
samer@74: 	$I(X_t;\fut{X}_t|\past{X}_t)$. For stationary process, this is the global
samer@74: 	\emph{predictive information rate} (PIR), the average rate at which new information arrives about
samer@74: 	the future. In terms of conditional entropies, has two forms:
samer@74: 	$H(\fut{X}_t|\past{X}_t) - H(\fut{X}_t|X_t,\past{X}_t)$ or 
samer@74: 	$H(X_t|\past{X}_t) - H(X_t|\fut{X}_t,\past{X}_t)$. 
samer@74: 	\end{enumerate}
samer@74: 
samer@74: \end{iframe}
samer@74: 
samer@74: \begin{iframe}[Global measures for stationary processes]
samer@74: 	For a stationary random process model, the average levels of suprise and information
samer@74: 	are captured by the time-shift invariant process information measures:
samer@74: 	\begin{align*}
samer@74: 		\text{entropy rate} &:  & h_\mu  &= H(X_t | \past{X}_t) \\
samer@74: 		\text{multi-information rate}  &: & \rho_\mu  &= I(\past{X}_t;X_t)  = H(X_t) - h_\mu \\
samer@74: 		\text{residual entropy  rate}  &: & r_\mu &= H(X_t | \past{X}_t, \fut{X}_t) \\
samer@74: 		\text{predictive information  rate} &:  & b_\mu  &= I(X_t;\fut{X}_t|\past{X}_t)  = h_\mu - r_\mu
samer@74: 	\end{align*}
samer@74: 	Residual entropy also known as \emph{erasure entropy} \cite{VerduWeissman2006}.
samer@74: \end{iframe}
samer@74: 
samer@74: \begin{isframe}[Process I-diagrams]
samer@74: %		\newcommand\subfig[2]{\shortstack{#2\\[0.75em]#1}}
samer@74: 		\newcommand\subfig[2]{#2}
samer@74: 		\newcommand\rad{1.75em}%
samer@74: 		\newcommand\ovoid[1]{%
samer@74: 			++(-#1,\rad) 
samer@74: 			-- ++(2 * #1,0em) arc (90:-90:\rad)
samer@74:  			-- ++(-2 * #1,0em) arc (270:90:\rad) 
samer@74: 		}%
samer@74: 		\newcommand\axis{2.75em}%
samer@74: 		\newcommand\olap{0.85em}%
samer@74: 		\newcommand\offs{3.6em}
samer@74: 		\newcommand\longblob{\ovoid{\axis}}
samer@74: 		\newcommand\shortblob{\ovoid{1.75em}}
samer@74: 		\begin{figure}
samer@74: 				\begin{tikzpicture}%[baseline=-1em]
samer@74: 					\newcommand\rc{\rad}
samer@74: 					\newcommand\throw{2.5em}
samer@74: 					\coordinate (p1) at (180:1.5em);
samer@74: 					\coordinate (p2) at (0:0.3em);
samer@74: 					\newcommand\bound{(-7em,-2.6em) rectangle (7em,3.0em)}
samer@74: 					\newcommand\present{(p2) circle (\rc)}
samer@74: 					\newcommand\thepast{(p1) ++(-\throw,0) \ovoid{\throw}}
samer@74: 					\newcommand\fillclipped[2]{%
samer@74: 						\begin{scope}[even odd rule]
samer@74: 							\foreach \thing in {#2} {\clip \thing;}
samer@74: 							\fill[black!#1] \bound;
samer@74: 						\end{scope}%
samer@74: 					}%
samer@74: 					\fillclipped{30}{\present,\bound \thepast}
samer@74: 					\fillclipped{15}{\present,\bound \thepast}
samer@74: 					\fillclipped{45}{\present,\thepast}
samer@74: 					\draw \thepast;
samer@74: 					\draw \present;
samer@74: 					\node at (barycentric cs:p2=1,p1=-0.3) {$h_\mu$};
samer@74: 					\node at (barycentric cs:p2=1,p1=1) [shape=rectangle,fill=black!45,inner sep=1pt]{$\rho_\mu$};
samer@74: 					\path (p2) +(90:3em) node {$X_0$};
samer@74: 					\path (p1) +(-3em,0em) node  {\shortstack{infinite\\past}};
samer@74: 					\path (p1) +(-4em,\rad) node [anchor=south] {$\ldots,X_{-1}$};
samer@74: 				\end{tikzpicture}%
samer@74: 			\\[0.25em]
samer@74: 				\begin{tikzpicture}%[baseline=-1em]
samer@74: 					\newcommand\rc{2.2em}
samer@74: 					\newcommand\throw{2.5em}
samer@74: 					\coordinate (p1) at (210:1.5em);
samer@74: 					\coordinate (p2) at (90:0.8em);
samer@74: 					\coordinate (p3) at (-30:1.5em);
samer@74: 					\newcommand\bound{(-7em,-2.6em) rectangle (7em,3.0em)}
samer@74: 					\newcommand\present{(p2) circle (\rc)}
samer@74: 					\newcommand\thepast{(p1) ++(-\throw,0) \ovoid{\throw}}
samer@74: 					\newcommand\future{(p3) ++(\throw,0) \ovoid{\throw}}
samer@74: 					\newcommand\fillclipped[2]{%
samer@74: 						\begin{scope}[even odd rule]
samer@74: 							\foreach \thing in {#2} {\clip \thing;}
samer@74: 							\fill[black!#1] \bound;
samer@74: 						\end{scope}%
samer@74: 					}%
samer@74: %					\fillclipped{80}{\future,\thepast}
samer@74: 					\fillclipped{30}{\present,\future,\bound \thepast}
samer@74: 					\fillclipped{15}{\present,\bound \future,\bound \thepast}
samer@74: 					\draw \future;
samer@74: 					\fillclipped{45}{\present,\thepast}
samer@74: 					\draw \thepast;
samer@74: 					\draw \present;
samer@74: 					\node at (barycentric cs:p2=0.9,p1=-0.17,p3=-0.17) {$r_\mu$};
samer@74: 					\node at (barycentric cs:p1=-0.5,p2=1.0,p3=1) {$b_\mu$};
samer@74: 					\node at (barycentric cs:p3=0,p2=1,p1=1.2) [shape=rectangle,fill=black!45,inner sep=1pt]{$\rho_\mu$};
samer@74: 					\path (p2) +(140:3.2em) node {$X_0$};
samer@74: 	%            \node at (barycentric cs:p3=0,p2=1,p1=1) {$\rho_\mu$};
samer@74: 					\path (p3) +(3em,0em) node  {\shortstack{infinite\\future}};
samer@74: 					\path (p1) +(-3em,0em) node  {\shortstack{infinite\\past}};
samer@74: 					\path (p1) +(-4em,\rad) node [anchor=south] {$\ldots,X_{-1}$};
samer@74: 					\path (p3) +(4em,\rad) node [anchor=south] {$X_1,\ldots$};
samer@74: 				\end{tikzpicture}%
samer@74: %				\\[0.25em]
samer@74: %		The small dark
samer@74: %		region  below $X_0$ is $\sigma_\mu$ and the excess entropy 
samer@74: %		is $E = \rho_\mu + \sigma_\mu$.
samer@74: 		\end{figure}
samer@74: 		Marginal entropy of `present' $X_0$ is $H(X_0)=\rho_\mu+r_\mu+b_\mu$.\\
samer@74: 		Entropy rate is $h_\mu = r_\mu+b_\mu$.
samer@74: \end{isframe}
samer@74: 
samer@74: \section{Markov chains}
samer@74: \label{s:InfoInMC}
samer@74: 
samer@74: 
samer@74: \begin{iframe}[Markov chains\nicedot Definitions]
samer@74: 
samer@74: %	Now we'll look at information dynamics in one of the simplest possible models, a Markov chain.
samer@74: %	To illustrate the how the measures defined in \secrf{InfoInRandomProcs} can be computed
samer@74: %	in practice, we will consider one of the simplest random processes, a 
samer@74: %	first order Markov chain. 
samer@74: %	In this case, the dynamic information measures can be computed in closed-form.
samer@74: %
samer@74: 
samer@74: 	Let $X$ be a Markov chain with state space 
samer@74: 	$\{1, \ldots, K\}$, \ie the $X_t$ take values from $1$ to $K$.
samer@74: 	\begin{center}
samer@74:    \begin{tikzpicture}[->]
samer@74:       \matrix[column sep=2em,ampersand replacement=\&]{
samer@74:         \cn(X,1) \&  \cn(X,2) \& \cn(X,3) \&  \cn(X,4)  \& \dn(XT) \\};
samer@74:       \rl(X1,X2) \rl(X2,X3) \rl(X3,X4) \rl(X4,XT)
samer@74:     \end{tikzpicture}
samer@74: 	\end{center}
samer@74: %	For the sake of brevity let us assume that $\domA$ is the set of integers from 1 to $K$. 
samer@74: 	Parameterised by transition matrix $\trans \in \reals^{K\times K}$,
samer@74: %	encoding the distribution of any element of the sequence given previous one,
samer@74: 	\ie $p(\ev(X_{t+1}=i)|\ev(X_t=j))=\trans_{ij}$.
samer@74: 	Assume irreducibility, ergodicity \etc to ensure uniqueness of 
samer@74: 	stationary distribution $\pi$ such that
samer@74: 	$p(\ev(X_t=i))=\init_i$ independent of $t$. Entropy rate as a function of
samer@74: 	$a$ is
samer@74: % $\entrorate:\reals^{K\times K} \to \reals$:
samer@74: 	\[
samer@74: 		\entrorate(\trans) = \sum_{j=1}^K \init_j \sum_{i=1}^K -\trans_{ij} \log \trans_{ij}.
samer@74: 	\]
samer@74: \end{iframe}
samer@74: 
samer@74: \begin{iframe}[Markov chains\nicedot  PIR]
samer@74: 	Predictive information rate for first order chains comes out in terms of entropy rate
samer@74: 	function as 
samer@74: 	\[
samer@74: 		b_\mu = h(a^2) - h(a),
samer@74: 	\]
samer@74: 	where $a^2$ is \emph{two-step} transition matrix. 
samer@74: 
samer@74: 	\uncover<2->{
samer@74: 	Can be generalised to higher-order transition matrices
samer@74: 	\[
samer@74: 		b_\mu = h(\hat{a}^{N+1}) - Nh(\hat{a}),
samer@74: 	\]
samer@74: 	where $N$ is the order of the chain and $\hat{a}$ is a sparse
samer@74: 	$K^N\times K^N$ transition matrix over product state space of $N$
samer@74: 	consecutive observations (step size 1).
samer@74: 	}
samer@74: \end{iframe}
samer@74: 
samer@74: \begin{iframe}[Entropy rate and PIR in Markov chains]
samer@74: 
samer@74: 	\begin{fig}{artseq}
samer@74: 		\hangbox{\colfig[0.40]{matbase/fig8515}}%
samer@74: 		\quad
samer@74: 		\hangbox{%
samer@74: 			\begin{tabular}{cc}%
samer@74: 				\colfig[0.18]{matbase/fig1356} &
samer@74: 				\colfig[0.18]{matbase/fig45647} \\
samer@74: 				\colfig[0.18]{matbase/fig49938} &
samer@74: 				\colfig[0.18]{matbase/fig23355}%
samer@74: 			\end{tabular}%
samer@74: 		}%
samer@74: %			\end{hanging}\\
samer@74: 	\end{fig}
samer@74: 	For given $K$, entropy rate varies between 0 (deterministic sequence)
samer@74: 	and $\log K$ when $\trans_{ij}=1/K$ for all $i,j$.
samer@74: 	Space of transition matrices explored by generating
samer@74: 	them at random and plotting entropy rate vs PIR. (Note inverted
samer@74: 	`U' relationship). %Transmat (d) is almost uniform.
samer@74: \end{iframe}
samer@74: 
samer@74: \begin{iframe}[Samples from processes with different PIR]
samer@74: 	\begin{figure}
samer@74: 		\colfig[0.75]{matbase/fig847}\\
samer@74: 		\colfig[0.75]{matbase/fig61989}\\
samer@74: 		\colfig[0.75]{matbase/fig43415}\\
samer@74: 		\colfig[0.75]{matbase/fig50385}
samer@74: 	\end{figure}
samer@74: 	Sequence (a) is repetition
samer@74: 	of state 4 (see transmat (a) on previous slide).
samer@74: 	System (b) has the highest PIR.
samer@74: \end{iframe}
samer@74: 
samer@74: %				\begin{tabular}{rl}
samer@74: %					(a) & \raisebox{-1em}{\colfig[0.58]{matbase/fig9048}}\\[1em]
samer@74: %					(b) & \raisebox{-1em}{\colfig[0.58]{matbase/fig58845}}\\[1em]
samer@74: %					(c) & \raisebox{-1em}{\colfig[0.58]{matbase/fig45019}}\\[1em]
samer@74: %					(d) & \raisebox{-1em}{\colfig[0.58]{matbase/fig1511}}
samer@74: %				\end{tabular}
samer@74: 
samer@74: \section{Application: The Melody Triangle}
samer@74: \begin{iframe}[Complexity and interestingness: the Wundt Curve]
samer@74: 	\label{s:Wundt}
samer@74: 		Studies looking into the relationship between stochastic complexity
samer@74: 		(usually measured as entropy or entropy rate) and aesthetic value, reveal 
samer@74: 		an inverted `U' shaped curve \citep{Berlyne71}. (Also, Wundt curve \cite{Wundt1897}).
samer@74: 		Repeated exposure tends to move stimuli leftwards.
samer@74: 
samer@74: 		\hangbox{%
samer@74: 			\only<1>{\colfig[0.5]{wundt}}%
samer@74: 			\only<2>{\colfig[0.5]{wundt2}}%
samer@74: 		}\hfill
samer@74: 		\hangbox{\parbox{0.43\linewidth}{\raggedright
samer@74: 		%Too deterministic $\rightarrow$ predictable, boring like a monotone;\\
samer@74: 		%Too random $\rightarrow$ are boring like white noise: unstructured,
samer@74: 		%featureless, uniform.
samer@74: 		Explanations for this usually appeal to a need for a `balance'
samer@74: 		between order and chaos, unity and diversity, and so on, in a generally
samer@74: 		imprecise way.}}
samer@74: 
samer@74: 
samer@74: %		Hence, a sequence can be uninteresting in two opposite ways: by
samer@74: %		being utterly predictable \emph{or} by being utterly
samer@74: %		unpredictable.
samer@74: %		Meyer \cite{Meyer2004} suggests something similar:
samer@74: %		hints at the same thing while discussing 
samer@74: %		the relation between the rate of information flow and aesthetic experience, 
samer@74: %		suggesting that
samer@74: %%		`unless there is some degree of order, \ldots
samer@74: %%		there is nothing to be uncertain \emph{about} \ldots
samer@74: %		`If the amount of information [by which he means entropy and surprisingness] 
samer@74: %		is inordinately increased, the result is a kind of cognitive white noise.'
samer@74: 
samer@74: \end{iframe}
samer@74: 
samer@74: \begin{iframe}[PIR as a measure of cognitive activity]
samer@74: 
samer@74: 		The predictive information rate incorporates a similar balance automatically:
samer@74: 		is maximal for sequences which are neither deterministic nor 
samer@74: 		totally uncorrelated across time. 
samer@74: 		
samer@74: 		\vspace{1em}
samer@74: 		\begin{tabular}{rr}%
samer@74: 			\raisebox{0.5em}{too predictable:} &
samer@74: 			\only<1>{\noderow(black,un0,un0,un0,un1,un1)}%
samer@74: 			\only<2>{\noderow(black,black,un0,un0,un0,un1)}%
samer@74: 			\only<3>{\noderow(black,black,black,un0,un0,un0)}%
samer@74: 			\only<4>{\noderow(black,black,black,black,un0,un0)}%
samer@74: 		\\[1.2em]
samer@74: 			\raisebox{0.5em}{intermediate:} &
samer@74: 			\only<1>{\noderow(black,un1,un2,un3,un4,un5)}%
samer@74: 			\only<2>{\noderow(black,black,un1,un2,un3,un4)}%
samer@74: 			\only<3>{\noderow(black,black,black,un1,un2,un3)}%
samer@74: 			\only<4>{\noderow(black,black,black,black,un1,un2)}%
samer@74: 		\\[1.2em]
samer@74: 			\raisebox{0.5em}{too random:} &
samer@74: 			\only<1>{\noderow(black,un5,un5,un5,un5,un5)}%
samer@74: 			\only<2>{\noderow(black,black,un5,un5,un5,un5)}%
samer@74: 			\only<3>{\noderow(black,black,black,un5,un5,un5)}%
samer@74: 			\only<4>{\noderow(black,black,black,black,un5,un5)}%
samer@74: 		\end{tabular}
samer@74: 		\vspace{1em}
samer@74: 
samer@74: 		(Black: \emph{observed}; red: \emph{unobserved}; paler: \emph{greater uncertainty}.)
samer@74: 		Our interpretation:
samer@74: %		when each event appears to carry no new information about the unknown future,
samer@74: %		it is `meaningless' and not worth attending to. 
samer@74: 		Things are `interesting' or at least `salient' when each new part supplies new information about parts to come.
samer@74: 
samer@74: %		Quantitative information dynamics will enable us to test this experimentally with human 
samer@74: %		subjects.
samer@74: \end{iframe}
samer@74: 
samer@74: \begin{iframe}[The Melody Triangle\nicedot Information space]
samer@74:  \begin{figure}
samer@74: 	\colfig[0.75]{mtriscat}
samer@74: 	\end{figure}
samer@74: 	Population of transition matrices in 3D space of $h_\mu$, $\rho_\mu$ and $b_\mu$. 
samer@74: %	Concentrations of points along redundancy axis correspond to roughly periodic patterns.
samer@74: 	Colour of each point
samer@74: 	represents PIR.
samer@74: 	%---highest values found at intermediate entropy and redundancy. 
samer@74: 	Shape is mostly (not completely) hollow inside: forming roughly 
samer@74: 	a curved triangular sheet.
samer@74: \end{iframe}
samer@74: 
samer@74: \begin{iframe}[The Melody Triangle\nicedot User interface]
samer@74:  \begin{figure}
samer@74: 	\colfig[0.55]{TheTriangle.pdf}
samer@74: 	\end{figure}
samer@74: 	Allows user to place tokens in the triangle
samer@74: 	to cause sonification of a Markov chain with corresponding information
samer@74: 	`coordinate'. 
samer@74: \end{iframe}
samer@74: 
samer@74: \begin{iframe}[Subjective information]
samer@74: 	So far we've assumed that sequence is actually sampled from 
samer@74: 	from a stationary Markov chain with a transition matrix known
samer@74: 	to the observer.
samer@74: 	This means time averages of IPI and surprise should equal
samer@74: 	expectations.
samer@74: 
samer@74: 	\uncover<2->{
samer@74: 	What if sequence is sampled from some other Markov chain, 
samer@74: 	or is produced by some unknown process?
samer@74: 	}
samer@74: 	
samer@74: 	\begin{itemize}
samer@74: 		\item<3->
samer@74: 		In general, it may be impossible to identify any `true' model. There
samer@74: 		are no `objective' probabilities; only subjective ones, as
samer@74: 		argued by de Finetti \cite{deFinetti}.
samer@74: 		
samer@74: 
samer@74: 		\item<4->
samer@74: 		If sequence \emph{is} sampled from some Markov chain, we can
samer@74: 		compute (time) averages of observer's average subjective surprise 
samer@74: 		and PI and also track what happens if observer gradually learns 
samer@74: 		the transition  matrix from the data.
samer@74: 	\end{itemize}
samer@74: \end{iframe}
samer@74: 
samer@74: 
samer@74: \begin{iframe}[Effect of learning on information dynamics]
samer@74: 	\begin{figure}
samer@74: %		\colfig{matbase/fig42687} % too small text
samer@74: %		\colfig{matbase/fig60379} % 9*19 too tall
samer@74: %		\colfig{matbase/fig52515} % 9*20 ok, perhaps text still too small
samer@74: 		\colfig[0.9]{matbase/fig30461} % 8*19  ok
samer@74: %		\colfig{matbase/fig66022} % 8.5*19  ok
samer@74: 	\end{figure}
samer@74: %	Upper row shows actual stochastic learning,
samer@74: %	lower shows the idealised deterministic learning.
samer@74: 	\textbf{(a/b/e/f)}: multiple runs starting from same 
samer@74: 	initial condition but using different generative transition matrices.
samer@74: 	\textbf{(c/d/g/h)}: multiple runs starting from different
samer@74: 	initial conditions and converging on transition matrices 
samer@74: 		with (c/g) high and (d/h) low PIR.
samer@74: \end{iframe}
samer@74: 
samer@74: 
samer@74: \section{More process models}
samer@74: \begin{iframe}[Exchangeable sequences and parametric models]
samer@74: 	De Finetti's theorem says that an exchangeable random process can be represented
samer@74: 	as a sequence variables which are iid \emph{given} some hidden probability
samer@74: 	distribution, which we can think of as a parameterised model:
samer@74: 	\begin{tabular}{lp{0.45\linewidth}}
samer@74: 		\hangbox{\begin{tikzpicture}
samer@74: 			[>=stealth',var/.style={circle,draw,inner sep=1pt,text height=10pt,text depth=4pt}]
samer@74: 			\matrix[ampersand replacement=\&,matrix of math nodes,row sep=2em,column sep=1.8em,minimum size=17pt] {
samer@74: 				\& |(theta) [var]| \Theta \\
samer@74: 				|(x1) [var]| X_1 \& |(x2) [var]| X_2 \& |(x3) [var]| X_3 \&
samer@74: 				|(etc) [outer sep=2pt]| \dots \\
samer@74: 			};
samer@74: 			\foreach \n in {x1,x2,x3,etc} \draw[->] (theta)--(\n);
samer@74: 		\end{tikzpicture}}
samer@74: 		&
samer@74: 			\raggedright 
samer@74: 			\uncover<2->{Observer's belief state at time $t$ includes probability distribution 
samer@74: 			over the parameters $p(\ev(\Theta=\theta)|\ev(\past{X}_t=\past{x}_t))$.}
samer@74: 	\end{tabular}\\[1em]	
samer@74: 	\uncover<3->{
samer@74: 	Each observation causes revision of belief state
samer@74: 	and hence supplies information 
samer@74: 	$
samer@74: 		I(\ev(X_t=x_t)\to\Theta|\ev(\past{X}_t=\past{x}_t)) 
samer@74: 	%		= D( p_{\Theta|\ev(X_t=x_t),\ev(\past{X}_t=\past{x}_t)} || p_{\Theta|\ev(\past{X}_t=\past{x}_t)} ).
samer@74: 	$ about $\Theta$:
samer@74: 	In previous work we called this the `model information rate'.
samer@74: 	}
samer@74: 	\uncover<4->{(Same as Haussler and Opper's \cite{HausslerOpper1995} IIG or 
samer@74: 	Itti and Baldi's \cite{IttiBaldi2005} Bayesian surprise.)}
samer@74: \end{iframe}
samer@74: 
samer@74: 		\def\circ{circle (9)}%
samer@74: 		\def\bs(#1,#2,#3){(barycentric cs:p1=#1,p2=#2,p3=#3)}%
samer@74: \begin{iframe}[IIG equals IPI in (some) XRPs]
samer@74: 	\begin{tabular}{@{}lc}
samer@74: 		\parbox[c]{0.5\linewidth}{\raggedright
samer@74: 		Mild assumptions yield a relationship between IIG (instantaneous information gain) and IPI.
samer@74: 		(Everything here implicitly conditioned on $\past{X}_t$).}
samer@74: 	&
samer@74: 		\pgfsetxvec{\pgfpoint{1mm}{0mm}}%
samer@74: 		\pgfsetyvec{\pgfpoint{0mm}{1mm}}%
samer@74: 		\begin{tikzpicture}[baseline=0pt]
samer@74: 			\coordinate (p1) at (90:6);
samer@74: 			\coordinate (p2) at (210:6);
samer@74: 			\coordinate (p3) at (330:6);
samer@74: 			\only<4->{%
samer@74: 				\begin{scope}
samer@74: 					\foreach \p in {p1,p2,p3} \clip (\p) \circ;
samer@74: 					\fill[lightgray] (-10,-10) rectangle (10,10);
samer@74: 				\end{scope}
samer@74: 				\path	(0,0) node {$\mathcal{I}_t$};}
samer@74: 			\foreach \p in {p1,p2,p3} \draw (\p) \circ;
samer@74: 			\path (p2) +(210:13) node {$X_t$}
samer@74: 						(p3) +(330:13) node {$\fut{X}_t$}
samer@74: 					(p1) +(140:12) node {$\Theta$};
samer@74: 			\only<2->{\path	\bs(-0.25,0.5,0.5) node {$0$};}
samer@74: 			\only<3->{\path	\bs(0.5,0.5,-0.25) node {$0$};}
samer@74: 		\end{tikzpicture}
samer@74: 	\end{tabular}\\
samer@74: 	\begin{enumerate}
samer@74: 			\uncover<2->{\item	$X_t \perp \fut{X}_t | \Theta$: observations iid given $\Theta$ for XRPs;}
samer@74: 			\uncover<3->{\item $\Theta \perp X_t | \fut{X}_t$:
samer@74: %		$I(X_t;\fut{X}_t|\Theta_t)=0$ due to the conditional independence of
samer@74: %		observables given the parameters $\Theta_t$, and 
samer@74: %		$I(\Theta_t;X_t|\fut{X}_t)=0$
samer@74: 		assumption that $X_t$ adds no new information about $\Theta$
samer@74: 		given infinitely long sequence $\fut{X}_t =X_{t+1:\infty}$.}
samer@74: \end{enumerate}
samer@74: \uncover<4->{Hence, $I(X_t;\Theta_t|\past{X}_t)=I(X_t;\fut{X}_t|\past{X}_t) = \mathcal{I}_t$.\\}
samer@74: \uncover<5->{Can drop assumption 1 and still get $I(X_t;\Theta_t|\past{X}_t)$ as an additive component (lower bound) of $\mathcal{I}_t$.}
samer@74: \end{iframe}
samer@74: 
samer@74: \def\fid#1{#1}
samer@74: \def\specint#1{\frac{1}{2\pi}\int_{-\pi}^\pi #1{S(\omega)} \dd \omega}
samer@74: \begin{iframe}[Discrete-time Gaussian processes]
samer@74: 	Information-theoretic quantities used earlier have analogues for continuous-valued
samer@74: 	random variables.  For stationary Gaussian processes, we can obtain results in
samer@74: 	terms of the power spectral density $S(\omega)$, (which for discrete time is periodic
samer@74: 	in $\omega$ with period $2\pi$). Standard methods give
samer@74: 	\begin{align*}
samer@74: 		H(X_t) &= \frac{1}{2}\left( \log 2\pi e + \log \specint{}\right), \\
samer@74: 		h_\mu &= \frac{1}{2} \left( \log 2\pi e  + \specint{\log} \right), \\
samer@74: 		\rho_\mu &= \frac{1}{2} \left( \log \specint{\fid} - \specint{\log}\right).
samer@74: 	\end{align*}
samer@74: 	Entropy rate is also known as Kolmogorov-Sinai entropy. 
samer@74: %	$H(X_t)$ is a function of marginal variance which is just the total power in the spectrum.
samer@74: \end{iframe}
samer@74: 
samer@74: \begin{iframe}[PIR/Multi-information duality]
samer@74: 	Analysis yeilds PIR:
samer@74: 	\[
samer@74: 		b_\mu = \frac{1}{2} \left( \log \specint{\frac{1}} - \specint{\log\frac{1}} \right).
samer@74: 	\]
samer@74: 	Yields simple expression for finite-order autogregressive processes, but beware: can diverge
samer@74: 	for moving average processes!
samer@74: 
samer@74: 	\uncover<2->{
samer@74: 	Compare with multi-information rate:
samer@74: 	\[
samer@74: 		\rho_\mu = \frac{1}{2} \left( \log \specint{\fid} - \specint{\log}\right).
samer@74: 	\]
samer@74: 	Yields simple expression for finite-order moving-average processes, but can diverge
samer@74: 	for marginally stable autogregressive processes.
samer@74: 	}
samer@74: 
samer@74: 	\uncover<3->{
samer@74: 		Infinities are troublesome and point to problem with notion of infinitely
samer@74: 		precise observation of continuous-valued variables.
samer@74: 	}
samer@74: \end{iframe}
samer@74: 
samer@74: %		Information gained about model parameters (measured as the KL divergence
samer@74: %		between prior and posterior distributions) is equivalent
samer@74: %		to \textbf{Itti and Baldi's `Bayesian surprise'} \cite{IttiBaldi2005}.
samer@74: 
samer@74: 
samer@74: 	\section{Application: Analysis of minimalist music}
samer@74: 	\label{s:Experiments}
samer@74: 
samer@74: \begin{iframe}[Material and Methods]
samer@74: 
samer@74: %		Returning to our original goal of modelling the perception of temporal structure
samer@74: %		in music, we computed dynamic information measures for 
samer@74: 		We took two pieces of minimalist 
samer@74: 		music by Philip Glass, \emph{Two Pages} (1969) and \emph{Gradus} (1968).
samer@74: 		Both monophonic and isochronous, so representable very simply as 
samer@74: 		a sequence of symbols (notes), one symbol per beat,
samer@74: 		yet remain ecologically valid examples of `real' music. 
samer@74: 
samer@74: 		We use an elaboration of the Markov chain model---not necessarily 
samer@74: 		a good model \latin{per se}, but that wasn't the point of the experiment.
samer@74: 		Markov chain model was chosen as it is tractable from and information 
samer@74: 		dynamics point of view while not being completely trivial.
samer@74: \end{iframe}
samer@74: 
samer@74: \begin{iframe}[Time-varying transition matrix model]
samer@74: 		We allow transition matrix to vary slowly with time to track 
samer@74: 		changes in the sequence structure.
samer@74: 		Hence, observer's belief state includes a probabilitiy
samer@74: 		distribution over transition matrices; we choose a product of
samer@74: 		Dirichlet distributions:
samer@74: 		\[
samer@74: 			\textstyle
samer@74: 			p(\trans|\param) = \prod_{j=1}^K p_\mathrm{Dir}(\trans_{:j}|\param_{:j}),
samer@74: 		\]
samer@74: 		where $\trans_{:j}$ is \nth{j} column of $\trans$ and $\param$ is an
samer@74: 		$K \times K$ parameter matrix.
samer@74: %		(Dirichlet, being conjugate to discrete/multinomial distribution,
samer@74: %		makes processing of observations particularly simple.)
samer@74: %		such that $\param_{:j}$ is the 
samer@74: %		parameter tuple for the $K$-component Dirichlet distribution $p_\mathrm{Dir}$.
samer@74: %		\begin{equation}
samer@74: %			\textstyle
samer@74: %			p(\trans|\param) = \prod_{j=1}^K p_\mathrm{Dir}(\trans_{:j}|\param_{:j})
samer@74: %			   = \prod_{j=1}^K (\prod_{i=1}^K \trans_{ij}^{\param_{ij}-1}) / B(\param_{:j}),
samer@74: %		\end{equation}
samer@74: %		where $\trans_{:j}$ is the \nth{j} column of $\trans$ and $\param$ is an
samer@74: %		$K \times K$ matrix of parameters.
samer@74: 
samer@74: 		At each time step, distribution first \emph{spreads} under mapping
samer@74: 		\[
samer@74: 			\param_{ij} \mapsto \frac{\beta\param_{ij}}{(\beta + \param_{ij})}
samer@74: 		\]
samer@74: 		to model possibility that transition matrix
samer@74: 		has changed ($\beta=2500$ in our experiments). Then it \emph{contracts}
samer@74: 		due to new observation providing fresh evidence about transition matrix.
samer@74: %
samer@74: %		Each observed symbol % provides fresh evidence about current transition matrix, 
samer@74: %		enables observer to update its belief state.
samer@74: \end{iframe}
samer@74: 
samer@74: 
samer@74: \begin{iframe}[Two Pages\nicedot Results]
samer@74: 
samer@74: %		\begin{fig}{twopages}
samer@74: 			\begin{tabular}{c@{\hspace{1.5ex}}l}%
samer@74: %			\hspace*{-1.5em}
samer@74: %				\hangbox{\colfig[0.5]{matbase/fig20304}} % 3 plots
samer@74: %				\hangbox{\colfig[0.52]{matbase/fig39528}} % 4 plots with means
samer@74: %				\hangbox{\colfig[0.52]{matbase/fig63538}} % two pages, 5 plots
samer@74: %				\hangbox{\colfig[0.52]{matbase/fig53706}} % two pages, 5 plots
samer@74: 				\hangbox{\colfig[0.72]{matbase/fig33309}} % two pages, 5 plots
samer@74: 			&
samer@74: 				\hangbox{%
samer@74: 					\parbox{0.28\linewidth}{
samer@74: 						\raggedright
samer@74: 						\textbf{Thick lines:} part boundaries as indicated 
samer@74: 						by Glass; \textbf{grey lines (top four panels):} changes in the melodic `figures';
samer@74: 					%	of which the piece is constructed. 
samer@74: 						\textbf{grey lines (bottom panel):}
samer@74: 						six most surprising moments chosen by expert listenter. 
samer@74: 					}
samer@74: 				}
samer@74: 			\end{tabular}
samer@74: %		\end{fig}
samer@74: \end{iframe}
samer@74: 
samer@74: \begin{iframe}[Two Pages\nicedot Rule based analysis]
samer@74: 	\begin{figure}
samer@74: 		\colfig[0.98]{matbase/fig13377}
samer@74: %		\hangbox{\colfig[0.98]{matbase/fig13377}}
samer@74: 	\end{figure}
samer@74: 	Analysis of \emph{Two Pages} using (top) Cambouropoulos' 
samer@74: 	Local Boundary Detection Model (LBDM) and 
samer@74: 	(bottom) Lerdahl and Jackendoff's 
samer@74: 	grouping preference rule 3a (GPR3a), which is a function of pitch proximity.
samer@74: 	Both analyses indicate `boundary strength'.
samer@74: \end{iframe}
samer@74: 
samer@74: \begin{iframe}[Two Pages\nicedot Discussion]
samer@74: 		Correspondence between the information
samer@74: 		measures and the structure of the piece is quite close.
samer@74: 		Good agreement between the six `most surprising
samer@74: 		moments' chosen by expert listener and  model information signal. 
samer@74: 		
samer@74: 		What appears to be an error in the detection of
samer@74: 		the major part boundary (between events 5000 and 6000) actually
samer@74: 		raises a known anomaly in the score, where Glass places the boundary several events
samer@74: 		before there is any change in the pattern of notes. Alternative analyses of \emph{Two Pages}
samer@74: 		place the boundary in agreement with peak in our surprisingness signal.
samer@74: \end{iframe}
samer@74: 
samer@74: \comment{
samer@74: \begin{iframe}[Gradus\nicedot Results]
samer@74: 
samer@74: %		\begin{fig}{gradus}
samer@74: 			\begin{tabular}{c@{\hspace{1.5ex}}l}
samer@74: %				&
samer@74: %				\hangbox{\colfig[0.4]{matbase/fig81812}}
samer@74: %				\hangbox{\colfig[0.52]{matbase/fig23177}} % two pages, 5 plots
samer@74: %				\hangbox{\colfig[0.495]{matbase/fig50709}} % Fudged segmentation
samer@74: %				\hangbox{\colfig[0.495]{matbase/fig3124}} % Geraint's segmentation
samer@74: 				\hangbox{\colfig[0.715]{matbase/fig11808}} % Geraint's segmentation, corrected
samer@74: 			&
samer@74: %				\hangbox{\colfig[0.5]{matbase/fig39914}} 
samer@74: 				\hangbox{%
samer@74: 					\parbox{0.28\linewidth}{
samer@74: 						\raggedright
samer@74: 						\textbf{Thick lines:} part boundaries as indicated 
samer@74: 						by the composer.
samer@74: 						\textbf{Grey lines:} segmentation by expert listener.
samer@74: 
samer@74: 						Note: traces smoothed with Gaussian
samer@74: 						window about 16 events wide. 
samer@74: 					}
samer@74: 				}
samer@74: 			\end{tabular}
samer@74: %		\end{fig}
samer@74: \end{iframe}
samer@74: 
samer@74: \begin{iframe}[Gradus\nicedot Rule based analysis]
samer@74: 	\begin{figure}
samer@74: 		\colfig[0.98]{matbase/fig58691}
samer@74: 	\end{figure}
samer@74: 	Boundary strength analysis of \emph{Gradus} using (top) Cambouropoulos' 
samer@74: 	\cite{CambouropoulosPhD} Local Boundary Detection Model  and 
samer@74: 	(bottom) Lerdahl and Jackendoff's \cite{LerdahlJackendoff83}
samer@74: 	grouping preference rule 3a.
samer@74: \end{iframe}
samer@74: }
samer@74: \begin{iframe}[Gradus\nicedot Metrical analysis]
samer@74: 	\begin{figure}
samer@74: 		\begin{tabular}{cc}
samer@74: 			\colfig[0.40]{matbase/fig56807} & \colfig[0.41]{matbase/fig27144} \\
samer@74: 			\colfig[0.40]{matbase/fig87574} & \colfig[0.41]{matbase/fig13651} \\
samer@74: 			\hspace*{1ex}\colfig[0.39]{matbase/fig19913} & \hspace*{1ex}\colfig[0.40]{matbase/fig66144}
samer@74: 		\end{tabular}
samer@74: 	\end{figure}
samer@74: \end{iframe}
samer@74: 
samer@74: \comment{
samer@74: \begin{iframe}[Gradus\nicedot Discussion]
samer@74: 		
samer@74: 		\emph{Gradus} is much less systematically structured than \emph{Two Pages}, and
samer@74: 		relies more on the conventions of tonal music, which are not represented the model.
samer@74: 
samer@74: 		For example initial transition matrix is uniform, which does not correctly represent 
samer@74: 		prior knowledge about tonal music.
samer@74: 
samer@74: 		Information dynamic analysis does not give such a 
samer@74: 		clear picture of the structure; but some of the fine structure can be related
samer@74: 		to specific events in the music (see Pearce and Wiggins 2006).
samer@74: %		nonetheless, there are some points of correspondence between the analysis and
samer@74: %		segmentation given by Keith Potter.
samer@74: 
samer@74: \end{iframe}
samer@74: }
samer@74: 
samer@74: 	\section{Application: Beat tracking and rhythm}
samer@74: 
samer@74: 	\begin{iframe}[Bayesian beat tracker]
samer@74: 		\uncover<1->{
samer@74: 			Works by maintaining probabilistic belief state about time of next
samer@74: 			beat and current tempo.
samer@74: 			
samer@74: 			\begin{figure}
samer@74: 				\colfig{beat_prior}
samer@74: 			\end{figure}
samer@74: 			}
samer@74: 
samer@74: 		\uncover<2->{	
samer@74: 			Receives categorised drum events (kick or snare) from audio analysis front-end.
samer@74: 			}
samer@74: 
samer@74: 	\end{iframe}
samer@74: 
samer@74: 	\begin{iframe}[Information gain in the beat tracker]
samer@74: 		\begin{tabular}{ll}
samer@74: 			\parbox[t]{0.43\linewidth}{\raggedright
samer@74: 			\uncover<1->{
samer@74: 				Each event triggers a change in belief state, so we can compute
samer@74: 				information gain about beat parameters.}\\[1em]
samer@74: 
samer@74: 			\uncover<2->{
samer@74: 				Relationship between IIG and IPI
samer@74: 				means we treat it as a proxy for IPI.}
samer@74: 				}
samer@74: 			&
samer@74: 			\hangbox{\colfig[0.55]{beat_info}}
samer@74: 		\end{tabular}
samer@74: 	\end{iframe}
samer@74: 
samer@74: 	\begin{iframe}[Analysis of drum patterns]
samer@74: 		We analysed 17 recordings of drummers, both playing solo or with a band.
samer@74: 		All patterns in were in 4/4.
samer@74: 		\begin{itemize}
samer@74: 			\item
samer@74: 			\uncover<1->{
samer@74: 				Information tends to arrive at beat times: consequence of structure of model.
samer@74: 			}
samer@74: 			\item
samer@74: 			\uncover<2->{
samer@74: 				Lots of information seems to arrive after drum fills and breaks
samer@74: 				as the drummer reestablishes the beat.
samer@74: 			}
samer@74: 			\item
samer@74: 			\uncover<3->{
samer@74: 				No consistent pattern of information arrival in relation to metrical
samer@74: 				structure, so no obvious metrical structure in micro-timing of events.
samer@74: 				However, still possible that metrical structure might emerge from predictive
samer@74: 				analysis of drum pattern.
samer@74: 				}
samer@74: 		\end{itemize}
samer@74: 	\end{iframe}
samer@74: 
samer@74: 	\section{Summary and conclusions}
samer@74: 	\label{s:Conclusions}
samer@74: 
samer@74: 	\begin{iframe}[Summary]
samer@74: 
samer@74: 		\begin{itemize}
samer@74: 		\item Dynamic, observer-centric information theory.
samer@74: 		\item Applicable to any dynamic probabilistic model.
samer@74: 		\item PIR potentially a measure of complexity.
samer@74: 		\item Simple analysis for Markov chains and Gaussian processes.
samer@74: 		\item Applications in music analysis and composition.
samer@74: 		\item Search for neural correlates is ongoing (that's another talk\ldots).
samer@74: 		\end{itemize}
samer@74: 		Thanks!
samer@74: 	\end{iframe}
samer@74: 
samer@74: 	\begin{bframe}[Bibliography]
samer@74: 		\bibliographystyle{alpha}
samer@74: 		{\small \bibliography{all,c4dm,compsci}}
samer@74: 	\end{bframe}
samer@74: \end{document}