cip2012: talk/talk.tex annotate

annotate talk/talk.tex @ 75:8a146c651475 tip

Added ready made bbl

author	samer
date	Fri, 01 Jun 2012 16:19:55 +0100
parents	90901fd611d1
children

rev	line source
samer@74	1 \documentclass{beamer}
samer@74	2
samer@74	3 \usepackage[T1]{fontenc}
samer@74	4 \usepackage{microtype}
samer@74	5 \usepackage{multimedia}
samer@74	6 \usepackage{tikz}
samer@74	7 \usetikzlibrary{matrix}
samer@74	8 \usetikzlibrary{patterns}
samer@74	9 \usetikzlibrary{arrows}
samer@74	10 \usetikzlibrary{calc}
samer@74	11 \usepackage{tools}
samer@74	12 %\usepackage{amsfonts,amssymb}
samer@74	13
samer@74	14 \tikzset{every picture/.style=semithick}
samer@74	15
samer@74	16 %%% font options:
samer@74	17 % atypewri, frankgth, gillsans, centuryg, futura, eurostil
samer@74	18 %\usepackage{fourier} % Maths in serif Utopia
samer@74	19 \usepackage[sf]{frankgth}
samer@74	20 %\usepackage[sf]{optima}
samer@74	21
samer@74	22 %%% Monospace font
samer@74	23 %\usepackage[scaled=0.88]{ulgothic} % 0.88 % suits narrow faces
samer@74	24 \renewcommand{\ttdefault}{plg} % Adobe Letter Gothic - suits light medium width face
samer@74	25 %\renewcommand{\ttdefault}{pcr} % Courier - suits wide faces
samer@74	26 % remember to match up size and weight of monospace font to main font
samer@74	27
samer@74	28 \newcommand{\mytt}[1]{{\texttt{\footnotesize\fontseries{bx}\selectfont #1}}}
samer@74	29
samer@74	30 \DeclareMathAlphabet{\mathcal}{OMS}{cmsy}{m}{n}
samer@74	31
samer@74	32
samer@74	33 %%% Black on white
samer@74	34 \definecolor{base}{rgb}{0,0,0}
samer@74	35 \definecolor{comp}{named}{green}
samer@74	36 \definecolor{paper}{named}{white}
samer@74	37
samer@74	38 \logo{%
samer@74	39 \includegraphics[height=16pt]{qmul-black}\hspace*{45pt}%
samer@74	40 \raisebox{1pt}{\includegraphics[height=12pt]{c4dm-black-white}}%
samer@74	41 }
samer@74	42
samer@74	43 %%% Red on black
samer@74	44 \comment{
samer@74	45 \definecolor{base}{rgb}{1,0,0}
samer@74	46 \definecolor{comp}{rgb}{0,0.8,0.2}
samer@74	47 \definecolor{paper}{named}{black}
samer@74	48
samer@74	49 \logo{%
samer@74	50 \includegraphics[height=16pt]{qmul-red}\hspace*{45pt}%
samer@74	51 \raisebox{1pt}{\includegraphics[height=12pt]{c4dm-red-black}}%
samer@74	52 }
samer@74	53 }
samer@74	54
samer@74	55
samer@74	56 \useinnertheme{default}%circles
samer@74	57 \useoutertheme{default}
samer@74	58 \usefonttheme[onlymath]{serif}
samer@74	59
samer@74	60 \setbeamercolor{normal text}{bg=paper,fg=base!90!-paper}
samer@74	61 \setbeamercolor{background}{bg=comp!50!paper,fg=comp}
samer@74	62 %\setbeamercolor{structure}{fg=base!75!-paper}
samer@74	63 \setbeamercolor{structure}{fg=red!50!base}
samer@74	64 \setbeamercolor{palette primary}{bg=yellow!50!paper,fg=yellow}
samer@74	65 \setbeamercolor{palette secondary}{bg=orange!50!paper,fg=orange}
samer@74	66 \setbeamercolor{palette tertiary}{bg=blue!50!paper,fg=blue}
samer@74	67 \setbeamercolor{palette quaternary}{bg=green!50!paper,fg=green}
samer@74	68 \setbeamercolor{block body}{bg=base!20!paper}
samer@74	69 \setbeamercolor{block title}{bg=base!60!paper,fg=paper}
samer@74	70 \setbeamercolor{navigation symbols}{fg=base!90!paper}
samer@74	71 \setbeamercolor{separation line}{bg=blue,fg=yellow}
samer@74	72 \setbeamercolor{fine separation line}{bg=blue,fg=orange}
samer@74	73
samer@74	74 % Title page
samer@74	75 % \setbeamercolor{title}{bg=base!20!paper}
samer@74	76 % \setbeamercolor{subtitle}{bg=base!20!paper}
samer@74	77 % \setbeamercolor{title page}{bg=base!40!paper}
samer@74	78
samer@74	79 % \setbeamercolor{headline}{bg=blue}
samer@74	80 % \setbeamercolor{footline}{bg=blue}
samer@74	81 % \setbeamercolor{frametitle}{bg=base!30!paper}
samer@74	82 % \setbeamercolor{framesubtitle}{bg=base!40!paper}
samer@74	83
samer@74	84 % \setbeamercolor{section in toc}{bg=base!25!paper,fg=orange}
samer@74	85 % \setbeamercolor{section in toc shaded}{bg=base!25!paper,fg=orange!80!paper}
samer@74	86 % \setbeamercolor{subsection in toc}{bg=base!25!paper,fg=orange}
samer@74	87 % \setbeamercolor{subsection in toc shaded}{bg=yellow!25!paper,fg=orange!80!paper}
samer@74	88 % page number in head/foot
samer@74	89 % section in head/foot
samer@74	90 % section in head/foot shaded
samer@74	91
samer@74	92
samer@74	93 \setbeamerfont{structure}{series=\bfseries}
samer@74	94 \setbeamerfont{title}{series=\mdseries,size=\Large}
samer@74	95 %\setbeamerfont{title}{series=\ltseries,size=\huge}
samer@74	96 \setbeamerfont{date}{size=\footnotesize}%,series=\mdcseries}
samer@74	97 \setbeamerfont{institute}{size=\footnotesize}%,series=\mdcseries}
samer@74	98 \setbeamerfont{author}{size=\footnotesize,series=\bfseries}
samer@74	99 \setbeamercolor{bibliography item}{parent={normal text}}
samer@74	100 \setbeamercolor{bibliography entry author}{fg=base}
samer@74	101 \setbeamercolor{bibliography entry location}{fg=base!70!paper}
samer@74	102
samer@74	103 %%% Templates
samer@74	104
samer@74	105 \setbeamertemplate{bibliography item}[text]
samer@74	106 \setbeamertemplate{bibliography entry title}{ }
samer@74	107 \setbeamertemplate{bibliography entry location}{ }
samer@74	108 \setbeamertemplate{blocks}[rounded][shadow=false]
samer@74	109 \setbeamertemplate{items}[circle]
samer@74	110 %\setbeamertemplate{bibliography item}[triangle]
samer@74	111 % \setbeamertemplate{title page}[default][rounded=true,shadow=false]
samer@74	112 % \setbeamertemplate{frametitle}[default][rounded=true,shadow=false]
samer@74	113 \setbeamertemplate{sidebar right}{}
samer@74	114 \setbeamertemplate{footline}{
samer@74	115 \hspace*{0.2cm}
samer@74	116 \insertlogo
samer@74	117 \hfill
samer@74	118 \usebeamertemplate***{navigation symbols}%
samer@74	119 \hfill
samer@74	120 \makebox[6ex]{\hfill\insertframenumber/\inserttotalframenumber}%
samer@74	121 \hspace*{0.2cm}
samer@74	122
samer@74	123 \vskip 4pt
samer@74	124 }
samer@74	125
samer@74	126 \setbeamertemplate{navigation symbols}
samer@74	127 {%
samer@74	128 \hbox{%
samer@74	129 \hbox{\insertslidenavigationsymbol}
samer@74	130 \hbox{\insertframenavigationsymbol}
samer@74	131 % \hbox{\insertsubsectionnavigationsymbol}
samer@74	132 \hbox{\insertsectionnavigationsymbol}
samer@74	133 \hbox{\insertdocnavigationsymbol}
samer@74	134 % \hbox{\insertbackfindforwardnavigationsymbol}%
samer@74	135 }%
samer@74	136 }
samer@74	137
samer@74	138
samer@74	139 \AtBeginSection[]{
samer@74	140 \begin{iframe}[Outline]
samer@74	141 \tableofcontents[currentsection]
samer@74	142 \end{iframe}
samer@74	143 }
samer@74	144 %\linespread{1.1}
samer@74	145
samer@74	146 \setlength{\parskip}{0.5em}
samer@74	147
samer@74	148 \newenvironment{bframe}[1][untitled]{\begin{frame}[allowframebreaks]\frametitle{#1}}{\end{frame}}
samer@74	149 \newenvironment{iframe}[1][untitled]{\begin{frame}\frametitle{#1}}{\end{frame}}
samer@74	150 \newenvironment{isframe}[1][untitled]{\begin{frame}[fragile=singleslide,environment=isframe]\frametitle{#1}}{\end{frame}}
samer@74	151
samer@74	152 \renewenvironment{fig}[1]
samer@74	153 {%
samer@74	154 \begin{figure}
samer@74	155 \def\fglbl{f:#1}
samer@74	156 \let\ocap=\caption
samer@74	157 \renewcommand{\caption}[2][]{\ocap[##1]{\small ##2}}
samer@74	158 \centering\small
samer@74	159 }{%
samer@74	160 \label{\fglbl}
samer@74	161 \end{figure}
samer@74	162 }
samer@74	163
samer@74	164 \newcommand{\paragraph}[1]{\textbf{#1}\qquad}
samer@74	165 \newcommand{\colfig}[2][1]{\includegraphics[width=#1\linewidth]{figs/#2}}%
samer@74	166 \let\citep=\cite
samer@74	167 %\newcommand{\dotmath}[2]{\psfrag{#1}[Bc][Bc]{\small $#2$}}
samer@74	168
samer@74	169 \title{Cognitive Music Modelling:\\An Information Dynamics Approach}
samer@74	170 \author{Samer Abdallah, Henrik Ekeus, Peter Foster,\\Andrew Robertson and Mark Plumbley}
samer@74	171 \institute{Centre for Digital Music\\Queen Mary, University of London}
samer@74	172
samer@74	173 \date{\today}
samer@74	174
samer@74	175 \def\X{\mathcal{X}}
samer@74	176 \def\Y{\mathcal{Y}}
samer@74	177 \def\Past{\mathrm{Past}}
samer@74	178 \def\Future{\mathrm{Future}}
samer@74	179 \def\Present{\mathrm{Present}}
samer@74	180 \def\param{\theta}
samer@74	181 \def\trans{a}
samer@74	182 \def\init{\pi^{\trans}}
samer@74	183 %\def\entrorate(#1){\mathcal{H}(#1)}
samer@74	184 %\def\entrorate(#1){\dot{\mathcal{H}}(#1)}
samer@74	185 \def\entrorate{h}
samer@74	186 \def\emcmarg(#1){b_#1}
samer@74	187 \def\mcmarg{\vec{b}}
samer@74	188 \def\domS{\mathcal{S}}
samer@74	189 \def\domA{\mathcal{A}}
samer@74	190
samer@74	191 \def\Lxz(#1,#2){\mathcal{L}(#1\|#2)}
samer@74	192 \def\LXz(#1){\overline{\mathcal{L}}(#1)}
samer@74	193 \def\LxZ(#1){\underline{\mathcal{L}}(#1)}
samer@74	194 \def\LXZ{\overline{\underline{\mathcal{L}}}}
samer@74	195 \def\Ixz(#1,#2){\mathcal{I}(#1\|#2)}
samer@74	196 \def\IXz(#1){\overline{\mathcal{I}}(#1)}
samer@74	197 \def\IxZ(#1){\underline{\mathcal{I}}(#1)}
samer@74	198 \def\IXZ{\overline{\underline{\mathcal{I}}}}
samer@74	199
samer@74	200 \def\ev(#1=#2){#1\!\!=\!#2}
samer@74	201 \def\sev(#1=#2){#1\!=#2}
samer@74	202
samer@74	203 \def\FE{\mathcal{F}}
samer@74	204
samer@74	205 \newcommand\past[1]{\overset{\rule{0pt}{0.2em}\smash{\leftarrow}}{#1}}
samer@74	206 \newcommand\fut[1]{\overset{\rule{0pt}{0.1em}\smash{\rightarrow}}{#1}}
samer@74	207
samer@74	208 \def\cn(#1,#2) {\node[circle,draw,inner sep=0.2em] (#1#2) {${#1}_{#2}$};}
samer@74	209 \def\dn(#1) {\node[circle,inner sep=0.2em] (#1) {$\cdots$};}
samer@74	210 \def\rl(#1,#2) {\draw (#1) -- (#2);}
samer@74	211
samer@74	212 \definecolor{un0}{rgb}{0.5,0.0,0.0}
samer@74	213 \definecolor{un1}{rgb}{0.6,0.15,0.15}
samer@74	214 \definecolor{un2}{rgb}{0.7,0.3,0.3}
samer@74	215 \definecolor{un3}{rgb}{0.8,0.45,0.45}
samer@74	216 \definecolor{un4}{rgb}{0.9,0.6,0.6}{
samer@74	217 \definecolor{un5}{rgb}{1.0,0.75,0.75}
samer@74	218
samer@74	219 %\def\blob(#1){\node[circle,draw,fill=#1,inner sep=0.25em]{};}
samer@74	220 \def\bl(#1){\draw[circle,fill=#1] (0,0) circle (0.4em);}
samer@74	221 \def\noderow(#1,#2,#3,#4,#5,#6){%
samer@74	222 \tikz{\matrix[draw,rounded corners,inner sep=0.4em,column sep=2.1em,ampersand replacement=\&]{%
samer@74	223 \bl(#1)\&\bl(#2)\&\bl(#3)\&\bl(#4)\&\bl(#5)\&\bl(#6)\\};}}
samer@74	224
samer@74	225 \begin{document}
samer@74	226 \frame{\titlepage}
samer@74	227 \section[Outline]{}
samer@74	228 \frame{
samer@74	229 \frametitle{Outline}
samer@74	230 \tableofcontents
samer@74	231 }
samer@74	232
samer@74	233
samer@74	234
samer@74	235 \section{Expectation and surprise in music}
samer@74	236 \label{s:Intro}
samer@74	237
samer@74	238 \begin{iframe}[`Unfoldingness']
samer@74	239 Music is experienced as a
samer@74	240 \uncover<2->{phenomenon}
samer@74	241 \uncover<3->{that}
samer@74	242 \uncover<4->{`unfolds'} \uncover<5->{in}\\
samer@74	243 \only<6>{blancmange}%
samer@74	244 \only<7>{(just kidding)}%
samer@74	245 \uncover<8->{time,}
samer@74	246 \uncover<9->{rather than being apprehended as a static object presented in its
samer@74	247 entirety.}
samer@74	248
samer@74	249 \uncover<10->{[This is recognised in computation linguistics where the phenomenon is known as \emph{incrementality}, \eg in incremental parsing.]}
samer@74	250
samer@74	251 \uncover<11->{%
samer@74	252 Meyer \cite{Meyer67} argued that musical experience depends on
samer@74	253 how we change and revise our conceptions \emph{as events happen},
samer@74	254 on how expectation and prediction interact with occurrence, and that, to a large
samer@74	255 degree, the way to understand the effect of music is to focus on
samer@74	256 this `kinetics' of expectation and surprise.%
samer@74	257 }
samer@74	258 \end{iframe}
samer@74	259
samer@74	260 \begin{iframe}[Expectation and suprise in music]
samer@74	261
samer@74	262 Music creates
samer@74	263 \emph{expectations} of what is to come next, which may be fulfilled
samer@74	264 immediately, after some delay, or not at all.
samer@74	265 Suggested by music theorists, \eg
samer@74	266 L. B. Meyer \cite{Meyer67} and Narmour \citep{Narmour77} but also
samer@74	267 noted much earlier by Hanslick \cite{Hanslick1854} in the
samer@74	268 1850s:
samer@74	269 \begin{quote}
samer@74	270 \small
samer@74	271 `The most important factor in the mental process which accompanies the
samer@74	272 act of listening to music, and which converts it to a source of pleasure, is
samer@74	273 \ldots
samer@74	274 % frequently overlooked. We here refer to
samer@74	275 the intellectual satisfaction which the
samer@74	276 listener derives from continually following and anticipating the composer's
samer@74	277 intentions---now, to see his expectations fulfilled, and now, to find himself
samer@74	278 agreeably mistaken. It is a matter of course that this intellectual flux and
samer@74	279 reflux, this perpetual giving and receiving takes place unconsciously, and with
samer@74	280 the rapidity of lightning-flashes.'
samer@74	281 \end{quote}
samer@74	282 \end{iframe}
samer@74	283
samer@74	284 \begin{iframe}[Probabilistic reasoning]
samer@74	285 \uncover<1->{%
samer@74	286 Making predictions and assessing surprise is
samer@74	287 essentially reasoning with degrees of belief and (arguably)
samer@74	288 the best way to do this is using Bayesian probability theory \cite{Cox1946,Jaynes27}.%
samer@74	289
samer@74	290 [NB. this is \textbf{subjective} probability as advocated by \eg De Finetti and Jaynes.]
samer@74	291 }
samer@74	292
samer@74	293 % Thus, we assume that musical schemata are encoded as probabilistic % \citep{Meyer56} models, and
samer@74	294 \uncover<2->{%
samer@74	295 We suppose that familiarity with different styles of music takes the form
samer@74	296 of various probabilistic models, and that these models are adapted through listening.%
samer@74	297 }
samer@74	298 % various stylistic norms is encoded as
samer@74	299 % using models that encode the statistics of music in general, the particular styles
samer@74	300 % of music that seem best to fit the piece we happen to be listening to, and the emerging
samer@74	301 % structures peculiar to the current piece.
samer@74	302
samer@74	303 \uncover<3->{%
samer@74	304 Experimental evidence that humans are able to internalise statistical
samer@74	305 knowledge about musical: \citep{SaffranJohnsonAslin1999,EerolaToiviainenKrumhansl2002}; and also
samer@74	306 that statistical models are effective for computational analysis of music, \eg \cite{ConklinWitten95,Pearce2005}.%
samer@74	307 }
samer@74	308
samer@74	309 % analysis of music, \eg \cite{ConklinWitten95,PonsfordWigginsMellish1999,Pearce2005}.
samer@74	310 % \cite{Ferrand2002}. Dubnov and Assayag PSTs?
samer@74	311 \end{iframe}
samer@74	312
samer@74	313 \begin{iframe}[Music and information theory]
samer@74	314 \uncover<1->{
samer@74	315 With probabilistic models in hand we can apply quantitative information theory: we can compute entropies,
samer@74	316 relative entropies, mutual information, and all that.
samer@74	317 }
samer@74	318
samer@74	319 \uncover<2->{
samer@74	320 Lots of interest in application of information theory to perception, music and aesthetics since the 50s,
samer@74	321 \eg Moles \cite{Moles66}, Meyer \cite{Meyer67}, Cohen \cite{Cohen1962}, Berlyne \cite{Berlyne71}.
samer@74	322 (See also Bense, Hiller)
samer@74	323 }
samer@74	324
samer@74	325 \uncover<3->{
samer@74	326 Idea is that subjective qualities and
samer@74	327 states like uncertainty, surprise, complexity, tension, and interestingness
samer@74	328 are determined by information-theoretic quantities.
samer@74	329 }
samer@74	330
samer@74	331 \uncover<4->{
samer@74	332 Berlyne \cite{Berlyne71} called such quantities `collative variables', since they are
samer@74	333 to do with patterns of occurrence rather than medium-specific details.
samer@74	334 \emph{Information aesthetics}.
samer@74	335 }
samer@74	336 % Listeners then experience greater or lesser levels of surprise
samer@74	337 % in response to departures from these norms.
samer@74	338 % By careful manipulation
samer@74	339 % of the material, the composer can thus define, and induce within the
samer@74	340 % listener, a temporal programme of varying
samer@74	341 % levels of uncertainty, ambiguity and surprise.
samer@74	342 \end{iframe}
samer@74	343
samer@74	344 \begin{iframe}[Probabilistic model-based observer hypothesis]
samer@74	345 \begin{itemize}
samer@74	346 \item<1->
samer@74	347 As we listen, we maintain a probabilistic model that enables
samer@74	348 us to make predictions. As events unfold, we revise our probabilistic `belief state',
samer@74	349 including predictions about the future.
samer@74	350 \item<2->
samer@74	351 Probability distributions and changes in distributions are characterised in terms
samer@74	352 of information theoretic-measures such as entropy and relative entropy (KL divergence).
samer@74	353 \item<3->
samer@74	354 The dynamic evolution of these information measures captures significant structure,
samer@74	355 \eg events that are surprising, informative, explanatory \etc
samer@74	356 \end{itemize}
samer@74	357
samer@74	358 \end{iframe}
samer@74	359
samer@74	360 \begin{iframe}[Features of information dynamics]
samer@74	361 \uncover<1->{
samer@74	362 \textbf{Abstraction}: sensitive mainly to \emph{patterns} of occurence,
samer@74	363 rather than details of which specific things occur or the sensory medium.
samer@74	364 % it operates at a level of abstraction removed from the details of the sensory experience and
samer@74	365 % the medium through which it was received, suggesting that the same
samer@74	366 % approach could, in principle, be used to analyse and compare information
samer@74	367 % flow in different temporal media regardless of whether they are auditory, visual or otherwise.
samer@74	368 }
samer@74	369
samer@74	370 \uncover<2->{
samer@74	371 \textbf{Generality}: applicable in principle to any probabilistic model, in particular,
samer@74	372 models with time-dependent latent variables such as HMMs.
samer@74	373 Many important musical concepts like key, harmony, and beat are essentially `hidden variables'.
samer@74	374 }
samer@74	375
samer@74	376 \uncover<3->{
samer@74	377 \textbf{Richness}: when applied to models with latent variables, can result in many-layered
samer@74	378 analysis, capturing information flow about harmony, tempo, \etc
samer@74	379 }
samer@74	380
samer@74	381 \uncover<4->{
samer@74	382 \textbf{Subjectivity}: all probabilities are \emph{subjective} probabilities relative to \emph{observer's}
samer@74	383 model, which can depend on observer's capabilities and prior experience.
samer@74	384 }
samer@74	385 \end{iframe}
samer@74	386
samer@74	387 \section{Surprise, entropy and information in random sequences}
samer@74	388 \label{s:InfoInRandomProcs}
samer@74	389
samer@74	390 \begin{iframe}[Information theory primer\nicedot Entropy]
samer@74	391 Let $X$ be a discrete-valued random (in the sense of \emph{subjective} probability) variable.
samer@74	392 Entropy is a measure of \emph{uncertainty}. If observer expects to see $x$ with probability $p(x)$,
samer@74	393 then
samer@74	394 \begin{align*}
samer@74	395 H(X) &= \sum_{x\in\X} - p(x) \log p(x) \\
samer@74	396 &= \expect{[-\log p(X)]}.
samer@74	397 \end{align*}
samer@74	398 Consider $-\log p(x)$ as the `surprisingness' of $x$, then the entropy is the `expected surprisingness'.
samer@74	399 High for spread out distributions and low for concentrated ones.
samer@74	400 \end{iframe}
samer@74	401
samer@74	402 \begin{iframe}[Information theory primer\nicedot Relative entropy]
samer@74	403 Relative entropy or Kullback-Leibler (KL) divergence quantifies difference between
samer@74	404 probability distributions.
samer@74	405 If observer receives data $\mathcal{D}$, divergence between (subjective) prior and
samer@74	406 posterior distributions is the
samer@74	407 amount of information in $\mathcal{D}$ \emph{about} $X$ for this observer:
samer@74	408 \[
samer@74	409 I(\mathcal{D}\to X) =
samer@74	410 D(p_{X\|\mathcal{D}} \|\| p_X)
samer@74	411 = \sum_{x\in\X} p(x\|\mathcal{D}) \log \frac{p(x\|\mathcal{D})}{p(x)}.
samer@74	412 \]
samer@74	413 If observing $\mathcal{D}$ causes a large change in belief about $X$, then $\mathcal{D}$
samer@74	414 contained a lot of information about $X$.
samer@74	415
samer@74	416 Like Lindley's (1956) information (thanks Lars!).
samer@74	417 \end{iframe}
samer@74	418
samer@74	419 \begin{iframe}[Information theory primer\nicedot Mutual information]
samer@74	420 Mutual information between (MI) $X_1$ and $X_2$ is the expected amount of information about
samer@74	421 $X_2$ in an observation of $X_1$. Can be written in several ways:
samer@74	422 \begin{align*}
samer@74	423 I(X_1;X_2) &= \sum_{x_1,x_2} p(x_1,x_2) \log \frac{p(x_1,x_2)}{p(x_1)p(x_2)} \\
samer@74	424 &= H(X_1) + H(X_2) - H(X_1,X_2) \\
samer@74	425 &= H(X_2) - H(X_2\|X_1).
samer@74	426 \end{align*}
samer@74	427 (1) Expected information about $X_2$ in an observation of $X_1$;\\
samer@74	428 (2) Expected reduction in uncertainty about $X_2$ after observing $X_1$;\\
samer@74	429 (3) Symmetric: $I(X_1;X_2) = I(X_2;X_1)$.
samer@74	430 \end{iframe}
samer@74	431
samer@74	432 \begin{iframe}[Information theory primer\nicedot Conditional MI]
samer@74	433 Information in one variable about another given observations of some third variable.
samer@74	434 Formulated analogously by adding conditioning variables to entropies:
samer@74	435 \begin{align*}
samer@74	436 I(X_1;X_2\|X_3) &= H(X_1\|X_3) - H(X_1\|X_2,X_3).
samer@74	437 \end{align*}
samer@74	438 Makes explicit the dependence of information assessment on background knowledge,
samer@74	439 represented by conditioning variables.
samer@74	440 \end{iframe}
samer@74	441
samer@74	442
samer@74	443 \begin{isframe}[Information theory primer\nicedot I-Diagrams]
samer@74	444 \newcommand\rad{2.2em}%
samer@74	445 \newcommand\circo{circle (3.4em)}%
samer@74	446 \newcommand\labrad{4.3em}
samer@74	447 \newcommand\bound{(-6em,-5em) rectangle (6em,6em)}
samer@74	448 \newcommand\clipin[1]{\clip (#1) \circo;}%
samer@74	449 \newcommand\clipout[1]{\clip \bound (#1) \circo;}%
samer@74	450 \newcommand\cliptwo[3]{%
samer@74	451 \begin{scope}
samer@74	452 \clipin{#1};
samer@74	453 \clipin{#2};
samer@74	454 \clipout{#3};
samer@74	455 \fill[black!30] \bound;
samer@74	456 \end{scope}
samer@74	457 }%
samer@74	458 \newcommand\clipone[3]{%
samer@74	459 \begin{scope}
samer@74	460 \clipin{#1};
samer@74	461 \clipout{#2};
samer@74	462 \clipout{#3};
samer@74	463 \fill[black!15] \bound;
samer@74	464 \end{scope}
samer@74	465 }%
samer@74	466 Information diagrams are a Venn diagram-like represention of entropies and mutual
samer@74	467 informations for a set of random variables.
samer@74	468 \begin{center}
samer@74	469 \begin{tabular}{c@{\ }c}
samer@74	470 \scalebox{0.8}{%
samer@74	471 \begin{tikzpicture}[baseline=0pt]
samer@74	472 \coordinate (p1) at (90:\rad);
samer@74	473 \coordinate (p2) at (210:\rad);
samer@74	474 \coordinate (p3) at (-30:\rad);
samer@74	475 \clipone{p1}{p2}{p3};
samer@74	476 \clipone{p2}{p3}{p1};
samer@74	477 \clipone{p3}{p1}{p2};
samer@74	478 \cliptwo{p1}{p2}{p3};
samer@74	479 \cliptwo{p2}{p3}{p1};
samer@74	480 \cliptwo{p3}{p1}{p2};
samer@74	481 \begin{scope}
samer@74	482 \clip (p1) \circo;
samer@74	483 \clip (p2) \circo;
samer@74	484 \clip (p3) \circo;
samer@74	485 \fill[black!45] \bound;
samer@74	486 \end{scope}
samer@74	487 \draw (p1) \circo;
samer@74	488 \draw (p2) \circo;
samer@74	489 \draw (p3) \circo;
samer@74	490 \path
samer@74	491 (barycentric cs:p3=1,p1=-0.2,p2=-0.1) +(0ex,0) node {$I_{3\|12}$}
samer@74	492 (barycentric cs:p1=1,p2=-0.2,p3=-0.1) +(0ex,0) node {$I_{1\|23}$}
samer@74	493 (barycentric cs:p2=1,p3=-0.2,p1=-0.1) +(0ex,0) node {$I_{2\|13}$}
samer@74	494 (barycentric cs:p3=1,p2=1,p1=-0.55) +(0ex,0) node {$I_{23\|1}$}
samer@74	495 (barycentric cs:p1=1,p3=1,p2=-0.55) +(0ex,0) node {$I_{13\|2}$}
samer@74	496 (barycentric cs:p2=1,p1=1,p3=-0.55) +(0ex,0) node {$I_{12\|3}$}
samer@74	497 (barycentric cs:p3=1,p2=1,p1=1) node {$I_{123}$}
samer@74	498 ;
samer@74	499 \path
samer@74	500 (p1) +(140:\labrad) node {$X_1$}
samer@74	501 (p2) +(-140:\labrad) node {$X_2$}
samer@74	502 (p3) +(-40:\labrad) node {$X_3$};
samer@74	503 \end{tikzpicture}%
samer@74	504 }
samer@74	505 &
samer@74	506 \parbox{0.5\linewidth}{
samer@74	507 \small
samer@74	508 \begin{align*}
samer@74	509 I_{1\|23} &= H(X_1\|X_2,X_3) \\
samer@74	510 I_{13\|2} &= I(X_1;X_3\|X_2) \\
samer@74	511 I_{1\|23} + I_{13\|2} &= H(X_1\|X_2) \\
samer@74	512 I_{12\|3} + I_{123} &= I(X_1;X_2)
samer@74	513 \end{align*}
samer@74	514 }
samer@74	515 \end{tabular}
samer@74	516 \end{center}
samer@74	517 The areas of
samer@74	518 the three circles represent $H(X_1)$, $H(X_2)$ and $H(X_3)$ respectively.
samer@74	519 The total shaded area is the joint entropy $H(X_1,X_2,X_3)$.
samer@74	520 Each undivided region is an \emph{atom} of the I-diagram.
samer@74	521 \end{isframe}
samer@74	522
samer@74	523
samer@74	524
samer@74	525
samer@74	526 \begin{isframe}[Information theory in sequences]
samer@74	527 \def\bx{1.6em}%
samer@74	528 \def\cn(#1,#2) {\node[circle,draw,fill=white,inner sep=0.2em] at(#1) {$#2$};}%
samer@74	529 \def\dn(#1){\node[circle,inner sep=0.2em] at(#1) {$\cdots$};}%
samer@74	530 \def\en(#1){coordinate(#1)}%
samer@74	531 \def\tb{++(3.8em,0)}%
samer@74	532 \def\lb(#1)#2{\path (#1)+(0,\bx) node[anchor=south] {#2};}
samer@74	533 \def\nr(#1,#2,#3){\draw[rounded corners,fill=#3] (#1) rectangle (#2);}%
samer@74	534
samer@74	535 Consider an observer receiving elements of a random sequence
samer@74	536 $(\ldots, X_{-1}, X_0, X_1, X_2, \ldots)$, so that at any time $t$ there is
samer@74	537 a `present' $X_t$, an observed pasti $\past{X}_t$, and an unobserved future
samer@74	538 $\fut{X}_t$. Eg, at time $t=3$:
samer@74	539
samer@74	540 \begin{figure}
samer@74	541 \begin{tikzpicture}%[baseline=-1em]
samer@74	542 \path (0,0) \en(X0) \tb \en(X1) \tb \en(X2) \tb \en(X3) \tb \en(X4) \tb \en(X5) \tb \en(X6);
samer@74	543 \path (X0)+(-\bx,-\bx) \en(p1) (X2)+(\bx,\bx) \en(p2)
samer@74	544 (X3)+(-\bx,-\bx) \en(p3) (X3)+(\bx,\bx) \en(p4)
samer@74	545 (X4)+(-\bx,-\bx) \en(p5) (X6)+(\bx,\bx) \en(p6);
samer@74	546 \nr(p1,p2,un3) \nr(p3,p4,un4) \nr(p5,p6,un5)
samer@74	547 \dn(X0) \cn(X1,X_1) \cn(X2,X_2) \cn(X3,X_3) \cn(X4,X_4) \cn(X5,X_5) \dn(X6)
samer@74	548 \lb(X1){Past: $\past{X}_3$}
samer@74	549 \lb(X5){Future $\fut{X}_3$}
samer@74	550 \lb(X3){Present}
samer@74	551 \end{tikzpicture}%}%
samer@74	552 \end{figure}
samer@74	553 Consider how the observer's belief state evolves when, having observed up to
samer@74	554 $X_2$, it learns the value of $X_3$.
samer@74	555 \end{isframe}
samer@74	556
samer@74	557 \begin{iframe}[`Surprise' based quantities]
samer@74	558 To obtain first set of measures, we ignore the future $\fut{X}_t$
samer@74	559 and consider the probability distribution for $X_t$ give the
samer@74	560 observed past $\past{X}_t=\past{x}_t$.
samer@74	561
samer@74	562 \begin{enumerate}
samer@74	563 \item<1->
samer@74	564 \textbf{Surprisingness}: negative log-probability
samer@74	565 $\ell_t = -\log p(x_t\|\past{x}_t)$.
samer@74	566
samer@74	567 \item<2->
samer@74	568 Expected surprisingness given context $\past{X}=\past{x}_t$ is the entropy of the predictive distribution,
samer@74	569 $H(X_t\|\ev(\past{X}_t=\past{x}_t))$: uncertainty about $X_t$ before the observation is made.
samer@74	570
samer@74	571 \item<3->
samer@74	572 Expectation over all possible realisations of process is the conditional entropy
samer@74	573 $H(X_t\|\past{X}_t)$ according to the observer's model. For stationary process, is
samer@74	574 \emph{entropy rate} $h_\mu$.
samer@74	575 \end{enumerate}
samer@74	576 \end{iframe}
samer@74	577
samer@74	578 \begin{iframe}[Predictive information]
samer@74	579 Second set of measures based on amount of information the observation $\ev(X_t=x_t)$
samer@74	580 carries \emph{about} about the unobserved future $\fut{X}_t$, \emph{given} that we already
samer@74	581 know the past $\ev(\past{X}_t=\past{x}_t)$:
samer@74	582 is
samer@74	583 \begin{equation*}
samer@74	584 \mathcal{I}_t = I(\ev(X_t=x_t)\to\fut{X}_t\|\ev(\past{X}_t=\past{x}_t)).
samer@74	585 \end{equation*}
samer@74	586 Is KL divergence between beliefs about future $\fut{X}_t$ prior and posterior
samer@74	587 to observation $\ev(X_t=x_t)$.
samer@74	588 Hence, for continuous valued variables, invariant to invertible
samer@74	589 transformations of the observation spaces.
samer@74	590 \end{iframe}
samer@74	591
samer@74	592 \begin{iframe}[Predictive information based quantities]
samer@74	593 \begin{enumerate}
samer@74	594 \item<1->
samer@74	595 \emph{Instantaneous predictive information} (IPI) is just $\mathcal{I}_t$.
samer@74	596
samer@74	597 % Expectations over $X\|\ev(Z=z)$, $Z\|\ev(X=x)$, and $(X,Z)$ give 3 more information measures:
samer@74	598 \item<2->
samer@74	599 Expectation of $\mathcal{I}_t$ before observation at time $t$ is
samer@74	600 $I(X_t;\fut{X}_t \| \ev(\past{X}_t=\past{x}_t))$: mutual information conditioned on
samer@74	601 observed past. Is the amount of new information about the future expected from the next observation.
samer@74	602 Useful for directing attention towards the next event even before it happens?
samer@74	603
samer@74	604 % This is different from Itti and Baldi's proposal that Bayesian
samer@74	605 % \emph{surprise} attracts attention \cite{IttiBaldi2005}, as it is a mechanism which can
samer@74	606 % operate \emph{before} the surprise occurs.
samer@74	607
samer@74	608
samer@74	609 \item<3->
samer@74	610 Expectation over all possible realisations is the conditional mutual information
samer@74	611 $I(X_t;\fut{X}_t\|\past{X}_t)$. For stationary process, this is the global
samer@74	612 \emph{predictive information rate} (PIR), the average rate at which new information arrives about
samer@74	613 the future. In terms of conditional entropies, has two forms:
samer@74	614 $H(\fut{X}_t\|\past{X}_t) - H(\fut{X}_t\|X_t,\past{X}_t)$ or
samer@74	615 $H(X_t\|\past{X}_t) - H(X_t\|\fut{X}_t,\past{X}_t)$.
samer@74	616 \end{enumerate}
samer@74	617
samer@74	618 \end{iframe}
samer@74	619
samer@74	620 \begin{iframe}[Global measures for stationary processes]
samer@74	621 For a stationary random process model, the average levels of suprise and information
samer@74	622 are captured by the time-shift invariant process information measures:
samer@74	623 \begin{align*}
samer@74	624 \text{entropy rate} &: & h_\mu &= H(X_t \| \past{X}_t) \\
samer@74	625 \text{multi-information rate} &: & \rho_\mu &= I(\past{X}_t;X_t) = H(X_t) - h_\mu \\
samer@74	626 \text{residual entropy rate} &: & r_\mu &= H(X_t \| \past{X}_t, \fut{X}_t) \\
samer@74	627 \text{predictive information rate} &: & b_\mu &= I(X_t;\fut{X}_t\|\past{X}_t) = h_\mu - r_\mu
samer@74	628 \end{align*}
samer@74	629 Residual entropy also known as \emph{erasure entropy} \cite{VerduWeissman2006}.
samer@74	630 \end{iframe}
samer@74	631
samer@74	632 \begin{isframe}[Process I-diagrams]
samer@74	633 % \newcommand\subfig[2]{\shortstack{#2\\[0.75em]#1}}
samer@74	634 \newcommand\subfig[2]{#2}
samer@74	635 \newcommand\rad{1.75em}%
samer@74	636 \newcommand\ovoid[1]{%
samer@74	637 ++(-#1,\rad)
samer@74	638 -- ++(2 * #1,0em) arc (90:-90:\rad)
samer@74	639 -- ++(-2 * #1,0em) arc (270:90:\rad)
samer@74	640 }%
samer@74	641 \newcommand\axis{2.75em}%
samer@74	642 \newcommand\olap{0.85em}%
samer@74	643 \newcommand\offs{3.6em}
samer@74	644 \newcommand\longblob{\ovoid{\axis}}
samer@74	645 \newcommand\shortblob{\ovoid{1.75em}}
samer@74	646 \begin{figure}
samer@74	647 \begin{tikzpicture}%[baseline=-1em]
samer@74	648 \newcommand\rc{\rad}
samer@74	649 \newcommand\throw{2.5em}
samer@74	650 \coordinate (p1) at (180:1.5em);
samer@74	651 \coordinate (p2) at (0:0.3em);
samer@74	652 \newcommand\bound{(-7em,-2.6em) rectangle (7em,3.0em)}
samer@74	653 \newcommand\present{(p2) circle (\rc)}
samer@74	654 \newcommand\thepast{(p1) ++(-\throw,0) \ovoid{\throw}}
samer@74	655 \newcommand\fillclipped[2]{%
samer@74	656 \begin{scope}[even odd rule]
samer@74	657 \foreach \thing in {#2} {\clip \thing;}
samer@74	658 \fill[black!#1] \bound;
samer@74	659 \end{scope}%
samer@74	660 }%
samer@74	661 \fillclipped{30}{\present,\bound \thepast}
samer@74	662 \fillclipped{15}{\present,\bound \thepast}
samer@74	663 \fillclipped{45}{\present,\thepast}
samer@74	664 \draw \thepast;
samer@74	665 \draw \present;
samer@74	666 \node at (barycentric cs:p2=1,p1=-0.3) {$h_\mu$};
samer@74	667 \node at (barycentric cs:p2=1,p1=1) [shape=rectangle,fill=black!45,inner sep=1pt]{$\rho_\mu$};
samer@74	668 \path (p2) +(90:3em) node {$X_0$};
samer@74	669 \path (p1) +(-3em,0em) node {\shortstack{infinite\\past}};
samer@74	670 \path (p1) +(-4em,\rad) node [anchor=south] {$\ldots,X_{-1}$};
samer@74	671 \end{tikzpicture}%
samer@74	672 \\[0.25em]
samer@74	673 \begin{tikzpicture}%[baseline=-1em]
samer@74	674 \newcommand\rc{2.2em}
samer@74	675 \newcommand\throw{2.5em}
samer@74	676 \coordinate (p1) at (210:1.5em);
samer@74	677 \coordinate (p2) at (90:0.8em);
samer@74	678 \coordinate (p3) at (-30:1.5em);
samer@74	679 \newcommand\bound{(-7em,-2.6em) rectangle (7em,3.0em)}
samer@74	680 \newcommand\present{(p2) circle (\rc)}
samer@74	681 \newcommand\thepast{(p1) ++(-\throw,0) \ovoid{\throw}}
samer@74	682 \newcommand\future{(p3) ++(\throw,0) \ovoid{\throw}}
samer@74	683 \newcommand\fillclipped[2]{%
samer@74	684 \begin{scope}[even odd rule]
samer@74	685 \foreach \thing in {#2} {\clip \thing;}
samer@74	686 \fill[black!#1] \bound;
samer@74	687 \end{scope}%
samer@74	688 }%
samer@74	689 % \fillclipped{80}{\future,\thepast}
samer@74	690 \fillclipped{30}{\present,\future,\bound \thepast}
samer@74	691 \fillclipped{15}{\present,\bound \future,\bound \thepast}
samer@74	692 \draw \future;
samer@74	693 \fillclipped{45}{\present,\thepast}
samer@74	694 \draw \thepast;
samer@74	695 \draw \present;
samer@74	696 \node at (barycentric cs:p2=0.9,p1=-0.17,p3=-0.17) {$r_\mu$};
samer@74	697 \node at (barycentric cs:p1=-0.5,p2=1.0,p3=1) {$b_\mu$};
samer@74	698 \node at (barycentric cs:p3=0,p2=1,p1=1.2) [shape=rectangle,fill=black!45,inner sep=1pt]{$\rho_\mu$};
samer@74	699 \path (p2) +(140:3.2em) node {$X_0$};
samer@74	700 % \node at (barycentric cs:p3=0,p2=1,p1=1) {$\rho_\mu$};
samer@74	701 \path (p3) +(3em,0em) node {\shortstack{infinite\\future}};
samer@74	702 \path (p1) +(-3em,0em) node {\shortstack{infinite\\past}};
samer@74	703 \path (p1) +(-4em,\rad) node [anchor=south] {$\ldots,X_{-1}$};
samer@74	704 \path (p3) +(4em,\rad) node [anchor=south] {$X_1,\ldots$};
samer@74	705 \end{tikzpicture}%
samer@74	706 % \\[0.25em]
samer@74	707 % The small dark
samer@74	708 % region below $X_0$ is $\sigma_\mu$ and the excess entropy
samer@74	709 % is $E = \rho_\mu + \sigma_\mu$.
samer@74	710 \end{figure}
samer@74	711 Marginal entropy of `present' $X_0$ is $H(X_0)=\rho_\mu+r_\mu+b_\mu$.\\
samer@74	712 Entropy rate is $h_\mu = r_\mu+b_\mu$.
samer@74	713 \end{isframe}
samer@74	714
samer@74	715 \section{Markov chains}
samer@74	716 \label{s:InfoInMC}
samer@74	717
samer@74	718
samer@74	719 \begin{iframe}[Markov chains\nicedot Definitions]
samer@74	720
samer@74	721 % Now we'll look at information dynamics in one of the simplest possible models, a Markov chain.
samer@74	722 % To illustrate the how the measures defined in \secrf{InfoInRandomProcs} can be computed
samer@74	723 % in practice, we will consider one of the simplest random processes, a
samer@74	724 % first order Markov chain.
samer@74	725 % In this case, the dynamic information measures can be computed in closed-form.
samer@74	726 %
samer@74	727
samer@74	728 Let $X$ be a Markov chain with state space
samer@74	729 $\{1, \ldots, K\}$, \ie the $X_t$ take values from $1$ to $K$.
samer@74	730 \begin{center}
samer@74	731 \begin{tikzpicture}[->]
samer@74	732 \matrix[column sep=2em,ampersand replacement=\&]{
samer@74	733 \cn(X,1) \& \cn(X,2) \& \cn(X,3) \& \cn(X,4) \& \dn(XT) \\};
samer@74	734 \rl(X1,X2) \rl(X2,X3) \rl(X3,X4) \rl(X4,XT)
samer@74	735 \end{tikzpicture}
samer@74	736 \end{center}
samer@74	737 % For the sake of brevity let us assume that $\domA$ is the set of integers from 1 to $K$.
samer@74	738 Parameterised by transition matrix $\trans \in \reals^{K\times K}$,
samer@74	739 % encoding the distribution of any element of the sequence given previous one,
samer@74	740 \ie $p(\ev(X_{t+1}=i)\|\ev(X_t=j))=\trans_{ij}$.
samer@74	741 Assume irreducibility, ergodicity \etc to ensure uniqueness of
samer@74	742 stationary distribution $\pi$ such that
samer@74	743 $p(\ev(X_t=i))=\init_i$ independent of $t$. Entropy rate as a function of
samer@74	744 $a$ is
samer@74	745 % $\entrorate:\reals^{K\times K} \to \reals$:
samer@74	746 \[
samer@74	747 \entrorate(\trans) = \sum_{j=1}^K \init_j \sum_{i=1}^K -\trans_{ij} \log \trans_{ij}.
samer@74	748 \]
samer@74	749 \end{iframe}
samer@74	750
samer@74	751 \begin{iframe}[Markov chains\nicedot PIR]
samer@74	752 Predictive information rate for first order chains comes out in terms of entropy rate
samer@74	753 function as
samer@74	754 \[
samer@74	755 b_\mu = h(a^2) - h(a),
samer@74	756 \]
samer@74	757 where $a^2$ is \emph{two-step} transition matrix.
samer@74	758
samer@74	759 \uncover<2->{
samer@74	760 Can be generalised to higher-order transition matrices
samer@74	761 \[
samer@74	762 b_\mu = h(\hat{a}^{N+1}) - Nh(\hat{a}),
samer@74	763 \]
samer@74	764 where $N$ is the order of the chain and $\hat{a}$ is a sparse
samer@74	765 $K^N\times K^N$ transition matrix over product state space of $N$
samer@74	766 consecutive observations (step size 1).
samer@74	767 }
samer@74	768 \end{iframe}
samer@74	769
samer@74	770 \begin{iframe}[Entropy rate and PIR in Markov chains]
samer@74	771
samer@74	772 \begin{fig}{artseq}
samer@74	773 \hangbox{\colfig[0.40]{matbase/fig8515}}%
samer@74	774 \quad
samer@74	775 \hangbox{%
samer@74	776 \begin{tabular}{cc}%
samer@74	777 \colfig[0.18]{matbase/fig1356} &
samer@74	778 \colfig[0.18]{matbase/fig45647} \\
samer@74	779 \colfig[0.18]{matbase/fig49938} &
samer@74	780 \colfig[0.18]{matbase/fig23355}%
samer@74	781 \end{tabular}%
samer@74	782 }%
samer@74	783 % \end{hanging}\\
samer@74	784 \end{fig}
samer@74	785 For given $K$, entropy rate varies between 0 (deterministic sequence)
samer@74	786 and $\log K$ when $\trans_{ij}=1/K$ for all $i,j$.
samer@74	787 Space of transition matrices explored by generating
samer@74	788 them at random and plotting entropy rate vs PIR. (Note inverted
samer@74	789 `U' relationship). %Transmat (d) is almost uniform.
samer@74	790 \end{iframe}
samer@74	791
samer@74	792 \begin{iframe}[Samples from processes with different PIR]
samer@74	793 \begin{figure}
samer@74	794 \colfig[0.75]{matbase/fig847}\\
samer@74	795 \colfig[0.75]{matbase/fig61989}\\
samer@74	796 \colfig[0.75]{matbase/fig43415}\\
samer@74	797 \colfig[0.75]{matbase/fig50385}
samer@74	798 \end{figure}
samer@74	799 Sequence (a) is repetition
samer@74	800 of state 4 (see transmat (a) on previous slide).
samer@74	801 System (b) has the highest PIR.
samer@74	802 \end{iframe}
samer@74	803
samer@74	804 % \begin{tabular}{rl}
samer@74	805 % (a) & \raisebox{-1em}{\colfig[0.58]{matbase/fig9048}}\\[1em]
samer@74	806 % (b) & \raisebox{-1em}{\colfig[0.58]{matbase/fig58845}}\\[1em]
samer@74	807 % (c) & \raisebox{-1em}{\colfig[0.58]{matbase/fig45019}}\\[1em]
samer@74	808 % (d) & \raisebox{-1em}{\colfig[0.58]{matbase/fig1511}}
samer@74	809 % \end{tabular}
samer@74	810
samer@74	811 \section{Application: The Melody Triangle}
samer@74	812 \begin{iframe}[Complexity and interestingness: the Wundt Curve]
samer@74	813 \label{s:Wundt}
samer@74	814 Studies looking into the relationship between stochastic complexity
samer@74	815 (usually measured as entropy or entropy rate) and aesthetic value, reveal
samer@74	816 an inverted `U' shaped curve \citep{Berlyne71}. (Also, Wundt curve \cite{Wundt1897}).
samer@74	817 Repeated exposure tends to move stimuli leftwards.
samer@74	818
samer@74	819 \hangbox{%
samer@74	820 \only<1>{\colfig[0.5]{wundt}}%
samer@74	821 \only<2>{\colfig[0.5]{wundt2}}%
samer@74	822 }\hfill
samer@74	823 \hangbox{\parbox{0.43\linewidth}{\raggedright
samer@74	824 %Too deterministic $\rightarrow$ predictable, boring like a monotone;\\
samer@74	825 %Too random $\rightarrow$ are boring like white noise: unstructured,
samer@74	826 %featureless, uniform.
samer@74	827 Explanations for this usually appeal to a need for a `balance'
samer@74	828 between order and chaos, unity and diversity, and so on, in a generally
samer@74	829 imprecise way.}}
samer@74	830
samer@74	831
samer@74	832 % Hence, a sequence can be uninteresting in two opposite ways: by
samer@74	833 % being utterly predictable \emph{or} by being utterly
samer@74	834 % unpredictable.
samer@74	835 % Meyer \cite{Meyer2004} suggests something similar:
samer@74	836 % hints at the same thing while discussing
samer@74	837 % the relation between the rate of information flow and aesthetic experience,
samer@74	838 % suggesting that
samer@74	839 %% `unless there is some degree of order, \ldots
samer@74	840 %% there is nothing to be uncertain \emph{about} \ldots
samer@74	841 % `If the amount of information [by which he means entropy and surprisingness]
samer@74	842 % is inordinately increased, the result is a kind of cognitive white noise.'
samer@74	843
samer@74	844 \end{iframe}
samer@74	845
samer@74	846 \begin{iframe}[PIR as a measure of cognitive activity]
samer@74	847
samer@74	848 The predictive information rate incorporates a similar balance automatically:
samer@74	849 is maximal for sequences which are neither deterministic nor
samer@74	850 totally uncorrelated across time.
samer@74	851
samer@74	852 \vspace{1em}
samer@74	853 \begin{tabular}{rr}%
samer@74	854 \raisebox{0.5em}{too predictable:} &
samer@74	855 \only<1>{\noderow(black,un0,un0,un0,un1,un1)}%
samer@74	856 \only<2>{\noderow(black,black,un0,un0,un0,un1)}%
samer@74	857 \only<3>{\noderow(black,black,black,un0,un0,un0)}%
samer@74	858 \only<4>{\noderow(black,black,black,black,un0,un0)}%
samer@74	859 \\[1.2em]
samer@74	860 \raisebox{0.5em}{intermediate:} &
samer@74	861 \only<1>{\noderow(black,un1,un2,un3,un4,un5)}%
samer@74	862 \only<2>{\noderow(black,black,un1,un2,un3,un4)}%
samer@74	863 \only<3>{\noderow(black,black,black,un1,un2,un3)}%
samer@74	864 \only<4>{\noderow(black,black,black,black,un1,un2)}%
samer@74	865 \\[1.2em]
samer@74	866 \raisebox{0.5em}{too random:} &
samer@74	867 \only<1>{\noderow(black,un5,un5,un5,un5,un5)}%
samer@74	868 \only<2>{\noderow(black,black,un5,un5,un5,un5)}%
samer@74	869 \only<3>{\noderow(black,black,black,un5,un5,un5)}%
samer@74	870 \only<4>{\noderow(black,black,black,black,un5,un5)}%
samer@74	871 \end{tabular}
samer@74	872 \vspace{1em}
samer@74	873
samer@74	874 (Black: \emph{observed}; red: \emph{unobserved}; paler: \emph{greater uncertainty}.)
samer@74	875 Our interpretation:
samer@74	876 % when each event appears to carry no new information about the unknown future,
samer@74	877 % it is `meaningless' and not worth attending to.
samer@74	878 Things are `interesting' or at least `salient' when each new part supplies new information about parts to come.
samer@74	879
samer@74	880 % Quantitative information dynamics will enable us to test this experimentally with human
samer@74	881 % subjects.
samer@74	882 \end{iframe}
samer@74	883
samer@74	884 \begin{iframe}[The Melody Triangle\nicedot Information space]
samer@74	885 \begin{figure}
samer@74	886 \colfig[0.75]{mtriscat}
samer@74	887 \end{figure}
samer@74	888 Population of transition matrices in 3D space of $h_\mu$, $\rho_\mu$ and $b_\mu$.
samer@74	889 % Concentrations of points along redundancy axis correspond to roughly periodic patterns.
samer@74	890 Colour of each point
samer@74	891 represents PIR.
samer@74	892 %---highest values found at intermediate entropy and redundancy.
samer@74	893 Shape is mostly (not completely) hollow inside: forming roughly
samer@74	894 a curved triangular sheet.
samer@74	895 \end{iframe}
samer@74	896
samer@74	897 \begin{iframe}[The Melody Triangle\nicedot User interface]
samer@74	898 \begin{figure}
samer@74	899 \colfig[0.55]{TheTriangle.pdf}
samer@74	900 \end{figure}
samer@74	901 Allows user to place tokens in the triangle
samer@74	902 to cause sonification of a Markov chain with corresponding information
samer@74	903 `coordinate'.
samer@74	904 \end{iframe}
samer@74	905
samer@74	906 \begin{iframe}[Subjective information]
samer@74	907 So far we've assumed that sequence is actually sampled from
samer@74	908 from a stationary Markov chain with a transition matrix known
samer@74	909 to the observer.
samer@74	910 This means time averages of IPI and surprise should equal
samer@74	911 expectations.
samer@74	912
samer@74	913 \uncover<2->{
samer@74	914 What if sequence is sampled from some other Markov chain,
samer@74	915 or is produced by some unknown process?
samer@74	916 }
samer@74	917
samer@74	918 \begin{itemize}
samer@74	919 \item<3->
samer@74	920 In general, it may be impossible to identify any `true' model. There
samer@74	921 are no `objective' probabilities; only subjective ones, as
samer@74	922 argued by de Finetti \cite{deFinetti}.
samer@74	923
samer@74	924
samer@74	925 \item<4->
samer@74	926 If sequence \emph{is} sampled from some Markov chain, we can
samer@74	927 compute (time) averages of observer's average subjective surprise
samer@74	928 and PI and also track what happens if observer gradually learns
samer@74	929 the transition matrix from the data.
samer@74	930 \end{itemize}
samer@74	931 \end{iframe}
samer@74	932
samer@74	933
samer@74	934 \begin{iframe}[Effect of learning on information dynamics]
samer@74	935 \begin{figure}
samer@74	936 % \colfig{matbase/fig42687} % too small text
samer@74	937 % \colfig{matbase/fig60379} % 9*19 too tall
samer@74	938 % \colfig{matbase/fig52515} % 9*20 ok, perhaps text still too small
samer@74	939 \colfig[0.9]{matbase/fig30461} % 8*19 ok
samer@74	940 % \colfig{matbase/fig66022} % 8.5*19 ok
samer@74	941 \end{figure}
samer@74	942 % Upper row shows actual stochastic learning,
samer@74	943 % lower shows the idealised deterministic learning.
samer@74	944 \textbf{(a/b/e/f)}: multiple runs starting from same
samer@74	945 initial condition but using different generative transition matrices.
samer@74	946 \textbf{(c/d/g/h)}: multiple runs starting from different
samer@74	947 initial conditions and converging on transition matrices
samer@74	948 with (c/g) high and (d/h) low PIR.
samer@74	949 \end{iframe}
samer@74	950
samer@74	951
samer@74	952 \section{More process models}
samer@74	953 \begin{iframe}[Exchangeable sequences and parametric models]
samer@74	954 De Finetti's theorem says that an exchangeable random process can be represented
samer@74	955 as a sequence variables which are iid \emph{given} some hidden probability
samer@74	956 distribution, which we can think of as a parameterised model:
samer@74	957 \begin{tabular}{lp{0.45\linewidth}}
samer@74	958 \hangbox{\begin{tikzpicture}
samer@74	959 [>=stealth',var/.style={circle,draw,inner sep=1pt,text height=10pt,text depth=4pt}]
samer@74	960 \matrix[ampersand replacement=\&,matrix of math nodes,row sep=2em,column sep=1.8em,minimum size=17pt] {
samer@74	961 \& \|(theta) [var]\| \Theta \\
samer@74	962 \|(x1) [var]\| X_1 \& \|(x2) [var]\| X_2 \& \|(x3) [var]\| X_3 \&
samer@74	963 \|(etc) [outer sep=2pt]\| \dots \\
samer@74	964 };
samer@74	965 \foreach \n in {x1,x2,x3,etc} \draw[->] (theta)--(\n);
samer@74	966 \end{tikzpicture}}
samer@74	967 &
samer@74	968 \raggedright
samer@74	969 \uncover<2->{Observer's belief state at time $t$ includes probability distribution
samer@74	970 over the parameters $p(\ev(\Theta=\theta)\|\ev(\past{X}_t=\past{x}_t))$.}
samer@74	971 \end{tabular}\\[1em]
samer@74	972 \uncover<3->{
samer@74	973 Each observation causes revision of belief state
samer@74	974 and hence supplies information
samer@74	975 $
samer@74	976 I(\ev(X_t=x_t)\to\Theta\|\ev(\past{X}_t=\past{x}_t))
samer@74	977 % = D( p_{\Theta\|\ev(X_t=x_t),\ev(\past{X}_t=\past{x}_t)} \|\| p_{\Theta\|\ev(\past{X}_t=\past{x}_t)} ).
samer@74	978 $ about $\Theta$:
samer@74	979 In previous work we called this the `model information rate'.
samer@74	980 }
samer@74	981 \uncover<4->{(Same as Haussler and Opper's \cite{HausslerOpper1995} IIG or
samer@74	982 Itti and Baldi's \cite{IttiBaldi2005} Bayesian surprise.)}
samer@74	983 \end{iframe}
samer@74	984
samer@74	985 \def\circ{circle (9)}%
samer@74	986 \def\bs(#1,#2,#3){(barycentric cs:p1=#1,p2=#2,p3=#3)}%
samer@74	987 \begin{iframe}[IIG equals IPI in (some) XRPs]
samer@74	988 \begin{tabular}{@{}lc}
samer@74	989 \parbox[c]{0.5\linewidth}{\raggedright
samer@74	990 Mild assumptions yield a relationship between IIG (instantaneous information gain) and IPI.
samer@74	991 (Everything here implicitly conditioned on $\past{X}_t$).}
samer@74	992 &
samer@74	993 \pgfsetxvec{\pgfpoint{1mm}{0mm}}%
samer@74	994 \pgfsetyvec{\pgfpoint{0mm}{1mm}}%
samer@74	995 \begin{tikzpicture}[baseline=0pt]
samer@74	996 \coordinate (p1) at (90:6);
samer@74	997 \coordinate (p2) at (210:6);
samer@74	998 \coordinate (p3) at (330:6);
samer@74	999 \only<4->{%
samer@74	1000 \begin{scope}
samer@74	1001 \foreach \p in {p1,p2,p3} \clip (\p) \circ;
samer@74	1002 \fill[lightgray] (-10,-10) rectangle (10,10);
samer@74	1003 \end{scope}
samer@74	1004 \path (0,0) node {$\mathcal{I}_t$};}
samer@74	1005 \foreach \p in {p1,p2,p3} \draw (\p) \circ;
samer@74	1006 \path (p2) +(210:13) node {$X_t$}
samer@74	1007 (p3) +(330:13) node {$\fut{X}_t$}
samer@74	1008 (p1) +(140:12) node {$\Theta$};
samer@74	1009 \only<2->{\path \bs(-0.25,0.5,0.5) node {$0$};}
samer@74	1010 \only<3->{\path \bs(0.5,0.5,-0.25) node {$0$};}
samer@74	1011 \end{tikzpicture}
samer@74	1012 \end{tabular}\\
samer@74	1013 \begin{enumerate}
samer@74	1014 \uncover<2->{\item $X_t \perp \fut{X}_t \| \Theta$: observations iid given $\Theta$ for XRPs;}
samer@74	1015 \uncover<3->{\item $\Theta \perp X_t \| \fut{X}_t$:
samer@74	1016 % $I(X_t;\fut{X}_t\|\Theta_t)=0$ due to the conditional independence of
samer@74	1017 % observables given the parameters $\Theta_t$, and
samer@74	1018 % $I(\Theta_t;X_t\|\fut{X}_t)=0$
samer@74	1019 assumption that $X_t$ adds no new information about $\Theta$
samer@74	1020 given infinitely long sequence $\fut{X}_t =X_{t+1:\infty}$.}
samer@74	1021 \end{enumerate}
samer@74	1022 \uncover<4->{Hence, $I(X_t;\Theta_t\|\past{X}_t)=I(X_t;\fut{X}_t\|\past{X}_t) = \mathcal{I}_t$.\\}
samer@74	1023 \uncover<5->{Can drop assumption 1 and still get $I(X_t;\Theta_t\|\past{X}_t)$ as an additive component (lower bound) of $\mathcal{I}_t$.}
samer@74	1024 \end{iframe}
samer@74	1025
samer@74	1026 \def\fid#1{#1}
samer@74	1027 \def\specint#1{\frac{1}{2\pi}\int_{-\pi}^\pi #1{S(\omega)} \dd \omega}
samer@74	1028 \begin{iframe}[Discrete-time Gaussian processes]
samer@74	1029 Information-theoretic quantities used earlier have analogues for continuous-valued
samer@74	1030 random variables. For stationary Gaussian processes, we can obtain results in
samer@74	1031 terms of the power spectral density $S(\omega)$, (which for discrete time is periodic
samer@74	1032 in $\omega$ with period $2\pi$). Standard methods give
samer@74	1033 \begin{align*}
samer@74	1034 H(X_t) &= \frac{1}{2}\left( \log 2\pi e + \log \specint{}\right), \\
samer@74	1035 h_\mu &= \frac{1}{2} \left( \log 2\pi e + \specint{\log} \right), \\
samer@74	1036 \rho_\mu &= \frac{1}{2} \left( \log \specint{\fid} - \specint{\log}\right).
samer@74	1037 \end{align*}
samer@74	1038 Entropy rate is also known as Kolmogorov-Sinai entropy.
samer@74	1039 % $H(X_t)$ is a function of marginal variance which is just the total power in the spectrum.
samer@74	1040 \end{iframe}
samer@74	1041
samer@74	1042 \begin{iframe}[PIR/Multi-information duality]
samer@74	1043 Analysis yeilds PIR:
samer@74	1044 \[
samer@74	1045 b_\mu = \frac{1}{2} \left( \log \specint{\frac{1}} - \specint{\log\frac{1}} \right).
samer@74	1046 \]
samer@74	1047 Yields simple expression for finite-order autogregressive processes, but beware: can diverge
samer@74	1048 for moving average processes!
samer@74	1049
samer@74	1050 \uncover<2->{
samer@74	1051 Compare with multi-information rate:
samer@74	1052 \[
samer@74	1053 \rho_\mu = \frac{1}{2} \left( \log \specint{\fid} - \specint{\log}\right).
samer@74	1054 \]
samer@74	1055 Yields simple expression for finite-order moving-average processes, but can diverge
samer@74	1056 for marginally stable autogregressive processes.
samer@74	1057 }
samer@74	1058
samer@74	1059 \uncover<3->{
samer@74	1060 Infinities are troublesome and point to problem with notion of infinitely
samer@74	1061 precise observation of continuous-valued variables.
samer@74	1062 }
samer@74	1063 \end{iframe}
samer@74	1064
samer@74	1065 % Information gained about model parameters (measured as the KL divergence
samer@74	1066 % between prior and posterior distributions) is equivalent
samer@74	1067 % to \textbf{Itti and Baldi's `Bayesian surprise'} \cite{IttiBaldi2005}.
samer@74	1068
samer@74	1069
samer@74	1070 \section{Application: Analysis of minimalist music}
samer@74	1071 \label{s:Experiments}
samer@74	1072
samer@74	1073 \begin{iframe}[Material and Methods]
samer@74	1074
samer@74	1075 % Returning to our original goal of modelling the perception of temporal structure
samer@74	1076 % in music, we computed dynamic information measures for
samer@74	1077 We took two pieces of minimalist
samer@74	1078 music by Philip Glass, \emph{Two Pages} (1969) and \emph{Gradus} (1968).
samer@74	1079 Both monophonic and isochronous, so representable very simply as
samer@74	1080 a sequence of symbols (notes), one symbol per beat,
samer@74	1081 yet remain ecologically valid examples of `real' music.
samer@74	1082
samer@74	1083 We use an elaboration of the Markov chain model---not necessarily
samer@74	1084 a good model \latin{per se}, but that wasn't the point of the experiment.
samer@74	1085 Markov chain model was chosen as it is tractable from and information
samer@74	1086 dynamics point of view while not being completely trivial.
samer@74	1087 \end{iframe}
samer@74	1088
samer@74	1089 \begin{iframe}[Time-varying transition matrix model]
samer@74	1090 We allow transition matrix to vary slowly with time to track
samer@74	1091 changes in the sequence structure.
samer@74	1092 Hence, observer's belief state includes a probabilitiy
samer@74	1093 distribution over transition matrices; we choose a product of
samer@74	1094 Dirichlet distributions:
samer@74	1095 \[
samer@74	1096 \textstyle
samer@74	1097 p(\trans\|\param) = \prod_{j=1}^K p_\mathrm{Dir}(\trans_{:j}\|\param_{:j}),
samer@74	1098 \]
samer@74	1099 where $\trans_{:j}$ is \nth{j} column of $\trans$ and $\param$ is an
samer@74	1100 $K \times K$ parameter matrix.
samer@74	1101 % (Dirichlet, being conjugate to discrete/multinomial distribution,
samer@74	1102 % makes processing of observations particularly simple.)
samer@74	1103 % such that $\param_{:j}$ is the
samer@74	1104 % parameter tuple for the $K$-component Dirichlet distribution $p_\mathrm{Dir}$.
samer@74	1105 % \begin{equation}
samer@74	1106 % \textstyle
samer@74	1107 % p(\trans\|\param) = \prod_{j=1}^K p_\mathrm{Dir}(\trans_{:j}\|\param_{:j})
samer@74	1108 % = \prod_{j=1}^K (\prod_{i=1}^K \trans_{ij}^{\param_{ij}-1}) / B(\param_{:j}),
samer@74	1109 % \end{equation}
samer@74	1110 % where $\trans_{:j}$ is the \nth{j} column of $\trans$ and $\param$ is an
samer@74	1111 % $K \times K$ matrix of parameters.
samer@74	1112
samer@74	1113 At each time step, distribution first \emph{spreads} under mapping
samer@74	1114 \[
samer@74	1115 \param_{ij} \mapsto \frac{\beta\param_{ij}}{(\beta + \param_{ij})}
samer@74	1116 \]
samer@74	1117 to model possibility that transition matrix
samer@74	1118 has changed ($\beta=2500$ in our experiments). Then it \emph{contracts}
samer@74	1119 due to new observation providing fresh evidence about transition matrix.
samer@74	1120 %
samer@74	1121 % Each observed symbol % provides fresh evidence about current transition matrix,
samer@74	1122 % enables observer to update its belief state.
samer@74	1123 \end{iframe}
samer@74	1124
samer@74	1125
samer@74	1126 \begin{iframe}[Two Pages\nicedot Results]
samer@74	1127
samer@74	1128 % \begin{fig}{twopages}
samer@74	1129 \begin{tabular}{c@{\hspace{1.5ex}}l}%
samer@74	1130 % \hspace*{-1.5em}
samer@74	1131 % \hangbox{\colfig[0.5]{matbase/fig20304}} % 3 plots
samer@74	1132 % \hangbox{\colfig[0.52]{matbase/fig39528}} % 4 plots with means
samer@74	1133 % \hangbox{\colfig[0.52]{matbase/fig63538}} % two pages, 5 plots
samer@74	1134 % \hangbox{\colfig[0.52]{matbase/fig53706}} % two pages, 5 plots
samer@74	1135 \hangbox{\colfig[0.72]{matbase/fig33309}} % two pages, 5 plots
samer@74	1136 &
samer@74	1137 \hangbox{%
samer@74	1138 \parbox{0.28\linewidth}{
samer@74	1139 \raggedright
samer@74	1140 \textbf{Thick lines:} part boundaries as indicated
samer@74	1141 by Glass; \textbf{grey lines (top four panels):} changes in the melodic `figures';
samer@74	1142 % of which the piece is constructed.
samer@74	1143 \textbf{grey lines (bottom panel):}
samer@74	1144 six most surprising moments chosen by expert listenter.
samer@74	1145 }
samer@74	1146 }
samer@74	1147 \end{tabular}
samer@74	1148 % \end{fig}
samer@74	1149 \end{iframe}
samer@74	1150
samer@74	1151 \begin{iframe}[Two Pages\nicedot Rule based analysis]
samer@74	1152 \begin{figure}
samer@74	1153 \colfig[0.98]{matbase/fig13377}
samer@74	1154 % \hangbox{\colfig[0.98]{matbase/fig13377}}
samer@74	1155 \end{figure}
samer@74	1156 Analysis of \emph{Two Pages} using (top) Cambouropoulos'
samer@74	1157 Local Boundary Detection Model (LBDM) and
samer@74	1158 (bottom) Lerdahl and Jackendoff's
samer@74	1159 grouping preference rule 3a (GPR3a), which is a function of pitch proximity.
samer@74	1160 Both analyses indicate `boundary strength'.
samer@74	1161 \end{iframe}
samer@74	1162
samer@74	1163 \begin{iframe}[Two Pages\nicedot Discussion]
samer@74	1164 Correspondence between the information
samer@74	1165 measures and the structure of the piece is quite close.
samer@74	1166 Good agreement between the six `most surprising
samer@74	1167 moments' chosen by expert listener and model information signal.
samer@74	1168
samer@74	1169 What appears to be an error in the detection of
samer@74	1170 the major part boundary (between events 5000 and 6000) actually
samer@74	1171 raises a known anomaly in the score, where Glass places the boundary several events
samer@74	1172 before there is any change in the pattern of notes. Alternative analyses of \emph{Two Pages}
samer@74	1173 place the boundary in agreement with peak in our surprisingness signal.
samer@74	1174 \end{iframe}
samer@74	1175
samer@74	1176 \comment{
samer@74	1177 \begin{iframe}[Gradus\nicedot Results]
samer@74	1178
samer@74	1179 % \begin{fig}{gradus}
samer@74	1180 \begin{tabular}{c@{\hspace{1.5ex}}l}
samer@74	1181 % &
samer@74	1182 % \hangbox{\colfig[0.4]{matbase/fig81812}}
samer@74	1183 % \hangbox{\colfig[0.52]{matbase/fig23177}} % two pages, 5 plots
samer@74	1184 % \hangbox{\colfig[0.495]{matbase/fig50709}} % Fudged segmentation
samer@74	1185 % \hangbox{\colfig[0.495]{matbase/fig3124}} % Geraint's segmentation
samer@74	1186 \hangbox{\colfig[0.715]{matbase/fig11808}} % Geraint's segmentation, corrected
samer@74	1187 &
samer@74	1188 % \hangbox{\colfig[0.5]{matbase/fig39914}}
samer@74	1189 \hangbox{%
samer@74	1190 \parbox{0.28\linewidth}{
samer@74	1191 \raggedright
samer@74	1192 \textbf{Thick lines:} part boundaries as indicated
samer@74	1193 by the composer.
samer@74	1194 \textbf{Grey lines:} segmentation by expert listener.
samer@74	1195
samer@74	1196 Note: traces smoothed with Gaussian
samer@74	1197 window about 16 events wide.
samer@74	1198 }
samer@74	1199 }
samer@74	1200 \end{tabular}
samer@74	1201 % \end{fig}
samer@74	1202 \end{iframe}
samer@74	1203
samer@74	1204 \begin{iframe}[Gradus\nicedot Rule based analysis]
samer@74	1205 \begin{figure}
samer@74	1206 \colfig[0.98]{matbase/fig58691}
samer@74	1207 \end{figure}
samer@74	1208 Boundary strength analysis of \emph{Gradus} using (top) Cambouropoulos'
samer@74	1209 \cite{CambouropoulosPhD} Local Boundary Detection Model and
samer@74	1210 (bottom) Lerdahl and Jackendoff's \cite{LerdahlJackendoff83}
samer@74	1211 grouping preference rule 3a.
samer@74	1212 \end{iframe}
samer@74	1213 }
samer@74	1214 \begin{iframe}[Gradus\nicedot Metrical analysis]
samer@74	1215 \begin{figure}
samer@74	1216 \begin{tabular}{cc}
samer@74	1217 \colfig[0.40]{matbase/fig56807} & \colfig[0.41]{matbase/fig27144} \\
samer@74	1218 \colfig[0.40]{matbase/fig87574} & \colfig[0.41]{matbase/fig13651} \\
samer@74	1219 \hspace{1ex}\colfig[0.39]{matbase/fig19913} & \hspace{1ex}\colfig[0.40]{matbase/fig66144}
samer@74	1220 \end{tabular}
samer@74	1221 \end{figure}
samer@74	1222 \end{iframe}
samer@74	1223
samer@74	1224 \comment{
samer@74	1225 \begin{iframe}[Gradus\nicedot Discussion]
samer@74	1226
samer@74	1227 \emph{Gradus} is much less systematically structured than \emph{Two Pages}, and
samer@74	1228 relies more on the conventions of tonal music, which are not represented the model.
samer@74	1229
samer@74	1230 For example initial transition matrix is uniform, which does not correctly represent
samer@74	1231 prior knowledge about tonal music.
samer@74	1232
samer@74	1233 Information dynamic analysis does not give such a
samer@74	1234 clear picture of the structure; but some of the fine structure can be related
samer@74	1235 to specific events in the music (see Pearce and Wiggins 2006).
samer@74	1236 % nonetheless, there are some points of correspondence between the analysis and
samer@74	1237 % segmentation given by Keith Potter.
samer@74	1238
samer@74	1239 \end{iframe}
samer@74	1240 }
samer@74	1241
samer@74	1242 \section{Application: Beat tracking and rhythm}
samer@74	1243
samer@74	1244 \begin{iframe}[Bayesian beat tracker]
samer@74	1245 \uncover<1->{
samer@74	1246 Works by maintaining probabilistic belief state about time of next
samer@74	1247 beat and current tempo.
samer@74	1248
samer@74	1249 \begin{figure}
samer@74	1250 \colfig{beat_prior}
samer@74	1251 \end{figure}
samer@74	1252 }
samer@74	1253
samer@74	1254 \uncover<2->{
samer@74	1255 Receives categorised drum events (kick or snare) from audio analysis front-end.
samer@74	1256 }
samer@74	1257
samer@74	1258 \end{iframe}
samer@74	1259
samer@74	1260 \begin{iframe}[Information gain in the beat tracker]
samer@74	1261 \begin{tabular}{ll}
samer@74	1262 \parbox[t]{0.43\linewidth}{\raggedright
samer@74	1263 \uncover<1->{
samer@74	1264 Each event triggers a change in belief state, so we can compute
samer@74	1265 information gain about beat parameters.}\\[1em]
samer@74	1266
samer@74	1267 \uncover<2->{
samer@74	1268 Relationship between IIG and IPI
samer@74	1269 means we treat it as a proxy for IPI.}
samer@74	1270 }
samer@74	1271 &
samer@74	1272 \hangbox{\colfig[0.55]{beat_info}}
samer@74	1273 \end{tabular}
samer@74	1274 \end{iframe}
samer@74	1275
samer@74	1276 \begin{iframe}[Analysis of drum patterns]
samer@74	1277 We analysed 17 recordings of drummers, both playing solo or with a band.
samer@74	1278 All patterns in were in 4/4.
samer@74	1279 \begin{itemize}
samer@74	1280 \item
samer@74	1281 \uncover<1->{
samer@74	1282 Information tends to arrive at beat times: consequence of structure of model.
samer@74	1283 }
samer@74	1284 \item
samer@74	1285 \uncover<2->{
samer@74	1286 Lots of information seems to arrive after drum fills and breaks
samer@74	1287 as the drummer reestablishes the beat.
samer@74	1288 }
samer@74	1289 \item
samer@74	1290 \uncover<3->{
samer@74	1291 No consistent pattern of information arrival in relation to metrical
samer@74	1292 structure, so no obvious metrical structure in micro-timing of events.
samer@74	1293 However, still possible that metrical structure might emerge from predictive
samer@74	1294 analysis of drum pattern.
samer@74	1295 }
samer@74	1296 \end{itemize}
samer@74	1297 \end{iframe}
samer@74	1298
samer@74	1299 \section{Summary and conclusions}
samer@74	1300 \label{s:Conclusions}
samer@74	1301
samer@74	1302 \begin{iframe}[Summary]
samer@74	1303
samer@74	1304 \begin{itemize}
samer@74	1305 \item Dynamic, observer-centric information theory.
samer@74	1306 \item Applicable to any dynamic probabilistic model.
samer@74	1307 \item PIR potentially a measure of complexity.
samer@74	1308 \item Simple analysis for Markov chains and Gaussian processes.
samer@74	1309 \item Applications in music analysis and composition.
samer@74	1310 \item Search for neural correlates is ongoing (that's another talk\ldots).
samer@74	1311 \end{itemize}
samer@74	1312 Thanks!
samer@74	1313 \end{iframe}
samer@74	1314
samer@74	1315 \begin{bframe}[Bibliography]
samer@74	1316 \bibliographystyle{alpha}
samer@74	1317 {\small \bibliography{all,c4dm,compsci}}
samer@74	1318 \end{bframe}
samer@74	1319 \end{document}

Mercurial > hg > cip2012

annotate talk/talk.tex @ 75:8a146c651475 tip