annotate talk/talk.tex @ 75:8a146c651475 tip

Added ready made bbl
author samer
date Fri, 01 Jun 2012 16:19:55 +0100
parents 90901fd611d1
children
rev   line source
samer@74 1 \documentclass{beamer}
samer@74 2
samer@74 3 \usepackage[T1]{fontenc}
samer@74 4 \usepackage{microtype}
samer@74 5 \usepackage{multimedia}
samer@74 6 \usepackage{tikz}
samer@74 7 \usetikzlibrary{matrix}
samer@74 8 \usetikzlibrary{patterns}
samer@74 9 \usetikzlibrary{arrows}
samer@74 10 \usetikzlibrary{calc}
samer@74 11 \usepackage{tools}
samer@74 12 %\usepackage{amsfonts,amssymb}
samer@74 13
samer@74 14 \tikzset{every picture/.style=semithick}
samer@74 15
samer@74 16 %%% font options:
samer@74 17 % atypewri, frankgth, gillsans, centuryg, futura, eurostil
samer@74 18 %\usepackage{fourier} % Maths in serif Utopia
samer@74 19 \usepackage[sf]{frankgth}
samer@74 20 %\usepackage[sf]{optima}
samer@74 21
samer@74 22 %%% Monospace font
samer@74 23 %\usepackage[scaled=0.88]{ulgothic} % 0.88 % suits narrow faces
samer@74 24 \renewcommand{\ttdefault}{plg} % Adobe Letter Gothic - suits light medium width face
samer@74 25 %\renewcommand{\ttdefault}{pcr} % Courier - suits wide faces
samer@74 26 % remember to match up size and weight of monospace font to main font
samer@74 27
samer@74 28 \newcommand{\mytt}[1]{{\texttt{\footnotesize\fontseries{bx}\selectfont #1}}}
samer@74 29
samer@74 30 \DeclareMathAlphabet{\mathcal}{OMS}{cmsy}{m}{n}
samer@74 31
samer@74 32
samer@74 33 %%% Black on white
samer@74 34 \definecolor{base}{rgb}{0,0,0}
samer@74 35 \definecolor{comp}{named}{green}
samer@74 36 \definecolor{paper}{named}{white}
samer@74 37
samer@74 38 \logo{%
samer@74 39 \includegraphics[height=16pt]{qmul-black}\hspace*{45pt}%
samer@74 40 \raisebox{1pt}{\includegraphics[height=12pt]{c4dm-black-white}}%
samer@74 41 }
samer@74 42
samer@74 43 %%% Red on black
samer@74 44 \comment{
samer@74 45 \definecolor{base}{rgb}{1,0,0}
samer@74 46 \definecolor{comp}{rgb}{0,0.8,0.2}
samer@74 47 \definecolor{paper}{named}{black}
samer@74 48
samer@74 49 \logo{%
samer@74 50 \includegraphics[height=16pt]{qmul-red}\hspace*{45pt}%
samer@74 51 \raisebox{1pt}{\includegraphics[height=12pt]{c4dm-red-black}}%
samer@74 52 }
samer@74 53 }
samer@74 54
samer@74 55
samer@74 56 \useinnertheme{default}%circles
samer@74 57 \useoutertheme{default}
samer@74 58 \usefonttheme[onlymath]{serif}
samer@74 59
samer@74 60 \setbeamercolor{normal text}{bg=paper,fg=base!90!-paper}
samer@74 61 \setbeamercolor{background}{bg=comp!50!paper,fg=comp}
samer@74 62 %\setbeamercolor{structure}{fg=base!75!-paper}
samer@74 63 \setbeamercolor{structure}{fg=red!50!base}
samer@74 64 \setbeamercolor{palette primary}{bg=yellow!50!paper,fg=yellow}
samer@74 65 \setbeamercolor{palette secondary}{bg=orange!50!paper,fg=orange}
samer@74 66 \setbeamercolor{palette tertiary}{bg=blue!50!paper,fg=blue}
samer@74 67 \setbeamercolor{palette quaternary}{bg=green!50!paper,fg=green}
samer@74 68 \setbeamercolor{block body}{bg=base!20!paper}
samer@74 69 \setbeamercolor{block title}{bg=base!60!paper,fg=paper}
samer@74 70 \setbeamercolor{navigation symbols}{fg=base!90!paper}
samer@74 71 \setbeamercolor{separation line}{bg=blue,fg=yellow}
samer@74 72 \setbeamercolor{fine separation line}{bg=blue,fg=orange}
samer@74 73
samer@74 74 % Title page
samer@74 75 % \setbeamercolor{title}{bg=base!20!paper}
samer@74 76 % \setbeamercolor{subtitle}{bg=base!20!paper}
samer@74 77 % \setbeamercolor{title page}{bg=base!40!paper}
samer@74 78
samer@74 79 % \setbeamercolor{headline}{bg=blue}
samer@74 80 % \setbeamercolor{footline}{bg=blue}
samer@74 81 % \setbeamercolor{frametitle}{bg=base!30!paper}
samer@74 82 % \setbeamercolor{framesubtitle}{bg=base!40!paper}
samer@74 83
samer@74 84 % \setbeamercolor{section in toc}{bg=base!25!paper,fg=orange}
samer@74 85 % \setbeamercolor{section in toc shaded}{bg=base!25!paper,fg=orange!80!paper}
samer@74 86 % \setbeamercolor{subsection in toc}{bg=base!25!paper,fg=orange}
samer@74 87 % \setbeamercolor{subsection in toc shaded}{bg=yellow!25!paper,fg=orange!80!paper}
samer@74 88 % page number in head/foot
samer@74 89 % section in head/foot
samer@74 90 % section in head/foot shaded
samer@74 91
samer@74 92
samer@74 93 \setbeamerfont{structure}{series=\bfseries}
samer@74 94 \setbeamerfont{title}{series=\mdseries,size=\Large}
samer@74 95 %\setbeamerfont{title}{series=\ltseries,size=\huge}
samer@74 96 \setbeamerfont{date}{size=\footnotesize}%,series=\mdcseries}
samer@74 97 \setbeamerfont{institute}{size=\footnotesize}%,series=\mdcseries}
samer@74 98 \setbeamerfont{author}{size=\footnotesize,series=\bfseries}
samer@74 99 \setbeamercolor{bibliography item}{parent={normal text}}
samer@74 100 \setbeamercolor{bibliography entry author}{fg=base}
samer@74 101 \setbeamercolor{bibliography entry location}{fg=base!70!paper}
samer@74 102
samer@74 103 %%% Templates
samer@74 104
samer@74 105 \setbeamertemplate{bibliography item}[text]
samer@74 106 \setbeamertemplate{bibliography entry title}{ }
samer@74 107 \setbeamertemplate{bibliography entry location}{ }
samer@74 108 \setbeamertemplate{blocks}[rounded][shadow=false]
samer@74 109 \setbeamertemplate{items}[circle]
samer@74 110 %\setbeamertemplate{bibliography item}[triangle]
samer@74 111 % \setbeamertemplate{title page}[default][rounded=true,shadow=false]
samer@74 112 % \setbeamertemplate{frametitle}[default][rounded=true,shadow=false]
samer@74 113 \setbeamertemplate{sidebar right}{}
samer@74 114 \setbeamertemplate{footline}{
samer@74 115 \hspace*{0.2cm}
samer@74 116 \insertlogo
samer@74 117 \hfill
samer@74 118 \usebeamertemplate***{navigation symbols}%
samer@74 119 \hfill
samer@74 120 \makebox[6ex]{\hfill\insertframenumber/\inserttotalframenumber}%
samer@74 121 \hspace*{0.2cm}
samer@74 122
samer@74 123 \vskip 4pt
samer@74 124 }
samer@74 125
samer@74 126 \setbeamertemplate{navigation symbols}
samer@74 127 {%
samer@74 128 \hbox{%
samer@74 129 \hbox{\insertslidenavigationsymbol}
samer@74 130 \hbox{\insertframenavigationsymbol}
samer@74 131 % \hbox{\insertsubsectionnavigationsymbol}
samer@74 132 \hbox{\insertsectionnavigationsymbol}
samer@74 133 \hbox{\insertdocnavigationsymbol}
samer@74 134 % \hbox{\insertbackfindforwardnavigationsymbol}%
samer@74 135 }%
samer@74 136 }
samer@74 137
samer@74 138
samer@74 139 \AtBeginSection[]{
samer@74 140 \begin{iframe}[Outline]
samer@74 141 \tableofcontents[currentsection]
samer@74 142 \end{iframe}
samer@74 143 }
samer@74 144 %\linespread{1.1}
samer@74 145
samer@74 146 \setlength{\parskip}{0.5em}
samer@74 147
samer@74 148 \newenvironment{bframe}[1][untitled]{\begin{frame}[allowframebreaks]\frametitle{#1}}{\end{frame}}
samer@74 149 \newenvironment{iframe}[1][untitled]{\begin{frame}\frametitle{#1}}{\end{frame}}
samer@74 150 \newenvironment{isframe}[1][untitled]{\begin{frame}[fragile=singleslide,environment=isframe]\frametitle{#1}}{\end{frame}}
samer@74 151
samer@74 152 \renewenvironment{fig}[1]
samer@74 153 {%
samer@74 154 \begin{figure}
samer@74 155 \def\fglbl{f:#1}
samer@74 156 \let\ocap=\caption
samer@74 157 \renewcommand{\caption}[2][]{\ocap[##1]{\small ##2}}
samer@74 158 \centering\small
samer@74 159 }{%
samer@74 160 \label{\fglbl}
samer@74 161 \end{figure}
samer@74 162 }
samer@74 163
samer@74 164 \newcommand{\paragraph}[1]{\textbf{#1}\qquad}
samer@74 165 \newcommand{\colfig}[2][1]{\includegraphics[width=#1\linewidth]{figs/#2}}%
samer@74 166 \let\citep=\cite
samer@74 167 %\newcommand{\dotmath}[2]{\psfrag{#1}[Bc][Bc]{\small $#2$}}
samer@74 168
samer@74 169 \title{Cognitive Music Modelling:\\An Information Dynamics Approach}
samer@74 170 \author{Samer Abdallah, Henrik Ekeus, Peter Foster,\\Andrew Robertson and Mark Plumbley}
samer@74 171 \institute{Centre for Digital Music\\Queen Mary, University of London}
samer@74 172
samer@74 173 \date{\today}
samer@74 174
samer@74 175 \def\X{\mathcal{X}}
samer@74 176 \def\Y{\mathcal{Y}}
samer@74 177 \def\Past{\mathrm{Past}}
samer@74 178 \def\Future{\mathrm{Future}}
samer@74 179 \def\Present{\mathrm{Present}}
samer@74 180 \def\param{\theta}
samer@74 181 \def\trans{a}
samer@74 182 \def\init{\pi^{\trans}}
samer@74 183 %\def\entrorate(#1){\mathcal{H}(#1)}
samer@74 184 %\def\entrorate(#1){\dot{\mathcal{H}}(#1)}
samer@74 185 \def\entrorate{h}
samer@74 186 \def\emcmarg(#1){b_#1}
samer@74 187 \def\mcmarg{\vec{b}}
samer@74 188 \def\domS{\mathcal{S}}
samer@74 189 \def\domA{\mathcal{A}}
samer@74 190
samer@74 191 \def\Lxz(#1,#2){\mathcal{L}(#1|#2)}
samer@74 192 \def\LXz(#1){\overline{\mathcal{L}}(#1)}
samer@74 193 \def\LxZ(#1){\underline{\mathcal{L}}(#1)}
samer@74 194 \def\LXZ{\overline{\underline{\mathcal{L}}}}
samer@74 195 \def\Ixz(#1,#2){\mathcal{I}(#1|#2)}
samer@74 196 \def\IXz(#1){\overline{\mathcal{I}}(#1)}
samer@74 197 \def\IxZ(#1){\underline{\mathcal{I}}(#1)}
samer@74 198 \def\IXZ{\overline{\underline{\mathcal{I}}}}
samer@74 199
samer@74 200 \def\ev(#1=#2){#1\!\!=\!#2}
samer@74 201 \def\sev(#1=#2){#1\!=#2}
samer@74 202
samer@74 203 \def\FE{\mathcal{F}}
samer@74 204
samer@74 205 \newcommand\past[1]{\overset{\rule{0pt}{0.2em}\smash{\leftarrow}}{#1}}
samer@74 206 \newcommand\fut[1]{\overset{\rule{0pt}{0.1em}\smash{\rightarrow}}{#1}}
samer@74 207
samer@74 208 \def\cn(#1,#2) {\node[circle,draw,inner sep=0.2em] (#1#2) {${#1}_{#2}$};}
samer@74 209 \def\dn(#1) {\node[circle,inner sep=0.2em] (#1) {$\cdots$};}
samer@74 210 \def\rl(#1,#2) {\draw (#1) -- (#2);}
samer@74 211
samer@74 212 \definecolor{un0}{rgb}{0.5,0.0,0.0}
samer@74 213 \definecolor{un1}{rgb}{0.6,0.15,0.15}
samer@74 214 \definecolor{un2}{rgb}{0.7,0.3,0.3}
samer@74 215 \definecolor{un3}{rgb}{0.8,0.45,0.45}
samer@74 216 \definecolor{un4}{rgb}{0.9,0.6,0.6}{
samer@74 217 \definecolor{un5}{rgb}{1.0,0.75,0.75}
samer@74 218
samer@74 219 %\def\blob(#1){\node[circle,draw,fill=#1,inner sep=0.25em]{};}
samer@74 220 \def\bl(#1){\draw[circle,fill=#1] (0,0) circle (0.4em);}
samer@74 221 \def\noderow(#1,#2,#3,#4,#5,#6){%
samer@74 222 \tikz{\matrix[draw,rounded corners,inner sep=0.4em,column sep=2.1em,ampersand replacement=\&]{%
samer@74 223 \bl(#1)\&\bl(#2)\&\bl(#3)\&\bl(#4)\&\bl(#5)\&\bl(#6)\\};}}
samer@74 224
samer@74 225 \begin{document}
samer@74 226 \frame{\titlepage}
samer@74 227 \section[Outline]{}
samer@74 228 \frame{
samer@74 229 \frametitle{Outline}
samer@74 230 \tableofcontents
samer@74 231 }
samer@74 232
samer@74 233
samer@74 234
samer@74 235 \section{Expectation and surprise in music}
samer@74 236 \label{s:Intro}
samer@74 237
samer@74 238 \begin{iframe}[`Unfoldingness']
samer@74 239 Music is experienced as a
samer@74 240 \uncover<2->{phenomenon}
samer@74 241 \uncover<3->{that}
samer@74 242 \uncover<4->{`unfolds'} \uncover<5->{in}\\
samer@74 243 \only<6>{blancmange}%
samer@74 244 \only<7>{(just kidding)}%
samer@74 245 \uncover<8->{time,}
samer@74 246 \uncover<9->{rather than being apprehended as a static object presented in its
samer@74 247 entirety.}
samer@74 248
samer@74 249 \uncover<10->{[This is recognised in computation linguistics where the phenomenon is known as \emph{incrementality}, \eg in incremental parsing.]}
samer@74 250
samer@74 251 \uncover<11->{%
samer@74 252 Meyer \cite{Meyer67} argued that musical experience depends on
samer@74 253 how we change and revise our conceptions \emph{as events happen},
samer@74 254 on how expectation and prediction interact with occurrence, and that, to a large
samer@74 255 degree, the way to understand the effect of music is to focus on
samer@74 256 this `kinetics' of expectation and surprise.%
samer@74 257 }
samer@74 258 \end{iframe}
samer@74 259
samer@74 260 \begin{iframe}[Expectation and suprise in music]
samer@74 261
samer@74 262 Music creates
samer@74 263 \emph{expectations} of what is to come next, which may be fulfilled
samer@74 264 immediately, after some delay, or not at all.
samer@74 265 Suggested by music theorists, \eg
samer@74 266 L. B. Meyer \cite{Meyer67} and Narmour \citep{Narmour77} but also
samer@74 267 noted much earlier by Hanslick \cite{Hanslick1854} in the
samer@74 268 1850s:
samer@74 269 \begin{quote}
samer@74 270 \small
samer@74 271 `The most important factor in the mental process which accompanies the
samer@74 272 act of listening to music, and which converts it to a source of pleasure, is
samer@74 273 \ldots
samer@74 274 % frequently overlooked. We here refer to
samer@74 275 the intellectual satisfaction which the
samer@74 276 listener derives from continually following and anticipating the composer's
samer@74 277 intentions---now, to see his expectations fulfilled, and now, to find himself
samer@74 278 agreeably mistaken. It is a matter of course that this intellectual flux and
samer@74 279 reflux, this perpetual giving and receiving takes place unconsciously, and with
samer@74 280 the rapidity of lightning-flashes.'
samer@74 281 \end{quote}
samer@74 282 \end{iframe}
samer@74 283
samer@74 284 \begin{iframe}[Probabilistic reasoning]
samer@74 285 \uncover<1->{%
samer@74 286 Making predictions and assessing surprise is
samer@74 287 essentially reasoning with degrees of belief and (arguably)
samer@74 288 the best way to do this is using Bayesian probability theory \cite{Cox1946,Jaynes27}.%
samer@74 289
samer@74 290 [NB. this is \textbf{subjective} probability as advocated by \eg De Finetti and Jaynes.]
samer@74 291 }
samer@74 292
samer@74 293 % Thus, we assume that musical schemata are encoded as probabilistic % \citep{Meyer56} models, and
samer@74 294 \uncover<2->{%
samer@74 295 We suppose that familiarity with different styles of music takes the form
samer@74 296 of various probabilistic models, and that these models are adapted through listening.%
samer@74 297 }
samer@74 298 % various stylistic norms is encoded as
samer@74 299 % using models that encode the statistics of music in general, the particular styles
samer@74 300 % of music that seem best to fit the piece we happen to be listening to, and the emerging
samer@74 301 % structures peculiar to the current piece.
samer@74 302
samer@74 303 \uncover<3->{%
samer@74 304 Experimental evidence that humans are able to internalise statistical
samer@74 305 knowledge about musical: \citep{SaffranJohnsonAslin1999,EerolaToiviainenKrumhansl2002}; and also
samer@74 306 that statistical models are effective for computational analysis of music, \eg \cite{ConklinWitten95,Pearce2005}.%
samer@74 307 }
samer@74 308
samer@74 309 % analysis of music, \eg \cite{ConklinWitten95,PonsfordWigginsMellish1999,Pearce2005}.
samer@74 310 % \cite{Ferrand2002}. Dubnov and Assayag PSTs?
samer@74 311 \end{iframe}
samer@74 312
samer@74 313 \begin{iframe}[Music and information theory]
samer@74 314 \uncover<1->{
samer@74 315 With probabilistic models in hand we can apply quantitative information theory: we can compute entropies,
samer@74 316 relative entropies, mutual information, and all that.
samer@74 317 }
samer@74 318
samer@74 319 \uncover<2->{
samer@74 320 Lots of interest in application of information theory to perception, music and aesthetics since the 50s,
samer@74 321 \eg Moles \cite{Moles66}, Meyer \cite{Meyer67}, Cohen \cite{Cohen1962}, Berlyne \cite{Berlyne71}.
samer@74 322 (See also Bense, Hiller)
samer@74 323 }
samer@74 324
samer@74 325 \uncover<3->{
samer@74 326 Idea is that subjective qualities and
samer@74 327 states like uncertainty, surprise, complexity, tension, and interestingness
samer@74 328 are determined by information-theoretic quantities.
samer@74 329 }
samer@74 330
samer@74 331 \uncover<4->{
samer@74 332 Berlyne \cite{Berlyne71} called such quantities `collative variables', since they are
samer@74 333 to do with patterns of occurrence rather than medium-specific details.
samer@74 334 \emph{Information aesthetics}.
samer@74 335 }
samer@74 336 % Listeners then experience greater or lesser levels of surprise
samer@74 337 % in response to departures from these norms.
samer@74 338 % By careful manipulation
samer@74 339 % of the material, the composer can thus define, and induce within the
samer@74 340 % listener, a temporal programme of varying
samer@74 341 % levels of uncertainty, ambiguity and surprise.
samer@74 342 \end{iframe}
samer@74 343
samer@74 344 \begin{iframe}[Probabilistic model-based observer hypothesis]
samer@74 345 \begin{itemize}
samer@74 346 \item<1->
samer@74 347 As we listen, we maintain a probabilistic model that enables
samer@74 348 us to make predictions. As events unfold, we revise our probabilistic `belief state',
samer@74 349 including predictions about the future.
samer@74 350 \item<2->
samer@74 351 Probability distributions and changes in distributions are characterised in terms
samer@74 352 of information theoretic-measures such as entropy and relative entropy (KL divergence).
samer@74 353 \item<3->
samer@74 354 The dynamic evolution of these information measures captures significant structure,
samer@74 355 \eg events that are surprising, informative, explanatory \etc
samer@74 356 \end{itemize}
samer@74 357
samer@74 358 \end{iframe}
samer@74 359
samer@74 360 \begin{iframe}[Features of information dynamics]
samer@74 361 \uncover<1->{
samer@74 362 \textbf{Abstraction}: sensitive mainly to \emph{patterns} of occurence,
samer@74 363 rather than details of which specific things occur or the sensory medium.
samer@74 364 % it operates at a level of abstraction removed from the details of the sensory experience and
samer@74 365 % the medium through which it was received, suggesting that the same
samer@74 366 % approach could, in principle, be used to analyse and compare information
samer@74 367 % flow in different temporal media regardless of whether they are auditory, visual or otherwise.
samer@74 368 }
samer@74 369
samer@74 370 \uncover<2->{
samer@74 371 \textbf{Generality}: applicable in principle to any probabilistic model, in particular,
samer@74 372 models with time-dependent latent variables such as HMMs.
samer@74 373 Many important musical concepts like key, harmony, and beat are essentially `hidden variables'.
samer@74 374 }
samer@74 375
samer@74 376 \uncover<3->{
samer@74 377 \textbf{Richness}: when applied to models with latent variables, can result in many-layered
samer@74 378 analysis, capturing information flow about harmony, tempo, \etc
samer@74 379 }
samer@74 380
samer@74 381 \uncover<4->{
samer@74 382 \textbf{Subjectivity}: all probabilities are \emph{subjective} probabilities relative to \emph{observer's}
samer@74 383 model, which can depend on observer's capabilities and prior experience.
samer@74 384 }
samer@74 385 \end{iframe}
samer@74 386
samer@74 387 \section{Surprise, entropy and information in random sequences}
samer@74 388 \label{s:InfoInRandomProcs}
samer@74 389
samer@74 390 \begin{iframe}[Information theory primer\nicedot Entropy]
samer@74 391 Let $X$ be a discrete-valued random (in the sense of \emph{subjective} probability) variable.
samer@74 392 Entropy is a measure of \emph{uncertainty}. If observer expects to see $x$ with probability $p(x)$,
samer@74 393 then
samer@74 394 \begin{align*}
samer@74 395 H(X) &= \sum_{x\in\X} - p(x) \log p(x) \\
samer@74 396 &= \expect{[-\log p(X)]}.
samer@74 397 \end{align*}
samer@74 398 Consider $-\log p(x)$ as the `surprisingness' of $x$, then the entropy is the `expected surprisingness'.
samer@74 399 High for spread out distributions and low for concentrated ones.
samer@74 400 \end{iframe}
samer@74 401
samer@74 402 \begin{iframe}[Information theory primer\nicedot Relative entropy]
samer@74 403 Relative entropy or Kullback-Leibler (KL) divergence quantifies difference between
samer@74 404 probability distributions.
samer@74 405 If observer receives data $\mathcal{D}$, divergence between (subjective) prior and
samer@74 406 posterior distributions is the
samer@74 407 amount of information in $\mathcal{D}$ \emph{about} $X$ for this observer:
samer@74 408 \[
samer@74 409 I(\mathcal{D}\to X) =
samer@74 410 D(p_{X|\mathcal{D}} || p_X)
samer@74 411 = \sum_{x\in\X} p(x|\mathcal{D}) \log \frac{p(x|\mathcal{D})}{p(x)}.
samer@74 412 \]
samer@74 413 If observing $\mathcal{D}$ causes a large change in belief about $X$, then $\mathcal{D}$
samer@74 414 contained a lot of information about $X$.
samer@74 415
samer@74 416 Like Lindley's (1956) information (thanks Lars!).
samer@74 417 \end{iframe}
samer@74 418
samer@74 419 \begin{iframe}[Information theory primer\nicedot Mutual information]
samer@74 420 Mutual information between (MI) $X_1$ and $X_2$ is the expected amount of information about
samer@74 421 $X_2$ in an observation of $X_1$. Can be written in several ways:
samer@74 422 \begin{align*}
samer@74 423 I(X_1;X_2) &= \sum_{x_1,x_2} p(x_1,x_2) \log \frac{p(x_1,x_2)}{p(x_1)p(x_2)} \\
samer@74 424 &= H(X_1) + H(X_2) - H(X_1,X_2) \\
samer@74 425 &= H(X_2) - H(X_2|X_1).
samer@74 426 \end{align*}
samer@74 427 (1) Expected information about $X_2$ in an observation of $X_1$;\\
samer@74 428 (2) Expected reduction in uncertainty about $X_2$ after observing $X_1$;\\
samer@74 429 (3) Symmetric: $I(X_1;X_2) = I(X_2;X_1)$.
samer@74 430 \end{iframe}
samer@74 431
samer@74 432 \begin{iframe}[Information theory primer\nicedot Conditional MI]
samer@74 433 Information in one variable about another given observations of some third variable.
samer@74 434 Formulated analogously by adding conditioning variables to entropies:
samer@74 435 \begin{align*}
samer@74 436 I(X_1;X_2|X_3) &= H(X_1|X_3) - H(X_1|X_2,X_3).
samer@74 437 \end{align*}
samer@74 438 Makes explicit the dependence of information assessment on background knowledge,
samer@74 439 represented by conditioning variables.
samer@74 440 \end{iframe}
samer@74 441
samer@74 442
samer@74 443 \begin{isframe}[Information theory primer\nicedot I-Diagrams]
samer@74 444 \newcommand\rad{2.2em}%
samer@74 445 \newcommand\circo{circle (3.4em)}%
samer@74 446 \newcommand\labrad{4.3em}
samer@74 447 \newcommand\bound{(-6em,-5em) rectangle (6em,6em)}
samer@74 448 \newcommand\clipin[1]{\clip (#1) \circo;}%
samer@74 449 \newcommand\clipout[1]{\clip \bound (#1) \circo;}%
samer@74 450 \newcommand\cliptwo[3]{%
samer@74 451 \begin{scope}
samer@74 452 \clipin{#1};
samer@74 453 \clipin{#2};
samer@74 454 \clipout{#3};
samer@74 455 \fill[black!30] \bound;
samer@74 456 \end{scope}
samer@74 457 }%
samer@74 458 \newcommand\clipone[3]{%
samer@74 459 \begin{scope}
samer@74 460 \clipin{#1};
samer@74 461 \clipout{#2};
samer@74 462 \clipout{#3};
samer@74 463 \fill[black!15] \bound;
samer@74 464 \end{scope}
samer@74 465 }%
samer@74 466 Information diagrams are a Venn diagram-like represention of entropies and mutual
samer@74 467 informations for a set of random variables.
samer@74 468 \begin{center}
samer@74 469 \begin{tabular}{c@{\ }c}
samer@74 470 \scalebox{0.8}{%
samer@74 471 \begin{tikzpicture}[baseline=0pt]
samer@74 472 \coordinate (p1) at (90:\rad);
samer@74 473 \coordinate (p2) at (210:\rad);
samer@74 474 \coordinate (p3) at (-30:\rad);
samer@74 475 \clipone{p1}{p2}{p3};
samer@74 476 \clipone{p2}{p3}{p1};
samer@74 477 \clipone{p3}{p1}{p2};
samer@74 478 \cliptwo{p1}{p2}{p3};
samer@74 479 \cliptwo{p2}{p3}{p1};
samer@74 480 \cliptwo{p3}{p1}{p2};
samer@74 481 \begin{scope}
samer@74 482 \clip (p1) \circo;
samer@74 483 \clip (p2) \circo;
samer@74 484 \clip (p3) \circo;
samer@74 485 \fill[black!45] \bound;
samer@74 486 \end{scope}
samer@74 487 \draw (p1) \circo;
samer@74 488 \draw (p2) \circo;
samer@74 489 \draw (p3) \circo;
samer@74 490 \path
samer@74 491 (barycentric cs:p3=1,p1=-0.2,p2=-0.1) +(0ex,0) node {$I_{3|12}$}
samer@74 492 (barycentric cs:p1=1,p2=-0.2,p3=-0.1) +(0ex,0) node {$I_{1|23}$}
samer@74 493 (barycentric cs:p2=1,p3=-0.2,p1=-0.1) +(0ex,0) node {$I_{2|13}$}
samer@74 494 (barycentric cs:p3=1,p2=1,p1=-0.55) +(0ex,0) node {$I_{23|1}$}
samer@74 495 (barycentric cs:p1=1,p3=1,p2=-0.55) +(0ex,0) node {$I_{13|2}$}
samer@74 496 (barycentric cs:p2=1,p1=1,p3=-0.55) +(0ex,0) node {$I_{12|3}$}
samer@74 497 (barycentric cs:p3=1,p2=1,p1=1) node {$I_{123}$}
samer@74 498 ;
samer@74 499 \path
samer@74 500 (p1) +(140:\labrad) node {$X_1$}
samer@74 501 (p2) +(-140:\labrad) node {$X_2$}
samer@74 502 (p3) +(-40:\labrad) node {$X_3$};
samer@74 503 \end{tikzpicture}%
samer@74 504 }
samer@74 505 &
samer@74 506 \parbox{0.5\linewidth}{
samer@74 507 \small
samer@74 508 \begin{align*}
samer@74 509 I_{1|23} &= H(X_1|X_2,X_3) \\
samer@74 510 I_{13|2} &= I(X_1;X_3|X_2) \\
samer@74 511 I_{1|23} + I_{13|2} &= H(X_1|X_2) \\
samer@74 512 I_{12|3} + I_{123} &= I(X_1;X_2)
samer@74 513 \end{align*}
samer@74 514 }
samer@74 515 \end{tabular}
samer@74 516 \end{center}
samer@74 517 The areas of
samer@74 518 the three circles represent $H(X_1)$, $H(X_2)$ and $H(X_3)$ respectively.
samer@74 519 The total shaded area is the joint entropy $H(X_1,X_2,X_3)$.
samer@74 520 Each undivided region is an \emph{atom} of the I-diagram.
samer@74 521 \end{isframe}
samer@74 522
samer@74 523
samer@74 524
samer@74 525
samer@74 526 \begin{isframe}[Information theory in sequences]
samer@74 527 \def\bx{1.6em}%
samer@74 528 \def\cn(#1,#2) {\node[circle,draw,fill=white,inner sep=0.2em] at(#1) {$#2$};}%
samer@74 529 \def\dn(#1){\node[circle,inner sep=0.2em] at(#1) {$\cdots$};}%
samer@74 530 \def\en(#1){coordinate(#1)}%
samer@74 531 \def\tb{++(3.8em,0)}%
samer@74 532 \def\lb(#1)#2{\path (#1)+(0,\bx) node[anchor=south] {#2};}
samer@74 533 \def\nr(#1,#2,#3){\draw[rounded corners,fill=#3] (#1) rectangle (#2);}%
samer@74 534
samer@74 535 Consider an observer receiving elements of a random sequence
samer@74 536 $(\ldots, X_{-1}, X_0, X_1, X_2, \ldots)$, so that at any time $t$ there is
samer@74 537 a `present' $X_t$, an observed pasti $\past{X}_t$, and an unobserved future
samer@74 538 $\fut{X}_t$. Eg, at time $t=3$:
samer@74 539
samer@74 540 \begin{figure}
samer@74 541 \begin{tikzpicture}%[baseline=-1em]
samer@74 542 \path (0,0) \en(X0) \tb \en(X1) \tb \en(X2) \tb \en(X3) \tb \en(X4) \tb \en(X5) \tb \en(X6);
samer@74 543 \path (X0)+(-\bx,-\bx) \en(p1) (X2)+(\bx,\bx) \en(p2)
samer@74 544 (X3)+(-\bx,-\bx) \en(p3) (X3)+(\bx,\bx) \en(p4)
samer@74 545 (X4)+(-\bx,-\bx) \en(p5) (X6)+(\bx,\bx) \en(p6);
samer@74 546 \nr(p1,p2,un3) \nr(p3,p4,un4) \nr(p5,p6,un5)
samer@74 547 \dn(X0) \cn(X1,X_1) \cn(X2,X_2) \cn(X3,X_3) \cn(X4,X_4) \cn(X5,X_5) \dn(X6)
samer@74 548 \lb(X1){Past: $\past{X}_3$}
samer@74 549 \lb(X5){Future $\fut{X}_3$}
samer@74 550 \lb(X3){Present}
samer@74 551 \end{tikzpicture}%}%
samer@74 552 \end{figure}
samer@74 553 Consider how the observer's belief state evolves when, having observed up to
samer@74 554 $X_2$, it learns the value of $X_3$.
samer@74 555 \end{isframe}
samer@74 556
samer@74 557 \begin{iframe}[`Surprise' based quantities]
samer@74 558 To obtain first set of measures, we ignore the future $\fut{X}_t$
samer@74 559 and consider the probability distribution for $X_t$ give the
samer@74 560 observed past $\past{X}_t=\past{x}_t$.
samer@74 561
samer@74 562 \begin{enumerate}
samer@74 563 \item<1->
samer@74 564 \textbf{Surprisingness}: negative log-probability
samer@74 565 $\ell_t = -\log p(x_t|\past{x}_t)$.
samer@74 566
samer@74 567 \item<2->
samer@74 568 Expected surprisingness given context $\past{X}=\past{x}_t$ is the entropy of the predictive distribution,
samer@74 569 $H(X_t|\ev(\past{X}_t=\past{x}_t))$: uncertainty about $X_t$ before the observation is made.
samer@74 570
samer@74 571 \item<3->
samer@74 572 Expectation over all possible realisations of process is the conditional entropy
samer@74 573 $H(X_t|\past{X}_t)$ according to the observer's model. For stationary process, is
samer@74 574 \emph{entropy rate} $h_\mu$.
samer@74 575 \end{enumerate}
samer@74 576 \end{iframe}
samer@74 577
samer@74 578 \begin{iframe}[Predictive information]
samer@74 579 Second set of measures based on amount of information the observation $\ev(X_t=x_t)$
samer@74 580 carries \emph{about} about the unobserved future $\fut{X}_t$, \emph{given} that we already
samer@74 581 know the past $\ev(\past{X}_t=\past{x}_t)$:
samer@74 582 is
samer@74 583 \begin{equation*}
samer@74 584 \mathcal{I}_t = I(\ev(X_t=x_t)\to\fut{X}_t|\ev(\past{X}_t=\past{x}_t)).
samer@74 585 \end{equation*}
samer@74 586 Is KL divergence between beliefs about future $\fut{X}_t$ prior and posterior
samer@74 587 to observation $\ev(X_t=x_t)$.
samer@74 588 Hence, for continuous valued variables, invariant to invertible
samer@74 589 transformations of the observation spaces.
samer@74 590 \end{iframe}
samer@74 591
samer@74 592 \begin{iframe}[Predictive information based quantities]
samer@74 593 \begin{enumerate}
samer@74 594 \item<1->
samer@74 595 \emph{Instantaneous predictive information} (IPI) is just $\mathcal{I}_t$.
samer@74 596
samer@74 597 % Expectations over $X|\ev(Z=z)$, $Z|\ev(X=x)$, and $(X,Z)$ give 3 more information measures:
samer@74 598 \item<2->
samer@74 599 Expectation of $\mathcal{I}_t$ before observation at time $t$ is
samer@74 600 $I(X_t;\fut{X}_t | \ev(\past{X}_t=\past{x}_t))$: mutual information conditioned on
samer@74 601 observed past. Is the amount of new information about the future expected from the next observation.
samer@74 602 Useful for directing attention towards the next event even before it happens?
samer@74 603
samer@74 604 % This is different from Itti and Baldi's proposal that Bayesian
samer@74 605 % \emph{surprise} attracts attention \cite{IttiBaldi2005}, as it is a mechanism which can
samer@74 606 % operate \emph{before} the surprise occurs.
samer@74 607
samer@74 608
samer@74 609 \item<3->
samer@74 610 Expectation over all possible realisations is the conditional mutual information
samer@74 611 $I(X_t;\fut{X}_t|\past{X}_t)$. For stationary process, this is the global
samer@74 612 \emph{predictive information rate} (PIR), the average rate at which new information arrives about
samer@74 613 the future. In terms of conditional entropies, has two forms:
samer@74 614 $H(\fut{X}_t|\past{X}_t) - H(\fut{X}_t|X_t,\past{X}_t)$ or
samer@74 615 $H(X_t|\past{X}_t) - H(X_t|\fut{X}_t,\past{X}_t)$.
samer@74 616 \end{enumerate}
samer@74 617
samer@74 618 \end{iframe}
samer@74 619
samer@74 620 \begin{iframe}[Global measures for stationary processes]
samer@74 621 For a stationary random process model, the average levels of suprise and information
samer@74 622 are captured by the time-shift invariant process information measures:
samer@74 623 \begin{align*}
samer@74 624 \text{entropy rate} &: & h_\mu &= H(X_t | \past{X}_t) \\
samer@74 625 \text{multi-information rate} &: & \rho_\mu &= I(\past{X}_t;X_t) = H(X_t) - h_\mu \\
samer@74 626 \text{residual entropy rate} &: & r_\mu &= H(X_t | \past{X}_t, \fut{X}_t) \\
samer@74 627 \text{predictive information rate} &: & b_\mu &= I(X_t;\fut{X}_t|\past{X}_t) = h_\mu - r_\mu
samer@74 628 \end{align*}
samer@74 629 Residual entropy also known as \emph{erasure entropy} \cite{VerduWeissman2006}.
samer@74 630 \end{iframe}
samer@74 631
samer@74 632 \begin{isframe}[Process I-diagrams]
samer@74 633 % \newcommand\subfig[2]{\shortstack{#2\\[0.75em]#1}}
samer@74 634 \newcommand\subfig[2]{#2}
samer@74 635 \newcommand\rad{1.75em}%
samer@74 636 \newcommand\ovoid[1]{%
samer@74 637 ++(-#1,\rad)
samer@74 638 -- ++(2 * #1,0em) arc (90:-90:\rad)
samer@74 639 -- ++(-2 * #1,0em) arc (270:90:\rad)
samer@74 640 }%
samer@74 641 \newcommand\axis{2.75em}%
samer@74 642 \newcommand\olap{0.85em}%
samer@74 643 \newcommand\offs{3.6em}
samer@74 644 \newcommand\longblob{\ovoid{\axis}}
samer@74 645 \newcommand\shortblob{\ovoid{1.75em}}
samer@74 646 \begin{figure}
samer@74 647 \begin{tikzpicture}%[baseline=-1em]
samer@74 648 \newcommand\rc{\rad}
samer@74 649 \newcommand\throw{2.5em}
samer@74 650 \coordinate (p1) at (180:1.5em);
samer@74 651 \coordinate (p2) at (0:0.3em);
samer@74 652 \newcommand\bound{(-7em,-2.6em) rectangle (7em,3.0em)}
samer@74 653 \newcommand\present{(p2) circle (\rc)}
samer@74 654 \newcommand\thepast{(p1) ++(-\throw,0) \ovoid{\throw}}
samer@74 655 \newcommand\fillclipped[2]{%
samer@74 656 \begin{scope}[even odd rule]
samer@74 657 \foreach \thing in {#2} {\clip \thing;}
samer@74 658 \fill[black!#1] \bound;
samer@74 659 \end{scope}%
samer@74 660 }%
samer@74 661 \fillclipped{30}{\present,\bound \thepast}
samer@74 662 \fillclipped{15}{\present,\bound \thepast}
samer@74 663 \fillclipped{45}{\present,\thepast}
samer@74 664 \draw \thepast;
samer@74 665 \draw \present;
samer@74 666 \node at (barycentric cs:p2=1,p1=-0.3) {$h_\mu$};
samer@74 667 \node at (barycentric cs:p2=1,p1=1) [shape=rectangle,fill=black!45,inner sep=1pt]{$\rho_\mu$};
samer@74 668 \path (p2) +(90:3em) node {$X_0$};
samer@74 669 \path (p1) +(-3em,0em) node {\shortstack{infinite\\past}};
samer@74 670 \path (p1) +(-4em,\rad) node [anchor=south] {$\ldots,X_{-1}$};
samer@74 671 \end{tikzpicture}%
samer@74 672 \\[0.25em]
samer@74 673 \begin{tikzpicture}%[baseline=-1em]
samer@74 674 \newcommand\rc{2.2em}
samer@74 675 \newcommand\throw{2.5em}
samer@74 676 \coordinate (p1) at (210:1.5em);
samer@74 677 \coordinate (p2) at (90:0.8em);
samer@74 678 \coordinate (p3) at (-30:1.5em);
samer@74 679 \newcommand\bound{(-7em,-2.6em) rectangle (7em,3.0em)}
samer@74 680 \newcommand\present{(p2) circle (\rc)}
samer@74 681 \newcommand\thepast{(p1) ++(-\throw,0) \ovoid{\throw}}
samer@74 682 \newcommand\future{(p3) ++(\throw,0) \ovoid{\throw}}
samer@74 683 \newcommand\fillclipped[2]{%
samer@74 684 \begin{scope}[even odd rule]
samer@74 685 \foreach \thing in {#2} {\clip \thing;}
samer@74 686 \fill[black!#1] \bound;
samer@74 687 \end{scope}%
samer@74 688 }%
samer@74 689 % \fillclipped{80}{\future,\thepast}
samer@74 690 \fillclipped{30}{\present,\future,\bound \thepast}
samer@74 691 \fillclipped{15}{\present,\bound \future,\bound \thepast}
samer@74 692 \draw \future;
samer@74 693 \fillclipped{45}{\present,\thepast}
samer@74 694 \draw \thepast;
samer@74 695 \draw \present;
samer@74 696 \node at (barycentric cs:p2=0.9,p1=-0.17,p3=-0.17) {$r_\mu$};
samer@74 697 \node at (barycentric cs:p1=-0.5,p2=1.0,p3=1) {$b_\mu$};
samer@74 698 \node at (barycentric cs:p3=0,p2=1,p1=1.2) [shape=rectangle,fill=black!45,inner sep=1pt]{$\rho_\mu$};
samer@74 699 \path (p2) +(140:3.2em) node {$X_0$};
samer@74 700 % \node at (barycentric cs:p3=0,p2=1,p1=1) {$\rho_\mu$};
samer@74 701 \path (p3) +(3em,0em) node {\shortstack{infinite\\future}};
samer@74 702 \path (p1) +(-3em,0em) node {\shortstack{infinite\\past}};
samer@74 703 \path (p1) +(-4em,\rad) node [anchor=south] {$\ldots,X_{-1}$};
samer@74 704 \path (p3) +(4em,\rad) node [anchor=south] {$X_1,\ldots$};
samer@74 705 \end{tikzpicture}%
samer@74 706 % \\[0.25em]
samer@74 707 % The small dark
samer@74 708 % region below $X_0$ is $\sigma_\mu$ and the excess entropy
samer@74 709 % is $E = \rho_\mu + \sigma_\mu$.
samer@74 710 \end{figure}
samer@74 711 Marginal entropy of `present' $X_0$ is $H(X_0)=\rho_\mu+r_\mu+b_\mu$.\\
samer@74 712 Entropy rate is $h_\mu = r_\mu+b_\mu$.
samer@74 713 \end{isframe}
samer@74 714
samer@74 715 \section{Markov chains}
samer@74 716 \label{s:InfoInMC}
samer@74 717
samer@74 718
samer@74 719 \begin{iframe}[Markov chains\nicedot Definitions]
samer@74 720
samer@74 721 % Now we'll look at information dynamics in one of the simplest possible models, a Markov chain.
samer@74 722 % To illustrate the how the measures defined in \secrf{InfoInRandomProcs} can be computed
samer@74 723 % in practice, we will consider one of the simplest random processes, a
samer@74 724 % first order Markov chain.
samer@74 725 % In this case, the dynamic information measures can be computed in closed-form.
samer@74 726 %
samer@74 727
samer@74 728 Let $X$ be a Markov chain with state space
samer@74 729 $\{1, \ldots, K\}$, \ie the $X_t$ take values from $1$ to $K$.
samer@74 730 \begin{center}
samer@74 731 \begin{tikzpicture}[->]
samer@74 732 \matrix[column sep=2em,ampersand replacement=\&]{
samer@74 733 \cn(X,1) \& \cn(X,2) \& \cn(X,3) \& \cn(X,4) \& \dn(XT) \\};
samer@74 734 \rl(X1,X2) \rl(X2,X3) \rl(X3,X4) \rl(X4,XT)
samer@74 735 \end{tikzpicture}
samer@74 736 \end{center}
samer@74 737 % For the sake of brevity let us assume that $\domA$ is the set of integers from 1 to $K$.
samer@74 738 Parameterised by transition matrix $\trans \in \reals^{K\times K}$,
samer@74 739 % encoding the distribution of any element of the sequence given previous one,
samer@74 740 \ie $p(\ev(X_{t+1}=i)|\ev(X_t=j))=\trans_{ij}$.
samer@74 741 Assume irreducibility, ergodicity \etc to ensure uniqueness of
samer@74 742 stationary distribution $\pi$ such that
samer@74 743 $p(\ev(X_t=i))=\init_i$ independent of $t$. Entropy rate as a function of
samer@74 744 $a$ is
samer@74 745 % $\entrorate:\reals^{K\times K} \to \reals$:
samer@74 746 \[
samer@74 747 \entrorate(\trans) = \sum_{j=1}^K \init_j \sum_{i=1}^K -\trans_{ij} \log \trans_{ij}.
samer@74 748 \]
samer@74 749 \end{iframe}
samer@74 750
samer@74 751 \begin{iframe}[Markov chains\nicedot PIR]
samer@74 752 Predictive information rate for first order chains comes out in terms of entropy rate
samer@74 753 function as
samer@74 754 \[
samer@74 755 b_\mu = h(a^2) - h(a),
samer@74 756 \]
samer@74 757 where $a^2$ is \emph{two-step} transition matrix.
samer@74 758
samer@74 759 \uncover<2->{
samer@74 760 Can be generalised to higher-order transition matrices
samer@74 761 \[
samer@74 762 b_\mu = h(\hat{a}^{N+1}) - Nh(\hat{a}),
samer@74 763 \]
samer@74 764 where $N$ is the order of the chain and $\hat{a}$ is a sparse
samer@74 765 $K^N\times K^N$ transition matrix over product state space of $N$
samer@74 766 consecutive observations (step size 1).
samer@74 767 }
samer@74 768 \end{iframe}
samer@74 769
samer@74 770 \begin{iframe}[Entropy rate and PIR in Markov chains]
samer@74 771
samer@74 772 \begin{fig}{artseq}
samer@74 773 \hangbox{\colfig[0.40]{matbase/fig8515}}%
samer@74 774 \quad
samer@74 775 \hangbox{%
samer@74 776 \begin{tabular}{cc}%
samer@74 777 \colfig[0.18]{matbase/fig1356} &
samer@74 778 \colfig[0.18]{matbase/fig45647} \\
samer@74 779 \colfig[0.18]{matbase/fig49938} &
samer@74 780 \colfig[0.18]{matbase/fig23355}%
samer@74 781 \end{tabular}%
samer@74 782 }%
samer@74 783 % \end{hanging}\\
samer@74 784 \end{fig}
samer@74 785 For given $K$, entropy rate varies between 0 (deterministic sequence)
samer@74 786 and $\log K$ when $\trans_{ij}=1/K$ for all $i,j$.
samer@74 787 Space of transition matrices explored by generating
samer@74 788 them at random and plotting entropy rate vs PIR. (Note inverted
samer@74 789 `U' relationship). %Transmat (d) is almost uniform.
samer@74 790 \end{iframe}
samer@74 791
samer@74 792 \begin{iframe}[Samples from processes with different PIR]
samer@74 793 \begin{figure}
samer@74 794 \colfig[0.75]{matbase/fig847}\\
samer@74 795 \colfig[0.75]{matbase/fig61989}\\
samer@74 796 \colfig[0.75]{matbase/fig43415}\\
samer@74 797 \colfig[0.75]{matbase/fig50385}
samer@74 798 \end{figure}
samer@74 799 Sequence (a) is repetition
samer@74 800 of state 4 (see transmat (a) on previous slide).
samer@74 801 System (b) has the highest PIR.
samer@74 802 \end{iframe}
samer@74 803
samer@74 804 % \begin{tabular}{rl}
samer@74 805 % (a) & \raisebox{-1em}{\colfig[0.58]{matbase/fig9048}}\\[1em]
samer@74 806 % (b) & \raisebox{-1em}{\colfig[0.58]{matbase/fig58845}}\\[1em]
samer@74 807 % (c) & \raisebox{-1em}{\colfig[0.58]{matbase/fig45019}}\\[1em]
samer@74 808 % (d) & \raisebox{-1em}{\colfig[0.58]{matbase/fig1511}}
samer@74 809 % \end{tabular}
samer@74 810
samer@74 811 \section{Application: The Melody Triangle}
samer@74 812 \begin{iframe}[Complexity and interestingness: the Wundt Curve]
samer@74 813 \label{s:Wundt}
samer@74 814 Studies looking into the relationship between stochastic complexity
samer@74 815 (usually measured as entropy or entropy rate) and aesthetic value, reveal
samer@74 816 an inverted `U' shaped curve \citep{Berlyne71}. (Also, Wundt curve \cite{Wundt1897}).
samer@74 817 Repeated exposure tends to move stimuli leftwards.
samer@74 818
samer@74 819 \hangbox{%
samer@74 820 \only<1>{\colfig[0.5]{wundt}}%
samer@74 821 \only<2>{\colfig[0.5]{wundt2}}%
samer@74 822 }\hfill
samer@74 823 \hangbox{\parbox{0.43\linewidth}{\raggedright
samer@74 824 %Too deterministic $\rightarrow$ predictable, boring like a monotone;\\
samer@74 825 %Too random $\rightarrow$ are boring like white noise: unstructured,
samer@74 826 %featureless, uniform.
samer@74 827 Explanations for this usually appeal to a need for a `balance'
samer@74 828 between order and chaos, unity and diversity, and so on, in a generally
samer@74 829 imprecise way.}}
samer@74 830
samer@74 831
samer@74 832 % Hence, a sequence can be uninteresting in two opposite ways: by
samer@74 833 % being utterly predictable \emph{or} by being utterly
samer@74 834 % unpredictable.
samer@74 835 % Meyer \cite{Meyer2004} suggests something similar:
samer@74 836 % hints at the same thing while discussing
samer@74 837 % the relation between the rate of information flow and aesthetic experience,
samer@74 838 % suggesting that
samer@74 839 %% `unless there is some degree of order, \ldots
samer@74 840 %% there is nothing to be uncertain \emph{about} \ldots
samer@74 841 % `If the amount of information [by which he means entropy and surprisingness]
samer@74 842 % is inordinately increased, the result is a kind of cognitive white noise.'
samer@74 843
samer@74 844 \end{iframe}
samer@74 845
samer@74 846 \begin{iframe}[PIR as a measure of cognitive activity]
samer@74 847
samer@74 848 The predictive information rate incorporates a similar balance automatically:
samer@74 849 is maximal for sequences which are neither deterministic nor
samer@74 850 totally uncorrelated across time.
samer@74 851
samer@74 852 \vspace{1em}
samer@74 853 \begin{tabular}{rr}%
samer@74 854 \raisebox{0.5em}{too predictable:} &
samer@74 855 \only<1>{\noderow(black,un0,un0,un0,un1,un1)}%
samer@74 856 \only<2>{\noderow(black,black,un0,un0,un0,un1)}%
samer@74 857 \only<3>{\noderow(black,black,black,un0,un0,un0)}%
samer@74 858 \only<4>{\noderow(black,black,black,black,un0,un0)}%
samer@74 859 \\[1.2em]
samer@74 860 \raisebox{0.5em}{intermediate:} &
samer@74 861 \only<1>{\noderow(black,un1,un2,un3,un4,un5)}%
samer@74 862 \only<2>{\noderow(black,black,un1,un2,un3,un4)}%
samer@74 863 \only<3>{\noderow(black,black,black,un1,un2,un3)}%
samer@74 864 \only<4>{\noderow(black,black,black,black,un1,un2)}%
samer@74 865 \\[1.2em]
samer@74 866 \raisebox{0.5em}{too random:} &
samer@74 867 \only<1>{\noderow(black,un5,un5,un5,un5,un5)}%
samer@74 868 \only<2>{\noderow(black,black,un5,un5,un5,un5)}%
samer@74 869 \only<3>{\noderow(black,black,black,un5,un5,un5)}%
samer@74 870 \only<4>{\noderow(black,black,black,black,un5,un5)}%
samer@74 871 \end{tabular}
samer@74 872 \vspace{1em}
samer@74 873
samer@74 874 (Black: \emph{observed}; red: \emph{unobserved}; paler: \emph{greater uncertainty}.)
samer@74 875 Our interpretation:
samer@74 876 % when each event appears to carry no new information about the unknown future,
samer@74 877 % it is `meaningless' and not worth attending to.
samer@74 878 Things are `interesting' or at least `salient' when each new part supplies new information about parts to come.
samer@74 879
samer@74 880 % Quantitative information dynamics will enable us to test this experimentally with human
samer@74 881 % subjects.
samer@74 882 \end{iframe}
samer@74 883
samer@74 884 \begin{iframe}[The Melody Triangle\nicedot Information space]
samer@74 885 \begin{figure}
samer@74 886 \colfig[0.75]{mtriscat}
samer@74 887 \end{figure}
samer@74 888 Population of transition matrices in 3D space of $h_\mu$, $\rho_\mu$ and $b_\mu$.
samer@74 889 % Concentrations of points along redundancy axis correspond to roughly periodic patterns.
samer@74 890 Colour of each point
samer@74 891 represents PIR.
samer@74 892 %---highest values found at intermediate entropy and redundancy.
samer@74 893 Shape is mostly (not completely) hollow inside: forming roughly
samer@74 894 a curved triangular sheet.
samer@74 895 \end{iframe}
samer@74 896
samer@74 897 \begin{iframe}[The Melody Triangle\nicedot User interface]
samer@74 898 \begin{figure}
samer@74 899 \colfig[0.55]{TheTriangle.pdf}
samer@74 900 \end{figure}
samer@74 901 Allows user to place tokens in the triangle
samer@74 902 to cause sonification of a Markov chain with corresponding information
samer@74 903 `coordinate'.
samer@74 904 \end{iframe}
samer@74 905
samer@74 906 \begin{iframe}[Subjective information]
samer@74 907 So far we've assumed that sequence is actually sampled from
samer@74 908 from a stationary Markov chain with a transition matrix known
samer@74 909 to the observer.
samer@74 910 This means time averages of IPI and surprise should equal
samer@74 911 expectations.
samer@74 912
samer@74 913 \uncover<2->{
samer@74 914 What if sequence is sampled from some other Markov chain,
samer@74 915 or is produced by some unknown process?
samer@74 916 }
samer@74 917
samer@74 918 \begin{itemize}
samer@74 919 \item<3->
samer@74 920 In general, it may be impossible to identify any `true' model. There
samer@74 921 are no `objective' probabilities; only subjective ones, as
samer@74 922 argued by de Finetti \cite{deFinetti}.
samer@74 923
samer@74 924
samer@74 925 \item<4->
samer@74 926 If sequence \emph{is} sampled from some Markov chain, we can
samer@74 927 compute (time) averages of observer's average subjective surprise
samer@74 928 and PI and also track what happens if observer gradually learns
samer@74 929 the transition matrix from the data.
samer@74 930 \end{itemize}
samer@74 931 \end{iframe}
samer@74 932
samer@74 933
samer@74 934 \begin{iframe}[Effect of learning on information dynamics]
samer@74 935 \begin{figure}
samer@74 936 % \colfig{matbase/fig42687} % too small text
samer@74 937 % \colfig{matbase/fig60379} % 9*19 too tall
samer@74 938 % \colfig{matbase/fig52515} % 9*20 ok, perhaps text still too small
samer@74 939 \colfig[0.9]{matbase/fig30461} % 8*19 ok
samer@74 940 % \colfig{matbase/fig66022} % 8.5*19 ok
samer@74 941 \end{figure}
samer@74 942 % Upper row shows actual stochastic learning,
samer@74 943 % lower shows the idealised deterministic learning.
samer@74 944 \textbf{(a/b/e/f)}: multiple runs starting from same
samer@74 945 initial condition but using different generative transition matrices.
samer@74 946 \textbf{(c/d/g/h)}: multiple runs starting from different
samer@74 947 initial conditions and converging on transition matrices
samer@74 948 with (c/g) high and (d/h) low PIR.
samer@74 949 \end{iframe}
samer@74 950
samer@74 951
samer@74 952 \section{More process models}
samer@74 953 \begin{iframe}[Exchangeable sequences and parametric models]
samer@74 954 De Finetti's theorem says that an exchangeable random process can be represented
samer@74 955 as a sequence variables which are iid \emph{given} some hidden probability
samer@74 956 distribution, which we can think of as a parameterised model:
samer@74 957 \begin{tabular}{lp{0.45\linewidth}}
samer@74 958 \hangbox{\begin{tikzpicture}
samer@74 959 [>=stealth',var/.style={circle,draw,inner sep=1pt,text height=10pt,text depth=4pt}]
samer@74 960 \matrix[ampersand replacement=\&,matrix of math nodes,row sep=2em,column sep=1.8em,minimum size=17pt] {
samer@74 961 \& |(theta) [var]| \Theta \\
samer@74 962 |(x1) [var]| X_1 \& |(x2) [var]| X_2 \& |(x3) [var]| X_3 \&
samer@74 963 |(etc) [outer sep=2pt]| \dots \\
samer@74 964 };
samer@74 965 \foreach \n in {x1,x2,x3,etc} \draw[->] (theta)--(\n);
samer@74 966 \end{tikzpicture}}
samer@74 967 &
samer@74 968 \raggedright
samer@74 969 \uncover<2->{Observer's belief state at time $t$ includes probability distribution
samer@74 970 over the parameters $p(\ev(\Theta=\theta)|\ev(\past{X}_t=\past{x}_t))$.}
samer@74 971 \end{tabular}\\[1em]
samer@74 972 \uncover<3->{
samer@74 973 Each observation causes revision of belief state
samer@74 974 and hence supplies information
samer@74 975 $
samer@74 976 I(\ev(X_t=x_t)\to\Theta|\ev(\past{X}_t=\past{x}_t))
samer@74 977 % = D( p_{\Theta|\ev(X_t=x_t),\ev(\past{X}_t=\past{x}_t)} || p_{\Theta|\ev(\past{X}_t=\past{x}_t)} ).
samer@74 978 $ about $\Theta$:
samer@74 979 In previous work we called this the `model information rate'.
samer@74 980 }
samer@74 981 \uncover<4->{(Same as Haussler and Opper's \cite{HausslerOpper1995} IIG or
samer@74 982 Itti and Baldi's \cite{IttiBaldi2005} Bayesian surprise.)}
samer@74 983 \end{iframe}
samer@74 984
samer@74 985 \def\circ{circle (9)}%
samer@74 986 \def\bs(#1,#2,#3){(barycentric cs:p1=#1,p2=#2,p3=#3)}%
samer@74 987 \begin{iframe}[IIG equals IPI in (some) XRPs]
samer@74 988 \begin{tabular}{@{}lc}
samer@74 989 \parbox[c]{0.5\linewidth}{\raggedright
samer@74 990 Mild assumptions yield a relationship between IIG (instantaneous information gain) and IPI.
samer@74 991 (Everything here implicitly conditioned on $\past{X}_t$).}
samer@74 992 &
samer@74 993 \pgfsetxvec{\pgfpoint{1mm}{0mm}}%
samer@74 994 \pgfsetyvec{\pgfpoint{0mm}{1mm}}%
samer@74 995 \begin{tikzpicture}[baseline=0pt]
samer@74 996 \coordinate (p1) at (90:6);
samer@74 997 \coordinate (p2) at (210:6);
samer@74 998 \coordinate (p3) at (330:6);
samer@74 999 \only<4->{%
samer@74 1000 \begin{scope}
samer@74 1001 \foreach \p in {p1,p2,p3} \clip (\p) \circ;
samer@74 1002 \fill[lightgray] (-10,-10) rectangle (10,10);
samer@74 1003 \end{scope}
samer@74 1004 \path (0,0) node {$\mathcal{I}_t$};}
samer@74 1005 \foreach \p in {p1,p2,p3} \draw (\p) \circ;
samer@74 1006 \path (p2) +(210:13) node {$X_t$}
samer@74 1007 (p3) +(330:13) node {$\fut{X}_t$}
samer@74 1008 (p1) +(140:12) node {$\Theta$};
samer@74 1009 \only<2->{\path \bs(-0.25,0.5,0.5) node {$0$};}
samer@74 1010 \only<3->{\path \bs(0.5,0.5,-0.25) node {$0$};}
samer@74 1011 \end{tikzpicture}
samer@74 1012 \end{tabular}\\
samer@74 1013 \begin{enumerate}
samer@74 1014 \uncover<2->{\item $X_t \perp \fut{X}_t | \Theta$: observations iid given $\Theta$ for XRPs;}
samer@74 1015 \uncover<3->{\item $\Theta \perp X_t | \fut{X}_t$:
samer@74 1016 % $I(X_t;\fut{X}_t|\Theta_t)=0$ due to the conditional independence of
samer@74 1017 % observables given the parameters $\Theta_t$, and
samer@74 1018 % $I(\Theta_t;X_t|\fut{X}_t)=0$
samer@74 1019 assumption that $X_t$ adds no new information about $\Theta$
samer@74 1020 given infinitely long sequence $\fut{X}_t =X_{t+1:\infty}$.}
samer@74 1021 \end{enumerate}
samer@74 1022 \uncover<4->{Hence, $I(X_t;\Theta_t|\past{X}_t)=I(X_t;\fut{X}_t|\past{X}_t) = \mathcal{I}_t$.\\}
samer@74 1023 \uncover<5->{Can drop assumption 1 and still get $I(X_t;\Theta_t|\past{X}_t)$ as an additive component (lower bound) of $\mathcal{I}_t$.}
samer@74 1024 \end{iframe}
samer@74 1025
samer@74 1026 \def\fid#1{#1}
samer@74 1027 \def\specint#1{\frac{1}{2\pi}\int_{-\pi}^\pi #1{S(\omega)} \dd \omega}
samer@74 1028 \begin{iframe}[Discrete-time Gaussian processes]
samer@74 1029 Information-theoretic quantities used earlier have analogues for continuous-valued
samer@74 1030 random variables. For stationary Gaussian processes, we can obtain results in
samer@74 1031 terms of the power spectral density $S(\omega)$, (which for discrete time is periodic
samer@74 1032 in $\omega$ with period $2\pi$). Standard methods give
samer@74 1033 \begin{align*}
samer@74 1034 H(X_t) &= \frac{1}{2}\left( \log 2\pi e + \log \specint{}\right), \\
samer@74 1035 h_\mu &= \frac{1}{2} \left( \log 2\pi e + \specint{\log} \right), \\
samer@74 1036 \rho_\mu &= \frac{1}{2} \left( \log \specint{\fid} - \specint{\log}\right).
samer@74 1037 \end{align*}
samer@74 1038 Entropy rate is also known as Kolmogorov-Sinai entropy.
samer@74 1039 % $H(X_t)$ is a function of marginal variance which is just the total power in the spectrum.
samer@74 1040 \end{iframe}
samer@74 1041
samer@74 1042 \begin{iframe}[PIR/Multi-information duality]
samer@74 1043 Analysis yeilds PIR:
samer@74 1044 \[
samer@74 1045 b_\mu = \frac{1}{2} \left( \log \specint{\frac{1}} - \specint{\log\frac{1}} \right).
samer@74 1046 \]
samer@74 1047 Yields simple expression for finite-order autogregressive processes, but beware: can diverge
samer@74 1048 for moving average processes!
samer@74 1049
samer@74 1050 \uncover<2->{
samer@74 1051 Compare with multi-information rate:
samer@74 1052 \[
samer@74 1053 \rho_\mu = \frac{1}{2} \left( \log \specint{\fid} - \specint{\log}\right).
samer@74 1054 \]
samer@74 1055 Yields simple expression for finite-order moving-average processes, but can diverge
samer@74 1056 for marginally stable autogregressive processes.
samer@74 1057 }
samer@74 1058
samer@74 1059 \uncover<3->{
samer@74 1060 Infinities are troublesome and point to problem with notion of infinitely
samer@74 1061 precise observation of continuous-valued variables.
samer@74 1062 }
samer@74 1063 \end{iframe}
samer@74 1064
samer@74 1065 % Information gained about model parameters (measured as the KL divergence
samer@74 1066 % between prior and posterior distributions) is equivalent
samer@74 1067 % to \textbf{Itti and Baldi's `Bayesian surprise'} \cite{IttiBaldi2005}.
samer@74 1068
samer@74 1069
samer@74 1070 \section{Application: Analysis of minimalist music}
samer@74 1071 \label{s:Experiments}
samer@74 1072
samer@74 1073 \begin{iframe}[Material and Methods]
samer@74 1074
samer@74 1075 % Returning to our original goal of modelling the perception of temporal structure
samer@74 1076 % in music, we computed dynamic information measures for
samer@74 1077 We took two pieces of minimalist
samer@74 1078 music by Philip Glass, \emph{Two Pages} (1969) and \emph{Gradus} (1968).
samer@74 1079 Both monophonic and isochronous, so representable very simply as
samer@74 1080 a sequence of symbols (notes), one symbol per beat,
samer@74 1081 yet remain ecologically valid examples of `real' music.
samer@74 1082
samer@74 1083 We use an elaboration of the Markov chain model---not necessarily
samer@74 1084 a good model \latin{per se}, but that wasn't the point of the experiment.
samer@74 1085 Markov chain model was chosen as it is tractable from and information
samer@74 1086 dynamics point of view while not being completely trivial.
samer@74 1087 \end{iframe}
samer@74 1088
samer@74 1089 \begin{iframe}[Time-varying transition matrix model]
samer@74 1090 We allow transition matrix to vary slowly with time to track
samer@74 1091 changes in the sequence structure.
samer@74 1092 Hence, observer's belief state includes a probabilitiy
samer@74 1093 distribution over transition matrices; we choose a product of
samer@74 1094 Dirichlet distributions:
samer@74 1095 \[
samer@74 1096 \textstyle
samer@74 1097 p(\trans|\param) = \prod_{j=1}^K p_\mathrm{Dir}(\trans_{:j}|\param_{:j}),
samer@74 1098 \]
samer@74 1099 where $\trans_{:j}$ is \nth{j} column of $\trans$ and $\param$ is an
samer@74 1100 $K \times K$ parameter matrix.
samer@74 1101 % (Dirichlet, being conjugate to discrete/multinomial distribution,
samer@74 1102 % makes processing of observations particularly simple.)
samer@74 1103 % such that $\param_{:j}$ is the
samer@74 1104 % parameter tuple for the $K$-component Dirichlet distribution $p_\mathrm{Dir}$.
samer@74 1105 % \begin{equation}
samer@74 1106 % \textstyle
samer@74 1107 % p(\trans|\param) = \prod_{j=1}^K p_\mathrm{Dir}(\trans_{:j}|\param_{:j})
samer@74 1108 % = \prod_{j=1}^K (\prod_{i=1}^K \trans_{ij}^{\param_{ij}-1}) / B(\param_{:j}),
samer@74 1109 % \end{equation}
samer@74 1110 % where $\trans_{:j}$ is the \nth{j} column of $\trans$ and $\param$ is an
samer@74 1111 % $K \times K$ matrix of parameters.
samer@74 1112
samer@74 1113 At each time step, distribution first \emph{spreads} under mapping
samer@74 1114 \[
samer@74 1115 \param_{ij} \mapsto \frac{\beta\param_{ij}}{(\beta + \param_{ij})}
samer@74 1116 \]
samer@74 1117 to model possibility that transition matrix
samer@74 1118 has changed ($\beta=2500$ in our experiments). Then it \emph{contracts}
samer@74 1119 due to new observation providing fresh evidence about transition matrix.
samer@74 1120 %
samer@74 1121 % Each observed symbol % provides fresh evidence about current transition matrix,
samer@74 1122 % enables observer to update its belief state.
samer@74 1123 \end{iframe}
samer@74 1124
samer@74 1125
samer@74 1126 \begin{iframe}[Two Pages\nicedot Results]
samer@74 1127
samer@74 1128 % \begin{fig}{twopages}
samer@74 1129 \begin{tabular}{c@{\hspace{1.5ex}}l}%
samer@74 1130 % \hspace*{-1.5em}
samer@74 1131 % \hangbox{\colfig[0.5]{matbase/fig20304}} % 3 plots
samer@74 1132 % \hangbox{\colfig[0.52]{matbase/fig39528}} % 4 plots with means
samer@74 1133 % \hangbox{\colfig[0.52]{matbase/fig63538}} % two pages, 5 plots
samer@74 1134 % \hangbox{\colfig[0.52]{matbase/fig53706}} % two pages, 5 plots
samer@74 1135 \hangbox{\colfig[0.72]{matbase/fig33309}} % two pages, 5 plots
samer@74 1136 &
samer@74 1137 \hangbox{%
samer@74 1138 \parbox{0.28\linewidth}{
samer@74 1139 \raggedright
samer@74 1140 \textbf{Thick lines:} part boundaries as indicated
samer@74 1141 by Glass; \textbf{grey lines (top four panels):} changes in the melodic `figures';
samer@74 1142 % of which the piece is constructed.
samer@74 1143 \textbf{grey lines (bottom panel):}
samer@74 1144 six most surprising moments chosen by expert listenter.
samer@74 1145 }
samer@74 1146 }
samer@74 1147 \end{tabular}
samer@74 1148 % \end{fig}
samer@74 1149 \end{iframe}
samer@74 1150
samer@74 1151 \begin{iframe}[Two Pages\nicedot Rule based analysis]
samer@74 1152 \begin{figure}
samer@74 1153 \colfig[0.98]{matbase/fig13377}
samer@74 1154 % \hangbox{\colfig[0.98]{matbase/fig13377}}
samer@74 1155 \end{figure}
samer@74 1156 Analysis of \emph{Two Pages} using (top) Cambouropoulos'
samer@74 1157 Local Boundary Detection Model (LBDM) and
samer@74 1158 (bottom) Lerdahl and Jackendoff's
samer@74 1159 grouping preference rule 3a (GPR3a), which is a function of pitch proximity.
samer@74 1160 Both analyses indicate `boundary strength'.
samer@74 1161 \end{iframe}
samer@74 1162
samer@74 1163 \begin{iframe}[Two Pages\nicedot Discussion]
samer@74 1164 Correspondence between the information
samer@74 1165 measures and the structure of the piece is quite close.
samer@74 1166 Good agreement between the six `most surprising
samer@74 1167 moments' chosen by expert listener and model information signal.
samer@74 1168
samer@74 1169 What appears to be an error in the detection of
samer@74 1170 the major part boundary (between events 5000 and 6000) actually
samer@74 1171 raises a known anomaly in the score, where Glass places the boundary several events
samer@74 1172 before there is any change in the pattern of notes. Alternative analyses of \emph{Two Pages}
samer@74 1173 place the boundary in agreement with peak in our surprisingness signal.
samer@74 1174 \end{iframe}
samer@74 1175
samer@74 1176 \comment{
samer@74 1177 \begin{iframe}[Gradus\nicedot Results]
samer@74 1178
samer@74 1179 % \begin{fig}{gradus}
samer@74 1180 \begin{tabular}{c@{\hspace{1.5ex}}l}
samer@74 1181 % &
samer@74 1182 % \hangbox{\colfig[0.4]{matbase/fig81812}}
samer@74 1183 % \hangbox{\colfig[0.52]{matbase/fig23177}} % two pages, 5 plots
samer@74 1184 % \hangbox{\colfig[0.495]{matbase/fig50709}} % Fudged segmentation
samer@74 1185 % \hangbox{\colfig[0.495]{matbase/fig3124}} % Geraint's segmentation
samer@74 1186 \hangbox{\colfig[0.715]{matbase/fig11808}} % Geraint's segmentation, corrected
samer@74 1187 &
samer@74 1188 % \hangbox{\colfig[0.5]{matbase/fig39914}}
samer@74 1189 \hangbox{%
samer@74 1190 \parbox{0.28\linewidth}{
samer@74 1191 \raggedright
samer@74 1192 \textbf{Thick lines:} part boundaries as indicated
samer@74 1193 by the composer.
samer@74 1194 \textbf{Grey lines:} segmentation by expert listener.
samer@74 1195
samer@74 1196 Note: traces smoothed with Gaussian
samer@74 1197 window about 16 events wide.
samer@74 1198 }
samer@74 1199 }
samer@74 1200 \end{tabular}
samer@74 1201 % \end{fig}
samer@74 1202 \end{iframe}
samer@74 1203
samer@74 1204 \begin{iframe}[Gradus\nicedot Rule based analysis]
samer@74 1205 \begin{figure}
samer@74 1206 \colfig[0.98]{matbase/fig58691}
samer@74 1207 \end{figure}
samer@74 1208 Boundary strength analysis of \emph{Gradus} using (top) Cambouropoulos'
samer@74 1209 \cite{CambouropoulosPhD} Local Boundary Detection Model and
samer@74 1210 (bottom) Lerdahl and Jackendoff's \cite{LerdahlJackendoff83}
samer@74 1211 grouping preference rule 3a.
samer@74 1212 \end{iframe}
samer@74 1213 }
samer@74 1214 \begin{iframe}[Gradus\nicedot Metrical analysis]
samer@74 1215 \begin{figure}
samer@74 1216 \begin{tabular}{cc}
samer@74 1217 \colfig[0.40]{matbase/fig56807} & \colfig[0.41]{matbase/fig27144} \\
samer@74 1218 \colfig[0.40]{matbase/fig87574} & \colfig[0.41]{matbase/fig13651} \\
samer@74 1219 \hspace*{1ex}\colfig[0.39]{matbase/fig19913} & \hspace*{1ex}\colfig[0.40]{matbase/fig66144}
samer@74 1220 \end{tabular}
samer@74 1221 \end{figure}
samer@74 1222 \end{iframe}
samer@74 1223
samer@74 1224 \comment{
samer@74 1225 \begin{iframe}[Gradus\nicedot Discussion]
samer@74 1226
samer@74 1227 \emph{Gradus} is much less systematically structured than \emph{Two Pages}, and
samer@74 1228 relies more on the conventions of tonal music, which are not represented the model.
samer@74 1229
samer@74 1230 For example initial transition matrix is uniform, which does not correctly represent
samer@74 1231 prior knowledge about tonal music.
samer@74 1232
samer@74 1233 Information dynamic analysis does not give such a
samer@74 1234 clear picture of the structure; but some of the fine structure can be related
samer@74 1235 to specific events in the music (see Pearce and Wiggins 2006).
samer@74 1236 % nonetheless, there are some points of correspondence between the analysis and
samer@74 1237 % segmentation given by Keith Potter.
samer@74 1238
samer@74 1239 \end{iframe}
samer@74 1240 }
samer@74 1241
samer@74 1242 \section{Application: Beat tracking and rhythm}
samer@74 1243
samer@74 1244 \begin{iframe}[Bayesian beat tracker]
samer@74 1245 \uncover<1->{
samer@74 1246 Works by maintaining probabilistic belief state about time of next
samer@74 1247 beat and current tempo.
samer@74 1248
samer@74 1249 \begin{figure}
samer@74 1250 \colfig{beat_prior}
samer@74 1251 \end{figure}
samer@74 1252 }
samer@74 1253
samer@74 1254 \uncover<2->{
samer@74 1255 Receives categorised drum events (kick or snare) from audio analysis front-end.
samer@74 1256 }
samer@74 1257
samer@74 1258 \end{iframe}
samer@74 1259
samer@74 1260 \begin{iframe}[Information gain in the beat tracker]
samer@74 1261 \begin{tabular}{ll}
samer@74 1262 \parbox[t]{0.43\linewidth}{\raggedright
samer@74 1263 \uncover<1->{
samer@74 1264 Each event triggers a change in belief state, so we can compute
samer@74 1265 information gain about beat parameters.}\\[1em]
samer@74 1266
samer@74 1267 \uncover<2->{
samer@74 1268 Relationship between IIG and IPI
samer@74 1269 means we treat it as a proxy for IPI.}
samer@74 1270 }
samer@74 1271 &
samer@74 1272 \hangbox{\colfig[0.55]{beat_info}}
samer@74 1273 \end{tabular}
samer@74 1274 \end{iframe}
samer@74 1275
samer@74 1276 \begin{iframe}[Analysis of drum patterns]
samer@74 1277 We analysed 17 recordings of drummers, both playing solo or with a band.
samer@74 1278 All patterns in were in 4/4.
samer@74 1279 \begin{itemize}
samer@74 1280 \item
samer@74 1281 \uncover<1->{
samer@74 1282 Information tends to arrive at beat times: consequence of structure of model.
samer@74 1283 }
samer@74 1284 \item
samer@74 1285 \uncover<2->{
samer@74 1286 Lots of information seems to arrive after drum fills and breaks
samer@74 1287 as the drummer reestablishes the beat.
samer@74 1288 }
samer@74 1289 \item
samer@74 1290 \uncover<3->{
samer@74 1291 No consistent pattern of information arrival in relation to metrical
samer@74 1292 structure, so no obvious metrical structure in micro-timing of events.
samer@74 1293 However, still possible that metrical structure might emerge from predictive
samer@74 1294 analysis of drum pattern.
samer@74 1295 }
samer@74 1296 \end{itemize}
samer@74 1297 \end{iframe}
samer@74 1298
samer@74 1299 \section{Summary and conclusions}
samer@74 1300 \label{s:Conclusions}
samer@74 1301
samer@74 1302 \begin{iframe}[Summary]
samer@74 1303
samer@74 1304 \begin{itemize}
samer@74 1305 \item Dynamic, observer-centric information theory.
samer@74 1306 \item Applicable to any dynamic probabilistic model.
samer@74 1307 \item PIR potentially a measure of complexity.
samer@74 1308 \item Simple analysis for Markov chains and Gaussian processes.
samer@74 1309 \item Applications in music analysis and composition.
samer@74 1310 \item Search for neural correlates is ongoing (that's another talk\ldots).
samer@74 1311 \end{itemize}
samer@74 1312 Thanks!
samer@74 1313 \end{iframe}
samer@74 1314
samer@74 1315 \begin{bframe}[Bibliography]
samer@74 1316 \bibliographystyle{alpha}
samer@74 1317 {\small \bibliography{all,c4dm,compsci}}
samer@74 1318 \end{bframe}
samer@74 1319 \end{document}