annotate mume2012/MelodyTriangleMUME2012.tex @ 58:a63c438b3f65 tip

Squeezed it into the 6 page limit
author Henrik Ekeus <hekeus@eecs.qmul.ac.uk>
date Tue, 11 Jun 2013 15:17:21 +0100
parents 3d4469f9e172
children
rev   line source
hekeus@51 1 %File: MelodyTriangleMUME2010.tex
hekeus@51 2 \documentclass{article}
hekeus@51 3 \usepackage{aaai}
hekeus@51 4 \usepackage{times}
hekeus@51 5 \usepackage{helvet}
hekeus@51 6 \usepackage{courier}
hekeus@51 7 \frenchspacing
hekeus@51 8 %TODO
hekeus@51 9 \pdfinfo{
hekeus@51 10 /Title (Melody Triangle Mume (todo)
hekeus@51 11 /Subject(todo)
hekeus@51 12 /Author(todo)}
hekeus@51 13 \usepackage{cite}
hekeus@51 14
hekeus@51 15 \usepackage{graphicx}
hekeus@51 16 \usepackage{amssymb}
hekeus@51 17 \usepackage{epstopdf}
hekeus@51 18 \usepackage{url}
hekeus@51 19 \usepackage{listings}
hekeus@51 20 %\usepackage[expectangle]{tools}
hekeus@51 21 \usepackage{tools}
hekeus@51 22 \usepackage{fixfloats}
hekeus@51 23 \usepackage{tikz}
hekeus@51 24 \usetikzlibrary{calc}
hekeus@51 25 \usetikzlibrary{matrix}
hekeus@51 26 \usetikzlibrary{patterns}
hekeus@51 27 \usetikzlibrary{arrows}
hekeus@51 28
hekeus@51 29 \let\citep=\cite
hekeus@51 30 \newcommand{\colfig}[2][1]{\includegraphics[width=#1\linewidth]{figures/#2}}%
hekeus@51 31 \newcommand\preals{\reals_+}
hekeus@51 32 \newcommand\X{\mathcal{X}}
hekeus@51 33 \newcommand\Y{\mathcal{Y}}
hekeus@51 34 \newcommand\domS{\mathcal{S}}
hekeus@51 35 \newcommand\A{\mathcal{A}}
hekeus@51 36 \newcommand\Data{\mathcal{D}}
hekeus@51 37 \newcommand\rvm[1]{\mathrm{#1}}
hekeus@51 38 \newcommand\sps{\,.\,}
hekeus@51 39 \newcommand\Ipred{\mathcal{I}_{\mathrm{pred}}}
hekeus@51 40 \newcommand\Ix{\mathcal{I}}
hekeus@51 41 \newcommand\IXZ{\overline{\underline{\mathcal{I}}}}
hekeus@51 42 \newcommand\x{\vec{x}}
hekeus@51 43 \newcommand\Ham[1]{\mathcal{H}_{#1}}
hekeus@51 44 \newcommand\subsets[2]{[#1]^{(k)}}
hekeus@51 45 \def\bet(#1,#2){#1..#2}
hekeus@51 46
hekeus@51 47
hekeus@51 48 \def\ev(#1=#2){#1\!\!=\!#2}
hekeus@51 49 \newcommand\rv[1]{\Omega \to #1}
hekeus@51 50 \newcommand\ceq{\!\!=\!}
hekeus@51 51 \newcommand\cmin{\!-\!}
hekeus@51 52 \newcommand\modulo[2]{#1\!\!\!\!\!\mod#2}
hekeus@51 53
hekeus@51 54 \newcommand\sumitoN{\sum_{i=1}^N}
hekeus@51 55 \newcommand\sumktoK{\sum_{k=1}^K}
hekeus@51 56 \newcommand\sumjtoK{\sum_{j=1}^K}
hekeus@51 57 \newcommand\sumalpha{\sum_{\alpha\in\A}}
hekeus@51 58 \newcommand\prodktoK{\prod_{k=1}^K}
hekeus@51 59 \newcommand\prodjtoK{\prod_{j=1}^K}
hekeus@51 60
hekeus@51 61 \newcommand\past[1]{\overset{\rule{0pt}{0.2em}\smash{\leftarrow}}{#1}}
hekeus@51 62 \newcommand\fut[1]{\overset{\rule{0pt}{0.1em}\smash{\rightarrow}}{#1}}
hekeus@51 63 \newcommand\parity[2]{P^{#1}_{2,#2}}
hekeus@51 64
hekeus@51 65
hekeus@51 66 %%%%%%%%%%%%%%%%%%%%%%%% Some useful packages %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
hekeus@51 67 %%%%%%%%%%%%%%%%%%%%%%%% See related documentation %%%%%%%%%%%%%%%%%%%%%%%%%%
hekeus@51 68 %\usepackage{amsmath} % popular packages from Am. Math. Soc. Please use the
hekeus@51 69 %\usepackage{amssymb} % related math environments (split, subequation, cases,
hekeus@51 70 %\usepackage{amsfonts}% multline, etc.)
hekeus@51 71 %\usepackage{bm} % Bold Math package, defines the command \bf{}
hekeus@51 72 %\usepackage{paralist}% extended list environments
hekeus@51 73 %%subfig.sty is the modern replacement for subfigure.sty. However, subfig.sty
hekeus@51 74 %%requires and automatically loads caption.sty which overrides class handling
hekeus@51 75 %%of captions. To prevent this problem, preload caption.sty with caption=false
hekeus@51 76 %\usepackage[caption=false]{caption}
hekeus@51 77 %\usepackage[font=footnotesize]{subfig}
hekeus@51 78
hekeus@51 79
hekeus@51 80 %user defined variables
hekeus@51 81 \def\papertitle{The Melody Triangle - Pattern and Predictability in Music}
hekeus@51 82 \def\firstauthor{Henrik Ekeus}
hekeus@51 83 \def\secondauthor{Samer A. Abdallah}
hekeus@51 84 \def\thirdauthor{Mark D. Plumbley}
hekeus@51 85 \def\fourthauthor{Peter W. McOwan}
hekeus@51 86
hekeus@51 87 % adds the automatic
hekeus@51 88 % Saves a lot of ouptut space in PDF... after conversion with the distiller
hekeus@51 89 % Delete if you cannot get PS fonts working on your system.
hekeus@51 90
hekeus@51 91 % pdf-tex settings: detect automatically if run by latex or pdflatex
hekeus@51 92 \newif\ifpdf
hekeus@51 93 \ifx\pdfoutput\relax
hekeus@51 94 \else
hekeus@51 95 \ifcase\pdfoutput
hekeus@51 96 \pdffalse
hekeus@51 97 \else
hekeus@51 98 \pdftrue
hekeus@51 99 \fi
hekeus@51 100
hekeus@51 101 \ifpdf % compiling with pdflatex
hekeus@51 102 \usepackage[pdftex,
hekeus@51 103 pdftitle={\papertitle},
hekeus@51 104 pdfauthor={\firstauthor, \secondauthor, \thirdauthor},
hekeus@51 105 bookmarksnumbered, % use section numbers with bookmarks
hekeus@51 106 pdfstartview=XYZ % start with zoom=100% instead of full screen;
hekeus@51 107 % especially useful if working with a big screen :-)
hekeus@51 108 ]{hyperref}
hekeus@51 109 %\pdfcompresslevel=9
hekeus@51 110
hekeus@51 111 %\usepackage[pdftex]{graphicx}
hekeus@51 112 % declare the path(s) where your graphic files are and their extensions so
hekeus@51 113 %you won't have to specify these with every instance of \includegraphics
hekeus@51 114 %\graphicspath{{./figures/}}
hekeus@51 115 %\DeclareGraphicsExtensions{.pdf,.jpeg,.png}
hekeus@51 116
hekeus@51 117 \usepackage[figure,table]{hypcap}
hekeus@51 118
hekeus@51 119 \else % compiling with latex
hekeus@51 120 \usepackage[dvips,
hekeus@51 121 bookmarksnumbered, % use section numbers with bookmarks
hekeus@51 122 pdfstartview=XYZ % start with zoom=100% instead of full screen
hekeus@51 123 ]{hyperref} % hyperrefs are active in the pdf file after conversion
hekeus@51 124
hekeus@51 125 \usepackage[dvips]{epsfig,graphicx}
hekeus@51 126 % declare the path(s) where your graphic files are and their extensions so
hekeus@51 127 %you won't have to specify these with every instance of \includegraphics
hekeus@51 128 \graphicspath{{./figures/}}
hekeus@51 129 \DeclareGraphicsExtensions{.eps}
hekeus@51 130
hekeus@51 131 \usepackage[figure,table]{hypcap}
hekeus@51 132 \fi
hekeus@51 133
hekeus@51 134 %setup the hyperref package - make the links black without a surrounding frame
hekeus@51 135 \hypersetup{
hekeus@51 136 colorlinks,%
hekeus@51 137 citecolor=black,%
hekeus@51 138 filecolor=black,%
hekeus@51 139 linkcolor=black,%
hekeus@51 140 urlcolor=black
hekeus@51 141 }
hekeus@51 142
hekeus@51 143
hekeus@51 144 % Title.
hekeus@51 145 % ------
hekeus@51 146 \title{\papertitle}
hekeus@51 147
hekeus@51 148 % Authors
hekeus@51 149 % Please note that submissions are NOT anonymous, therefore
hekeus@51 150 % authors' names have to be VISIBLE in your manuscript.
hekeus@51 151 %
hekeus@51 152 % Single address
hekeus@51 153 % To use with only one author or several with the same address
hekeus@51 154 % ---------------
hekeus@51 155 \oneauthor
hekeus@51 156 {\firstauthor, \secondauthor, \thirdauthor, \fourthauthor} {Queen Mary University of London \\ Centre for Digital Music \\ School of Electronic Engineering and Computer Science\\%
hekeus@51 157 {\tt \href{mailto:hekeus@eecs.qmul.ac.uk}{\{hekeus,samer.abdallah\}@eecs.qmul.ac.uk}}}
hekeus@51 158
hekeus@51 159 %Two addresses
hekeus@51 160 %--------------
hekeus@51 161 % \twoauthors
hekeus@51 162 % {\firstauthor} {Affiliation1 \\ %
hekeus@51 163 % {\tt \href{mailto:author1@smcnetwork.org}{author1@smcnetwork.org}}}
hekeus@51 164 % {\secondauthor} {Affiliation2 \\ %
hekeus@51 165 % {\tt \href{mailto:author2@smcnetwork.org}{author2@smcnetwork.org}}}
hekeus@51 166
hekeus@51 167 % Three addresses
hekeus@51 168 % --------------
hekeus@51 169 % \threeauthors
hekeus@51 170 % {\firstauthor} {Affiliation1 \\ %
hekeus@51 171 % {\tt \href{mailto:author1@smcnetwork.org}{author1@smcnetwork.org}}}
hekeus@51 172 % {\secondauthor} {Affiliation2 \\ %
hekeus@51 173 % {\tt \href{mailto:author2@smcnetwork.org}{author2@smcnetwork.org}}}
hekeus@51 174 % {\thirdauthor} { Affiliation3 \\ %
hekeus@51 175 % {\tt \href{mailto:author3@smcnetwork.org}{author3@smcnetwork.org}}}
hekeus@51 176 %
hekeus@51 177
hekeus@51 178 % ***************************************** the document starts here ***************
hekeus@51 179 \begin{document}
hekeus@51 180 %
hekeus@51 181 \capstartfalse
hekeus@51 182 \maketitle
hekeus@51 183 \capstarttrue
hekeus@51 184 %
hekeus@51 185 \begin{abstract}
hekeus@51 186 The Melody Triangle is an interface for the discovery of melodic materials, where the input -- positions within a triangle -- directly map to information theoretic properties of the output. The measures are the entropy rate, redundancy and \emph{predictive information rate}\cite{Abdallah:2009p4089} of the random process used to generate the sequence of notes. These are all related to the \emph{predictability} of the sequence and as such address the notions of expectation and surprise in the perception of music. We describe some of the relevant ideas from information dynamics, how the Melody Triangle is defined in terms of these, and describe two physical incarnations of the Melody Triangle. The first is a multi-user installation where collaboration in a performative setting provides a playful yet informative way to explore expectation and surprise in music. The second is a screen based interface where the Melody Triangle becomes a cognitively-informed compositional aid for the generation of musical textures; the user's control at the abstract level of randomness and predictability. Finally we outline a pilot study where the screen-based interface was used under experimental conditions to determine how the three measures of predictive information rate, entropy and redundancy might relate to musical preference.
hekeus@51 187 \end{abstract}
hekeus@51 188 %the generation of musical materials as a cognitively-informed compositional aid
hekeus@51 189
hekeus@51 190 \section{Information Dynamics}\label{sec:Information_dynamics}
hekeus@51 191
hekeus@51 192 The relationship between
hekeus@51 193 Shannon's \cite{Shannon48} information theory and music and art in general has been the
hekeus@51 194 subject of some interest since the 1950s
hekeus@51 195 \cite{Youngblood58,CoonsKraehenbuehl1958,Moles66,Meyer67,Cohen1962}.
hekeus@51 196 The general thesis is that perceptible qualities and subjective states
hekeus@51 197 like uncertainty, surprise, complexity, tension, and interestingness
hekeus@51 198 are closely related to information-theoretic quantities like
hekeus@51 199 entropy, relative entropy, and mutual information.
hekeus@51 200
hekeus@51 201
hekeus@51 202 Music is an inherently dynamic process. The idea that the musical experience is strongly shaped by the generation
hekeus@51 203 and playing out of strong and weak expectations was put forward by, amongst others,
hekeus@51 204 music theorists L. B. Meyer \cite{Meyer:1967} and Narmour \citep{Narmour:1977}.
hekeus@51 205 %Composers commonly, consciously or not, play with this process by setting up expectations which may, or may not be fulfilled, manipulating the expectations of the listener and inducing surprise or not as the music progresses
hekeus@51 206 %and surprise in the listener has been articulated by music theorist Meyer
hekeus@51 207 %\cite{Meyer:1967,Narmour:1977}.
hekeus@51 208 Central to this is the idea that music is not a static object presented as a whole,
hekeus@51 209 %as the grammatical analysis of Lerdahl and Jackendoff \cite{Lerdahl:1983} might imply,
hekeus@51 210 but as a phenomenon that `unfolds' and is experienced \emph{in time}; as listeners we continually build and re-evaluate expectations of what is to come next.
hekeus@51 211
hekeus@51 212
hekeus@51 213
hekeus@51 214
hekeus@51 215
hekeus@51 216
hekeus@51 217 Information dynamics\cite{Abdallah:2009p4089} considers several different kinds of predictability in musical patterns, how these might be quantified using the tools of information theory,
hekeus@51 218 %human listeners might perceive these,
hekeus@51 219 and how they shape or affect the listening experience. Our working hypothesis is that listeners maintain a dynamically evolving statistical model that enables them to make predictions about how a piece of music will continue. They do this using both the immediate context of the piece as well as using previous musical experience, such as a familiarity with musical styles and conventions. As the music unfolds, listeners continually revise their model; in other words, they revise their own, subjective probabilistic belief state. These changes in probabilistic beliefs can be associated with
hekeus@51 220 quantities of information; these are the focus of information dynamics.
hekeus@51 221
hekeus@51 222
hekeus@51 223
hekeus@51 224 \section{The Melody Triangle}\label{sec:The_Melody_triangle}
hekeus@51 225 %%%How we created the transition matrixes and created the triangle.
hekeus@51 226 The use of stochastic processes in music composition has been widespread for
hekeus@51 227 decades---for instance Iannis Xenakis applied probabilistic mathematical models
hekeus@51 228 to the creation of musical materials\cite{Xenakis:1992ul}. While such processes
hekeus@51 229 can drive the \emph{generative} phase of the creative process, information dynamics
hekeus@51 230 can serve as a novel framework for a \emph{selective} phase, by
hekeus@51 231 providing a set of criteria to be used in judging which of the
hekeus@51 232 generated materials
hekeus@51 233 are of value. This alternation of generative and selective phases as been
hekeus@51 234 noted before \cite{Boden1990}.
hekeus@51 235 %
hekeus@51 236 Information-dynamic criteria can also be used as \emph{constraints} on the
hekeus@51 237 generative processes, for example, by specifying a certain temporal profile
hekeus@51 238 of suprisingness and uncertainty the composer wishes to induce in the listener
hekeus@51 239 as the piece unfolds.
hekeus@51 240
hekeus@51 241 The Melody Triangle enables the discovery of melodic content matching a set of information theoretic criteria. Positions within the triangle correspond with pairs of values of entropy rate and redundancy. %The relationship with the predictive information rate is not explicitly controlled as this would require a three-dimensional interface, but an implicit relationship emerges, which is described in section \ref{makingthetriangle}.
hekeus@51 242 The physical interface to the Triangle has so far been realised in two forms: as an interactive installation and as a screen based interface.
hekeus@51 243
hekeus@51 244 Given coordinates corresponding to a point in the triangle, we select from a pre-built
hekeus@51 245 library of random processes, choosing one whose entropy rate and redundancy match the desired
hekeus@51 246 values. The implementations discussed in this paper use first order Markov chains as the content generator,
hekeus@51 247 since it is easy to compute the theoretically exact values of entropy rate, redundancy and predictive
hekeus@51 248 information rate given the transition matrix of the Markov chain. However, in principle, any generative system could be used to create the library of sequences, given an appropriate probabilistic listener model supporting
hekeus@51 249 the estimation of entropy rate and redundancy.
hekeus@51 250
hekeus@51 251 The Markov chain based implementation generates streams of symbols in the abstract; the alphabet of symbols is then mapped to a set of distinct sounds, such as pitched notes in a scale or a set of percussive
hekeus@51 252 sounds. Further by layering these streams intricate musical textures can be created. The selection of
hekeus@51 253 notes or sounds is arbitrary, as long as they are all distinguishable.
hekeus@51 254 %)le is not a part of the Melody Triangle's core functionality, i
hekeus@51 255 Indeed, the symbols could be mapped to even non sonic outputs such as visible shapes, colours, or movements.
hekeus@51 256
hekeus@51 257 Any sequence of symbols can be analysed and information theoretic measures estimated from it.
hekeus@51 258 The novelty of the Melody Triangle lies in that we reverse this mapping: given desired values for these measures, as determined from the user interface, we return a stream of symbols with the desired properties.
hekeus@51 259 In the next section we describe the three information theoretic measures that we use.
hekeus@51 260
hekeus@51 261
hekeus@51 262 \section{Sequential Information Measures}\label{sec:Sequential_Information_Measures}
hekeus@51 263 The \emph{entropy rate} of a random process is a basic measure of its randomness or
hekeus@51 264 unpredictablity. Consider the viewpoint of an observer at a certain time, and split the
hekeus@51 265 sequence into an infinite \emph{past}, as single symbol in the \emph{present}, and the
hekeus@51 266 infinite \emph{future}. The entropy rate is a conditional entropy; informally:
hekeus@51 267 \begin{equation}
hekeus@51 268 \mathrm{EntropyRate} = H( \mathrm{Present} | \mathrm{Past}),
hekeus@51 269 \end{equation}
hekeus@51 270 that is, it represents our average uncertainty about the present symbol \emph{given}
hekeus@51 271 that we have observed everything before it. Processes with zero entropy rate can
hekeus@51 272 be predicted perfectly given enough of the preceeding context.
hekeus@51 273
hekeus@51 274 The \emph{redundancy} of the a process, in the sense we are using the term here, is
hekeus@51 275 a measure of how much the predictability of the process depends on knowing the
hekeus@51 276 preceeding context. It is the difference between the entropy of a single element of the
hekeus@51 277 sequence in isolation (imagine chosing a note from a musical score at random with your
hekeus@51 278 eyes closed and then trying to guess the note) and its entropy after taking into account
hekeus@51 279 the preceeding context:
hekeus@51 280 \begin{equation}
hekeus@51 281 \mathrm{Redundancy} = H( \mathrm{Present} ) - H(\mathrm{Present} | \mathrm{Past}).
hekeus@51 282 \end{equation}
hekeus@51 283 If the previous symbols reduce our uncertainty about present symbol a great deal, then
hekeus@51 284 the redundancy is high. For example, if we know that a sequence consists of a repeating
hekeus@51 285 cycle such as $ \ldots b, c, d, a, b, c, d, a \ldots$, but we don't know which was the first
hekeus@51 286 symbol, then the redundancy is high, as $H(\mathrm{Present})$ is high (because we
hekeus@51 287 have no idea about the present symbol in isolation, but $H(\mathrm{Present}|\mathrm{Past})$
hekeus@51 288 is zero, because knowing the previous symbol immediately tells us what the present symbol is.
hekeus@51 289
hekeus@51 290 The \emph{predictive information rate} (PIR) brings in our uncertainty about the future. It is a
hekeus@51 291 measure of how much each symbol reduces our uncertainty about the future as it is
hekeus@51 292 observed, \emph{given} that we have observed the past:
hekeus@51 293 \begin{equation}
hekeus@51 294 \mathrm{PIR} = H(\mathrm{Future} | \mathrm{Past}) - H(\mathrm{Future} | \mathrm{Present}, \mathrm{Past}).
hekeus@51 295 \end{equation}
hekeus@51 296 It is a measure of the \emph{new} information in each symbol.
hekeus@51 297 Notice that if the past completely determines both the present and the future (as in the cyclic
hekeus@51 298 pattern above) the PIR is zero, since the present symbol brings no new information. However,
hekeus@51 299 if the symbols in a sequence are generated completely independently, e.g. by rolling a die for each
hekeus@51 300 one, then again, the present symbol provides no information about the future and the PIR
hekeus@51 301 is zero.
hekeus@51 302
hekeus@51 303 %However, there do exist processes that have high predictive information rates as compared
hekeus@51 304 %with their entropy rates: within the class of Markov chains, these are neither the periodic nor the sequentially uncorrellated ones. Rather they tend to yield sequences that have certain recognisable patterns or motifs,
hekeus@51 305 %but which occur at irregular times. A certain symbol might tell us about which one of the characteristic patterns will appear next. Each symbol tell a us little bit about the future; in order to make good predictions,
hekeus@51 306 %the listener must continually pay attention, building up expectations on the basis of each new observation.
hekeus@51 307 %% but only a limited amount about the infinite future, we only learn about that as time goes on; there is continual building of prediction.
hekeus@51 308 Processes with high PIR maintain a certain kind of balance between
hekeus@51 309 predictability and unpredictability in such a way that the observer must continually
hekeus@51 310 pay attention to each new observation as it occurs in order to make the best
hekeus@51 311 possible predictions about the evolution of the sequence. This balance between predictability
hekeus@51 312 and unpredictability is reminiscent of the inverted `U' shape of the Wundt curve (see \figrf{wundt}),
hekeus@51 313 which summarises the observations of Wundt \cite{Wundt1897} that stimuli are most
hekeus@51 314 pleasing at intermediate levels of novelty or disorder, where there is a balance between
hekeus@51 315 `order' and `chaos'.
hekeus@51 316
hekeus@51 317 \begin{fig}{wundt}
hekeus@51 318 \raisebox{-4em}{\colfig[0.43]{wundt}}
hekeus@51 319 % {\ \shortstack{{\Large$\longrightarrow$}\\ {\scriptsize\emph{exposure}}}\ }
hekeus@51 320 {\ {\large$\longrightarrow$}\ }
hekeus@51 321 \raisebox{-4em}{\colfig[0.43]{wundt2}}
hekeus@51 322 \caption{
hekeus@51 323 The Wundt curve relating randomness/complexity with
hekeus@51 324 perceived value. Repeated exposure sometimes results
hekeus@51 325 in a move to the left along the curve \cite{Berlyne71}.
hekeus@51 326 }
hekeus@51 327 \end{fig}
hekeus@51 328
hekeus@51 329
hekeus@51 330 \begin{figure}
hekeus@51 331 \centering
hekeus@51 332 \includegraphics[width=0.2\textwidth]{figures/PeriodicMatrix.png}
hekeus@51 333 \includegraphics[width=0.2\textwidth]{figures/NonDeterministicMatrix_bw.png}
hekeus@51 334 \caption{Two transition matrixes. The shade of white represents the probabilities of transition from one symbol to the next (black=0, white=1). The current symbol is along the bottom, and in this case there are twelve possibilities (mapped to a chromatic scale). The left hand matrix has no uncertainty; it represents a periodic pattern. The right hand matrix contains unpredictability but nonetheless is not completely without perceivable structure, it is of a higher entropy rate. \label{TransitionMatrixes}}
hekeus@51 335 \end{figure}
hekeus@51 336
hekeus@51 337
hekeus@51 338
hekeus@51 339 \begin{fig}{mtriscat}
hekeus@51 340 \colfig[0.9]{mtriscat}
hekeus@51 341 \caption{The population of transition matrices in the 3D space of
hekeus@51 342 entropy rate, redundancy and PIR,
hekeus@51 343 all in bits.
hekeus@51 344 The concentrations of points along the redundancy axis correspond
hekeus@51 345 to Markov chains which are roughly periodic with periods of 2 (redundancy 1 bit),
hekeus@51 346 3, 4, \etc all the way to period 7 (redundancy 2.8 bits). The colour of each point
hekeus@51 347 represents its PIR---note that the highest values are found at intermediate entropy
hekeus@51 348 and redundancy, and that the distribution as a whole makes a curved triangle. Although
hekeus@51 349 not visible in this plot, it is largely hollow in the middle. \label{InfoDynEngine}}
hekeus@51 350 \end{fig}
hekeus@51 351
hekeus@51 352
hekeus@51 353
hekeus@51 354 %\begin{figure}
hekeus@51 355 %\centering
hekeus@51 356 %\includegraphics[width=0.5\textwidth]{MatrixDistribution.png}
hekeus@51 357 %\caption{The population of transition matrixes distributed along three axes of redundancy, entropy rate and predictive information rate. Note how the distribution makes a curved triangle-like plane floating in 3d space. \label{InfoDynEngine}}
hekeus@51 358 %\end{figure}
hekeus@51 359 \begin{figure}[h]
hekeus@51 360 \centering
hekeus@51 361 \includegraphics[width=0.5\textwidth]{figures/TheTriangle.pdf}
hekeus@51 362 \caption{The Melody Triangle\label{TheTriangle}}
hekeus@51 363 \end{figure}
hekeus@51 364
hekeus@51 365 \subsection{Populating the triangle}\label{makingthetriangle}
hekeus@51 366
hekeus@51 367
hekeus@51 368
hekeus@51 369 Before the Melody Triangle can used, it has to be `populated' with possible parameter values for the melody generators. These are then plotted in a 3d statistical space of redundancy, entropy rate and predictive information rate. In our case we generated thousands of transition matrixes, representing first-order Markov chains, by a random sampling method. In figure \ref{InfoDynEngine} we see a representation of how these matrixes are distributed in the 3d statistical space; each one of these points corresponds to a transition matrix.
hekeus@51 370
hekeus@51 371
hekeus@51 372
hekeus@51 373 When we look at the distribution of transition matrixes plotted in this space, we see that it forms an arch shape that is fairly thin. It thus becomes a reasonable approximation to pretend that it is just a sheet in two dimensions; and so we stretch out this curved arc into a flat triangle. It is this triangular sheet that is our `Melody Triangle' and forms the interface by which the system is controlled.
hekeus@51 374
hekeus@51 375 Though the interface is 2D, the third dimension (PIR) is implicitly present, as
hekeus@51 376 transition matrices retrieved from
hekeus@51 377 along the centre line of the triangle will tend to have higher PIR.
hekeus@51 378 We hypothesise that, under
hekeus@51 379 the appropriate conditions, these will be perceived as more `interesting' or
hekeus@51 380 `melodic.'
hekeus@51 381
hekeus@51 382 When the Melody Triangle is used, regardless of whether it is as a screen based system, or as an interactive installation, it involves a mapping to this statistical space. When the user, through the interface, selects a position within the triangle, the corresponding transition matrix is returned. Figure \ref{TheTriangle} shows how the triangle maps to different measures of redundancy, entropy rate and predictive information rate.
hekeus@51 383
hekeus@51 384 %%%paragraph explaining what the different parts of the triangle are like.
hekeus@51 385 Each corner corresponds to three different extremes of predictability and unpredictability, which could be loosely characterised as `periodicity', `noise' and `repetition'. %Melodies from the `noise' corner have no discernible pattern; they have high entropy rate, low predictive information rate and low redundancy. These melodies are essentially totally random. A melody along the `periodicity' to `repetition' edge are all deterministic loops that get shorter as we approach the `repetition' corner, until it becomes just one repeating note. It is the areas in between the extremes that provide the more `interesting' melodies. That is, those that have some level of unpredictability, but are not completely random. Or, conversely, that are predictable, but not entirely so. This triangular space allows for an intuitive exploration of expectation and surprise in temporal sequences based on a simple model of how one might guess the next event given the previous one.
hekeus@51 386 In our experiments with visualising and sonifying sequences sampled from
hekeus@51 387 first order Markov chains \cite{Abdallah:2009p4089}, we found that
hekeus@51 388 the measures of redundancy rate, entropy rate and predictive information rate correspond to perceptible
hekeus@51 389 characteristics, and that the transition matrices maximising or minimising
hekeus@51 390 each of these quantities are quite distinct. High entropy rates are associated
hekeus@51 391 with completely uncorrelated sequences with no recognisable temporal structure.
hekeus@51 392 High values of redundancy rate are associated with long periodic cycles (and low PIR
hekeus@51 393 and entropy rate). High values of predictive information rate are associated with intermediate values
hekeus@51 394 of redundancy rate and entropy rate, and recognisable, but not completely predictable,
hekeus@51 395 temporal structures.
hekeus@51 396
hekeus@51 397
hekeus@51 398 \section{User Interfaces}
hekeus@51 399 Any number of interfaces could be developed for the Melody Triangle\footnote{The Melody Triangle was developed in Prolog and MatLab. It can be controlled with OpenSoundControl messages, and thus is independent of any specific interface implementation.}. We have developed two; a standard screen based interface where a user moves tokens with a mouse in and around a triangle on screen, and a multi-user interactive installation where a Kinect\footnote{http://www.xbox.com/en-GB/Kinect} camera tracks individuals in a space and maps their positions in the space to the triangle.
hekeus@51 400
hekeus@51 401 \subsection{The Multi-User Installation}
hekeus@51 402
hekeus@51 403 \begin{figure}
hekeus@51 404 \centering
hekeus@51 405 \includegraphics[width=0.5\textwidth]{figures/kinnect.pdf}
hekeus@51 406 \caption{The depth map as seen by the Kinect, and the bounding box outlines the blobs detected by OpenNI.\label{Kinect}}
hekeus@51 407 \end{figure}
hekeus@51 408
hekeus@51 409 As a Kinect camera overlooks a space, its range naturally forms a triangle. As visitors/users comes into the range of the camera, they start generating a melody, the statistical properties of this melody determined by the mapping of physical space to statistical space as discussed above. Thus by exploring the physical space the participant changes the predictability of the generated melodic content. When multiple people are in the space they can cooperate to create interweaving melodies, forming intricate polyphonic textures.
hekeus@51 410
hekeus@51 411 The streams of symbols are mapped to MIDI and then played with software instruments in Logic. The tracking system was capable of detecting gestures, and these were mapped to different musical effects such as tempo changes, periodicity changes (going to the off-beat), instrument/register changes and volume (see Table \ref{gestures}, Figure \ref{gestures2}).
hekeus@51 412
hekeus@51 413 \subsubsection{Tracking and Control}
hekeus@51 414
hekeus@51 415 Tracking and control was done using the OpenNI libraries' API\footnote{http://OpenNi.org/} and high level middle-ware for tracking with Kinect. This provided reliable blob tracking of humanoid forms in 2d space. By triangulating this to the Kinect's depth map it became possible to get reliable coordinate of visitors' positions in the space.
hekeus@51 416
hekeus@51 417 By detecting the bounding box of the 2d blobs of individuals in the space, and then normalising these based on the distance of the depth map it became possible to work out if an individual had an arm stretched out or if they were crouching. With this it was possible to define a series of gestures for controlling the system without the use of any controllers(see table \ref{gestures}). Thus for instance by sticking out one's left arm quickly, the melody doubles in tempo. By pulling one's left arm in at the same time as sticking the right arm out the melody would shift onto the offbeat. Sending out both arms would change the instrument being `played'.
hekeus@51 418
hekeus@51 419 \begin{table}
hekeus@51 420 \centering
hekeus@51 421 %\includegraphics[width=0.5\textwidth]{InstructionsText.pdf}
hekeus@51 422 \caption{Gestures and their resulting effect\label{gestures}}
hekeus@51 423 \begin{tabular}{ l c l }
hekeus@51 424 left arm & right arm & meaning\\
hekeus@51 425 \hline
hekeus@51 426 out & static & double tempo \\
hekeus@51 427 in & static & halve tempo \\
hekeus@51 428 static & out & triple tempo \\
hekeus@51 429 static & in & one-third tempo\\
hekeus@51 430 out & in & shift to off-beat \\
hekeus@51 431 out & out & change instrument\\
hekeus@51 432 in & in & reset tempo\\
hekeus@51 433 \end{tabular}
hekeus@51 434 \end{table}
hekeus@51 435
hekeus@51 436 \begin{figure}
hekeus@51 437 \centering
hekeus@51 438 \includegraphics[width=0.5\textwidth]{figures/InstructionsImage2.pdf}
hekeus@51 439 \caption{Gestures and their resulting effect \label{gestures2}}
hekeus@51 440 \end{figure}
hekeus@51 441
hekeus@51 442
hekeus@51 443 \subsubsection{Observations}
hekeus@51 444 Although visitors would need an initial bit of training they would then quickly be able to collaboratively design musical textures. For example, one person could lay down a predictable repeating bass line by keeping themselves to the periodicity/repetition side of the room, while a companion can generate a freer melodic line by being nearer the 'noise' part of the space.
hekeus@51 445
hekeus@51 446
hekeus@51 447 The collaborative nature of this installation is an area that merits attention. By not having one user be able to control the whole narrative, the participants would communicate verbally and direct each other in the goals of learning to use the system and finding interesting musical textures. This collaboration added an element of playfulness and enjoyment that was clearly apparent.
hekeus@51 448
hekeus@51 449 As an artefact this installation is an exploratory prototype and occupies an ambiguous role in terms of purpose; it is in a nebulous middle ground between instrument, art installation and technical demonstration. It is clear however, that as a vehicle for communicating ideas related to the expectation, pattern and predictability in music to the public, it is very effective.
hekeus@51 450
hekeus@51 451 \subsection{The Screen Based Interface}
hekeus@51 452
hekeus@51 453 \begin{figure}
hekeus@51 454 \centering
hekeus@51 455 \includegraphics[width=0.3\textwidth]{figures/UIscreenshot.png}
hekeus@51 456 \caption{Screen shot of the screen based interface for the Melody Triangle\label{UIScreenShot}}
hekeus@51 457 \end{figure}
hekeus@51 458
hekeus@51 459 %The Melody Triangle can also be explored with a standard screen, keyboard and mouse interface. A triangle is drawn on the screen, screen space thus mapped to the statistical space of the Melody Triangle. A number of round tokens, each representing a melody can be dragged in and around the triangle. When a token is dragged into the triangle, the system will start generating the sequence of notes with statistical properties that correspond to its position in the triangle.
hekeus@51 460 %
hekeus@51 461 %Additionally there are a number of keyboard controls. These include controls for changing the overall tempo, for enabling and disabling individual voices, changing registers, going to off-beats and changing the speed of individual voices. The system gives visual feedback to indicate when a token has locked on to a new melody, and contains a buffer zone for allowing tokens to be pushed right to the edges of the triangle without falling out.
hekeus@51 462 %
hekeus@51 463 %In this mode, the Melody Triangle can be used as a kind of composition assistant for the generation of interesting musical textures and melodies. However unlike other computer aided composition tools or programming environments, here the composer engages with music on the high and abstract level of expectation, randomness and predictability.
hekeus@51 464
hekeus@51 465 The screen based interface can serve as a compositional tool.
hekeus@51 466 %%A triangle is drawn on the screen, screen space thus mapped to the statistical
hekeus@51 467 %space of the Melody Triangle.
hekeus@51 468 A number of tokens, each representing a
hekeus@51 469 sonification stream or `voice', can be dragged in and around the triangle.
hekeus@51 470 For each token, a sequence of symbols is sampled using the corresponding
hekeus@51 471 transition matrix, which
hekeus@51 472 %statistical properties that correspond to the token's position is generated. These
hekeus@51 473 %symbols
hekeus@51 474 are then mapped to notes of a scale or percussive sounds%
hekeus@51 475 \footnote{The sampled sequence could easily be mapped to other musical processes, possibly over
hekeus@51 476 different time scales, such as chords, dynamics and timbres. It would also be possible
hekeus@51 477 to map the symbols to visual or other outputs.}%
hekeus@51 478 . Keyboard commands give control over other musical parameters such
hekeus@51 479 as pitch register, inter-onset interval, tempo and dynamics. The system is capable of generating intricate musical textures when multiple tokens are in the triangle.
hekeus@51 480
hekeus@51 481 In this mode the Melody Triangle is a cognitively-informed compositional aid; unlike other computer aided composition tools or programming environments, here the composer exercises control at the abstract level of information-dynamic
hekeus@51 482 properties. The use of Markov Chains for the generation of musical content is not anything new, rather the novelty lies in the ability to define criteria in the selection of generated materials that relate to how a listener might perceive the output.
hekeus@51 483
hekeus@51 484
hekeus@51 485
hekeus@51 486
hekeus@51 487
hekeus@51 488
hekeus@51 489 \section{Information Dynamics and Musical Preference Study}
hekeus@51 490
hekeus@51 491 We are currently in the process of using the screen-based
hekeus@51 492 Melody Triangle user interface to investigate the relationship between the information-dynamic
hekeus@51 493 characteristics of sonified Markov chains and subjective musical preference.
hekeus@51 494 We carried out a pilot study with six participants, who were asked
hekeus@51 495 to use a simplified form of the user interface (a single controllable token,
hekeus@51 496 and no rhythmic, registral or timbral controls) under two conditions:
hekeus@51 497 one where a single sequence was sonified under user control, and another
hekeus@51 498 where an additional sequence was sonified in a different register, as if generated
hekeus@51 499 by a fixed invisible token in one of four regions of the triangle. In addition, subjects
hekeus@51 500 were asked to press a key if they `liked' what they were hearing.
hekeus@51 501
hekeus@51 502 After the study the participants were surveyed with the Goldsmiths Musical Sophistication Index\cite{Mullensiefen:2011ts} to elicit their prior musical experience.
hekeus@51 503
hekeus@51 504 We recorded subjects' behaviour as well as points which they marked
hekeus@51 505 with a key press.
hekeus@51 506 Some results for three of the subjects are shown in \figrf{mtri-results}. Though
hekeus@51 507 we have not been able to detect any systematic across-subjects preference for any particular
hekeus@51 508 region of the triangle, subjects do seem to exhibit distinct kinds of exploratory behaviour.
hekeus@51 509 Our initial hypothesis, that subjects would linger longer in regions of the triangle
hekeus@51 510 that produced aesthetically preferable sequences, and that this would tend to be towards the
hekeus@51 511 centre line of the triangle for all subjects, was not confirmed. However, it is possible
hekeus@51 512 that the design of the experiment encouraged an initial exploration of the space (sometimes
hekeus@51 513 very systematic, as for subject c) aimed at \emph{understanding} %the parameter space and
hekeus@51 514 how the system works, rather than finding musical patterns. It is also possible that the
hekeus@51 515 system encourages users to create musically interesting output by \emph{moving the token},
hekeus@51 516 rather than finding a particular spot in the triangle which produces a musically interesting
hekeus@51 517 sequence by itself.
hekeus@51 518
hekeus@51 519 \begin{fig}{mtri-results}
hekeus@51 520 \def\scat#1{\colfig[0.42]{mtri/#1}}
hekeus@51 521 \def\subj#1{\scat{scat_dwells_subj_#1} & \scat{scat_marks_subj_#1}}
hekeus@51 522 \begin{tabular}{cc}
hekeus@51 523 % \subj{a} \\
hekeus@51 524 \subj{b} \\
hekeus@51 525 \subj{c} \\
hekeus@51 526 \subj{d}
hekeus@51 527 \end{tabular}
hekeus@51 528 \caption{Dwell times and mark positions from user trials with the
hekeus@51 529 on-screen Melody Triangle interface, for three subjects. The left-hand column shows
hekeus@51 530 the positions in a 2D information space (entropy rate vs multi-information rate
hekeus@51 531 in bits) where each spent their time; the area of each circle is proportional
hekeus@51 532 to the time spent there. The right-hand column shows point which subjects
hekeus@51 533 `liked'; the area of the circles here is proportional to the duration spent at
hekeus@51 534 that point before the point was marked.}
hekeus@51 535 \end{fig}
hekeus@51 536
hekeus@51 537 Comments collected from the subjects
hekeus@51 538 %during and after the experiment
hekeus@51 539 suggest that
hekeus@51 540 the information-dynamic characteristics of the patterns were readily apparent
hekeus@51 541 to most: several noticed the main organisation of the triangle,
hekeus@51 542 with repetitive notes at the top, cyclic patterns along one edge, and unpredictable
hekeus@51 543 notes towards the opposite corner. Some described their systematic exploration of the space.
hekeus@51 544 Two felt that the right side was `more controllable' than the left (a consequence
hekeus@51 545 of their ability to return to a particular distinctive pattern and recognise it
hekeus@51 546 as one heard previously). Two reported that they became bored towards the end,
hekeus@51 547 but another felt there wasn't enough time to `hear out' the patterns properly.
hekeus@51 548 One subject did not `enjoy' the patterns in the lower region, but another said the lower
hekeus@51 549 central regions were more `melodic' and `interesting'.
hekeus@51 550
hekeus@51 551 We plan to continue the trials with a slightly less restricted user interface in order
hekeus@51 552 make the experience more enjoyable and thereby give subjects longer to use the interface;
hekeus@51 553 this may allow them to get beyond the initial exploratory phase and give a clearer
hekeus@51 554 picture of their aesthetic preferences. In addition, we plan to conduct a
hekeus@51 555 study under more restrictive conditions, where subjects will have no control over the patterns
hekeus@51 556 other than to signal (a) which of two alternatives they prefer in a forced
hekeus@51 557 choice paradigm, and (b) when they are bored of listening to a given sequence.
hekeus@51 558
hekeus@51 559
hekeus@51 560
hekeus@51 561
hekeus@51 562
hekeus@51 563 \section{Further Work}
hekeus@51 564 %The Melody Triangle has so far only been used with first-order Markov chains for generating content. This mean that the melodies generated don't have any long term structure or form and hence don't seem to `go anywhere'. As such the system in its current form is better suited to creating textures and short phrases as oppose to composing over-arching musical structures.
hekeus@51 565
hekeus@51 566 We are currently investigating how higher-order Markov models can be mapped to information theoretic measures and adapting the Melody Triangle to those models. This would generate higher level patterns and provide more long-term structures. Further more sophisticated listener models\cite{Pearce:2005wr}\cite{Potter:2007tt} could be used for computing information measures for more conventional or ecologically valid music.
hekeus@51 567
hekeus@51 568 As it stands, the streams of symbols generated are only mapped to note values. However they could just as well be applied to any other musical property, such as intervals, chords, dynamics, timbres, structures and key changes. The possibilities for the Melody Triangle to be compositional guide in these other domains remains to be investigated.
hekeus@51 569
hekeus@51 570 We are investigating the possibility of turning the Melody Triangle into a mobile phone based music making application. It is hoped that by collecting usage statistics we may have a rich source of data that can help determine any relationship between the information dynamics measures and aesthetic preference.
hekeus@51 571 %The Melody Triangle in its current form however forms an ideal tool for investigations into musical preference and their relationship to the information dynamics models, and as such more detailed studies under wider experimental conditions and with more participants will be carried out.
hekeus@51 572 Although our initial data on aesthetic preference are inconclusive, there is still
hekeus@51 573 plenty of work to be done in this area: where-ever there are probabilistic models,
hekeus@51 574 information dynamics can shed light on their behaviour.
hekeus@51 575
hekeus@51 576 \section{acknowledgments}
hekeus@51 577 This work is supported by an EPSRC Doctoral Training Centre EP/G03723X/1 (HE), GR/S82213/01 and \\EP/E045235/1(SA), an EPSRC Leadership Fellowship, \\EP/G007144/1 (MDP) and EPSRC IDyOM2 EP/H013059/1. Thanks to Louie McCallum and Davie Smith from QMUL EECS for Kinect programming support.
hekeus@51 578
hekeus@51 579 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
hekeus@51 580 %bibliography here
hekeus@51 581 \bibliography{smc2012template,nime,all,c4dm}
hekeus@51 582
hekeus@51 583
hekeus@51 584
hekeus@51 585
hekeus@51 586 \end{document}