annotate docs/spec/spec.tex @ 770:c54bc2ffbf92 tip

update tags
author convert-repo
date Fri, 16 Dec 2011 11:34:01 +0000
parents d6ca94333e2b
children
rev   line source
mas02md@547 1 \documentclass[11pt]{article}
mas02md@547 2 \usepackage{oz,times}
mas02md@547 3
mas02md@547 4 \usepackage{tikz}
mas02md@547 5 \usetikzlibrary{arrows}
mas02md@547 6 \usetikzlibrary{calc}
mas02md@547 7
mas02md@547 8 \title{A Formal Framework for Content-based Retrieval}
mas02md@618 9 \author{Mark d'Inverno and Christophe Rhodes and Michael Jewell}
mas02md@547 10
mas02md@547 11
mas02md@547 12 \begin{document}
mas02md@547 13 \maketitle
mas02md@547 14
mas02md@547 15 %%\begin{gendef}[X]
mas02md@547 16 %% concat : (\seq (\seq X)) \fun (\seq X)
mas02md@547 17 %%\where
mas02md@547 18 %% \forall xs : \seq X; xss : \seq (\seq X) @ \\
mas02md@547 19 %%\t2 concat~ ((\langle xs \rangle) \cat xss) = xs \cat
mas02md@547 20 %%(concat~xss)
mas02md@547 21 %%\end{gendef}
mas02md@547 22
mas02md@547 23 %% \begin{gendef}[X]
mas02md@547 24 %% nfront : \nat \fun (\seq X) \fun \seq (\seq X) \\
mas02md@547 25 %% \where
mas02md@547 26 %% \forall n : \nat ; xs : \seq X @ \\
mas02md@547 27 %% n > \# xs \implies nfront ~n~xs = \langle \rangle \land \\
mas02md@547 28 %% n \leq \# xs \implies nfront~n~xs = \\
mas02md@547 29 %% \t2 (\langle (0 \upto n-1) \dres xs \rangle) \cat (nfront~n~(tail~xs))
mas02md@547 30 %% \end{gendef}
mas02md@547 31
mas02md@547 32 \section{The System Instance}
mas02md@547 33
mas02md@562 34 \newcommand{\LET}{\mathrel{\sf Let}}
mas02md@562 35
mas02md@547 36 \newcommand{\mylet}{\methrel{\sf Let}}
mas02md@547 37 \newcommand{\FV}{\mathrel{~FV}}
mas02md@547 38 \newcommand{\V}{\mathrel{~FV^{d}}}
mas02md@547 39 \newcommand{\U}{\mathrel{~FV^{1}}}
mas02md@547 40 \newcommand{\R}{\mathrel{~R}}
mas02md@547 41
mas02md@547 42 \newcommand{\mylog}{\mathrel{ln}}
mas02md@547 43
mas02md@547 44 \begin{enumerate}
mas02md@547 45
mas02md@547 46 \item \textsf{Track} - original media.
mas02md@547 47
mas02md@547 48 We define the set of all such tracks.
mas02md@547 49
mas02md@547 50 \begin{flushright}
mas02md@547 51 \begin{tikzpicture}
mas02md@547 52 \input track.tikz
mas02md@547 53 \end{tikzpicture}
mas02md@547 54 \end{flushright}
mas02md@547 55
mas02md@547 56 \begin{zed}
mas02md@547 57 [Track]
mas02md@547 58 \end{zed}
mas02md@547 59
mas02md@547 60 %% \begin{zed}
mas02md@547 61 %%\R == \nat
mas02md@547 62 %% \end{zed}
mas02md@547 63
mas02md@547 64 %% \begin{axdef}
mas02md@547 65 %% \mylog : \R \fun \R
mas02md@547 66 %% \end{axdef}
mas02md@547 67
mas02md@547 68 % For every track it is possible to determine the lenth.
mas02md@547 69
mas02md@547 70 % \begin{axdef}
mas02md@547 71 % lenth : Track \fun Time
mas02md@547 72 % \end{axdef}
mas02md@547 73
mas02md@547 74 % For every track it is possible to determine the lenth.
mas02md@547 75
mas02md@547 76 % \begin{axdef}
mas02md@547 77 % lenth : Track \fun Time
mas02md@547 78 % \end{axdef}
mas02md@547 79
mas02md@547 80 \item \textsf{Catalogue} - a non-empty list of tracks.
mas02md@547 81
mas02md@547 82 % A catalogue is typically generated for a specific \emph{use case}.
mas02md@547 83
mas02md@547 84 \begin{flushright}
mas02md@547 85 \begin{tikzpicture}
mas02md@547 86 \draw (-3,-3) rectangle (3,3);
mas02md@547 87 \foreach \y/\xscale in {2/1,1/1.02,0/0.87,-1/0.95} {
mas02md@547 88 \begin{scope}[yshift=\y cm]
mas02md@547 89 \begin{scope}[xscale=\xscale]
mas02md@547 90 \input track.tikz
mas02md@547 91 \end{scope}
mas02md@547 92 \end{scope}
mas02md@547 93 }
mas02md@547 94 \draw node at (0,-2) {\vdots};
mas02md@547 95 \end{tikzpicture}
mas02md@547 96 \end{flushright}
mas02md@547 97
mas02md@547 98 \begin{zed}
mas02md@547 99 Catalogue == \seq_1 Track
mas02md@547 100 \end{zed}
mas02md@547 101
mas02md@547 102 \item \textsf{Collection} - a set of catalogues where each track in each catalogue is associated with a unique name.
mas02md@547 103
mas02md@547 104 In a collection, each track has a unique \textsf{TrackName}. We define the injective function $name$ to do this.
mas02md@547 105
mas02md@547 106 \begin{zed}
mas02md@547 107 [TrackName]
mas02md@547 108 \end{zed}
mas02md@547 109
mas02md@547 110 \begin{schema}{Collection}
mas02md@547 111 collection : \power Catalogue \\
mas02md@547 112 tracks : \power Track \\
mas02md@547 113 name : Track \inj TrackName
mas02md@547 114 \where
mas02md@547 115 tracks = \{ cat : collection; t : Track | t \in (\ran cat) @ t \} \\
mas02md@547 116 \dom name = tracks
mas02md@547 117 \end{schema}
mas02md@547 118
mas02md@547 119 \item \textsf{Time} - the set of non-negative reals.
mas02md@547 120
mas02md@547 121
mas02md@547 122 \begin{zed}
mas02md@547 123 Time == \R
mas02md@547 124 \end{zed}
mas02md@547 125
mas02md@547 126
mas02md@547 127 \item \textsf{Interval} - defined as a continuous set of real numbers, represented as an ordered pair of reals with the second of the pair strictly greater.
mas02md@547 128
mas02md@547 129 \begin{flushright}
mas02md@547 130 \begin{tikzpicture}[>=stealth]
mas02md@547 131 \draw (0,0.8) rectangle (0.4,1);
mas02md@547 132 \fill (0.2,0.9) ellipse (0.05);
mas02md@547 133 \draw[->] (0.2,0.9) -- (0.1,0.2);
mas02md@547 134 \draw[->] (0.2,0.9) -- (0.55,0.2);
mas02md@547 135 \draw[very thick] (0.1,0.2) -- (0.55,0.2);
mas02md@547 136 \draw[o->] (-0.5,0.2) -- (1,0.2) node[anchor=north west] {\small $\mathbb{R}$};
mas02md@547 137 \end{tikzpicture}
mas02md@547 138 \end{flushright}
mas02md@547 139
mas02md@547 140 \begin{zed}
mas02md@547 141 Interval == \{ t_1, t_2 : Time | t_2 > t_1 @ (t_1, t_2) \}
mas02md@547 142 \end{zed}
mas02md@547 143
mas02md@547 144 \item \textsf{Interval Index} - list of intervals such that the start of each successive interval strictly increases. Intervals may be overlapping.
mas02md@547 145
mas02md@547 146 The predicate below states that for any consecutive intervals in an interval index $i_j$ and $i_{j+1}$, that $i_j$ starts before $i_{j+1}$.
mas02md@547 147
mas02md@547 148 \begin{flushright}
mas02md@547 149 \begin{tikzpicture}[>=stealth]
mas02md@547 150 \draw (0,0.8) rectangle (1.6,1);
mas02md@547 151 \draw (0.4,0.8) -- (0.4,1);
mas02md@547 152 \draw (0.8,0.8) -- (0.8,1);
mas02md@547 153 \draw (1.2,0.8) -- (1.2,1);
mas02md@547 154 \fill (0.2,0.9) ellipse (0.05);
mas02md@547 155 \fill (0.6,0.9) ellipse (0.05);
mas02md@547 156 \fill (1.0,0.9) ellipse (0.05);
mas02md@547 157 \fill (1.4,0.9) ellipse (0.05);
mas02md@547 158 \draw[->] (0.2,0.9) -- (0.1,0.2);
mas02md@547 159 \draw[->] (0.2,0.9) -- (0.55,0.2);
mas02md@547 160 \draw[->] (0.6,0.9) -- (0.55,0.2);
mas02md@547 161 \draw[->] (0.6,0.9) -- (0.7,0.2);
mas02md@547 162 \draw[->] (1.0,0.9) -- (0.9,0.2);
mas02md@547 163 \draw[->] (1.0,0.9) -- (1.3,0.2);
mas02md@547 164 \draw[->] (1.4,0.9) -- (1.2,0.2);
mas02md@547 165 \draw[->] (1.4,0.9) -- (1.5,0.2);
mas02md@547 166 \draw[o->] (-0.5,0.2) -- (2,0.2) node[anchor=north west] {\small $\mathbb{R}$};
mas02md@547 167 \end{tikzpicture}
mas02md@547 168 \end{flushright}
mas02md@547 169
mas02md@547 170 \begin{zed}
mas02md@547 171 IntervalIndex == \iseq Interval \\ \\ \\
mas02md@547 172 \end{zed}
mas02md@547 173
mas02md@547 174 \[ \forall i : IntervalIndex; i_j, i_{j+1} : Interval @ \\
mas02md@547 175 \t3 \langle i_j, i_{j+1} \rangle \inseq i \implies first~ i_j \leq first ~ i_{j+1}
mas02md@547 176 \]
mas02md@547 177
mas02md@547 178 \item \textsf{Continuous Interval Index - CII} - there are no consecutive intervals $i_j$ and $i_{j+1}$ in the index where the second element of $i_j$ is strictly less than the first element of $i_{j+1}$.
mas02md@547 179
mas02md@547 180 \begin{flushright}
mas02md@547 181 \begin{tikzpicture}[>=stealth]
mas02md@547 182 \draw (0,0.8) rectangle (1.6,1);
mas02md@547 183 \draw (0.4,0.8) -- (0.4,1);
mas02md@547 184 \draw (0.8,0.8) -- (0.8,1);
mas02md@547 185 \draw (1.2,0.8) -- (1.2,1);
mas02md@547 186 \fill (0.2,0.9) ellipse (0.05);
mas02md@547 187 \fill (0.6,0.9) ellipse (0.05);
mas02md@547 188 \fill (1.0,0.9) ellipse (0.05);
mas02md@547 189 \fill (1.4,0.9) ellipse (0.05);
mas02md@547 190 \draw[->] (0.2,0.9) -- (0.1,0.2);
mas02md@547 191 \draw[->] (0.2,0.9) -- (0.55,0.2);
mas02md@547 192 \draw[->] (0.6,0.9) -- (0.55,0.2);
mas02md@547 193 \draw[->] (0.6,0.9) -- (0.9,0.2);
mas02md@547 194 \draw[->] (1.0,0.9) -- (0.7,0.2);
mas02md@547 195 \draw[->] (1.0,0.9) -- (1.3,0.2);
mas02md@547 196 \draw[->] (1.4,0.9) -- (1.2,0.2);
mas02md@547 197 \draw[->] (1.4,0.9) -- (1.5,0.2);
mas02md@547 198 \draw[o->] (-0.5,0.2) -- (2,0.2) node[anchor=north west] {\small $\mathbb{R}$};
mas02md@547 199 \end{tikzpicture}
mas02md@547 200 \end{flushright}
mas02md@547 201
mas02md@547 202 %%unchecked
mas02md@547 203 \begin{zed}
mas02md@562 204 ContIntIndex == \\
mas02md@562 205 \t1 \{ ii : IntervalIndex; i_j, i_{j+1}: Interval | \\
mas02md@562 206 \t3 \langle i_j, i_{j+1} \rangle \inseq ii \implies second ~ i_j \geq first ~ i_{j+1}@ ii \}
mas02md@562 207 \end{zed}
mas02md@547 208
mas01mj@619 209 \item \textsf{Standard Interval Index - SII} - a Continual Interval Index which has no overlapping intervals and begins at 0.
mas01mj@619 210
mas01mj@619 211 %%unchecked
mas01mj@619 212 \begin{zed}
mas01mj@619 213 StandardIntIndex == \\
mas01mj@619 214 \t1 \{ cii : ContIntIndex; i_j, i_{j+1}: Interval | \\
mas01mj@619 215 \t3 first (head~cii) = 0 \land \\
mas01mj@619 216 \t4 second ~ i_j = first ~ i_{j+1}
mas01mj@619 217 \end{zed}
mas02md@562 218 % In fact this enables us to specify such a data structure simply as a sequence of increasing times as we can assume the track starts at 0.
mas02md@562 219
mas02md@562 220 % \begin{zed}
mas02md@562 221 % ContIntIndex == \iseq Time \\ \\ \\
mas02md@562 222 % \end{zed}
mas02md@562 223
mas02md@562 224 %% \begin{zed}
mas02md@562 225 %% ContIntIndex == IntervalIndex
mas02md@562 226 %% \end{zed}
mas02md@547 227
mas02md@547 228 \item A \textsf{Duration} is an amount of time.
mas02md@547 229
mas02md@547 230 \begin{zed}
mas02md@547 231 Duration == Time
mas02md@547 232 \end{zed}
mas02md@547 233
mas02md@547 234 \item The \textsf{Track Duration} is the duration of a track. It is the difference between the \emph{end} time and \emph{start} time of the track.
mas02md@547 235
mas02md@547 236 \begin{axdef}
mas02md@547 237 trackduration : Track \fun Duration
mas02md@547 238 \end{axdef}
mas02md@547 239
mas02md@547 240 \item \textsf{Interval Duration} is the duration of an interval. It is calculated by subtracting the first element of the interval from the second element.
mas02md@547 241
mas02md@547 242 \begin{axdef}
mas02md@547 243 intervalduration : Interval \fun Duration
mas02md@547 244 \where
mas02md@547 245 \forall i : Interval @ intervalduration ~ i = second ~ i - first ~ i
mas02md@547 246 \end{axdef}
mas02md@547 247
mas02md@547 248
mas02md@562 249 \item \textsf{Index Duration} is the duration of a Continuous Interval Index which is simply the second element of the last interval pair.
mas02md@547 250
mas02md@547 251 \begin{axdef}
mas02md@562 252 indexduration : ContIntIndex \fun Time
mas02md@547 253 \where
mas01mj@619 254 \forall cii : ContIntIndex @ indexduration~cii = second (last~cii) - first (head~cii)
mas02md@547 255 \end{axdef}
mas02md@547 256
mas02md@547 257 \item \textsf{Segmenter} - a process which computes an interval index for any track, represented as a function which maps any track to an interval index.
mas02md@547 258
mas02md@547 259 \begin{flushright}
mas02md@547 260 \begin{tikzpicture}
mas02md@547 261 \input interval-index.tikz
mas02md@547 262 \input segmented-track.tikz
mas02md@547 263 \end{tikzpicture}
mas02md@547 264 \end{flushright}
mas02md@547 265
mas02md@562 266
mas01mj@619 267 In this specification we will consider segmenters that produce a standard interval index.
mas02md@562 268
mas02md@547 269 \begin{zed}
mas01mj@619 270 Segmenter == Track \fun StandardIntIndex \\
mas02md@547 271 \end{zed}
mas02md@547 272
mas02md@547 273 \item The \textsf{Length} of an interval index is the number of intervals contained within it.
mas02md@547 274
mas02md@547 275 \begin{axdef}
mas02md@547 276 length : IntervalIndex \fun \nat
mas02md@547 277 \where
mas02md@547 278 \forall ii : IntervalIndex @ length~ii = \# ii
mas02md@547 279 \end{axdef}
mas02md@547 280
mas02md@547 281 % \section{Features}
mas02md@547 282
mas02md@547 283 \item \textsf{Feature Name} - a name describing a feature.
mas02md@547 284
mas02md@547 285 \begin{zed}
mas02md@547 286 [FeatureName]
mas02md@547 287 \end{zed}
mas02md@547 288
mas02md@547 289 \item \textsf{Dimension} - a natural number.
mas02md@547 290
mas02md@547 291 \begin{zed}
mas02md@547 292 Dimension == \nat
mas02md@547 293 \end{zed}
mas02md@547 294
mas02md@547 295 \item \textsf{Feature Kind} - a feature name and associated set of parameters.
mas02md@547 296
mas02md@547 297 \begin{zed}
mas02md@547 298 [Const, Var]
mas02md@547 299 \end{zed}
mas02md@547 300
mas02md@547 301 We define the binding type as the set of functions between variables and constants.
mas02md@547 302
mas02md@547 303 \begin{zed}
mas02md@547 304 Binding == \power (Var \cross Const)
mas02md@547 305 \end{zed}
mas02md@547 306
mas02md@547 307 Every feature kind has a dimension (not necessarily fixed).
mas02md@547 308
mas02md@547 309 \begin{schema}{FeatureKind}
mas02md@547 310 name : FeatureName \\
mas02md@547 311 parameters : \power Var \\
mas02md@547 312 dparameters : \power Var \\
mas02md@547 313 binding : Binding \\
mas01mj@619 314 ddimension : Binding \pfun Dimension \\
mas02md@547 315 \where
mas02md@547 316 dparameters \subseteq parameters \\
mas02md@547 317 \forall b : Binding | b \in (\dom ddimension) @ dparameters \subseteq (\dom b)
mas02md@547 318 \end{schema}
mas02md@547 319
mas02md@547 320 \item A \textsf{Feature} - sometimes referred to as a Fully Qualified Feature - is a feature kind with all parameters bound.
mas02md@547 321
mas02md@547 322 \item \textsf{Feature Dimension} - every feature has a fixed dimension.
mas02md@547 323
mas02md@547 324 \begin{schema}{Feature}
mas02md@547 325 FeatureKind \\
mas01mj@619 326 fdimension : Dimension \\
mas02md@547 327 \where
mas02md@547 328 \dom binding = parameters \\
mas02md@547 329 fdimension = ddimension~binding
mas02md@547 330 \end{schema}
mas02md@547 331
mas02md@547 332 \item \textsf{Unit Feature} - any feature with a dimension of 1.
mas02md@547 333
mas02md@547 334 \begin{schema}{UnitFeature}
mas02md@547 335 Feature \\
mas02md@547 336 \where
mas02md@547 337 fdimension = 1
mas02md@547 338 \end{schema}
mas02md@547 339
mas02md@547 340 \item \textsf{Feature Vector} - a non-empty sequence of real numbers.
mas02md@547 341
mas02md@547 342 \begin{zed}
mas02md@547 343 \FV == \seq_1 \R
mas02md@547 344 \end{zed}
mas02md@547 345
mas02md@547 346 % DEFINE ALL FEATURE VECTORS HERE
mas02md@547 347
mas02md@547 348 %% \begin{zed}
mas02md@547 349 %% \V == \FV \\
mas02md@547 350 %% \U == \FV
mas02md@547 351 %% \end{zed}
mas02md@547 352
mas02md@547 353 \newcommand{\Vdsl}{\mathrel{~FV^{d * sl}}}
mas02md@547 354
mas02md@547 355 %% \begin{zed}
mas02md@547 356 %% \Vdsl == \FV
mas02md@547 357 %% \end{zed}
mas02md@547 358
mas02md@547 359 \newcommand{\Vd}{\mathrel{~FV^{d}}}
mas02md@547 360
mas02md@547 361 %% \begin{zed}
mas02md@547 362 %% \Vd == \FV
mas02md@547 363 %% \end{zed}
mas02md@547 364
mas02md@547 365 \newcommand{\Z}{\mathrel{~FV^{1}}}
mas02md@547 366
mas02md@547 367 %% \begin{zed}
mas02md@547 368 %% \Z == \FV
mas02md@547 369 %% \end{zed}
mas02md@547 370
mas02md@547 371 \newcommand{\Vsl}{\mathrel{~FV^{sl}}}
mas02md@547 372
mas02md@547 373 %% \begin{zed}
mas02md@547 374 %% \Vsl == \FV
mas02md@547 375 %% \end{zed}
mas02md@547 376
mas02md@547 377
mas02md@547 378 \item \textsf{Feature Vector Dimension} - the length of a feature vector.
mas02md@547 379
mas02md@547 380 \begin{axdef}
mas02md@547 381 dimension : \FV \fun \nat
mas02md@547 382 \where
mas02md@547 383 \forall fv : \FV @ dimension~fv = \# fv
mas02md@547 384 \end{axdef}
mas02md@547 385
mas02md@547 386 \item \textsf{Unit Vector} - any feature vector with a dimension of 1.
mas02md@547 387
mas02md@547 388 \begin{zed}
mas02md@547 389 UnitVector == \{ fv : \FV | dimension~fv = 1 \}
mas02md@547 390 \end{zed}
mas02md@547 391
mas02md@547 392 \item \textsf{Extractor} - a process which computes a feature vector of fixed dimension for any interval of any track, represented formally as a function that takes a track and an interval and returns a feature vector.
mas02md@547 393
mas02md@547 394 \begin{zed}
mas02md@547 395 Extractor == Track \fun Interval \fun \FV
mas02md@547 396 \end{zed}
mas02md@547 397
mas02md@547 398 \item \textsf{Unit Extractor} - a subclass of Extractor where the feature vector dimension is equal to 1.
mas02md@547 399
mas02md@547 400 \begin{zed}
mas02md@547 401 UnitExtractor == \\
mas02md@547 402 \t1 \{ e : Extractor | \forall t : Track; i : Interval @ dimension (e~t~i) = 1 \}
mas02md@547 403 \end{zed}
mas02md@547 404
mas02md@547 405 \item \textsf{Bel Unit Extractor} - a subclass of Unit Extractor where the function outputs on the bel scale (between minus infinity and zero).
mas02md@547 406
mas02md@547 407 \begin{zed}
mas02md@547 408 BelUnitExtractor == UnitExtractor
mas02md@547 409 \end{zed}
mas02md@547 410
mas01mj@619 411 The range of the real contained in the bel unit vector is less than 0.
mas01mj@619 412
mas01mj@619 413 \begin{zed}
mas01mj@619 414 \forall t : Track; int : Interval; ux : BelUnitExtractor @ \\
mas01mj@619 415 \t5 head (ux~t~int) \leq 0
mas01mj@619 416 \end{zed}
mas01mj@619 417
mas01mj@619 418
mas02md@547 419 \item \textsf{Feature Extractors} - the set of all extractors associated with a feature.
mas02md@547 420
mas02md@547 421 As this data structure can be updated over time - by adding and removing new features, and adding and removing extractors for a feature as new ones arise and old ones are not used - we use a schema. The predicate states that the feature vector dimension computed by an extractor must be equal to the associated feature dimension. Every feature has a unique associated unit feature and every extractor has a unique associated unit extractor. Once a user defines a feature, and one its associated extractors, the unit feature and extractor are generated automatically.
mas02md@547 422
mas02md@547 423 \begin{schema}{FeatureExtractors}
mas02md@547 424 featureextractors : Feature \pfun (\power Extractor) \\
mas02md@547 425 features : \power Feature \\
mas02md@547 426 extractors : \power Extractor \\
mas02md@547 427 \where
mas02md@547 428 \dom featureextractors = features \\
mas02md@547 429 \bigcup (\ran featureextractors) = extractors \\
mas02md@547 430 \forall f : Feature ; e : Extractor ; t : Track; i : Interval \\
mas02md@547 431 \t1 @ e \in featureextractors(f) \implies \\
mas02md@547 432 \t2 f.fdimension = dimension (e~ t ~ i) \\
mas02md@547 433 \end{schema}
mas02md@547 434
mas02md@547 435 \item \textsf{Track Feature Vectors} - given a segmenter and an extractor the sequence of feature vectors for each interval of the track
mas02md@547 436
mas02md@547 437 \begin{flushright}
mas02md@547 438 \begin{tikzpicture}
mas02md@547 439 \input track-feature-vectors.tikz
mas02md@547 440 \end{tikzpicture}
mas02md@547 441 \end{flushright}
mas02md@547 442
mas02md@547 443 The function $extract$ applies an extractor to each interval of a track to return a list of feature vectors. This requires the generic function map, which takes a function and applies it to every element of a list. A definition can be found in the appendix.
mas02md@547 444
mas02md@547 445 %%\begin{gendef}[X,Y]
mas02md@547 446 %% map : (X \fun Y) \fun (\seq X) \fun (\seq Y) \\
mas02md@547 447 %%\where
mas02md@547 448 %% \forall f : X \fun Y; x : X; xs,ys : \seq X @ \\
mas02md@547 449 %%\t1 map~f~\langle \rangle = \langle \rangle \land \\
mas02md@547 450 %%\t1 map~f~(xs \cat ys) = map~f~xs \cat map~f~ys
mas02md@547 451 %%\end{gendef}
mas02md@547 452
mas02md@547 453 \begin{axdef}
mas02md@547 454 extract : Segmenter \fun Extractor \fun Track \fun \seq \FV
mas02md@547 455 \where
mas02md@547 456 \forall seg : Segmenter; fx : Extractor; t : Track @ \\
mas02md@547 457 \t5 extract~seg~fx~t = map ~ (fx~t) (seg~t)
mas02md@547 458 \end{axdef}
mas02md@547 459
mas02md@547 460
mas02md@547 461 \item \textsf{Track Unit Vectors} - defined as above but specifically for unit extractors
mas02md@547 462
mas02md@547 463 \begin{flushright}
mas02md@547 464 \begin{tikzpicture}
mas02md@547 465 \input interval-index.tikz
mas02md@547 466 \input segmented-track.tikz
mas02md@547 467 % \foreach \x in {-2.5,-2,-1.4,-0.5,0,0.8,1.3,1.8} {
mas02md@547 468 % \begin{scope}[xshift=\x cm,yshift=-0.4 cm]
mas02md@547 469 \foreach \x in {-1.8,-1.4,...,1.1} {
mas02md@547 470 \begin{scope}[xshift=\x cm,yshift=1.1 cm]
mas02md@547 471 \draw (-0.1,0) rectangle (0.1,0.2);
mas02md@547 472 \end{scope}
mas02md@547 473 }
mas02md@547 474 \begin{scope}[yshift=1.1 cm]
mas02md@547 475 \draw[|-|,>=stealth] (-2.3,0) -- (-2.3,0.2);
mas02md@547 476 \draw node[anchor=south east] at (-2.4,0.1) {\small $d=1$};
mas02md@547 477 \end{scope}
mas02md@547 478 \end{tikzpicture}
mas02md@547 479 \end{flushright}
mas02md@547 480
mas01mj@619 481 \item \textsf{Catalogue Feature Vectors} - a sequence of track feature vectors for each track in the catalogue ($features$)
mas02md@547 482
mas01mj@619 483 \item \textsf{Catalogue Unit Vectors} - a sequence of track unit vectors for each track in the catalogue ($unitfeatures$)
mas02md@547 484
mas02md@562 485 \item \textsf{Catalogue Index} - a sequence of continuous interval indexes (one for each track).
mas02md@562 486
mas02md@562 487 An example can be seen below
mas02md@562 488
mas02md@562 489 \[ \langle
mas01mj@619 490 \langle (0,r_{11}), (r_{11},r_{12}), (r_{12}, r_{13}) \rangle, \\
mas01mj@619 491 \langle (0,r_{21}), (r_{21},r_{22}), (r_{22}, r_{23}, (r_{23}, r_{24}) \rangle, \\
mas01mj@619 492 \langle (0,r_{31}), (r_{31},r_{32}), (r_{32}, r_{33}) \rangle, \\
mas02md@562 493 \rangle \]
mas02md@562 494
mas02md@562 495 \item \textsf{Instance} - a catalogue, segmenter, feature, extractor, unit feature, unit extractor, dimension, feature vectors, catal
mas02md@547 496
mas02md@547 497 % \emph{In our usage (see \textsf{Absolute} \and \textsf{Relative} in section \ref{s:refining}), we require certain semantics of the \textsf{Unit Feature} (and so of the values returned by its \textit{Unit Extractors}. Specifically, the values returned must be interpretable on a logarithmic scale in Bels, with the maximum possible value being the reference threshold at 0B.}
mas02md@547 498
mas02md@547 499 \begin{schema}{Instance}
mas02md@547 500 cat : Catalogue \\
mas02md@547 501 % tracks : \power Track \\
mas02md@547 502 seg : Segmenter \\
mas02md@547 503 f : Feature \\
mas02md@547 504 x : Extractor \\
mas02md@547 505 uf : UnitFeature \\
mas02md@547 506 ux : UnitExtractor \\
mas02md@562 507 d : Dimension \\
mas02md@562 508 index : \seq ContIntIndex \\
mas02md@562 509 features : \seq ~ ( \seq \V) \\
mas02md@562 510 unitfeatures : \seq ~ (\seq \U) \\
mas02md@547 511 \where
mas02md@547 512 d = f.fdimension \\
mas02md@562 513 index = map~seg~cat \\
mas02md@562 514 features = map ~ (extract~seg~x) ~ cat \\
mas02md@562 515 unitfeatures = map ~ (extract~seg~ux) ~ cat \\
mas02md@547 516 \end{schema}
mas02md@547 517
mas02md@562 518 \item \textsf{Singleton Instances} - the set of System instances which contain only one track
mas02md@547 519
mas02md@562 520 \begin{schema}{SingletonInstance}
mas02md@562 521 Instance \\
mas02md@562 522 \where
mas02md@562 523 \# cat = 1
mas02md@562 524 \end{schema}
mas02md@562 525
mas02md@562 526 \item \textsf{System Instances} - the set of system instances.
mas02md@562 527
mas02md@562 528 Note that every instance must contain a catalogue in the music collection but not all catalogues in the collection are necessarily part of an instance.
mas02md@547 529
mas02md@547 530 \begin{schema}{SystemInstances}
mas02md@547 531 Collection \\
mas02md@547 532 instances : \power Instance
mas02md@547 533 \where
mas02md@547 534 \{ i : instances @ i.cat \} \subseteq collection
mas02md@547 535 \end{schema}
mas02md@547 536
mas02md@562 537 \section{Search Vectors}
mas01mj@619 538 Searching takes place using concatenations of feature vectors to build \emph{search} vectors. For a particular query all search vectors will have a fixed length equal to some multiple (which we will refer to as $sl$, the search length) of the dimension of the orignal feature vectors ($d$).
mas02md@547 539
mas02md@547 540 In the schema below, the function $makesearchvs$ takes a natual number $sl$ and a sequence of feature vectors (associated with a track) to create a sequence of search vectors. The first element of the returned sequence is the concatenation of the first $sl$ feature vectors, the second element is also the concatenation the next $sl$ feature vectors but starting from the second element, and so on until all such sequences are formed. It should be clear that if the original sequence contains $n$ feature vectors then the output will contain $n-sl+1$ vectors.
mas02md@547 541
mas02md@547 542 \begin{flushright}
mas02md@547 543 \begin{tikzpicture}[>=stealth]
mas02md@547 544 \begin{scope}[xshift=-6 cm]
mas02md@547 545 \input track-feature-vectors.tikz
mas02md@547 546 \begin{scope}[yshift=1.1 cm]
mas02md@547 547 \begin{scope}[yshift=1.1 cm,xshift=-1.8cm]
mas02md@547 548 \input fv1.tikz
mas02md@547 549 \draw (-0.1,0) rectangle (0.1,1);
mas02md@547 550 \draw (-0.1,0.2) -- (0.1,0.2);
mas02md@547 551 \draw (-0.1,0.4) -- (0.1,0.4);
mas02md@547 552 \draw (-0.1,0.6) -- (0.1,0.6);
mas02md@547 553 \draw (-0.1,0.8) -- (0.1,0.8);
mas02md@547 554 \end{scope}
mas02md@547 555 \begin{scope}[yshift=2.2 cm,xshift=-1.8cm]
mas02md@547 556 \input fv2.tikz
mas02md@547 557 \draw (-0.1,0) rectangle (0.1,1);
mas02md@547 558 \draw (-0.1,0.2) -- (0.1,0.2);
mas02md@547 559 \draw (-0.1,0.4) -- (0.1,0.4);
mas02md@547 560 \draw (-0.1,0.6) -- (0.1,0.6);
mas02md@547 561 \draw (-0.1,0.8) -- (0.1,0.8);
mas02md@547 562 \end{scope}
mas02md@547 563 \draw[->] (-1.4,1) arc(0:90:0.4 and 0.6);
mas02md@547 564 \draw[->] (-1.0,1) arc(0:90:0.8 and 1.7);
mas02md@547 565 \draw[->] (-1.0,1) arc(0:90:0.4 and 0.6);
mas02md@547 566 \draw[->] (-0.6,1) arc(0:90:0.8 and 1.7);
mas02md@547 567 \end{scope}
mas02md@547 568 \end{scope}
mas02md@547 569 \input interval-index.tikz
mas02md@547 570 \input segmented-track.tikz
mas02md@547 571 % \foreach \x in {-2.5,-2,-1.4,-0.5,0,0.8,1.3,1.8} {
mas02md@547 572 % \begin{scope}[xshift=\x cm,yshift=-0.4 cm]
mas02md@547 573 \foreach \x/\f in {-1.8/fv0,-1.4/fv1,-1.0/fv2,-0.6/fv3,-0.2/fv4,0.2/fv5,0.6/fv6,1.0/fv7} {
mas02md@547 574 \begin{scope}[yshift=1.1 cm,xshift=\x cm]
mas02md@547 575 \input \f.tikz
mas02md@547 576 \end{scope}
mas02md@547 577 }
mas02md@547 578 \foreach \x/\f in {-1.8/fv1,-1.4/fv2,-1.0/fv3,-0.6/fv4,-0.2/fv5,0.2/fv6,0.6/fv7} {
mas02md@547 579 \begin{scope}[yshift=2.1 cm,xshift=\x cm]
mas02md@547 580 \input \f.tikz
mas02md@547 581 \end{scope}
mas02md@547 582 }
mas02md@547 583 \foreach \x/\f in {-1.8/fv2,-1.4/fv3,-1.0/fv4,-0.6/fv5,-0.2/fv6,0.2/fv7} {
mas02md@547 584 \begin{scope}[yshift=3.1 cm,xshift=\x cm]
mas02md@547 585 \input \f.tikz
mas02md@547 586 \end{scope}
mas02md@547 587 }
mas02md@547 588 \foreach \x in {-1.8,-1.4,...,0.3} {
mas02md@547 589 \begin{scope}[xshift=\x cm,yshift=1.1 cm]
mas02md@547 590 \draw (-0.1,0) rectangle (0.1,3);
mas02md@547 591 \foreach \y in {0.2,0.4,...,2.9} {
mas02md@547 592 \draw (-0.1,\y) -- (0.1,\y);
mas02md@547 593 }
mas02md@547 594 \end{scope}
mas02md@547 595 }
mas02md@547 596 \begin{scope}[yshift=1.1 cm]
mas02md@547 597 \draw[<->,>=stealth] (-2.1,0) -- (-2.1,3);
mas02md@547 598 \draw node[anchor=east] at (-2.3,1.5) {$d \times sl$};
mas02md@547 599 \end{scope}
mas02md@547 600 \begin{scope}[yshift=1.1 cm]
mas02md@547 601 \draw[<->,>=stealth] (-2,3.2) -- (0.4,3.2);
mas02md@547 602 \draw node at (-0.8,3.6) {$n - sl + 1$};
mas02md@547 603 \end{scope}
mas02md@547 604 \begin{scope}[xshift=0.6 cm,yshift=1.1 cm]
mas02md@547 605 \draw (-0.1,0) rectangle (0.1,2);
mas02md@547 606 \foreach \y in {0.2,0.4,...,1.9} {
mas02md@547 607 \draw (-0.1,\y) -- (0.1,\y);
mas02md@547 608 }
mas02md@547 609 \draw[dashed] (-0.1,0) -- (-0.3,1);
mas02md@547 610 \draw[dashed] (-0.1,1) -- (-0.3,2);
mas02md@547 611 \draw[dashed] (-0.1,2) -- (-0.3,3);
mas02md@547 612 \end{scope}
mas02md@547 613 \begin{scope}[xshift=1.0 cm,yshift=1.1 cm]
mas02md@547 614 \draw (-0.1,0) rectangle (0.1,1);
mas02md@547 615 \foreach \y in {0.2,0.4,...,0.9} {
mas02md@547 616 \draw (-0.1,\y) -- (0.1,\y);
mas02md@547 617 }
mas02md@547 618 \draw[dashed] (-0.1,0) -- (-0.3,1);
mas02md@547 619 \draw[dashed] (-0.1,1) -- (-0.3,2);
mas02md@547 620 \end{scope}
mas02md@547 621 \foreach \x in {-1.4,-1.0,...,0.3} {
mas02md@547 622 \begin{scope}[xshift=\x cm,yshift=1.1 cm]
mas02md@547 623 \draw[dashed] (-0.1,0) -- (-0.3,1);
mas02md@547 624 \draw[dashed] (-0.1,1) -- (-0.3,2);
mas02md@547 625 \draw[dashed] (-0.1,2) -- (-0.3,3);
mas02md@547 626 \end{scope}
mas02md@547 627 }
mas02md@547 628 \end{tikzpicture}
mas02md@547 629 \end{flushright}
mas02md@547 630
mas02md@547 631 \begin{axdef}
mas02md@547 632 makesearchvs : \nat \fun \seq \Vd \fun \seq \Vdsl
mas02md@547 633 \where
mas02md@547 634 \forall xs : \seq \FV ; sl : \nat @ \\
mas02md@547 635 \t1 sl > \# xs \implies makesearchvs~sl~xs = \langle \rangle \land \\
mas02md@547 636 \t1 sl \leq \# xs \implies makesearchvs~sl~xs = \\
mas01mj@619 637 \t2 concat (\langle (1 \upto sl) \dres xs \rangle) \cat makesearchvs~sl (tail~xs)
mas02md@547 638 \end{axdef}
mas02md@547 639
mas02md@547 640
mas02md@562 641 \item \textsf{Instance Search Vectors}
mas02md@547 642
mas02md@562 643 For any natural number less than the length of feature vectors for each track we can calculate the search vectors for an given by applying the $makesearchvs$ function to each of the sequences of feature vectors. We call this natural number \emph{search length} ($sl$) and we can use $map$ to apply $makesearchvs ~ sl$ to each of these sequences.
mas02md@547 644
mas02md@562 645 \begin{schema}{SearchVectors}
mas02md@562 646 i? : Instance \\
mas02md@562 647 sl? : \nat \\
mas02md@562 648 searchvs! : \seq (\seq \Vdsl) \\
mas02md@562 649 unitvs! : \seq (\seq \Vsl) \\
mas02md@547 650 \where
mas02md@562 651 searchvs! = map ~ (makesearchvs ~ sl?) ~ i?.features \\
mas01mj@619 652 unitvs! = map ~ (makesearchvs ~ sl?) ~ i?.unit \\
mas02md@547 653 \end{schema}
mas02md@547 654
mas02md@547 655 \item \textsf{Hopped Search Vectors}
mas02md@547 656
mas01mj@619 657 Rather then generating all possible search vectors we may wish to general vectors starting at equally distanced intervals in the interval index of the tracks in an instance. We refer to the size of this interval as $hop$.
mas01mj@619 658
mas01mj@619 659 \begin{gendef}[X]
mas01mj@619 660 strip : \nat \fun (\seq X) \fun \seq X
mas01mj@619 661 \where
mas01mj@619 662 \forall x : X; xs : \seq X @ \\
mas01mj@619 663 \t1 strip ~ 0 ~ xs = xs \land \\
mas01mj@619 664 \t1 strip ~ (n+1) ~ \langle \rangle = \langle \rangle \\
mas01mj@619 665 \t1 strip ~ (n+1) ~ \langle x \rangle \cat xs = strip ~ n ~ xs
mas01mj@619 666 \end{gendef}
mas02md@547 667
mas02md@547 668 \begin{axdef}
mas02md@547 669 makehopsearchvs : \nat \fun \nat \fun \seq \Vd \fun \seq \Vdsl
mas02md@547 670 \where
mas02md@547 671 \forall xs : \seq \FV ; sl : \nat ; hop : \nat @ \\
mas02md@547 672 \t1 sl > \# xs \implies makehopsearchvs~sl~hop~xs = \langle \rangle \land \\
mas02md@547 673 \t1 sl \leq \# xs \implies makehopsearchvs~sl~hop~xs = \\
mas01mj@619 674 \t2 concat (\langle (1 \upto sl) \dres xs \rangle) \cat \\
mas01mj@619 675 \t2 makehopsearchvs~sl~hop~ (strip~hop~xs)
mas02md@547 676 \end{axdef}
mas02md@547 677
mas01mj@619 678 This enables us to define a hop size when generating search vectors
mas02md@547 679
mas02md@562 680 \begin{schema}{HopSearchVectors}
mas02md@562 681 SearchVectors \\
mas02md@547 682 hop? : \nat \\
mas02md@562 683 hopsearchvs! : \seq (\seq \Vdsl) \\
mas02md@562 684 hopunitvs! : \seq (\seq \Vsl) \\
mas02md@547 685 \where
mas02md@562 686 hopsearchvs! = map ~ (makehopsearchvs ~i?.d ~ hop?) ~ i?.features \\
mas01mj@619 687 hopunitvs! = map ~ (makehopsearchvs ~ i?.d ~ hop?) ~ i?.unit \\
mas02md@547 688 \end{schema}
mas02md@547 689
mas02md@562 690 \item \textsf{Search Vector Power}
mas02md@562 691
mas01mj@619 692 For each Search Vector in an instance's search vectors we take the associated unit values and take the arithmetic mean.
mas02md@562 693
mas02md@562 694 \begin{axdef}
mas02md@562 695 average : \V \fun \R
mas02md@562 696 \end{axdef}
mas02md@562 697
mas02md@562 698 \begin{schema}{Powers}
mas02md@562 699 SearchVectors \\
mas02md@562 700 powers : \seq (\seq \R)
mas02md@562 701 \where
mas02md@562 702 powers = map~(map ~ average) ~ unitvs! \\
mas02md@562 703 \end{schema}
mas02md@562 704
mas02md@562 705 \item \textsf{Search Vector Duration}
mas02md@562 706
mas01mj@619 707 The duration of the sequence of the original feature vectors from which the search vector is composed.
mas02md@562 708
mas02md@562 709 \begin{enumerate}
mas02md@562 710
mas02md@562 711 \item \textsf{Single Search Vector Duration}
mas02md@562 712
mas02md@562 713 This is defined as the duration of the sequence for each search vector. So for example if a search vector was made from the combination of 3 feature vectors and those 3 feature vectors came from the following intervals $ (3,8), (8,11), (11,13) $ then the duration of the search vector is $13-3=10$.
mas02md@562 714
mas02md@562 715 \item \textsf{Track Search Vector Durations}
mas02md@562 716
mas01mj@619 717 Next we define a function at the level of track search vectors. We define a function which takes a sequence of search vectors for that track, the search length that was used to create them, and the track interval index and returns a sequence of natural numbers each of which is the duration of the corresponding search vector
mas02md@562 718
mas02md@562 719 \begin{axdef}
mas02md@562 720 durationsTsv : \seq \Vdsl \fun ContIntIndex \fun \nat \fun \seq \R
mas02md@562 721 \where
mas02md@562 722 \forall svs : \seq \Vdsl; cii : ContIntIndex; sl : \nat @ \\
mas02md@562 723 \t1 durationsTsv~svs~cii~sl = \\
mas02md@562 724 \t2 \{ n : \nat | n \in 1 \upto n - sl + 1 @ \\
mas02md@562 725 \t3 (n, first (cii (n + sl)) - first (cii (n)) ) \}
mas02md@562 726 \end{axdef}
mas02md@562 727
mas02md@562 728 \item \textsf{Instance Search Vector Durations}
mas02md@562 729
mas01mj@619 730 Finally, we define a function called svdurations which takes a sequence of search vectors and calculates the track search vector durations for each sequence.
mas02md@562 731
mas02md@562 732 \begin{axdef}
mas02md@562 733 durationsIsv : \seq (\seq \Vdsl) \fun (\seq ContIntIndex) \fun \nat \fun \seq (\seq \R) \\
mas02md@562 734 \where
mas02md@562 735 \forall svss : \seq (\seq \Vdsl); sl : \nat; ciis : \seq ContIntIndex @ \\
mas02md@562 736 \t1 durationsIsv ~ svss ~ ciis ~ sl = \\
mas02md@562 737 \t2 \langle durationsTsv ~ (head ~ svss) ~ (head ~ ciis) ~ sl \rangle \cat \\
mas02md@562 738 \t3 durationsIsv ~ (tail ~ svss) ~ (tail ~ ciis) ~ sl \land \\
mas02md@562 739 \t1 durationsIsv ~ \langle \rangle~ \langle \rangle ~ sl = \langle \rangle
mas02md@562 740 \end{axdef}
mas02md@562 741
mas02md@547 742 \end{enumerate}
mas02md@547 743
mas02md@562 744 \begin{schema}{Durations}
mas02md@562 745 SearchVectors \\
mas01mj@619 746 svdurations! : \seq (\seq \R) \\
mas02md@562 747 \where
mas01mj@619 748 svdurations! = durationsIsv ~ searchvs! ~ i?.index ~ sl?
mas02md@562 749 \end{schema}
mas02md@547 750
mas02md@562 751 % \end{enumerate}
mas02md@562 752
mas02md@562 753 \section{Making a Query}
mas02md@562 754
mas02md@562 755 \item \textsf{General Instance Query}
mas02md@562 756
mas02md@562 757 Two instances, a source and a target are identified by the user for comparison as well as the search length from which the search vectors are defined for both the source and the sequence. Once the query has been defined we can generate the \emph{source search vectors}, \emph{source unit vectors}, \emph{target search vectors} and \emph{target unit vectors} as specified earlier. We also generate \emph{source powers}, \emph{target powers}, \emph{source search vector durations} and \emph{target search vector durations}.
mas02md@562 758
mas02md@562 759 \emph{What are the pre-conditions regarding the search length? That it is less than the length of any sequence of feature vectors for every track?}
mas02md@562 760
mas02md@562 761 \begin{schema}{GeneralQuery}
mas02md@562 762 SystemInstances \\
mas02md@562 763 src?, tgt? : Instance \\
mas02md@562 764 sl?: \nat \\
mas02md@562 765 srcsearchvs!, tgtsearchvs! : \seq (\seq \Vdsl) \\
mas02md@562 766 srcunitvs!, tgtunitvs! : \seq (\seq \Vsl) \\
mas02md@562 767 sourcepowers, targetpowers : \seq (\seq \R) \\
mas02md@562 768 sourcedurations, targetdurations : \seq (\seq \R) \\
mas02md@562 769 \where
mas02md@562 770 \{ src?, tgt? \} \subseteq instances \\
mas02md@562 771 \end{schema}
mas02md@547 772
mas02md@562 773 \item \textsf{General Instance Self Query}
mas02md@562 774
mas02md@562 775 When an instance is identified as both the source and target we state that it is a self query.
mas02md@562 776
mas02md@562 777 \begin{schema}{GeneralSelfQuery}
mas02md@562 778 GeneralQuery
mas02md@562 779 \where
mas02md@562 780 src? = tgt?
mas02md@562 781 \end{schema}
mas02md@562 782
mas02md@562 783 \item Singleton (Track) Query
mas02md@562 784
mas02md@562 785 A user defines an instance with exactly one track as the source.
mas02md@562 786
mas02md@562 787 \begin{schema}{SingletonQuery}
mas02md@562 788 GeneralQuery
mas02md@562 789 \where
mas02md@562 790 src? \in SingletonInstance
mas02md@562 791 \end{schema}
mas02md@562 792
mas02md@562 793 \item Point (Sequence) Query
mas02md@562 794
mas02md@562 795 The user identifies a specific part of the track in a singleton instance to be used as the source known as a \emph{sequence}. A user defines the sequence by specifying the start and end points of the sequence index. Once these have been defined the sequence, sequence index and sequence length are easily identified.
mas02md@562 796
mas02md@562 797 \begin{enumerate}
mas02md@562 798
mas02md@562 799 \item \textsf{Source track} - the single track contained in the instance.
mas02md@562 800
mas02md@562 801 \item \textsf{Sequence} - a continuous sub-section of the point query track used to make a query.
mas02md@562 802
mas02md@562 803 \begin{zed}
mas02md@562 804 Sequence == Track
mas02md@562 805 \end{zed}
mas02md@562 806
mas02md@562 807 \item \textsf{Sequence Index} - a continuous interval index that defines the sequence according to the segmenter of the instance.
mas02md@562 808
mas02md@562 809 % \emph{Can we discuss this - is it reset so that the first element is indexed by 1 and so on?}
mas02md@562 810
mas02md@562 811 \item \textsf{Sequence Length} - the number of intervals in the sequence index.
mas02md@562 812
mas02md@562 813 \end{enumerate}
mas02md@562 814
mas02md@562 815 \begin{schema}{PointQuery}
mas01mj@619 816 GeneralQuery \\
mas02md@562 817 start?, end? : \nat \\
mas01mj@619 818 sourcetrack? : Track \\
mas02md@562 819 sequence! : Sequence \\
mas02md@562 820 seqindex! : ContIntIndex \\
mas02md@562 821 sl! : \nat \\
mas02md@562 822 \where
mas01mj@619 823 sourcetrack? \in \ran cat \\
mas01mj@619 824 start? \in \dom (src?.seg (sourcetrack?)) \\
mas01mj@619 825 end? \in \dom (src?.seg (sourcetrack?)) \\
mas01mj@619 826 seqindex! = (start? \upto end?) \dres (src?.seg~sourcetrack?) \\
mas02md@562 827 sl! = end? - start? \\
mas02md@562 828 \end{schema}
mas02md@562 829
mas01mj@619 830 In this case the search vectors will be equal to a sequence containing only one sequence. That sequence will be a search vector of dimension $d * sl$ where $d$ is the dimension of the sequence and $sl$ the length of the sequence.
mas02md@562 831
mas02md@562 832 \[ \langle \langle \Vdsl \rangle \rangle \]
mas02md@562 833
mas02md@562 834 \section{Distance between Search Vectors}
mas02md@547 835
mas02md@547 836 We define the scalar product and distance between vectors of equal dimension.
mas02md@547 837
mas02md@547 838 \begin{axdef}
mas02md@547 839 scalarproduct : \V \fun \V \fun \R \\
mas02md@547 840 distance : \V \fun \V \fun \R \\
mas02md@547 841 \end{axdef}
mas02md@547 842
mas02md@547 843 \begin{zed}
mas02md@547 844 \forall v_1, v_2 : \V @ distance~v_1~v_2 = 2 - scalarproduct~v_1~v_2
mas02md@547 845 \end{zed}
mas02md@547 846
mas02md@562 847 Then we can define a variable which maintains a list of all distances between the source and target search vectors. If we have a sequence of sequence of search vectors in the both the source and the target we will end up with a sequence of sequence of sequence of sequences.
mas02md@547 848
mas02md@562 849 For the purposes of illumination imagine a source and target each with two simple search vectors. Again, we will replace the reals with naturals here.
mas02md@562 850
mas02md@562 851 Let us assume we have the following source search vectors for an instance of two tracks
mas02md@562 852
mas02md@562 853 \[ \langle \langle a_1, a_2, a_3 \rangle, \langle b_1, b_2 \rangle \rangle \]
mas02md@562 854
mas02md@562 855 And the following target search vectors also for an instance of two tracks
mas02md@562 856
mas02md@562 857 \[ \langle \langle x_1, x_2 \rangle, \langle y_1, y_2, y_3 \rangle \rangle \]
mas02md@562 858
mas02md@562 859 Then we need to generate the following data structure
mas02md@562 860
mas02md@562 861 \[ \langle \\ % First track in source with target instance \\
mas02md@562 862 \t1 \langle \\ % First Search Vector with whole of target \\
mas02md@562 863 \t2 \langle \\ % First Search Vector with first track
mas02md@562 864 \t3 \langle a_1 \dot x_1, a_1 \dot x_2 \rangle, \\
mas02md@562 865 \t3 \langle a_1 \dot y_1, a_1 \dot y_2, a_1 \dot y_3 \rangle \\
mas02md@562 866 \t2 \rangle, \\
mas02md@562 867 % Second search vector
mas02md@562 868 \t2 \langle \\
mas02md@562 869 \t3 \langle a_2 \dot x_1, a_2 \dot x_2 \rangle, \\
mas02md@562 870 \t3 \langle a_2 \dot y_1, a_2 \dot y_2, a_2 \dot y_3 \rangle \\
mas02md@562 871 \t2 \rangle, \\
mas02md@562 872 \t2 \langle \\
mas02md@562 873 \t3 \langle a_3 \dot x_1, a_3 \dot x_2 \rangle, \\
mas02md@562 874 \t3 \langle a_3 \dot y_1, a_3 \dot y_2, a_3 \dot y_3 \rangle \\
mas02md@562 875 \t2 \rangle \\
mas02md@562 876 \t1 \rangle, \\
mas02md@562 877 % Second track in source with target instance
mas02md@562 878 \t1 \langle, \\
mas02md@562 879 \t2 \langle \\
mas02md@562 880 \t3 \langle b_1 \dot x_1, b_1 \dot x_2 \rangle, \\
mas02md@562 881 \t3 \langle b_1 \dot y_1, b_1 \dot y_2, b_1 \dot y_3 \rangle \\
mas02md@562 882 \t2 \rangle \\
mas02md@562 883 \t2 \langle \\
mas02md@562 884 \t3 \langle b_2 \dot x_1, b_2 \dot x_2 \rangle, \\
mas02md@562 885 \t3 \langle b_2 \dot y_1, b_2 \dot y_2, b_2 \dot y_3 \rangle \\
mas02md@562 886 \t2 \rangle \\
mas02md@562 887 \t1 \rangle \\
mas02md@562 888 \rangle \\
mas02md@562 889 \]
mas02md@562 890
mas02md@562 891
mas01mj@619 892 First let us define the function to calculate the distances for a single search vector.
mas02md@562 893
mas02md@562 894 \begin{axdef}
mas02md@562 895 vectordistances : \Vdsl \fun \seq (\seq \Vdsl) \fun \seq (\seq \R)
mas02md@547 896 \where
mas02md@562 897 \forall sv : \Vdsl; targetsv : \seq (\seq \Vdsl) @ \\
mas02md@562 898 \t1 vectordistances~sv~targetsv = map (map (distance~sv)) targetsv
mas02md@562 899 \end{axdef}
mas02md@562 900
mas02md@562 901 Next we define the function for calculating the distances for the search vectors of a track with a target instance.
mas02md@562 902
mas02md@562 903 \begin{axdef}
mas02md@562 904 trackdistances : (\seq \Vdsl) \fun \seq (\seq \Vdsl) \fun \seq (\seq (\seq \R))
mas02md@562 905 \where
mas02md@562 906 \forall tracksv : \seq \Vdsl; targetsv : \seq (\seq \Vdsl) @ \\
mas02md@562 907 \t1 trackdistances~tracksv~targetsv = \\
mas02md@562 908 \t3 \langle vectordistances~(head ~ tracksv)~targetsv \rangle \cat \\
mas02md@562 909 \t4 trackdistances~(tail ~ tracksv) ~ targetsv \land \\
mas02md@562 910 \t1 trackdistances~\langle \rangle ~targetsv = \langle \langle \langle \rangle \rangle \rangle
mas02md@562 911 \end{axdef}
mas02md@562 912
mas02md@562 913 Then we can define a function which calculating the distances between a source and target instance.
mas02md@562 914
mas02md@562 915 \begin{axdef}
mas02md@562 916 instancedistances : \seq (\seq \Vdsl) \fun \seq (\seq \Vdsl) \fun \seq (\seq (\seq (\seq \R)))
mas02md@562 917 \where
mas02md@562 918 \forall sourcesv, targetsv: \seq (\seq \Vdsl) @ \\
mas02md@562 919 \t1 instancedistances~sourcesv~targetsv = \\
mas02md@562 920 \t3 \langle trackdistances~(head ~ sourcesv)~targetsv \rangle \cat \\
mas02md@562 921 \t4 instancedistances~(tail ~ sourcesv) ~ targetsv \land \\
mas02md@562 922 \t1 instancedistances~\langle \langle \rangle \rangle ~targetsv = \langle \langle \langle \langle \rangle \rangle \rangle \rangle
mas02md@562 923 \end{axdef}
mas02md@562 924
mas02md@562 925 The next scheme takes a general query and calculates all the distances.
mas02md@562 926
mas02md@562 927 \begin{schema}{CalculateDistances}
mas02md@562 928 GeneralQuery \\
mas02md@562 929 distances : \seq (\seq (\seq (\seq \R)))
mas02md@562 930 \where
mas02md@562 931 distances = instancedistances~srcsearchvs!~tgtsearchvs!
mas02md@547 932 \end{schema}
mas02md@547 933
mas02md@562 934 The list can be sorted into ascending order and some threshold set (as we shall see) where we believe it is sensible to expect some acoustic or cognitive similarity.
mas02md@547 935
mas01mj@619 936 Along with the distance between the two search vectors, we also output the source track and index, the target track and index, the duration of the source and target search vectors, and the powers of the associated unit vectors.
mas02md@547 937
mas02md@562 938 \begin{schema}{Output}
mas02md@562 939 srctrack, srcindex, tgttrack, tgtindex : \nat \\
mas02md@562 940 distance : \R \\
mas02md@562 941 sourceduration, targetduration : \R \\
mas02md@562 942 sourcepower, targetpower : \R \\
mas02md@562 943 \end{schema}
mas02md@547 944
mas02md@562 945 The System outputs are a sequence of outputs ordered according to distance. We define a new variable which specifies the modified output that the user can make which we discuss in the next section.
mas02md@547 946
mas02md@547 947 \begin{schema}{SystemOutput}
mas02md@562 948 CalculateDistances \\
mas02md@547 949 output! : \seq Output \\
mas02md@547 950 modifiedoutput! : \seq Output
mas02md@547 951 \where
mas02md@547 952 \forall o_1, o_2 : Output @ \langle o_1, o_2 \rangle \inseq output! \\
mas02md@547 953 \t3 \implies o_1.distance \leq o_2.distance
mas02md@547 954 \end{schema}
mas02md@547 955
mas02md@562 956 \section{Refining a Search}
mas02md@562 957 \label{s:refining}
mas02md@562 958
mas02md@562 959 There are ways of refining the query.
mas02md@562 960
mas02md@562 961 \begin{enumerate}
mas02md@562 962
mas02md@562 963 \item \textsf{Key List} - specify specific tracks to search over. (Either source or target or both.)
mas02md@562 964
mas02md@562 965 \item \textsf{Radius} - reject distances which are greater than a given real number radius. (Either source or target or both.)
mas02md@562 966
mas02md@562 967 \item \textsf{Absolute} - reject any search vectors where the `power' average is less than a specific absolute value. (Either source or target or either or both.)
mas02md@562 968
mas02md@562 969 \item \textsf{Relative} - reject any search vectors where the `power' average is not within + or - a relative value. (Either source or target or either or both.)
mas02md@562 970
mas02md@562 971 \item \textsf{Duration Ratio} - remove any outputs where the relative durations of the search vectors are not within a specified range.
mas02md@562 972
mas02md@562 973 \item \textsf{Hop Size} - Rather than making search vectors which start with the first feature vectors and then the second feature vector and so on, make sparser search vectors by starting with fv at 1, then fv at $1 + h$, then fv at $1 + 2h$ and so on where h is the hop size. (Either source or target or both with either equal or separate values of hop.)
mas02md@562 974
mas02md@562 975 \end{enumerate}
mas02md@562 976
mas02md@547 977 \begin{enumerate}
mas02md@547 978
mas02md@547 979 \item Keylist
mas02md@547 980
mas02md@562 981 \begin{schema}{KeyListSource}
mas02md@547 982 SystemOutput \\
mas02md@562 983 k? : \power \nat
mas02md@547 984 \where
mas02md@562 985 modifiedoutput! = output! \filter \{ o : Output | o.srctrack \in k? \}
mas02md@547 986 \end{schema}
mas02md@547 987
mas02md@547 988 \item Radius
mas02md@547 989
mas02md@547 990 The system removes any elements which have a distance greater than the radius.
mas02md@547 991
mas02md@547 992 \begin{schema}{Radius}
mas02md@547 993 SystemOutput \\
mas02md@547 994 r? : \R \\
mas02md@547 995 \where
mas02md@547 996 modifiedoutput! = \\
mas02md@547 997 \t1 output! \filter \{ o : Output | o.distance \leq r? \}
mas02md@547 998 \end{schema}
mas02md@547 999
mas02md@562 1000 \item Absolute Source
mas02md@547 1001
mas02md@547 1002 \begin{schema}{Absolute}
mas02md@547 1003 SystemOutput \\
mas02md@547 1004 a? : \R \\
mas02md@547 1005 \where
mas02md@547 1006 modifiedoutput! = \\
mas02md@562 1007 \t1 output! \filter \{ o : Output | o.sourcepower \geq a? \}
mas02md@547 1008 \end{schema}
mas02md@547 1009
mas02md@547 1010 \item Relative
mas02md@547 1011
mas02md@547 1012 %% \begin{axdef}
mas02md@547 1013 %% abs : \R \fun \R
mas02md@547 1014 %% \end{axdef}
mas02md@547 1015
mas02md@547 1016
mas02md@562 1017
mas02md@547 1018 \begin{schema}{Relative}
mas02md@547 1019 SystemOutput \\
mas02md@547 1020 rel? : \R \\
mas02md@562 1021 % \where
mas02md@562 1022 % modifiedoutput! = \\
mas02md@562 1023 % \t1 output! \filter \{ o : Output | abs (o.power - sourcepower) \leq rel? \}
mas02md@547 1024 \end{schema}
mas02md@547 1025
mas02md@547 1026 \item Duration Ratio
mas02md@547 1027
mas02md@547 1028 %%unchecked
mas02md@547 1029 \begin{schema}{DurationRatio}
mas02md@547 1030 SystemOutput \\
mas02md@547 1031 d? : \R \\
mas02md@547 1032 \where
mas02md@547 1033 modifiedoutput! = \\
mas02md@562 1034 \t1 output! \filter \{ o : Output | \exp ^ {abs \ln (\frac{o.duration}{srcduration})} \leq d? \}
mas02md@547 1035 \end{schema}
mas02md@547 1036
mas02md@547 1037 \item Hop Size
mas02md@547 1038
mas02md@547 1039 \begin{schema}{Hop}
mas02md@547 1040 hop? : \nat \\
mas02md@547 1041 SystemOutput \\
mas02md@562 1042 HopSearchVectors \\
mas02md@547 1043 \end{schema}
mas02md@547 1044
mas02md@547 1045 \end{enumerate}
mas02md@562 1046 \end{enumerate}
mas02md@562 1047 \end{document}
mas02md@547 1048
mas02md@547 1049 \section{Setting Thresholds}
mas02md@547 1050
mas02md@547 1051 \subsection{For an Instance}
mas02md@547 1052
mas02md@547 1053 \begin{enumerate}
mas02md@547 1054 \item Guess several lengths based on understanding of the instance (beat, bar, phrase, section). ($sl$)
mas02md@547 1055 \item Generate all sequences of length ($sl$) from the tracks in the instance
mas02md@547 1056 \item Select pairs of sequences of length ($sl$) which may or may not be from the same track randomly from the instance, compute distance between these pairs, and repeat a fixed number of times.
mas02md@547 1057 \item Tabulate distances
mas02md@547 1058 \item Scary maths
mas02md@547 1059 \item Obtain a global threshold
mas02md@547 1060 \item Set the radius threshold equal to this value
mas02md@547 1061 \end{enumerate}
mas02md@547 1062
mas02md@547 1063 \subsection{For a Source Sequence and a Target Instance (I)}
mas02md@547 1064
mas02md@547 1065 \begin{enumerate}
mas02md@547 1066 \item Take the length of the source sequence ($sl$)
mas02md@547 1067 \item Generate all sequences of length ($sl$) from the tracks in the instance
mas02md@547 1068 \item Select a sequence randomly, calculate the distance form the source sequence, and repeat a fixed number of times.
mas02md@547 1069 \item Tabulate distances
mas02md@547 1070 \item Scary maths
mas02md@547 1071 \item Obtain a threshold for this sequence/instance pair
mas02md@547 1072 \item Set the radius threshold equal to this value
mas02md@547 1073 \end{enumerate}
mas02md@547 1074
mas02md@547 1075 \subsection{For a Source Track and a Target Instance (II)}
mas02md@547 1076
mas02md@547 1077 \begin{enumerate}
mas02md@547 1078 \item Choose a length $sl$ which is less than the track length
mas02md@547 1079 \item Generate all sequences of length ($sl$) from the tracks in the instance
mas02md@547 1080 \item Generate all sequences of length ($sl$) from the source sequence
mas02md@547 1081 \item Select one of these source sequences randomly and one of the sequences from one of the tracks in the instance randomly, calculate the distance form the source sequence, and repeat a fixed number of times.
mas02md@547 1082 \item Tabulate distances
mas02md@547 1083 \item Scary maths
mas02md@547 1084 \item Obtain a threshold for this track/instance pair
mas02md@547 1085 \item Set the radius threshold equal to this value
mas02md@547 1086 \end{enumerate}
mas02md@547 1087
mas02md@547 1088 \appendix
mas02md@547 1089 \section{Auxillary Definitions}
mas02md@547 1090
mas02md@547 1091 \subsection{The function $map$}
mas02md@547 1092
mas02md@547 1093 This requires need the generic function map, which takes a function and applies it to every element of a list.
mas02md@547 1094
mas02md@547 1095 %%unchecked
mas02md@547 1096 \begin{gendef}[X,Y]
mas02md@547 1097 map : (X \fun Y) \fun (\seq X) \fun (\seq Y) \\
mas02md@547 1098 \where
mas02md@547 1099 \forall f : X \fun Y; x : X; xs,ys : \seq X @ \\
mas02md@547 1100 \t1 map~f~\langle \rangle = \langle \rangle \land \\
mas02md@547 1101 \t1 map~f~(xs \cat ys) = map~f~xs \cat map~f~ys
mas02md@547 1102 \end{gendef}
mas02md@547 1103
mas02md@547 1104
mas02md@547 1105 \subsection{The function $concat$}
mas02md@547 1106
mas02md@547 1107 We can do this by making use of the following generic function that takes a list of lists of lists, and concatenates each of the lists together.
mas02md@547 1108
mas02md@547 1109 %%unchecked
mas02md@547 1110 \begin{gendef}[X]
mas02md@547 1111 concat : (\seq (\seq X)) \fun (\seq X)
mas02md@547 1112 \where
mas02md@547 1113 \forall xs : \seq X; xss : \seq (\seq X) @ \\
mas02md@547 1114 \t2 concat~ ((\langle xs \rangle) \cat xss) = xs \cat
mas02md@547 1115 (concat~xss)
mas02md@547 1116 \end{gendef}
mas02md@547 1117
mas02md@547 1118 \section{Probabilty of chosing a track}
mas02md@547 1119
mas02md@547 1120 The longer the track in a target the more sequences it will contain with the same number of intervals with length ($sl$). Therefore the probability of selecting a track is weighted. Once a track has been selected then a sequence within the track is chosen uniformly.
mas02md@547 1121
mas02md@547 1122 %% \begin{axdef}
mas02md@547 1123 %% sum : \power \R \fun \R
mas02md@547 1124 %% \end{axdef}
mas02md@547 1125
mas02md@547 1126 %% \begin{axdef}
mas02md@547 1127 %% pairmax : \R \cross \R \fun \R
mas02md@547 1128 %% \end{axdef}
mas02md@547 1129
mas02md@547 1130 %%unchecked
mas02md@547 1131 \begin{schema}{CalculateProbabilities}
mas02md@547 1132 i : Instance \\
mas02md@547 1133 sl : \nat \\
mas02md@547 1134 allslsequences : \nat \\
mas02md@547 1135 SystemInstances \\
mas02md@547 1136 prob : Track \fun \R \also
mas02md@547 1137 \where
mas02md@547 1138 allslsequences = \\
mas02md@547 1139 \t1 sum \{ t : Track @ pairmax ( (length (i.seg~t) - sl + 1), 0 ) \} \also
mas02md@547 1140 \forall t : Track @ prob~t = \frac{length (i.seg~t) - sl + 1}{ allslsequences}
mas02md@547 1141 \end{schema}
mas02md@547 1142
mas02md@547 1143 This uses a function $subsequences$ which takes a sequence of elements $s$ and a natural number $n$ and returns the number of contiguous sequences of length $n$ and a function $sumseq$ which sums the elements in a list.
mas02md@547 1144
mas02md@547 1145 \section{Examples illustrating functions}
mas02md@547 1146
mas02md@547 1147 \subsection{Track Extract}
mas02md@547 1148
mas02md@547 1149 Given a segmenter, an extractor, and a track, the function $extract$ returns a list of feature vectors for that track. Each feature vector has equal dimension, and each vector corresponds to the equivalent interval in the interval index. As an example, if the feature vectors have dimension $d$, and if there are $m$ intervals defined by the segmenter, and if we adopt the convention that $\V$ represents a vector of length d, this function would return something with the following form:
mas02md@547 1150
mas02md@547 1151 \[ \langle V_{1}^{d}, V_{2}^{d} \dots V_{m}^{d} \rangle \]
mas02md@547 1152
mas02md@547 1153 When we apply this function to every track in a catalogue (which is a list of tracks) we simply determine a list of such lists. If there are $n$ tracks in a catalogue then our data would look something like the following.
mas02md@547 1154
mas02md@562 1155 \[ features = \langle ~~ \langle V_{11}^{d}, V_{12}^{d} \dots V_{1m_{1}}^{d} \rangle \\
mas02md@547 1156 \t3 ~~~ \langle V_{21}^{d}, V_{22}^{d} \dots V_{2m_{2}}^{d} \rangle \\
mas02md@547 1157 \t3 \dots \\
mas02md@547 1158 \t3 \dots \\
mas02md@547 1159 \t3 ~~~ \langle V_{n1}^{d}, V_{n2}^{d} \dots V_{nm_{n}}^{d} \rangle
mas02md@547 1160 ~~ \rangle
mas02md@547 1161 ~~ \]
mas02md@547 1162
mas02md@547 1163 The unit vector data has exactly the same structure but with different vectors all of dimension 1.
mas02md@547 1164
mas02md@562 1165 \[ unitfeatures = \langle ~~ \langle V_{11}^{1}, V_{12}^{1} \dots V_{1m_{1}}^{1} \rangle \\
mas02md@547 1166 \t4 ~~ \langle V_{21}^{1}, V_{22}^{1} \dots V_{2m_{2}}^{1} \rangle \\
mas02md@547 1167 \t4 \dots \\
mas02md@547 1168 \t4 \dots \\
mas02md@547 1169 \t4 ~~~ \langle V_{n1}^{1}, V_{n2}^{1} \dots V_{nm_{n}}^{1} \rangle
mas02md@547 1170 ~~ \rangle
mas02md@547 1171 ~~ \]
mas02md@547 1172
mas02md@547 1173 \subsection{Making Search Vectors}
mas02md@547 1174
mas02md@547 1175 Suppose we had the following feature vectors for a track.
mas02md@547 1176
mas02md@547 1177 \[ \langle V_{1}^{d}, V_{2}^{d} , V_{3}^{d} \rangle \]
mas02md@547 1178
mas02md@547 1179 Also suppose that our sequence length $sl$ had a value of 2 and we applied the function $makesearchvs$. We would then make search vectors as follows.
mas02md@547 1180
mas02md@547 1181 \[ \t3 \langle V_{1}^{d} \cat V_{2}^{d} , V_{2}^{d} \cat V_{3}^{d} \rangle \]
mas02md@547 1182
mas02md@562 1183
mas02md@562 1184 \end{enumerate}
mas02md@547 1185 \end{document}
mas02md@547 1186