comparison docs/spec/spec.tex @ 562:dfeb5ef768da

Usually a commitment phobe but everybody has they're first time. Include mant structural changes and additional definition in line with what Christophe has asked for. Some of the schemas are more complicated than Id like - I will try to think of more elegant ways of specification.
author mas02md
date Fri, 17 Apr 2009 16:52:15 +0000
parents 5a248cedd3e9
children 365d3c500ef3
comparison
equal deleted inserted replaced
557:d3c96cbb91e3 562:dfeb5ef768da
9 \author{Mark d'Inverno and Christophe Rhodes} 9 \author{Mark d'Inverno and Christophe Rhodes}
10 10
11 11
12 \begin{document} 12 \begin{document}
13 \maketitle 13 \maketitle
14
15 14
16 %%\begin{gendef}[X] 15 %%\begin{gendef}[X]
17 %% concat : (\seq (\seq X)) \fun (\seq X) 16 %% concat : (\seq (\seq X)) \fun (\seq X)
18 %%\where 17 %%\where
19 %% \forall xs : \seq X; xss : \seq (\seq X) @ \\ 18 %% \forall xs : \seq X; xss : \seq (\seq X) @ \\
30 %% \t2 (\langle (0 \upto n-1) \dres xs \rangle) \cat (nfront~n~(tail~xs)) 29 %% \t2 (\langle (0 \upto n-1) \dres xs \rangle) \cat (nfront~n~(tail~xs))
31 %% \end{gendef} 30 %% \end{gendef}
32 31
33 \section{The System Instance} 32 \section{The System Instance}
34 33
34 \newcommand{\LET}{\mathrel{\sf Let}}
35
35 \newcommand{\mylet}{\methrel{\sf Let}} 36 \newcommand{\mylet}{\methrel{\sf Let}}
36 \newcommand{\FV}{\mathrel{~FV}} 37 \newcommand{\FV}{\mathrel{~FV}}
37 \newcommand{\V}{\mathrel{~FV^{d}}} 38 \newcommand{\V}{\mathrel{~FV^{d}}}
38 \newcommand{\U}{\mathrel{~FV^{1}}} 39 \newcommand{\U}{\mathrel{~FV^{1}}}
39 \newcommand{\R}{\mathrel{~R}} 40 \newcommand{\R}{\mathrel{~R}}
61 %% \end{zed} 62 %% \end{zed}
62 63
63 %% \begin{axdef} 64 %% \begin{axdef}
64 %% \mylog : \R \fun \R 65 %% \mylog : \R \fun \R
65 %% \end{axdef} 66 %% \end{axdef}
66
67
68
69 67
70 % For every track it is possible to determine the lenth. 68 % For every track it is possible to determine the lenth.
71 69
72 % \begin{axdef} 70 % \begin{axdef}
73 % lenth : Track \fun Time 71 % lenth : Track \fun Time
116 \where 114 \where
117 tracks = \{ cat : collection; t : Track | t \in (\ran cat) @ t \} \\ 115 tracks = \{ cat : collection; t : Track | t \in (\ran cat) @ t \} \\
118 \dom name = tracks 116 \dom name = tracks
119 \end{schema} 117 \end{schema}
120 118
121
122
123
124 \item \textsf{Time} - the set of non-negative reals. 119 \item \textsf{Time} - the set of non-negative reals.
125 120
126 121
127 \begin{zed} 122 \begin{zed}
128 Time == \R 123 Time == \R
129 \end{zed} 124 \end{zed}
130
131
132
133 125
134 126
135 \item \textsf{Interval} - defined as a continuous set of real numbers, represented as an ordered pair of reals with the second of the pair strictly greater. 127 \item \textsf{Interval} - defined as a continuous set of real numbers, represented as an ordered pair of reals with the second of the pair strictly greater.
136 128
137 \begin{flushright} 129 \begin{flushright}
205 \draw[->] (1.4,0.9) -- (1.5,0.2); 197 \draw[->] (1.4,0.9) -- (1.5,0.2);
206 \draw[o->] (-0.5,0.2) -- (2,0.2) node[anchor=north west] {\small $\mathbb{R}$}; 198 \draw[o->] (-0.5,0.2) -- (2,0.2) node[anchor=north west] {\small $\mathbb{R}$};
207 \end{tikzpicture} 199 \end{tikzpicture}
208 \end{flushright} 200 \end{flushright}
209 201
210 %% \begin{zed} 202 %%unchecked
211 %% ContinuousIntervalIndex == IntervalIndex 203 \begin{zed}
204 ContIntIndex == \\
205 \t1 \{ ii : IntervalIndex; i_j, i_{j+1}: Interval | \\
206 \t3 \langle i_j, i_{j+1} \rangle \inseq ii \implies second ~ i_j \geq first ~ i_{j+1}@ ii \}
207 \end{zed}
208
209 % In fact this enables us to specify such a data structure simply as a sequence of increasing times as we can assume the track starts at 0.
210
211 % \begin{zed}
212 % ContIntIndex == \iseq Time \\ \\ \\
213 % \end{zed}
214
215 %% \begin{zed}
216 %% ContIntIndex == IntervalIndex
212 %% \end{zed} 217 %% \end{zed}
213
214 %%unchecked
215 \begin{zed}
216 ContinuousIntervalIndex == \\
217 \t1 \{ ii : IntervalIndex; i_j,
218 i_{j+1}: Interval | \\
219 \t3 \langle i_j, i_{j+1} \rangle \inseq ii \implies second ~ i_j \geq first ~ i_{j+1}@ ii \}
220 \end{zed}
221
222 218
223 \item A \textsf{Duration} is an amount of time. 219 \item A \textsf{Duration} is an amount of time.
224 220
225 \begin{zed} 221 \begin{zed}
226 Duration == Time 222 Duration == Time
239 \where 235 \where
240 \forall i : Interval @ intervalduration ~ i = second ~ i - first ~ i 236 \forall i : Interval @ intervalduration ~ i = second ~ i - first ~ i
241 \end{axdef} 237 \end{axdef}
242 238
243 239
244 \item \textsf{Index Duration} is the duration of a Continuous Interval Index. It is calculated by subtracting the first element of the first interval from the second element of the last interval. 240 \item \textsf{Index Duration} is the duration of a Continuous Interval Index which is simply the second element of the last interval pair.
245
246 241
247 \begin{axdef} 242 \begin{axdef}
248 indexduration : ContinuousIntervalIndex \fun Time 243 indexduration : ContIntIndex \fun Time
249 \where 244 \where
250 \forall cii : ContinuousIntervalIndex @ \\ 245 \forall cii : ContIntIndex @ indexduration~cii = first (last~cii)
251 \t1 indexduration~cii = second (last~cii) - first (head~cii)
252 \end{axdef} 246 \end{axdef}
253 247
254 \item \textsf{Segmenter} - a process which computes an interval index for any track, represented as a function which maps any track to an interval index. 248 \item \textsf{Segmenter} - a process which computes an interval index for any track, represented as a function which maps any track to an interval index.
255 249
256 \begin{flushright} 250 \begin{flushright}
258 \input interval-index.tikz 252 \input interval-index.tikz
259 \input segmented-track.tikz 253 \input segmented-track.tikz
260 \end{tikzpicture} 254 \end{tikzpicture}
261 \end{flushright} 255 \end{flushright}
262 256
263 \begin{zed} 257
264 Segmenter == Track \fun IntervalIndex \\ 258 In this specification we will consider segmenters that produce a continuous interval index.
259
260 \begin{zed}
261 Segmenter == Track \fun ContIntIndex \\
265 \end{zed} 262 \end{zed}
266 263
267 \item The \textsf{Length} of an interval index is the number of intervals contained within it. 264 \item The \textsf{Length} of an interval index is the number of intervals contained within it.
268 265
269 \begin{axdef} 266 \begin{axdef}
469 \draw node[anchor=south east] at (-2.4,0.1) {\small $d=1$}; 466 \draw node[anchor=south east] at (-2.4,0.1) {\small $d=1$};
470 \end{scope} 467 \end{scope}
471 \end{tikzpicture} 468 \end{tikzpicture}
472 \end{flushright} 469 \end{flushright}
473 470
474 \item \textsf{Catalogue Feature Vectors} - a sequence of track feature vectors for each track in the target instance ($catfeatures$) 471 \item \textsf{Catalogue Feature Vectors} - a sequence of track feature vectors for each track in the instance ($features$)
475 472
476 \item \textsf{Catalogue Unit Vectors} - a sequence of track unit vectors for each track in the target instance ($catunits$) 473 \item \textsf{Catalogue Unit Vectors} - a sequence of track unit vectors for each track in the instance ($unitfeatures$)
477 474
478 \item \textsf{Instance} - a catalogue, segmenter, feature, extractor, unit feature, unit extractor, dimension, catalogue feature vectors, 475 \item \textsf{Catalogue Index} - a sequence of continuous interval indexes (one for each track).
476
477 An example can be seen below
478
479 \[ \langle
480 \langle (0,r_{11}), (r_{11},r_{12}), (r_{12}, r_{13}) \rangle,
481 \langle (0,r_{21}), (r_{21},r_{22}), (r_{22}, r_{23}, (r_{23}, r_{24}) \rangle,
482 \langle (0,r_{31}), (r_{31},r_{32}), (r_{32}, r_{33}) \rangle,
483 \rangle \]
484
485 \item \textsf{Instance} - a catalogue, segmenter, feature, extractor, unit feature, unit extractor, dimension, feature vectors, catal
479 486
480 % \emph{In our usage (see \textsf{Absolute} \and \textsf{Relative} in section \ref{s:refining}), we require certain semantics of the \textsf{Unit Feature} (and so of the values returned by its \textit{Unit Extractors}. Specifically, the values returned must be interpretable on a logarithmic scale in Bels, with the maximum possible value being the reference threshold at 0B.} 487 % \emph{In our usage (see \textsf{Absolute} \and \textsf{Relative} in section \ref{s:refining}), we require certain semantics of the \textsf{Unit Feature} (and so of the values returned by its \textit{Unit Extractors}. Specifically, the values returned must be interpretable on a logarithmic scale in Bels, with the maximum possible value being the reference threshold at 0B.}
481 488
482 \begin{schema}{Instance} 489 \begin{schema}{Instance}
483 cat : Catalogue \\ 490 cat : Catalogue \\
485 seg : Segmenter \\ 492 seg : Segmenter \\
486 f : Feature \\ 493 f : Feature \\
487 x : Extractor \\ 494 x : Extractor \\
488 uf : UnitFeature \\ 495 uf : UnitFeature \\
489 ux : UnitExtractor \\ 496 ux : UnitExtractor \\
490 d : Dimension \\ 497 d : Dimension \\
491 catfeatures : \seq ~ ( \seq \V) \\ 498 index : \seq ContIntIndex \\
492 catunits : \seq ~ (\seq \U) 499 features : \seq ~ ( \seq \V) \\
500 unitfeatures : \seq ~ (\seq \U) \\
493 \where 501 \where
494 d = f.fdimension \\ 502 d = f.fdimension \\
495 catfeatures = map ~ (extract~seg~x) ~ cat \\ 503 index = map~seg~cat \\
496 catunits = map ~ (extract~seg~ux) ~ cat \\ 504 features = map ~ (extract~seg~x) ~ cat \\
505 unitfeatures = map ~ (extract~seg~ux) ~ cat \\
497 \end{schema} 506 \end{schema}
498 507
499 \item \textsf{System Instances} - the set of System instances. 508 \item \textsf{Singleton Instances} - the set of System instances which contain only one track
500 509
501 Note that every instance must contain a catalogue in the music collection but not all catalogues in the collection will be part of an instance. 510 \begin{schema}{SingletonInstance}
511 Instance \\
512 \where
513 \# cat = 1
514 \end{schema}
515
516 \item \textsf{System Instances} - the set of system instances.
517
518 Note that every instance must contain a catalogue in the music collection but not all catalogues in the collection are necessarily part of an instance.
502 519
503 \begin{schema}{SystemInstances} 520 \begin{schema}{SystemInstances}
504 Collection \\ 521 Collection \\
505 instances : \power Instance 522 instances : \power Instance
506 \where 523 \where
507 \{ i : instances @ i.cat \} \subseteq collection 524 \{ i : instances @ i.cat \} \subseteq collection
508 \end{schema} 525 \end{schema}
509 526
510 \section{Search} 527 \section{Search Vectors}
511
512 \item \textsf{Search Vectors}
513
514 Searching takes place using concatenations of feature vectors to build \emph{search} vectors. For a particular query all search vectors will have a fixed length equal to some multiple (which we will refer to as $sl$) of the dimension of the orignal feature vectors ($d$). 528 Searching takes place using concatenations of feature vectors to build \emph{search} vectors. For a particular query all search vectors will have a fixed length equal to some multiple (which we will refer to as $sl$) of the dimension of the orignal feature vectors ($d$).
515 529
516 In the schema below, the function $makesearchvs$ takes a natual number $sl$ and a sequence of feature vectors (associated with a track) to create a sequence of search vectors. The first element of the returned sequence is the concatenation of the first $sl$ feature vectors, the second element is also the concatenation the next $sl$ feature vectors but starting from the second element, and so on until all such sequences are formed. It should be clear that if the original sequence contains $n$ feature vectors then the output will contain $n-sl+1$ vectors. 530 In the schema below, the function $makesearchvs$ takes a natual number $sl$ and a sequence of feature vectors (associated with a track) to create a sequence of search vectors. The first element of the returned sequence is the concatenation of the first $sl$ feature vectors, the second element is also the concatenation the next $sl$ feature vectors but starting from the second element, and so on until all such sequences are formed. It should be clear that if the original sequence contains $n$ feature vectors then the output will contain $n-sl+1$ vectors.
517 531
518 \begin{flushright} 532 \begin{flushright}
611 \t1 sl > \# xs \implies makesearchvs~sl~xs = \langle \rangle \land \\ 625 \t1 sl > \# xs \implies makesearchvs~sl~xs = \langle \rangle \land \\
612 \t1 sl \leq \# xs \implies makesearchvs~sl~xs = \\ 626 \t1 sl \leq \# xs \implies makesearchvs~sl~xs = \\
613 \t2 concat (\langle (0 \upto sl-1) \dres xs \rangle) \cat makesearchvs~sl (tail~xs) 627 \t2 concat (\langle (0 \upto sl-1) \dres xs \rangle) \cat makesearchvs~sl (tail~xs)
614 \end{axdef} 628 \end{axdef}
615 629
616 \subsection{Identifying Source and Target} 630
617 631 \item \textsf{Instance Search Vectors}
618 A search vector is made from a source (sequence) and used to match against a user-defined target (instance). 632
619 633 For any natural number less than the length of feature vectors for each track we can calculate the search vectors for an given by applying the $makesearchvs$ function to each of the sequences of feature vectors. We call this natural number \emph{search length} ($sl$) and we can use $map$ to apply $makesearchvs ~ sl$ to each of these sequences.
620 \item \textsf{Source} - a track identified by the user in order to define a search. 634
621 635 \begin{schema}{SearchVectors}
622 \item \textsf{Target} - the instance used to match the source against. 636 i? : Instance \\
623 637 sl? : \nat \\
624 \begin{schema}{IdentifySourceTarget} 638 searchvs! : \seq (\seq \Vdsl) \\
625 source? : Track \\ 639 unitvs! : \seq (\seq \Vsl) \\
626 tgt? : Instance \\ 640 \where
627 SystemInstances 641 searchvs! = map ~ (makesearchvs ~ sl?) ~ i?.features \\
628 \where 642 unitvs! = map ~ (makesearchvs ~ sl?) ~ i?.features \\
629 source? \in tracks \\
630 tgt? \in instances
631 \end{schema}
632
633 In addition the user can define the part of the source track to be used in the query. This is known as a sequence. A user defines the sequence by specifying the start and end points of the sequence index. (Note that there is an underlying assumption that the segmenter of the target returns a continuous interval index when applied to the source.)
634
635 \item \textsf{Track Part} - a continuous sub-section of a track.
636
637 \begin{zed}
638 TrackPart == Track
639 \end{zed}
640
641 \item \textsf{Sequence} - the track part required for the search.
642
643 \item \textsf{Sequence Index} - a continuous interval index that defines the sequence.
644
645 \item \textsf{Sequence Length} - the number of intervals in the sequence index.
646
647 \begin{schema}{IdentifySequence}
648 IdentifySourceTarget \\
649 start?, end? : \nat \\
650 seqindex : ContinuousIntervalIndex \\
651 sl : \nat \\
652 sequence : TrackPart
653 \where
654 start? \in \dom (tgt?.seg (source?)) \\
655 end? \in \dom (tgt?.seg (source?)) \\
656 seqindex = (start? \upto end?) \dres (tgt?.seg~source?) \\
657 sl = end? - start? \\
658 \end{schema}
659
660 \subsection{Source Search and Feature Vectors}
661
662 \item The \textsf{Source Feature Vectors} are the set of feature vectors of the sequence.
663
664 \item The \textsf{Source Unit Vectors} are the set of unit vectors of the sequence.
665
666 \begin{schema}{SourceFeatureVectors}
667 IdentifySequence \\
668 sourcefeatures : \seq \V \\
669 sourceunits : \seq \U \\
670 \where
671 sourcefeatures = extract ~tgt?.seg~tgt?.x~sequence \\
672 sourceunits = extract ~ tgt?.seg~tgt?.ux~sequence \\
673 \end{schema}
674
675 \item \textsf{Source Search Vector} - the concatenation of the feature vectors of the source sequence.
676
677 \item \textsf{Source Unit Search Vector} - the concatenation of the unit vectors of the source sequence.
678
679 \begin{schema}{SourceSearchVectors}
680 SourceFeatureVectors \\
681 sourcesearch : \Vdsl \\
682 sourcesearchunits : \Vsl \\
683 \where
684 sourcesearch = (head~(makesearchvs~sl~sourcefeatures)) \\
685 sourcesearchunits = (head~(makesearchvs~sl~sourcefeatures)) \\
686 \end{schema}
687
688 \subsection{Target Search Vectors}
689
690 \item \textsf{Target Search Vectors}
691
692 We can define these in exactly the same way as for the source, but as we now have a sequence of sequences of search vectors we have to use the map function to apply the $makesearchveors$ to each list
693
694 \begin{schema}{TargetSearchVectors}
695 IdentifySourceTarget \\
696 IdentifySequence\\
697 targetsearchvs : \seq (\seq \Vdsl) \\
698 targetunitsearchvs : \seq (\seq \Vsl) \\
699 \where
700 targetsearchvs = \\
701 \t1 map ~ (makesearchvs ~ sl) ~ tgt?.catfeatures \\
702 targetunitsearchvs = \\
703 \t1 map ~ (makesearchvs ~ sl) ~ tgt?.catfeatures \\
704 \end{schema} 643 \end{schema}
705 644
706 \item \textsf{Hopped Search Vectors} 645 \item \textsf{Hopped Search Vectors}
707 646
708 Rather then generating all possible search vectors we may wish to general vectors starting at equally distanced intervals in the interval index of the tracks in an instance. Re refer to the size of this interval as $hop$. 647 Rather then generating all possible search vectors we may wish to general vectors starting at equally distanced intervals in the interval index of the tracks in an instance. Re refer to the size of this interval as $hop$.
709
710 648
711 \begin{axdef} 649 \begin{axdef}
712 makehopsearchvs : \nat \fun \nat \fun \seq \Vd \fun \seq \Vdsl 650 makehopsearchvs : \nat \fun \nat \fun \seq \Vd \fun \seq \Vdsl
713 \where 651 \where
714 \forall xs : \seq \FV ; sl : \nat ; hop : \nat @ \\ 652 \forall xs : \seq \FV ; sl : \nat ; hop : \nat @ \\
716 \t1 sl \leq \# xs \implies makehopsearchvs~sl~hop~xs = \\ 654 \t1 sl \leq \# xs \implies makehopsearchvs~sl~hop~xs = \\
717 \t2 concat (\langle (0 \upto sl-1) \dres xs \rangle) \cat \\ 655 \t2 concat (\langle (0 \upto sl-1) \dres xs \rangle) \cat \\
718 \t2 makehopsearchvs~sl~hop~ ((0 \upto (hop -1)) \ndres xs) 656 \t2 makehopsearchvs~sl~hop~ ((0 \upto (hop -1)) \ndres xs)
719 \end{axdef} 657 \end{axdef}
720 658
721 This enables us to define a hop size when generating target search vectors 659 This enables us to define a hop size when generating search vectors
722 660
723 \begin{schema}{TargetSearchHopVectors} 661 \begin{schema}{HopSearchVectors}
662 SearchVectors \\
724 hop? : \nat \\ 663 hop? : \nat \\
725 TargetSearchVectors \\ 664 hopsearchvs! : \seq (\seq \Vdsl) \\
726 targethopsearchvs : \seq (\seq \Vdsl) \\ 665 hopunitvs! : \seq (\seq \Vsl) \\
727 targethopunitsearchvs : \seq (\seq \Vsl) \\ 666 \where
728 \where 667 hopsearchvs! = map ~ (makehopsearchvs ~i?.d ~ hop?) ~ i?.features \\
729 targethopsearchvs = \\ 668 hopunitvs! = map ~ (makehopsearchvs ~ i?.d ~ hop?) ~ i?.features \\
730 \t1 map ~ (makehopsearchvs ~sl ~ hop?) ~ tgt?.catfeatures \\ 669 \end{schema}
731 targethopunitsearchvs = \\ 670
732 \t1 map ~ (makehopsearchvs ~ sl ~ hop?) ~ tgt?.catfeatures \\ 671 \item \textsf{Search Vector Power}
733 \end{schema} 672
673 For each Search Vector in an instances search vectors we take the associated unit values and take the arithmetic mean.
674
675 \begin{axdef}
676 average : \V \fun \R
677 \end{axdef}
678
679 \begin{schema}{Powers}
680 SearchVectors \\
681 powers : \seq (\seq \R)
682 \where
683 powers = map~(map ~ average) ~ unitvs! \\
684 \end{schema}
685
686 \item \textsf{Search Vector Duration}
687
688 The duration of the sequence of a the original feature vectors from which the search vector is composed.
689
690 \begin{enumerate}
691
692 \item \textsf{Single Search Vector Duration}
693
694 This is defined as the duration of the sequence for each search vector. So for example if a search vector was made from the combination of 3 feature vectors and those 3 feature vectors came from the following intervals $ (3,8), (8,11), (11,13) $ then the duration of the search vector is $13-3=10$.
695
696 \item \textsf{Track Search Vector Durations}
697
698 Next we define a function at the level of track search vectors. We define a function which takes a sequence of search vectors for that track, the search length that was used to create them, and the track interval index and returns a sequence of natural numbers each which is the duration of the corresponding search vector
699
700 \begin{axdef}
701 durationsTsv : \seq \Vdsl \fun ContIntIndex \fun \nat \fun \seq \R
702 \where
703 \forall svs : \seq \Vdsl; cii : ContIntIndex; sl : \nat @ \\
704 \t1 durationsTsv~svs~cii~sl = \\
705 \t2 \{ n : \nat | n \in 1 \upto n - sl + 1 @ \\
706 \t3 (n, first (cii (n + sl)) - first (cii (n)) ) \}
707 \end{axdef}
708
709 \item \textsf{Instance Search Vector Durations}
710
711 Finally, we define a function called svdurations which takes a sequence of search vectors and calculate the track search vector durations for each sequence.
712
713 \begin{axdef}
714 durationsIsv : \seq (\seq \Vdsl) \fun (\seq ContIntIndex) \fun \nat \fun \seq (\seq \R) \\
715 \where
716 \forall svss : \seq (\seq \Vdsl); sl : \nat; ciis : \seq ContIntIndex @ \\
717 \t1 durationsIsv ~ svss ~ ciis ~ sl = \\
718 \t2 \langle durationsTsv ~ (head ~ svss) ~ (head ~ ciis) ~ sl \rangle \cat \\
719 \t3 durationsIsv ~ (tail ~ svss) ~ (tail ~ ciis) ~ sl \land \\
720 \t1 durationsIsv ~ \langle \rangle~ \langle \rangle ~ sl = \langle \rangle
721 \end{axdef}
734 722
735 \end{enumerate} 723 \end{enumerate}
736 724
737 725 \begin{schema}{Durations}
738 \section{Distance, Power and Duration} 726 SearchVectors \\
727 svdurations : \seq (\seq \R) \\
728 \where
729 svdurations = durationsIsv ~ searchvs! ~ i?.index ~ sl?
730 \end{schema}
731
732 % \end{enumerate}
733
734 \section{Making a Query}
735
736 \item \textsf{General Instance Query}
737
738 Two instances, a source and a target are identified by the user for comparison as well as the search length from which the search vectors are defined for both the source and the sequence. Once the query has been defined we can generate the \emph{source search vectors}, \emph{source unit vectors}, \emph{target search vectors} and \emph{target unit vectors} as specified earlier. We also generate \emph{source powers}, \emph{target powers}, \emph{source search vector durations} and \emph{target search vector durations}.
739
740 \emph{What are the pre-conditions regarding the search length? That it is less than the length of any sequence of feature vectors for every track?}
741
742 \begin{schema}{GeneralQuery}
743 SystemInstances \\
744 src?, tgt? : Instance \\
745 sl?: \nat \\
746 srcsearchvs!, tgtsearchvs! : \seq (\seq \Vdsl) \\
747 srcunitvs!, tgtunitvs! : \seq (\seq \Vsl) \\
748 sourcepowers, targetpowers : \seq (\seq \R) \\
749 sourcedurations, targetdurations : \seq (\seq \R) \\
750 \where
751 \{ src?, tgt? \} \subseteq instances \\
752 \end{schema}
739 753
740 \subsection{Distance of Source Search Vector from Target Search Vector} 754 \item \textsf{General Instance Self Query}
755
756 When an instance is identified as both the source and target we state that it is a self query.
757
758 \begin{schema}{GeneralSelfQuery}
759 GeneralQuery
760 \where
761 src? = tgt?
762 \end{schema}
763
764 \item Singleton (Track) Query
765
766 A user defines an instance with exactly one track as the source.
767
768 \begin{schema}{SingletonQuery}
769 GeneralQuery
770 \where
771 src? \in SingletonInstance
772 \end{schema}
773
774 \item Point (Sequence) Query
775
776 The user identifies a specific part of the track in a singleton instance to be used as the source known as a \emph{sequence}. A user defines the sequence by specifying the start and end points of the sequence index. Once these have been defined the sequence, sequence index and sequence length are easily identified.
777
778 \begin{enumerate}
779
780 \item \textsf{Source track} - the single track contained in the instance.
781
782 \item \textsf{Sequence} - a continuous sub-section of the point query track used to make a query.
783
784 \begin{zed}
785 Sequence == Track
786 \end{zed}
787
788 \item \textsf{Sequence Index} - a continuous interval index that defines the sequence according to the segmenter of the instance.
789
790 % \emph{Can we discuss this - is it reset so that the first element is indexed by 1 and so on?}
791
792 \item \textsf{Sequence Length} - the number of intervals in the sequence index.
793
794 \end{enumerate}
795
796 \begin{schema}{PointQuery}
797 SingletonQuery \\
798 start?, end? : \nat \\
799 sourcetrack! : Track \\
800 sequence! : Sequence \\
801 seqindex! : ContIntIndex \\
802 sl! : \nat \\
803 \where
804 sourcetrack! = head (src?.cat) \\
805 start? \in \dom (src?.seg (sourcetrack!)) \\
806 end? \in \dom (src?.seg (sourcetrack!)) \\
807 seqindex! = (start? \upto end?) \dres (src?.seg~sourcetrack!) \\
808 sl! = end? - start? \\
809 \end{schema}
810
811 In this case the search vectors will be equal to a sequence containing only one sequence. That sequence will itself consist of only one sequence which will be a search vector of dimension $d * sl$ where $d$ is the dimension of the sequence and $sl$ the length of the sequence.
812
813 \[ \langle \langle \Vdsl \rangle \rangle \]
814
815 \section{Distance between Search Vectors}
741 816
742 We define the scalar product and distance between vectors of equal dimension. 817 We define the scalar product and distance between vectors of equal dimension.
743 818
744 \begin{axdef} 819 \begin{axdef}
745 scalarproduct : \V \fun \V \fun \R \\ 820 scalarproduct : \V \fun \V \fun \R \\
748 823
749 \begin{zed} 824 \begin{zed}
750 \forall v_1, v_2 : \V @ distance~v_1~v_2 = 2 - scalarproduct~v_1~v_2 825 \forall v_1, v_2 : \V @ distance~v_1~v_2 = 2 - scalarproduct~v_1~v_2
751 \end{zed} 826 \end{zed}
752 827
753 Then we can define a variable which maintains a list of all distances between the source search vector and the target search vectors. 828 Then we can define a variable which maintains a list of all distances between the source and target search vectors. If we have a sequence of sequence of search vectors in the both the source and the target we will end up with a sequence of sequence of sequence of sequences.
754 829
755 \begin{schema}{Distances} 830 For the purposes of illumination imagine a source and target each with two simple search vectors. Again, we will replace the reals with naturals here.
756 distances : \seq (\seq \R) \\ 831
757 SourceSearchVectors \\ 832 Let us assume we have the following source search vectors for an instance of two tracks
758 TargetSearchVectors \\ 833
759 \where 834 \[ \langle \langle a_1, a_2, a_3 \rangle, \langle b_1, b_2 \rangle \rangle \]
760 distances = map (map (distance~sourcesearch)) targetsearchvs 835
761 \end{schema} 836 And the following target search vectors also for an instance of two tracks
762 837
763 \subsection{Average `power' of a search feature vector} 838 \[ \langle \langle x_1, x_2 \rangle, \langle y_1, y_2, y_3 \rangle \rangle \]
764 839
765 For each Search Feature Vector in the target, we take the associated unit values and take the arithmetic mean. 840 Then we need to generate the following data structure
766 841
767 \begin{axdef} 842 \[ \langle \\ % First track in source with target instance \\
768 average : \V \fun \R 843 \t1 \langle \\ % First Search Vector with whole of target \\
844 \t2 \langle \\ % First Search Vector with first track
845 \t3 \langle a_1 \dot x_1, a_1 \dot x_2 \rangle, \\
846 \t3 \langle a_1 \dot y_1, a_1 \dot y_2, a_1 \dot y_3 \rangle \\
847 \t2 \rangle, \\
848 % Second search vector
849 \t2 \langle \\
850 \t3 \langle a_2 \dot x_1, a_2 \dot x_2 \rangle, \\
851 \t3 \langle a_2 \dot y_1, a_2 \dot y_2, a_2 \dot y_3 \rangle \\
852 \t2 \rangle, \\
853 \t2 \langle \\
854 \t3 \langle a_3 \dot x_1, a_3 \dot x_2 \rangle, \\
855 \t3 \langle a_3 \dot y_1, a_3 \dot y_2, a_3 \dot y_3 \rangle \\
856 \t2 \rangle \\
857 \t1 \rangle, \\
858 % Second track in source with target instance
859 \t1 \langle, \\
860 \t2 \langle \\
861 \t3 \langle b_1 \dot x_1, b_1 \dot x_2 \rangle, \\
862 \t3 \langle b_1 \dot y_1, b_1 \dot y_2, b_1 \dot y_3 \rangle \\
863 \t2 \rangle \\
864 \t2 \langle \\
865 \t3 \langle b_2 \dot x_1, b_2 \dot x_2 \rangle, \\
866 \t3 \langle b_2 \dot y_1, b_2 \dot y_2, b_2 \dot y_3 \rangle \\
867 \t2 \rangle \\
868 \t1 \rangle \\
869 \rangle \\
870 \]
871
872
873 First let us define the function for a calculating the distances for a single search vector and general target search vectors
874
875 \emph{I couldn't do this on one go first time round - too much nesting for me!}
876
877 \begin{axdef}
878 vectordistances : \Vdsl \fun \seq (\seq \Vdsl) \fun \seq (\seq \R)
879 \where
880 \forall sv : \Vdsl; targetsv : \seq (\seq \Vdsl) @ \\
881 \t1 vectordistances~sv~targetsv = map (map (distance~sv)) targetsv
769 \end{axdef} 882 \end{axdef}
770 883
771 \begin{schema}{Powers} 884 Next we define the function for calculating the distances for the search vectors of a track with a target instance.
772 targetpowers : \seq (\seq \R) \\ 885
773 sourcepower : \R \\ 886 \begin{axdef}
774 SourceSearchVectors \\ 887 trackdistances : (\seq \Vdsl) \fun \seq (\seq \Vdsl) \fun \seq (\seq (\seq \R))
775 TargetSearchVectors \\ 888 \where
776 \where 889 \forall tracksv : \seq \Vdsl; targetsv : \seq (\seq \Vdsl) @ \\
777 targetpowers = map~(map ~ average) ~ targetsearchvs\\ 890 \t1 trackdistances~tracksv~targetsv = \\
778 sourcepower = head (map~average ~ sourceunits) 891 \t3 \langle vectordistances~(head ~ tracksv)~targetsv \rangle \cat \\
779 \end{schema} 892 \t4 trackdistances~(tail ~ tracksv) ~ targetsv \land \\
780 893 \t1 trackdistances~\langle \rangle ~targetsv = \langle \langle \langle \rangle \rangle \rangle
781 \subsection{Duration of a search feature vector} 894 \end{axdef}
782 895
783 Calculate the duration of the sequence for each search vector. (Made from the concatenation of feature vectors each associated with an interval. Add the intervals together to get the duration of the search vector.) 896 Then we can define a function which calculating the distances between a source and target instance.
784 897
785 \begin{schema}{Durations} 898 \begin{axdef}
786 targetdurations : \seq (\seq \R) \\ 899 instancedistances : \seq (\seq \Vdsl) \fun \seq (\seq \Vdsl) \fun \seq (\seq (\seq (\seq \R)))
787 sourceduration : \R \\ 900 \where
788 TargetSearchVectors \\ 901 \forall sourcesv, targetsv: \seq (\seq \Vdsl) @ \\
789 SourceSearchVectors \\ 902 \t1 instancedistances~sourcesv~targetsv = \\
790 \end{schema} 903 \t3 \langle trackdistances~(head ~ sourcesv)~targetsv \rangle \cat \\
791 904 \t4 instancedistances~(tail ~ sourcesv) ~ targetsv \land \\
792 \subsection{Compiled Data} 905 \t1 instancedistances~\langle \langle \rangle \rangle ~targetsv = \langle \langle \langle \langle \rangle \rangle \rangle \rangle
793 906 \end{axdef}
794 The we have a set of compiled data as follows. 907
795 908 The next scheme takes a general query and calculates all the distances.
796 \begin{schema}{CompiledData} 909
797 Distances \\ 910 \begin{schema}{CalculateDistances}
798 Powers \\ 911 GeneralQuery \\
799 Durations \\ 912 distances : \seq (\seq (\seq (\seq \R)))
913 \where
914 distances = instancedistances~srcsearchvs!~tgtsearchvs!
915 \end{schema}
916
917 The list can be sorted into ascending order and some threshold set (as we shall see) where we believe it is sensible to expect some acoustic or cognitive similarity.
918
919 Along with the distance between two search vectors, we also output the source track and index, the target track and index, as well as the duration the source search vector and power of the associated and corresponding unit vectors.
920
921 \begin{schema}{Output}
922 srctrack, srcindex, tgttrack, tgtindex : \nat \\
923 distance : \R \\
924 sourceduration, targetduration : \R \\
925 sourcepower, targetpower : \R \\
926 \end{schema}
927
928 The System outputs are a sequence of outputs ordered according to distance. We define a new variable which specifies the modified output that the user can make which we discuss in the next section.
929
930 \begin{schema}{SystemOutput}
931 CalculateDistances \\
932 output! : \seq Output \\
933 modifiedoutput! : \seq Output
934 \where
935 \forall o_1, o_2 : Output @ \langle o_1, o_2 \rangle \inseq output! \\
936 \t3 \implies o_1.distance \leq o_2.distance
800 \end{schema} 937 \end{schema}
801 938
802 \section{Refining a Search} 939 \section{Refining a Search}
803 \label{s:refining} 940 \label{s:refining}
804 941
805 System will first return the index of the track which gives the smallest distance from the source sequence search vector. Suppose this is at (3,4) in the list of lists. This signifies that the closest match is the 3rd track in the catalogue starting at the 4th interval. There are ways of refining the query. 942 There are ways of refining the query.
806 943
807 \begin{enumerate} 944 \begin{enumerate}
808 945
809 \item \textsf{Key List} - specify specific tracks within a catalogue to search over. 946 \item \textsf{Key List} - specify specific tracks to search over. (Either source or target or both.)
810 947
811 \item \textsf{Radius} - reject distances which are greater than a given real number radius. 948 \item \textsf{Radius} - reject distances which are greater than a given real number radius. (Either source or target or both.)
812 949
813 \item \textsf{Absolute} - reject any target search vectors where the `power' average is less than a specific absolute value. 950 \item \textsf{Absolute} - reject any search vectors where the `power' average is less than a specific absolute value. (Either source or target or either or both.)
814 951
815 \item \textsf{Relative} - reject any target search vectors where the `power' average is not within + or - a relative value. 952 \item \textsf{Relative} - reject any search vectors where the `power' average is not within + or - a relative value. (Either source or target or either or both.)
816 953
817 \item \textsf{Duration Ratio} - remove any search vectors whose total interval (duration) is sufficiently different from the duration of the source. 954 \item \textsf{Duration Ratio} - remove any outputs where the relative durations of the search vectors are not within a specified range.
818 955
819 \item \textsf{Hop Size} - Rather than making search vectors which start with the first feature vectors and then the second feature vector and so on, make sparser search vectors by starting with fv at 1, then fv at $1 + h$, then fv at $1 + 2h$ and so on where h is the hop size. 956 \item \textsf{Hop Size} - Rather than making search vectors which start with the first feature vectors and then the second feature vector and so on, make sparser search vectors by starting with fv at 1, then fv at $1 + h$, then fv at $1 + 2h$ and so on where h is the hop size. (Either source or target or both with either equal or separate values of hop.)
820 957
821 \end{enumerate} 958 \end{enumerate}
822 959
823 We specify what happens when values are given for any of these parameters.
824
825 First, we specify a single output as signifiying a track, a starting interval, a distance, a power and a duration
826
827 \begin{schema}{Output}
828 track : Track \\
829 index : \nat \\
830 distance : \R \\
831 duration : \R \\
832 power : \R \\
833 \end{schema}
834
835 The System outputs are a sequence of such outputs ordered according to distance. We define a new variable which specifies the modified output that the user can make.
836
837 \begin{schema}{SystemOutput}
838 CompiledData \\
839 output! : \seq Output \\
840 modifiedoutput! : \seq Output
841 \where
842 \forall o_1, o_2 : Output @ \langle o_1, o_2 \rangle \inseq output! \\
843 \t3 \implies o_1.distance \leq o_2.distance
844 \end{schema}
845
846 \begin{enumerate} 960 \begin{enumerate}
847 961
848 \item Keylist 962 \item Keylist
849 963
850 \begin{schema}{KeyList} 964 \begin{schema}{KeyListSource}
851 SystemOutput \\ 965 SystemOutput \\
852 k? : \power Track 966 k? : \power \nat
853 \where 967 \where
854 modifiedoutput! = output! \filter \{ o : Output | o.track \in k? \} 968 modifiedoutput! = output! \filter \{ o : Output | o.srctrack \in k? \}
855 \end{schema} 969 \end{schema}
856 970
857 \item Radius 971 \item Radius
858 972
859 The system removes any elements which have a distance greater than the radius. 973 The system removes any elements which have a distance greater than the radius.
864 \where 978 \where
865 modifiedoutput! = \\ 979 modifiedoutput! = \\
866 \t1 output! \filter \{ o : Output | o.distance \leq r? \} 980 \t1 output! \filter \{ o : Output | o.distance \leq r? \}
867 \end{schema} 981 \end{schema}
868 982
869 \item Absolute 983 \item Absolute Source
870 984
871 \begin{schema}{Absolute} 985 \begin{schema}{Absolute}
872 SystemOutput \\ 986 SystemOutput \\
873 a? : \R \\ 987 a? : \R \\
874 \where 988 \where
875 modifiedoutput! = \\ 989 modifiedoutput! = \\
876 \t1 output! \filter \{ o : Output | o.power \geq a? \} 990 \t1 output! \filter \{ o : Output | o.sourcepower \geq a? \}
877 \end{schema} 991 \end{schema}
878 992
879 \item Relative 993 \item Relative
880 994
881 %% \begin{axdef} 995 %% \begin{axdef}
882 %% abs : \R \fun \R 996 %% abs : \R \fun \R
883 %% \end{axdef} 997 %% \end{axdef}
884 998
885 999
1000
886 \begin{schema}{Relative} 1001 \begin{schema}{Relative}
887 SystemOutput \\ 1002 SystemOutput \\
888 rel? : \R \\ 1003 rel? : \R \\
889 \where 1004 % \where
890 modifiedoutput! = \\ 1005 % modifiedoutput! = \\
891 \t1 output! \filter \{ o : Output | abs (o.power - sourcepower) \leq rel? \} 1006 % \t1 output! \filter \{ o : Output | abs (o.power - sourcepower) \leq rel? \}
892 \end{schema} 1007 \end{schema}
893 1008
894 \item Duration Ratio 1009 \item Duration Ratio
895 1010
896 %%unchecked 1011 %%unchecked
897 \begin{schema}{DurationRatio} 1012 \begin{schema}{DurationRatio}
898 SystemOutput \\ 1013 SystemOutput \\
899 d? : \R \\ 1014 d? : \R \\
900 \where 1015 \where
901 modifiedoutput! = \\ 1016 modifiedoutput! = \\
902 \t1 output! \filter \{ o : Output | \exp ^ {abs \ln (\frac{o.duration}{sourceduration})} \leq d? \} 1017 \t1 output! \filter \{ o : Output | \exp ^ {abs \ln (\frac{o.duration}{srcduration})} \leq d? \}
903 \end{schema} 1018 \end{schema}
904 1019
905 \item Hop Size 1020 \item Hop Size
906 1021
907 \begin{schema}{Hop} 1022 \begin{schema}{Hop}
908 hop? : \nat \\ 1023 hop? : \nat \\
909 SystemOutput \\ 1024 SystemOutput \\
910 TargetSearchHopVectors \\ 1025 HopSearchVectors \\
911 \end{schema} 1026 \end{schema}
912 1027
913 \end{enumerate} 1028 \end{enumerate}
1029 \end{enumerate}
1030 \end{document}
914 1031
915 \section{Setting Thresholds} 1032 \section{Setting Thresholds}
916 1033
917 \subsection{For an Instance} 1034 \subsection{For an Instance}
918 1035
1016 1133
1017 \[ \langle V_{1}^{d}, V_{2}^{d} \dots V_{m}^{d} \rangle \] 1134 \[ \langle V_{1}^{d}, V_{2}^{d} \dots V_{m}^{d} \rangle \]
1018 1135
1019 When we apply this function to every track in a catalogue (which is a list of tracks) we simply determine a list of such lists. If there are $n$ tracks in a catalogue then our data would look something like the following. 1136 When we apply this function to every track in a catalogue (which is a list of tracks) we simply determine a list of such lists. If there are $n$ tracks in a catalogue then our data would look something like the following.
1020 1137
1021 \[ catfeatures = \langle ~~ \langle V_{11}^{d}, V_{12}^{d} \dots V_{1m_{1}}^{d} \rangle \\ 1138 \[ features = \langle ~~ \langle V_{11}^{d}, V_{12}^{d} \dots V_{1m_{1}}^{d} \rangle \\
1022 \t3 ~~~ \langle V_{21}^{d}, V_{22}^{d} \dots V_{2m_{2}}^{d} \rangle \\ 1139 \t3 ~~~ \langle V_{21}^{d}, V_{22}^{d} \dots V_{2m_{2}}^{d} \rangle \\
1023 \t3 \dots \\ 1140 \t3 \dots \\
1024 \t3 \dots \\ 1141 \t3 \dots \\
1025 \t3 ~~~ \langle V_{n1}^{d}, V_{n2}^{d} \dots V_{nm_{n}}^{d} \rangle 1142 \t3 ~~~ \langle V_{n1}^{d}, V_{n2}^{d} \dots V_{nm_{n}}^{d} \rangle
1026 ~~ \rangle 1143 ~~ \rangle
1027 ~~ \] 1144 ~~ \]
1028 1145
1029 The unit vector data has exactly the same structure but with different vectors all of dimension 1. 1146 The unit vector data has exactly the same structure but with different vectors all of dimension 1.
1030 1147
1031 \[ catunits = \langle ~~ \langle V_{11}^{1}, V_{12}^{1} \dots V_{1m_{1}}^{1} \rangle \\ 1148 \[ unitfeatures = \langle ~~ \langle V_{11}^{1}, V_{12}^{1} \dots V_{1m_{1}}^{1} \rangle \\
1032 \t4 ~~ \langle V_{21}^{1}, V_{22}^{1} \dots V_{2m_{2}}^{1} \rangle \\ 1149 \t4 ~~ \langle V_{21}^{1}, V_{22}^{1} \dots V_{2m_{2}}^{1} \rangle \\
1033 \t4 \dots \\ 1150 \t4 \dots \\
1034 \t4 \dots \\ 1151 \t4 \dots \\
1035 \t4 ~~~ \langle V_{n1}^{1}, V_{n2}^{1} \dots V_{nm_{n}}^{1} \rangle 1152 \t4 ~~~ \langle V_{n1}^{1}, V_{n2}^{1} \dots V_{nm_{n}}^{1} \rangle
1036 ~~ \rangle 1153 ~~ \rangle
1044 1161
1045 Also suppose that our sequence length $sl$ had a value of 2 and we applied the function $makesearchvs$. We would then make search vectors as follows. 1162 Also suppose that our sequence length $sl$ had a value of 2 and we applied the function $makesearchvs$. We would then make search vectors as follows.
1046 1163
1047 \[ \t3 \langle V_{1}^{d} \cat V_{2}^{d} , V_{2}^{d} \cat V_{3}^{d} \rangle \] 1164 \[ \t3 \langle V_{1}^{d} \cat V_{2}^{d} , V_{2}^{d} \cat V_{3}^{d} \rangle \]
1048 1165
1166
1167 \end{enumerate}
1049 \end{document} 1168 \end{document}
1050 1169