changeset 19:f1f8c84339d0

Starting to revisit this EM logic and test in a single-column world
author Chris Cannam
date Wed, 26 Mar 2014 18:49:12 +0000
parents bbc8185ba511
children 982aa1197a7e
files notes/em.txt notes/hnmf.m yeti/em_onecolumn.yeti yeti/run.sh yeti/silvet.yeti yeti/test.wav
diffstat 6 files changed, 177 insertions(+), 1 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/notes/em.txt	Wed Mar 26 18:49:12 2014 +0000
@@ -0,0 +1,63 @@
+
+I agree with you - having a look at a model that does not support 
+convolution would help. You'll find attached 'hnmf.m', which is 
+essentially the same model without convolution. So the simplified model 
+is: P(w,t) = P(t) \sum_{s,p} P(w|p,s)P(p|t)P(s|p,t)
+
+Also, a more recent (and much more efficient) version of the CMJ system 
+converts the model from a convolutive to a linear one, but still keeping 
+the shift-invariance support. That is achieved by having a pre-extracted 
+4-D dictionary that also supports templates that are pre-shifted across 
+log-frequency (so that the system would not need to compute the 
+convolutions during the EM step). I have uploaded the source code on 
+SoundSoftware [i], and you can find the related paper in [ii]. This 
+system has the exact same performance with the CMJ one, but is much 
+easier to understand/implement, and is over 50 times faster. 
+
+[i] https://code.soundsoftware.ac.uk/projects/amt_mssiplca_fast
+[ii] http://www.ecmlpkdd2013.org/wp-content/uploads/2013/09/MLMU_benetos.pdf
+
+> In eqn 12,
+>    Pt(p) =
+>        sum[w,f,s] ( P(p,f,s|w,t) Vw,t ) /
+>        sum[p,w,f,s] ( P(p,f,s|w,t) Vw,t )
+>
+> P(p,f,s|w,t) is the result of the E-step (and a time-frequency
+> distribution), and Vw,t is the input spectrogram (also a
+> time-frequency distribution), right?
+
+Right! Basically, P(p,f,s|w,t) is a 5-dimensional matrix, essentially 
+the model without the sums (the sums convert P(p,f,s|w,t) into a 2-D 
+matrix P(w,t)).
+
+> So I read this as something like: update the pitch probability
+> distribution for time t so that its value for a pitch p is the ratio
+> of the sum of the expression P(p,f,s|w,t) Vw,t for *that* pitch
+> variable to the sum of the same expression across *all* pitch
+> variables.
+
+The equation you put essentially takes the 5-dimensional quantity 
+P(p,f,s|w,t) Vw,t and marginalises it to P(p,t), i.e. it sums over all 
+other dimensions. All these 'unknown' parameters, e.g. P(s|p,t), are 
+generated from this 5-dimensional posterior distribution.
+
+> But what does it mean to refer to P(p,f,s|w,t) for a single pitch
+> variable, given that P(p,f,s|w,t) is just a time-frequency
+> distribution? There doesn't seem to be any dependence on p in it. I
+> think this is where I'm missing the (hopefully obvious) fundamental
+> thing.
+
+P(p,f,s|w,t) is not a time-frequency distribution; it is a 5-dimensional 
+posterior distribution of the 3 unknown model parameters given time and 
+frequency.
+
+The basic concept of EM is that you have a latent variable in your 
+model, e.g. p; in the E-step, you compute the posterior given the 
+known/input data (e.g. P(p|w,t)). For the M-step, you compute the 
+complete likelihood given the original input, P(p|w,t)Vwt; and you 
+marginalise over the variables that you don't care about, e.g. if you 
+want to find P(p|t), you compute \sum_w P(p|w,t)V_wt; finally, you 
+normalise that result according to your model, so if your model has a 
+P(p|t) component, you normalise so that P(p) for a given timeframe sums 
+to one (this is the denominator in the equation you showed).
+
--- a/notes/hnmf.m	Wed Mar 26 11:44:51 2014 +0000
+++ b/notes/hnmf.m	Wed Mar 26 18:49:12 2014 +0000
@@ -68,7 +68,7 @@
 for it = 1:iter
     
     % E-step
-    zh = z .* permute(repmat(h,[1 1 R]),[3 1 2]);
+    zh = z .* permute(repmat(h,[1 1 R]),[3 1 2]); %% z is the source activation distribution, h the component (pitch) activation
     xa=eps;
     for r=1:R
         for k=1:K
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/yeti/em_onecolumn.yeti	Wed Mar 26 18:49:12 2014 +0000
@@ -0,0 +1,89 @@
+
+module em_onecolumn;
+
+mm = load may.mathmisc;
+vec = load may.vector;
+mat = load may.matrix;
+
+inRange ranges instrument note =
+    note >= ranges[instrument].lowest and note <= ranges[instrument].highest;
+
+initialise ranges templates notes =
+   (instruments = keys ranges;
+    {
+        pitches = // z in the original. 1 per note
+            vec.randoms notes,
+        sources = // u in the original. 1 per note-instrument
+            mapIntoHash id
+                do instrument:
+                    vec.fromList
+                       (map do note:
+                            if inRange ranges instrument note then 1 else 0 fi
+                        done [0..notes-1])
+                done instruments,
+        instruments,
+        templates,
+        ranges,
+        lowest = head
+           (sort (map do i: ranges[i].lowest done instruments)),
+        highest = head (reverse
+           (sort (map do i: ranges[i].highest done instruments))),
+    });
+
+epsilon = 1e-16;
+
+select predicate = concatMap do v: if predicate v then [v] else [] fi done;
+
+distributionsFor data instrument note =
+    {
+        w = mat.getColumn note data.templates[instrument],
+        p = vec.at data.pitches note,
+        s = vec.at data.sources[instrument] note,
+    };
+
+performExpectation data column =
+   (estimate = 
+        fold do acc instrument:
+            fold do acc note:
+                { w, p, s } = distributionsFor data instrument note;
+                vec.add [acc, vec.scaled (p * s) w];
+            done acc [data.ranges[instrument].lowest .. 
+                      data.ranges[instrument].highest]
+        done (vec.consts epsilon (vec.length column)) data.instruments;
+    vec.divide column estimate);
+
+performMaximisation data column q =
+   (pitchTop = vec.fromList
+       (map do note:
+            fold do acc instrument:
+                { w, p, s } = distributionsFor data instrument note;
+                fold do acc bin:
+                    acc + s * (vec.at w bin) * (vec.at column bin);
+                done acc [0..vec.length column - 1]
+            done epsilon data.instruments;
+        done [data.lowest .. data.highest]);
+    pitchBottom = vec.sum pitchTop;
+
+/*    sourceTop = vec.fromList
+       (map do instrument:
+            fold do acc note:
+                { w, p, s } = distributionsFor data instrument note;
+                fold do acc bin:
+                    acc + p * (vec.at w bin) * (vec.at column bin);
+                done acc [0..vec.length column - 1]
+            done epsilon [data.lowest .. data.highest]
+        done data.instruments);
+    sourceBottom = vec.sum sourceTop;
+*/
+    data with { 
+        pitches = vec.divideBy pitchBottom pitchTop,
+//        sources = vec.divideBy sourceBottom sourceTop
+    });
+
+{
+    initialise,
+    performExpectation,
+    performMaximisation,
+}
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/yeti/run.sh	Wed Mar 26 18:49:12 2014 +0000
@@ -0,0 +1,3 @@
+#!/bin/bash
+
+CLASSPATH=../../constant-q-cpp/yeti/cqt.jar ../../may/bin/yc ./silvet.yeti
--- a/yeti/silvet.yeti	Wed Mar 26 11:44:51 2014 +0000
+++ b/yeti/silvet.yeti	Wed Mar 26 18:49:12 2014 +0000
@@ -5,6 +5,7 @@
 { loadTemplates, extractRanges } = load templates;
 
 em = load em;
+em1 = load em_onecolumn;
 
 mat = load may.matrix;
 vec = load may.vector;
@@ -22,6 +23,7 @@
 
 columns = prepareTimeFrequency "test.wav";
 
+/*
 height = if empty? columns then 0 else vec.length (head columns) fi;
 
 chunkSize = { rows = height, columns = 100 };
@@ -58,6 +60,25 @@
 \() (plot.plot [ Grid error ]);
 
 \() (plot.plot [ Grid newP ]);
+*/
+
+em1data = em1.initialise ranges templates 88;
+
+col = head (drop 35 columns);
+
+\() (plot.plot [ Vector col ]);
+
+oneIteration em1data col =
+   (q = em1.performExpectation em1data col;
+    \() (plot.plot [ Vector col, Vector q ]);
+    newdata = em1.performMaximisation em1data col q;
+    \() (plot.plot [ Vector (em1data.pitches), Vector (newdata.pitches) ]);
+    newdata);
+
+em1data = oneIteration em1data col;
+em1data = oneIteration em1data col;
+em1data = oneIteration em1data col;
+
 
 ();
 
Binary file yeti/test.wav has changed