annotate yeti/timefreq.yeti @ 325:4cf4313d7e30 livemode

Always use q=0.8 and accept the hit on speed -- the templates are made for that configuration and it does work better. Also some adjustments to thresholding and peak picking for live mode in particular.
author Chris Cannam
date Mon, 18 May 2015 13:58:27 +0100
parents 0f6db1895e1c
children
rev   line source
Chris@12 1
Chris@12 2 module timefreq;
Chris@12 3
Chris@12 4 // Obtain the time-frequency representation (based on constant-Q
Chris@12 5 // transform) as transcription input
Chris@12 6
Chris@12 7 af = load may.stream.audiofile;
Chris@12 8 mat = load may.matrix;
Chris@12 9 plot = load may.plot;
Chris@12 10 vec = load may.vector;
Chris@12 11
Chris@12 12 { pow } = load may.mathmisc;
Chris@12 13
Chris@12 14 { resampledTo } = load may.stream.resample;
Chris@12 15
Chris@12 16 { cqt } = load cqt;
Chris@12 17
Chris@12 18 prepareTimeFrequency wavfile =
Chris@12 19 (stream = resampledTo 44100 (af.openMono wavfile);
Chris@12 20
Chris@12 21 streamLength =
Chris@12 22 case stream.available of
Chris@12 23 Known n: n;
Chris@12 24 _: failWith "Audio file length unknown?!";
Chris@12 25 esac;
Chris@12 26
Chris@12 27 eprintln "streamLength = \(streamLength)";
Chris@12 28
Chris@12 29 //!!! original also scales to peak = 0.5
Chris@12 30
Chris@12 31 cq = cqt {
Chris@12 32 maxFreq = stream.sampleRate / 3,
Chris@12 33 minFreq = 27.5,
Chris@12 34 binsPerOctave = 60
Chris@12 35 } stream;
Chris@12 36
Chris@12 37 //!!! note: original also modifies the Q and atomHopFactor
Chris@12 38 eprintln "atomSpacing = \(cq.kernel.atomSpacing)";
Chris@12 39
Chris@12 40 matrices = case cq.output (Spectrogram ()) of
Chris@12 41 Real mm: mm;
Chris@12 42 _: failWith "Expected real";
Chris@12 43 esac;
Chris@12 44
Chris@12 45 eprintln "have \(length matrices) matrices of size \(mat.size (head matrices)), isRowMajor? = \(mat.isRowMajor? (head matrices))";
Chris@12 46
Chris@12 47 levels = concatMap do m:
Chris@12 48 map do c: vec.sum c done (mat.asColumns m);
Chris@12 49 done matrices;
Chris@12 50
Chris@12 51 nztail = find (> 0.1) levels;
Chris@12 52 nzonly = reverse (find (> 0.1) (reverse nztail));
Chris@12 53
Chris@12 54 eprintln "non-zero columns start at \(length levels - length nztail), go on for \(length nzonly) [of \(length levels)]";
Chris@12 55
Chris@12 56 nzstart = (length levels - length nztail) * cq.kernel.atomSpacing;
Chris@12 57 nzduration = (length nzonly) * cq.kernel.atomSpacing;
Chris@12 58
Chris@12 59 // Get a stream of columns at 25 per second.
Chris@12 60 //
Chris@12 61 // The original picks samples at a rate of 100-per-second then
Chris@12 62 // median filters to reduce noise then picks samples again at
Chris@12 63 // 25-per-second. We don't do that (yet)
Chris@12 64
Chris@12 65 samplesPerCol = stream.sampleRate / 25;
Chris@12 66 var sample = samplesPerCol - nzstart;
Chris@12 67
Chris@12 68 columns = take (nzduration / samplesPerCol)
Chris@12 69 (concatMap do m:
Chris@12 70 concatMap do col:
Chris@12 71 sample := sample + cq.kernel.atomSpacing;
Chris@12 72 if sample >= samplesPerCol then
Chris@12 73 sample := sample - samplesPerCol;
Chris@12 74 [col]
Chris@12 75 else
Chris@12 76 []
Chris@12 77 fi;
Chris@12 78 done (mat.asColumns m);
Chris@12 79 done matrices);
Chris@12 80
Chris@12 81 eprintln "have \(length columns) columns of \(vec.length (head columns)) values each";
Chris@12 82
Chris@12 83 // drop the lowest 55 of the 600 bins
Chris@12 84 columns = map do c:
Chris@12 85 vec.slice c 55 (vec.length c);
Chris@12 86 done columns;
Chris@12 87
Chris@12 88 eprintln "now have \(length columns) columns of \(vec.length (head columns))";
Chris@12 89
Chris@12 90 // plot.plot [ Grid (mat.fromColumns columns) ];
Chris@12 91
Chris@12 92 columns);
Chris@12 93
Chris@12 94
Chris@12 95 {
Chris@12 96 prepareTimeFrequency
Chris@12 97 }
Chris@12 98