annotate yeti/timefreq.yeti @ 135:8db5e4ab56ce

Ground-truth data in CSV and lab format, converted from the MIDI using Sonic Visualiser and then to lab using the script here
author Chris Cannam
date Thu, 08 May 2014 12:59:09 +0100
parents 0f6db1895e1c
children
rev   line source
Chris@12 1
Chris@12 2 module timefreq;
Chris@12 3
Chris@12 4 // Obtain the time-frequency representation (based on constant-Q
Chris@12 5 // transform) as transcription input
Chris@12 6
Chris@12 7 af = load may.stream.audiofile;
Chris@12 8 mat = load may.matrix;
Chris@12 9 plot = load may.plot;
Chris@12 10 vec = load may.vector;
Chris@12 11
Chris@12 12 { pow } = load may.mathmisc;
Chris@12 13
Chris@12 14 { resampledTo } = load may.stream.resample;
Chris@12 15
Chris@12 16 { cqt } = load cqt;
Chris@12 17
Chris@12 18 prepareTimeFrequency wavfile =
Chris@12 19 (stream = resampledTo 44100 (af.openMono wavfile);
Chris@12 20
Chris@12 21 streamLength =
Chris@12 22 case stream.available of
Chris@12 23 Known n: n;
Chris@12 24 _: failWith "Audio file length unknown?!";
Chris@12 25 esac;
Chris@12 26
Chris@12 27 eprintln "streamLength = \(streamLength)";
Chris@12 28
Chris@12 29 //!!! original also scales to peak = 0.5
Chris@12 30
Chris@12 31 cq = cqt {
Chris@12 32 maxFreq = stream.sampleRate / 3,
Chris@12 33 minFreq = 27.5,
Chris@12 34 binsPerOctave = 60
Chris@12 35 } stream;
Chris@12 36
Chris@12 37 //!!! note: original also modifies the Q and atomHopFactor
Chris@12 38 eprintln "atomSpacing = \(cq.kernel.atomSpacing)";
Chris@12 39
Chris@12 40 matrices = case cq.output (Spectrogram ()) of
Chris@12 41 Real mm: mm;
Chris@12 42 _: failWith "Expected real";
Chris@12 43 esac;
Chris@12 44
Chris@12 45 eprintln "have \(length matrices) matrices of size \(mat.size (head matrices)), isRowMajor? = \(mat.isRowMajor? (head matrices))";
Chris@12 46
Chris@12 47 levels = concatMap do m:
Chris@12 48 map do c: vec.sum c done (mat.asColumns m);
Chris@12 49 done matrices;
Chris@12 50
Chris@12 51 nztail = find (> 0.1) levels;
Chris@12 52 nzonly = reverse (find (> 0.1) (reverse nztail));
Chris@12 53
Chris@12 54 eprintln "non-zero columns start at \(length levels - length nztail), go on for \(length nzonly) [of \(length levels)]";
Chris@12 55
Chris@12 56 nzstart = (length levels - length nztail) * cq.kernel.atomSpacing;
Chris@12 57 nzduration = (length nzonly) * cq.kernel.atomSpacing;
Chris@12 58
Chris@12 59 // Get a stream of columns at 25 per second.
Chris@12 60 //
Chris@12 61 // The original picks samples at a rate of 100-per-second then
Chris@12 62 // median filters to reduce noise then picks samples again at
Chris@12 63 // 25-per-second. We don't do that (yet)
Chris@12 64
Chris@12 65 samplesPerCol = stream.sampleRate / 25;
Chris@12 66 var sample = samplesPerCol - nzstart;
Chris@12 67
Chris@12 68 columns = take (nzduration / samplesPerCol)
Chris@12 69 (concatMap do m:
Chris@12 70 concatMap do col:
Chris@12 71 sample := sample + cq.kernel.atomSpacing;
Chris@12 72 if sample >= samplesPerCol then
Chris@12 73 sample := sample - samplesPerCol;
Chris@12 74 [col]
Chris@12 75 else
Chris@12 76 []
Chris@12 77 fi;
Chris@12 78 done (mat.asColumns m);
Chris@12 79 done matrices);
Chris@12 80
Chris@12 81 eprintln "have \(length columns) columns of \(vec.length (head columns)) values each";
Chris@12 82
Chris@12 83 // drop the lowest 55 of the 600 bins
Chris@12 84 columns = map do c:
Chris@12 85 vec.slice c 55 (vec.length c);
Chris@12 86 done columns;
Chris@12 87
Chris@12 88 eprintln "now have \(length columns) columns of \(vec.length (head columns))";
Chris@12 89
Chris@12 90 // plot.plot [ Grid (mat.fromColumns columns) ];
Chris@12 91
Chris@12 92 columns);
Chris@12 93
Chris@12 94
Chris@12 95 {
Chris@12 96 prepareTimeFrequency
Chris@12 97 }
Chris@12 98