annotate yeti/timefreq.yeti @ 309:07ee4ebea57c

Add chromagram output
author Chris Cannam
date Mon, 19 Jan 2015 11:23:07 +0000
parents 0f6db1895e1c
children
rev   line source
Chris@12 1
Chris@12 2 module timefreq;
Chris@12 3
Chris@12 4 // Obtain the time-frequency representation (based on constant-Q
Chris@12 5 // transform) as transcription input
Chris@12 6
Chris@12 7 af = load may.stream.audiofile;
Chris@12 8 mat = load may.matrix;
Chris@12 9 plot = load may.plot;
Chris@12 10 vec = load may.vector;
Chris@12 11
Chris@12 12 { pow } = load may.mathmisc;
Chris@12 13
Chris@12 14 { resampledTo } = load may.stream.resample;
Chris@12 15
Chris@12 16 { cqt } = load cqt;
Chris@12 17
Chris@12 18 prepareTimeFrequency wavfile =
Chris@12 19 (stream = resampledTo 44100 (af.openMono wavfile);
Chris@12 20
Chris@12 21 streamLength =
Chris@12 22 case stream.available of
Chris@12 23 Known n: n;
Chris@12 24 _: failWith "Audio file length unknown?!";
Chris@12 25 esac;
Chris@12 26
Chris@12 27 eprintln "streamLength = \(streamLength)";
Chris@12 28
Chris@12 29 //!!! original also scales to peak = 0.5
Chris@12 30
Chris@12 31 cq = cqt {
Chris@12 32 maxFreq = stream.sampleRate / 3,
Chris@12 33 minFreq = 27.5,
Chris@12 34 binsPerOctave = 60
Chris@12 35 } stream;
Chris@12 36
Chris@12 37 //!!! note: original also modifies the Q and atomHopFactor
Chris@12 38 eprintln "atomSpacing = \(cq.kernel.atomSpacing)";
Chris@12 39
Chris@12 40 matrices = case cq.output (Spectrogram ()) of
Chris@12 41 Real mm: mm;
Chris@12 42 _: failWith "Expected real";
Chris@12 43 esac;
Chris@12 44
Chris@12 45 eprintln "have \(length matrices) matrices of size \(mat.size (head matrices)), isRowMajor? = \(mat.isRowMajor? (head matrices))";
Chris@12 46
Chris@12 47 levels = concatMap do m:
Chris@12 48 map do c: vec.sum c done (mat.asColumns m);
Chris@12 49 done matrices;
Chris@12 50
Chris@12 51 nztail = find (> 0.1) levels;
Chris@12 52 nzonly = reverse (find (> 0.1) (reverse nztail));
Chris@12 53
Chris@12 54 eprintln "non-zero columns start at \(length levels - length nztail), go on for \(length nzonly) [of \(length levels)]";
Chris@12 55
Chris@12 56 nzstart = (length levels - length nztail) * cq.kernel.atomSpacing;
Chris@12 57 nzduration = (length nzonly) * cq.kernel.atomSpacing;
Chris@12 58
Chris@12 59 // Get a stream of columns at 25 per second.
Chris@12 60 //
Chris@12 61 // The original picks samples at a rate of 100-per-second then
Chris@12 62 // median filters to reduce noise then picks samples again at
Chris@12 63 // 25-per-second. We don't do that (yet)
Chris@12 64
Chris@12 65 samplesPerCol = stream.sampleRate / 25;
Chris@12 66 var sample = samplesPerCol - nzstart;
Chris@12 67
Chris@12 68 columns = take (nzduration / samplesPerCol)
Chris@12 69 (concatMap do m:
Chris@12 70 concatMap do col:
Chris@12 71 sample := sample + cq.kernel.atomSpacing;
Chris@12 72 if sample >= samplesPerCol then
Chris@12 73 sample := sample - samplesPerCol;
Chris@12 74 [col]
Chris@12 75 else
Chris@12 76 []
Chris@12 77 fi;
Chris@12 78 done (mat.asColumns m);
Chris@12 79 done matrices);
Chris@12 80
Chris@12 81 eprintln "have \(length columns) columns of \(vec.length (head columns)) values each";
Chris@12 82
Chris@12 83 // drop the lowest 55 of the 600 bins
Chris@12 84 columns = map do c:
Chris@12 85 vec.slice c 55 (vec.length c);
Chris@12 86 done columns;
Chris@12 87
Chris@12 88 eprintln "now have \(length columns) columns of \(vec.length (head columns))";
Chris@12 89
Chris@12 90 // plot.plot [ Grid (mat.fromColumns columns) ];
Chris@12 91
Chris@12 92 columns);
Chris@12 93
Chris@12 94
Chris@12 95 {
Chris@12 96 prepareTimeFrequency
Chris@12 97 }
Chris@12 98