Mercurial > hg > silvet
changeset 12:0f6db1895e1c
Prepare the ground for cqt and templates
author | Chris Cannam |
---|---|
date | Fri, 21 Mar 2014 17:14:44 +0000 |
parents | f268212ac499 |
children | e15bc63cb146 |
files | notes/cplcaMT-annotated.m yeti/scratch/convert.m yeti/silvet.yeti yeti/templates.yeti yeti/timefreq.yeti |
diffstat | 5 files changed, 188 insertions(+), 1 deletions(-) [+] |
line wrap: on
line diff
--- a/notes/cplcaMT-annotated.m Fri Mar 21 17:05:26 2014 +0000 +++ b/notes/cplcaMT-annotated.m Fri Mar 21 17:14:44 2014 +0000 @@ -226,7 +226,7 @@ xa = eps; %% tiny non-zero initialiser for k = 16:73 %% overall note range found in instrument set fh{k} = fftn( h{k}, wc); %% this and the subsequent ifftn are for the pitch-shift convolution step I think - for r=1:R + for r=1:R %% instruments if( (pa(r,1) <= k && k <= pa(r,2)) ) xa1 = abs( real( ifftn( fw{r,k} .* fh{k}))); xa = xa + xa1(1:size(x,1),1:size(x,2)) .*repmat(z{k},1,size(x,1))'.*repmat(u{r,k},1,size(x,1))';
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/yeti/scratch/convert.m Fri Mar 21 17:14:44 2014 +0000 @@ -0,0 +1,22 @@ + +load('noteTemplatesBassoon'); +load('noteTemplatesCello'); +load('noteTemplatesClarinet'); +load('noteTemplatesFlute'); +load('noteTemplatesGuitar'); +load('noteTemplatesHorn'); +load('noteTemplatesOboe'); +load('noteTemplatesTenorSax'); +load('noteTemplatesViolin'); +load('noteTemplatesSptkBGCl'); + +dlmwrite('bassoon.csv', noteTemplatesBassoon); +dlmwrite('cello.csv', noteTemplatesCello); +dlmwrite('clarinet.csv', noteTemplatesClarinet); +dlmwrite('flute.csv', noteTemplatesFlute); +dlmwrite('guitar.csv', noteTemplatesGuitar); +dlmwrite('horn.csv', noteTemplatesHorn); +dlmwrite('oboe.csv', noteTemplatesOboe); +dlmwrite('tenorsax.csv', noteTemplatesTenorSax); +dlmwrite('violin.csv', noteTemplatesViolin); +dlmwrite('sptkbgcl.csv', noteTemplatesSptkBGCl);
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/yeti/silvet.yeti Fri Mar 21 17:14:44 2014 +0000 @@ -0,0 +1,32 @@ + +program silvet; + +{ prepareTimeFrequency } = load timefreq; +{ loadTemplates } = load templates; + +mat = load may.matrix; +vec = load may.vector; + +templates = loadTemplates (); + +eprintln "we have \(length (keys templates)) instruments:"; +for (sort (keys templates)) eprintln; +eprintln ""; + +columns = prepareTimeFrequency "test.wav"; + +chunkSize = 100; +height = if empty? columns then 0 else vec.length (head columns) fi; + +chunkify cols = + if empty? cols then [] + else + (mat.resizedTo { rows = chunkSize, columns = height } + (mat.fromColumns (take chunkSize cols))) + :. \(chunkify (drop chunkSize cols)); + fi; + +chunks = chunkify columns; + +eprintln "we have \(length chunks) chunks of size \(mat.size (head chunks))"; +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/yeti/templates.yeti Fri Mar 21 17:14:44 2014 +0000 @@ -0,0 +1,35 @@ + +module templates; + +vec = load may.vector; + +// Load instrument templates + +instruments = [ + "bassoon", + "cello", + "clarinet", + "flute", + "guitar", + "horn", + "oboe", + "tenorsax", + "violin", + "piano-maps-SptkBGCl" +]; + +loadTemplates () = mapIntoHash id + do instrument: + readFile "../data/\(instrument).csv" "UTF-8" + do istr: + array + (map do line: + vec.fromList (map number (strSplit "," line)) + done (istr.lines ())); + done; + done instruments; + +{ + loadTemplates +} +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/yeti/timefreq.yeti Fri Mar 21 17:14:44 2014 +0000 @@ -0,0 +1,98 @@ + +module timefreq; + +// Obtain the time-frequency representation (based on constant-Q +// transform) as transcription input + +af = load may.stream.audiofile; +mat = load may.matrix; +plot = load may.plot; +vec = load may.vector; + +{ pow } = load may.mathmisc; + +{ resampledTo } = load may.stream.resample; + +{ cqt } = load cqt; + +prepareTimeFrequency wavfile = + (stream = resampledTo 44100 (af.openMono wavfile); + + streamLength = + case stream.available of + Known n: n; + _: failWith "Audio file length unknown?!"; + esac; + + eprintln "streamLength = \(streamLength)"; + + //!!! original also scales to peak = 0.5 + + cq = cqt { + maxFreq = stream.sampleRate / 3, + minFreq = 27.5, + binsPerOctave = 60 + } stream; + + //!!! note: original also modifies the Q and atomHopFactor + eprintln "atomSpacing = \(cq.kernel.atomSpacing)"; + + matrices = case cq.output (Spectrogram ()) of + Real mm: mm; + _: failWith "Expected real"; + esac; + + eprintln "have \(length matrices) matrices of size \(mat.size (head matrices)), isRowMajor? = \(mat.isRowMajor? (head matrices))"; + + levels = concatMap do m: + map do c: vec.sum c done (mat.asColumns m); + done matrices; + + nztail = find (> 0.1) levels; + nzonly = reverse (find (> 0.1) (reverse nztail)); + + eprintln "non-zero columns start at \(length levels - length nztail), go on for \(length nzonly) [of \(length levels)]"; + + nzstart = (length levels - length nztail) * cq.kernel.atomSpacing; + nzduration = (length nzonly) * cq.kernel.atomSpacing; + + // Get a stream of columns at 25 per second. + // + // The original picks samples at a rate of 100-per-second then + // median filters to reduce noise then picks samples again at + // 25-per-second. We don't do that (yet) + + samplesPerCol = stream.sampleRate / 25; + var sample = samplesPerCol - nzstart; + + columns = take (nzduration / samplesPerCol) + (concatMap do m: + concatMap do col: + sample := sample + cq.kernel.atomSpacing; + if sample >= samplesPerCol then + sample := sample - samplesPerCol; + [col] + else + [] + fi; + done (mat.asColumns m); + done matrices); + + eprintln "have \(length columns) columns of \(vec.length (head columns)) values each"; + + // drop the lowest 55 of the 600 bins + columns = map do c: + vec.slice c 55 (vec.length c); + done columns; + + eprintln "now have \(length columns) columns of \(vec.length (head columns))"; + +// plot.plot [ Grid (mat.fromColumns columns) ]; + + columns); + + +{ + prepareTimeFrequency +} +