c@116
|
1 /*
|
c@116
|
2 Constant-Q library
|
c@116
|
3 Copyright (c) 2013-2014 Queen Mary, University of London
|
c@116
|
4
|
c@116
|
5 Permission is hereby granted, free of charge, to any person
|
c@116
|
6 obtaining a copy of this software and associated documentation
|
c@116
|
7 files (the "Software"), to deal in the Software without
|
c@116
|
8 restriction, including without limitation the rights to use, copy,
|
c@116
|
9 modify, merge, publish, distribute, sublicense, and/or sell copies
|
c@116
|
10 of the Software, and to permit persons to whom the Software is
|
c@116
|
11 furnished to do so, subject to the following conditions:
|
c@116
|
12
|
c@116
|
13 The above copyright notice and this permission notice shall be
|
c@116
|
14 included in all copies or substantial portions of the Software.
|
c@116
|
15
|
c@116
|
16 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
c@116
|
17 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
c@116
|
18 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
c@116
|
19 NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
|
c@116
|
20 CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
|
c@116
|
21 CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
c@116
|
22 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
c@116
|
23
|
c@116
|
24 Except as contained in this notice, the names of the Centre for
|
c@116
|
25 Digital Music; Queen Mary, University of London; and Chris Cannam
|
c@116
|
26 shall not be used in advertising or otherwise to promote the sale,
|
c@116
|
27 use or other dealings in this Software without prior written
|
c@116
|
28 authorization.
|
c@116
|
29 */
|
c@116
|
30
|
c@116
|
31 module cqt;
|
c@116
|
32
|
c@116
|
33 cqtkernel = load cqtkernel;
|
c@116
|
34 resample = load may.stream.resample;
|
c@116
|
35 manipulate = load may.stream.manipulate;
|
c@116
|
36 mat = load may.matrix;
|
c@116
|
37 cm = load may.matrix.complex;
|
c@116
|
38 framer = load may.stream.framer;
|
c@116
|
39 cplx = load may.complex;
|
c@116
|
40 fft = load may.transform.fft;
|
c@116
|
41 vec = load may.vector;
|
c@116
|
42 ch = load may.stream.channels;
|
c@116
|
43
|
c@116
|
44 { pow, round, floor, ceil, log2, nextPowerOfTwo } = load may.mathmisc;
|
c@116
|
45
|
c@116
|
46 cqt { maxFreq, minFreq, binsPerOctave } str =
|
c@116
|
47 (sampleRate = str.sampleRate;
|
c@116
|
48 octaves = ceil (log2 (maxFreq / minFreq));
|
c@116
|
49 // actualMinFreq = (maxFreq / (pow 2 octaves)) * (pow 2 (1/binsPerOctave));
|
c@116
|
50
|
c@116
|
51 kdata = cqtkernel.makeKernel { sampleRate, maxFreq, binsPerOctave };
|
c@116
|
52
|
c@116
|
53 // eprintln "sampleRate = \(sampleRate), maxFreq = \(maxFreq), minFreq = \(minFreq), actualMinFreq = \(actualMinFreq), octaves = \(octaves), binsPerOctave = \(binsPerOctave), fftSize = \(kdata.fftSize), hop = \(kdata.fftHop)";
|
c@116
|
54
|
c@116
|
55 // eprintln "atomsPerFrame = \(kdata.atomsPerFrame)";
|
c@116
|
56
|
c@116
|
57 padding = (kdata.fftSize * (pow 2 (octaves-1)));
|
c@116
|
58
|
c@116
|
59 // eprintln "padding = \(padding)";
|
c@116
|
60
|
c@116
|
61 str = manipulate.paddedBy padding str;
|
c@116
|
62
|
c@116
|
63 streams = manipulate.duplicated octaves str;
|
c@116
|
64
|
c@116
|
65 // forward transform uses the conjugate-transposed kernel, inverse
|
c@116
|
66 // uses the original
|
c@116
|
67 kernel = cm.transposed (cm.conjugateTransposed kdata.kernel);
|
c@116
|
68
|
c@116
|
69 // eprintln "have kernel";
|
c@116
|
70
|
c@116
|
71 fftFunc = fft.forward kdata.fftSize;
|
c@116
|
72
|
c@116
|
73 cqblocks =
|
c@116
|
74 map do octave:
|
c@116
|
75 frames = map ch.mixedDown //!!! mono for now
|
c@116
|
76 (framer.frames kdata.fftSize [ Hop kdata.fftHop, Padded false ]
|
c@116
|
77 (resample.decimated (pow 2 octave) streams[octave]));
|
c@116
|
78 map do frame:
|
c@116
|
79 freq = fftFunc (cplx.complexArray frame (vec.zeros kdata.fftSize));
|
c@116
|
80 // eprintln "octave = \(octave), frame = \(vec.list frame)";
|
c@116
|
81 // eprintln "octave = \(octave), freq = \(freq)";
|
c@116
|
82 cm.product kernel (cm.newComplexColumnVector freq);
|
c@116
|
83 done frames;
|
c@116
|
84 done [0..octaves-1];
|
c@116
|
85
|
c@116
|
86 // The cqblocks list is a list<list<matrix>>. Each top-level list
|
c@116
|
87 // corresponds to an octave, from highest to lowest, each having
|
c@116
|
88 // twice as many elements in its list as the next octave. The
|
c@116
|
89 // sub-lists are sampled in time with an effective spacing of
|
c@116
|
90 // fftSize * 2^(octave-1) audio frames, and the matrices are row
|
c@116
|
91 // vectors with atomsPerFrame * binsPerOctave complex elements.
|
c@116
|
92 //
|
c@116
|
93 // ***
|
c@116
|
94 //
|
c@116
|
95 // In a typical constant-Q structure, each (2^(octaves-1) *
|
c@116
|
96 // fftHop) input frames gives us an output structure conceptually
|
c@116
|
97 // like this:
|
c@116
|
98 //
|
c@116
|
99 // [][][][][][][][] <- fftHop frames per highest-octave output value
|
c@116
|
100 // [][][][][][][][] layered as many times as binsPerOctave (here 2)
|
c@116
|
101 // [--][--][--][--] <- fftHop*2 frames for the next lower octave
|
c@116
|
102 // [--][--][--][--] etc
|
c@116
|
103 // [------][------]
|
c@116
|
104 // [------][------]
|
c@116
|
105 // [--------------]
|
c@116
|
106 // [--------------]
|
c@116
|
107 //
|
c@116
|
108 // ***
|
c@116
|
109 //
|
c@116
|
110 // But the kernel we're using here has more than one temporally
|
c@116
|
111 // spaced atom; each individual cell is a row vector with
|
c@116
|
112 // atomsPerFrame * binsPerOctave elements, but that actually
|
c@116
|
113 // represents a rectangular matrix of result cells with width
|
c@116
|
114 // atomsPerFrame and height binsPerOctave. The columns of this
|
c@116
|
115 // matrix (the atoms) then need to be spaced by 2^(octave-1)
|
c@116
|
116 // relative to those from the highest octave.
|
c@116
|
117
|
c@116
|
118 // Reshape each row vector into the appropriate rectangular matrix
|
c@116
|
119 // and split into single-atom columns
|
c@116
|
120
|
c@116
|
121 emptyHops = kdata.firstCentre / kdata.atomSpacing; //!!! int? round?
|
c@116
|
122 // maxDrop = emptyHops * (pow 2 (octaves-1)) - emptyHops;
|
c@116
|
123 // eprintln "maxDrop = \(maxDrop)";
|
c@116
|
124
|
c@116
|
125 cqblocks =
|
c@116
|
126 map do octlist:
|
c@116
|
127 concatMap do rv:
|
c@116
|
128 cm.asColumns
|
c@116
|
129 (cm.generate do row col:
|
c@116
|
130 cm.at rv ((row * kdata.atomsPerFrame) + col) 0
|
c@116
|
131 done {
|
c@116
|
132 rows = kdata.binsPerOctave,
|
c@116
|
133 columns = kdata.atomsPerFrame
|
c@116
|
134 })
|
c@116
|
135 done octlist
|
c@116
|
136 done cqblocks;
|
c@116
|
137
|
c@116
|
138 cqblocks = array (map2 do octlist octave:
|
c@116
|
139 d = emptyHops * (pow 2 (octaves-octave)) - emptyHops;
|
c@116
|
140 // eprintln "dropping \(d)";
|
c@116
|
141 drop d octlist;
|
c@116
|
142 done cqblocks [1..octaves]);
|
c@116
|
143
|
c@116
|
144 assembleBlock bits =
|
c@116
|
145 (//eprintln "assembleBlock: structure of bits is:";
|
c@116
|
146 //eprintln (map length bits);
|
c@116
|
147
|
c@116
|
148 rows = octaves * kdata.binsPerOctave;
|
c@116
|
149 columns = (pow 2 (octaves - 1)) * kdata.atomsPerFrame;
|
c@116
|
150
|
c@116
|
151 cm.generate do row col:
|
c@116
|
152
|
c@116
|
153 // bits structure: [1,2,4,8,...]
|
c@116
|
154
|
c@116
|
155 // each elt of bits is a list of the chunks that should
|
c@116
|
156 // make up this block in that octave (lowest octave first)
|
c@116
|
157
|
c@116
|
158 // each chunk has atomsPerFrame * binsPerOctave elts in it
|
c@116
|
159
|
c@116
|
160 // row is disposed with 0 at the top, highest octave (in
|
c@116
|
161 // both pitch and index into bits structure)
|
c@116
|
162
|
c@116
|
163 oct = int (row / binsPerOctave);
|
c@116
|
164 binNo = row % kdata.binsPerOctave;
|
c@116
|
165
|
c@116
|
166 chunks = pow 2 oct;
|
c@116
|
167 colsPerAtom = int (columns / (chunks * kdata.atomsPerFrame));
|
c@116
|
168 atomNo = int (col / colsPerAtom);
|
c@116
|
169 atomOffset = col % colsPerAtom;
|
c@116
|
170
|
c@116
|
171 if atomOffset == 0 and atomNo < length bits[oct] then
|
c@116
|
172 bits[oct][atomNo][binNo];
|
c@116
|
173 else
|
c@116
|
174 cplx.zero
|
c@116
|
175 fi;
|
c@116
|
176
|
c@116
|
177 done { rows, columns };
|
c@116
|
178 );
|
c@116
|
179
|
c@116
|
180 assembleBlockSpectrogram bits =
|
c@116
|
181 (// As assembleBlock, but producing a dense magnitude
|
c@116
|
182 // spectrogram (rather than a complex output). (todo:
|
c@116
|
183 // interpolation, smoothing)
|
c@116
|
184
|
c@116
|
185 //eprintln "assembleBlockSpectrogram: structure of bits is:";
|
c@116
|
186 //eprintln (map length bits);
|
c@116
|
187
|
c@116
|
188 rows = octaves * kdata.binsPerOctave;
|
c@116
|
189 columns = (pow 2 (octaves - 1)) * kdata.atomsPerFrame;
|
c@116
|
190
|
c@116
|
191 mat.generate do row col:
|
c@116
|
192
|
c@116
|
193 oct = int (row / binsPerOctave);
|
c@116
|
194 binNo = row % kdata.binsPerOctave;
|
c@116
|
195
|
c@116
|
196 chunks = pow 2 oct;
|
c@116
|
197 colsPerAtom = int (columns / (chunks * kdata.atomsPerFrame));
|
c@116
|
198 atomNo = int (col / colsPerAtom);
|
c@116
|
199
|
c@116
|
200 if atomNo < length bits[oct] then
|
c@116
|
201 cplx.magnitude bits[oct][atomNo][binNo];
|
c@116
|
202 else
|
c@116
|
203 0
|
c@116
|
204 fi;
|
c@116
|
205
|
c@116
|
206 done { rows, columns };
|
c@116
|
207 );
|
c@116
|
208
|
c@116
|
209 processOctaveLists assembler octs =
|
c@116
|
210 case octs[0] of
|
c@116
|
211 block::rest:
|
c@116
|
212 (toAssemble = array
|
c@116
|
213 (map do oct:
|
c@116
|
214 n = kdata.atomsPerFrame * pow 2 oct;
|
c@116
|
215 if not empty? octs[oct] then
|
c@116
|
216 forBlock = array (take n octs[oct]);
|
c@116
|
217 octs[oct] := drop n octs[oct];
|
c@116
|
218 forBlock
|
c@116
|
219 else
|
c@116
|
220 array []
|
c@116
|
221 fi
|
c@116
|
222 done (keys octs));
|
c@116
|
223 assembler toAssemble :. \(processOctaveLists assembler octs));
|
c@116
|
224 _: []
|
c@116
|
225 esac;
|
c@116
|
226
|
c@116
|
227 //eprintln "cqblocks has \(length cqblocks) entries";
|
c@116
|
228
|
c@116
|
229 octaveLists = [:];
|
c@116
|
230
|
c@116
|
231 cqblocks = array cqblocks;
|
c@116
|
232 for [1..octaves] do oct:
|
c@116
|
233 octaveLists[octaves - oct] := cqblocks[oct-1];
|
c@116
|
234 done;
|
c@116
|
235 /*
|
c@116
|
236 \() (map2 do octlist octave:
|
c@116
|
237 println "oct \(octaves) - \(octave) = \(octaves - octave)";
|
c@116
|
238 octaveLists[octaves - octave] := octlist
|
c@116
|
239 done cqblocks [1..octaves]);
|
c@116
|
240 */
|
c@116
|
241 //eprintln "octaveLists keys are: \(keys octaveLists)";
|
c@116
|
242
|
c@116
|
243 {
|
c@116
|
244 kernel = kdata with {
|
c@116
|
245 binFrequencies = array
|
c@116
|
246 (concatMap do octave:
|
c@116
|
247 map do freq:
|
c@116
|
248 freq / (pow 2 octave);
|
c@116
|
249 done (reverse (list kdata.binFrequencies))
|
c@116
|
250 done [0..octaves-1])
|
c@116
|
251 },
|
c@116
|
252 sampleRate,
|
c@116
|
253 octaves,
|
c@116
|
254 get cqComplex () = processOctaveLists assembleBlock octaveLists,
|
c@116
|
255 get cqSpectrogram () = processOctaveLists assembleBlockSpectrogram octaveLists,
|
c@116
|
256 }
|
c@116
|
257 );
|
c@116
|
258
|
c@116
|
259 { cqt }
|
c@116
|
260
|