c@69
|
1 /*
|
c@69
|
2 Constant-Q library
|
c@69
|
3 Copyright (c) 2013-2014 Queen Mary, University of London
|
c@69
|
4
|
c@69
|
5 Permission is hereby granted, free of charge, to any person
|
c@69
|
6 obtaining a copy of this software and associated documentation
|
c@69
|
7 files (the "Software"), to deal in the Software without
|
c@69
|
8 restriction, including without limitation the rights to use, copy,
|
c@69
|
9 modify, merge, publish, distribute, sublicense, and/or sell copies
|
c@69
|
10 of the Software, and to permit persons to whom the Software is
|
c@69
|
11 furnished to do so, subject to the following conditions:
|
c@69
|
12
|
c@69
|
13 The above copyright notice and this permission notice shall be
|
c@69
|
14 included in all copies or substantial portions of the Software.
|
c@69
|
15
|
c@69
|
16 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
c@69
|
17 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
c@69
|
18 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
c@69
|
19 NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
|
c@69
|
20 CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
|
c@69
|
21 CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
c@69
|
22 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
c@69
|
23
|
c@69
|
24 Except as contained in this notice, the names of the Centre for
|
c@69
|
25 Digital Music; Queen Mary, University of London; and Chris Cannam
|
c@69
|
26 shall not be used in advertising or otherwise to promote the sale,
|
c@69
|
27 use or other dealings in this Software without prior written
|
c@69
|
28 authorization.
|
c@69
|
29 */
|
c@10
|
30
|
c@37
|
31 module cqt;
|
c@10
|
32
|
c@10
|
33 cqtkernel = load cqtkernel;
|
c@10
|
34 resample = load may.stream.resample;
|
c@10
|
35 manipulate = load may.stream.manipulate;
|
c@72
|
36 mat = load may.matrix;
|
c@10
|
37 cm = load may.matrix.complex;
|
c@10
|
38 framer = load may.stream.framer;
|
c@10
|
39 cplx = load may.complex;
|
c@10
|
40 fft = load may.transform.fft;
|
c@10
|
41 vec = load may.vector;
|
c@42
|
42 ch = load may.stream.channels;
|
c@10
|
43
|
c@10
|
44 { pow, round, floor, ceil, log2, nextPowerOfTwo } = load may.mathmisc;
|
c@10
|
45
|
c@37
|
46 cqt { maxFreq, minFreq, binsPerOctave } str =
|
c@10
|
47 (sampleRate = str.sampleRate;
|
c@10
|
48 octaves = ceil (log2 (maxFreq / minFreq));
|
c@65
|
49 // actualMinFreq = (maxFreq / (pow 2 octaves)) * (pow 2 (1/binsPerOctave));
|
c@10
|
50
|
c@41
|
51 kdata = cqtkernel.makeKernel { sampleRate, maxFreq, binsPerOctave };
|
c@10
|
52
|
c@63
|
53 // eprintln "sampleRate = \(sampleRate), maxFreq = \(maxFreq), minFreq = \(minFreq), actualMinFreq = \(actualMinFreq), octaves = \(octaves), binsPerOctave = \(binsPerOctave), fftSize = \(kdata.fftSize), hop = \(kdata.fftHop)";
|
c@10
|
54
|
c@63
|
55 // eprintln "atomsPerFrame = \(kdata.atomsPerFrame)";
|
c@11
|
56
|
c@41
|
57 padding = (kdata.fftSize * (pow 2 (octaves-1)));
|
c@40
|
58
|
c@63
|
59 // eprintln "padding = \(padding)";
|
c@40
|
60
|
c@40
|
61 str = manipulate.paddedBy padding str;
|
c@40
|
62
|
c@10
|
63 streams = manipulate.duplicated octaves str;
|
c@10
|
64
|
c@10
|
65 //!!! can't be right!
|
c@10
|
66 kernel = cm.transposed (cm.conjugateTransposed kdata.kernel);
|
c@10
|
67
|
c@63
|
68 // eprintln "have kernel";
|
c@10
|
69
|
c@10
|
70 fftFunc = fft.forward kdata.fftSize;
|
c@10
|
71
|
c@10
|
72 cqblocks =
|
c@10
|
73 map do octave:
|
c@42
|
74 frames = map ch.mixedDown //!!! mono for now
|
c@42
|
75 (framer.frames kdata.fftSize [ Hop kdata.fftHop, Padded false ]
|
c@42
|
76 (resample.decimated (pow 2 octave) streams[octave]));
|
c@10
|
77 map do frame:
|
c@10
|
78 freq = fftFunc (cplx.complexArray frame (vec.zeros kdata.fftSize));
|
c@43
|
79 // eprintln "octave = \(octave), frame = \(vec.list frame)";
|
c@43
|
80 // eprintln "octave = \(octave), freq = \(freq)";
|
c@10
|
81 cm.product kernel (cm.newComplexColumnVector freq);
|
c@10
|
82 done frames;
|
c@10
|
83 done [0..octaves-1];
|
c@10
|
84
|
c@13
|
85 // The cqblocks list is a list<list<matrix>>. Each top-level list
|
c@11
|
86 // corresponds to an octave, from highest to lowest, each having
|
c@11
|
87 // twice as many elements in its list as the next octave. The
|
c@11
|
88 // sub-lists are sampled in time with an effective spacing of
|
c@11
|
89 // fftSize * 2^(octave-1) audio frames, and the matrices are row
|
c@11
|
90 // vectors with atomsPerFrame * binsPerOctave complex elements.
|
c@13
|
91 //
|
c@13
|
92 // ***
|
c@13
|
93 //
|
c@13
|
94 // In a typical constant-Q structure, each (2^(octaves-1) *
|
c@13
|
95 // fftHop) input frames gives us an output structure conceptually
|
c@13
|
96 // like this:
|
c@10
|
97 //
|
c@10
|
98 // [][][][][][][][] <- fftHop frames per highest-octave output value
|
c@10
|
99 // [][][][][][][][] layered as many times as binsPerOctave (here 2)
|
c@10
|
100 // [--][--][--][--] <- fftHop*2 frames for the next lower octave
|
c@10
|
101 // [--][--][--][--] etc
|
c@10
|
102 // [------][------]
|
c@10
|
103 // [------][------]
|
c@10
|
104 // [--------------]
|
c@10
|
105 // [--------------]
|
c@10
|
106 //
|
c@13
|
107 // ***
|
c@13
|
108 //
|
c@13
|
109 // But the kernel we're using here has more than one temporally
|
c@13
|
110 // spaced atom; each individual cell is a row vector with
|
c@13
|
111 // atomsPerFrame * binsPerOctave elements, but that actually
|
c@13
|
112 // represents a rectangular matrix of result cells with width
|
c@13
|
113 // atomsPerFrame and height binsPerOctave. The columns of this
|
c@13
|
114 // matrix (the atoms) then need to be spaced by 2^(octave-1)
|
c@13
|
115 // relative to those from the highest octave.
|
c@10
|
116
|
c@15
|
117 // Reshape each row vector into the appropriate rectangular matrix
|
c@21
|
118 // and split into single-atom columns
|
c@19
|
119
|
c@44
|
120 emptyHops = kdata.firstCentre / kdata.atomSpacing; //!!! int? round?
|
c@65
|
121 // maxDrop = emptyHops * (pow 2 (octaves-1)) - emptyHops;
|
c@63
|
122 // eprintln "maxDrop = \(maxDrop)";
|
c@21
|
123
|
c@47
|
124 cqblocks =
|
c@47
|
125 map do octlist:
|
c@47
|
126 concatMap do rv:
|
c@21
|
127 cm.asColumns
|
c@21
|
128 (cm.generate do row col:
|
c@21
|
129 cm.at rv ((row * kdata.atomsPerFrame) + col) 0
|
c@21
|
130 done {
|
c@21
|
131 rows = kdata.binsPerOctave,
|
c@21
|
132 columns = kdata.atomsPerFrame
|
c@21
|
133 })
|
c@47
|
134 done octlist
|
c@47
|
135 done cqblocks;
|
c@21
|
136
|
c@21
|
137 cqblocks = array (map2 do octlist octave:
|
c@21
|
138 d = emptyHops * (pow 2 (octaves-octave)) - emptyHops;
|
c@63
|
139 // eprintln "dropping \(d)";
|
c@21
|
140 drop d octlist;
|
c@21
|
141 done cqblocks [1..octaves]);
|
c@14
|
142
|
c@17
|
143 assembleBlock bits =
|
c@59
|
144 (//eprintln "assembleBlock: structure of bits is:";
|
c@59
|
145 //eprintln (map length bits);
|
c@19
|
146
|
c@19
|
147 rows = octaves * kdata.binsPerOctave;
|
c@19
|
148 columns = (pow 2 (octaves - 1)) * kdata.atomsPerFrame;
|
c@19
|
149
|
c@18
|
150 cm.generate do row col:
|
c@19
|
151
|
c@19
|
152 // bits structure: [1,2,4,8,...]
|
c@19
|
153
|
c@19
|
154 // each elt of bits is a list of the chunks that should
|
c@19
|
155 // make up this block in that octave (lowest octave first)
|
c@19
|
156
|
c@19
|
157 // each chunk has atomsPerFrame * binsPerOctave elts in it
|
c@19
|
158
|
c@19
|
159 // row is disposed with 0 at the top, highest octave (in
|
c@19
|
160 // both pitch and index into bits structure)
|
c@19
|
161
|
c@18
|
162 oct = int (row / binsPerOctave);
|
c@19
|
163 binNo = row % kdata.binsPerOctave;
|
c@21
|
164
|
c@19
|
165 chunks = pow 2 oct;
|
c@21
|
166 colsPerAtom = int (columns / (chunks * kdata.atomsPerFrame));
|
c@21
|
167 atomNo = int (col / colsPerAtom);
|
c@21
|
168 atomOffset = col % colsPerAtom;
|
c@18
|
169
|
c@40
|
170 if atomOffset == 0 and atomNo < length bits[oct] then
|
c@21
|
171 bits[oct][atomNo][binNo];
|
c@20
|
172 else
|
c@20
|
173 cplx.zero
|
c@20
|
174 fi;
|
c@19
|
175
|
c@19
|
176 done { rows, columns };
|
c@19
|
177 );
|
c@15
|
178
|
c@72
|
179 assembleBlockSpectrogram bits =
|
c@72
|
180 (// As assembleBlock, but producing a dense magnitude
|
c@72
|
181 // spectrogram (rather than a complex output with zeros
|
c@72
|
182 // between the cell values in lower octaves). (todo: smoothing)
|
c@72
|
183
|
c@72
|
184 //eprintln "assembleBlockSpectrogram: structure of bits is:";
|
c@72
|
185 //eprintln (map length bits);
|
c@72
|
186
|
c@72
|
187 rows = octaves * kdata.binsPerOctave;
|
c@72
|
188 columns = (pow 2 (octaves - 1)) * kdata.atomsPerFrame;
|
c@72
|
189
|
c@72
|
190 mat.generate do row col:
|
c@72
|
191
|
c@72
|
192 oct = int (row / binsPerOctave);
|
c@72
|
193 binNo = row % kdata.binsPerOctave;
|
c@72
|
194
|
c@72
|
195 chunks = pow 2 oct;
|
c@72
|
196 colsPerAtom = int (columns / (chunks * kdata.atomsPerFrame));
|
c@72
|
197 atomNo = int (col / colsPerAtom);
|
c@72
|
198
|
c@72
|
199 if atomNo < length bits[oct] then
|
c@72
|
200 cplx.magnitude bits[oct][atomNo][binNo];
|
c@72
|
201 else
|
c@72
|
202 0
|
c@72
|
203 fi;
|
c@72
|
204
|
c@72
|
205 done { rows, columns };
|
c@72
|
206 );
|
c@72
|
207
|
c@72
|
208 processOctaveLists assembler octs =
|
c@17
|
209 case octs[0] of
|
c@17
|
210 block::rest:
|
c@19
|
211 (toAssemble = array
|
c@19
|
212 (map do oct:
|
c@21
|
213 n = kdata.atomsPerFrame * pow 2 oct;
|
c@17
|
214 if not empty? octs[oct] then
|
c@19
|
215 forBlock = array (take n octs[oct]);
|
c@17
|
216 octs[oct] := drop n octs[oct];
|
c@17
|
217 forBlock
|
c@17
|
218 else
|
c@19
|
219 array []
|
c@17
|
220 fi
|
c@19
|
221 done (keys octs));
|
c@72
|
222 assembler toAssemble :. \(processOctaveLists assembler octs));
|
c@17
|
223 _: []
|
c@15
|
224 esac;
|
c@15
|
225
|
c@63
|
226 //eprintln "cqblocks has \(length cqblocks) entries";
|
c@15
|
227
|
c@17
|
228 octaveLists = [:];
|
c@19
|
229
|
c@19
|
230 cqblocks = array cqblocks;
|
c@17
|
231 for [1..octaves] do oct:
|
c@17
|
232 octaveLists[octaves - oct] := cqblocks[oct-1];
|
c@17
|
233 done;
|
c@17
|
234 /*
|
c@17
|
235 \() (map2 do octlist octave:
|
c@17
|
236 println "oct \(octaves) - \(octave) = \(octaves - octave)";
|
c@17
|
237 octaveLists[octaves - octave] := octlist
|
c@17
|
238 done cqblocks [1..octaves]);
|
c@17
|
239 */
|
c@63
|
240 //eprintln "octaveLists keys are: \(keys octaveLists)";
|
c@15
|
241
|
c@40
|
242 {
|
c@40
|
243 kernel = kdata with {
|
c@47
|
244 binFrequencies = array
|
c@47
|
245 (concatMap do octave:
|
c@40
|
246 map do freq:
|
c@40
|
247 freq / (pow 2 octave);
|
c@40
|
248 done (reverse (list kdata.binFrequencies))
|
c@47
|
249 done [0..octaves-1])
|
c@40
|
250 },
|
c@72
|
251 octaves,
|
c@72
|
252 output type =
|
c@72
|
253 case type of
|
c@72
|
254 ComplexCQ ():
|
c@72
|
255 Complex (processOctaveLists assembleBlock octaveLists);
|
c@72
|
256 Spectrogram ():
|
c@72
|
257 Real (processOctaveLists assembleBlockSpectrogram octaveLists);
|
c@72
|
258 esac
|
c@40
|
259 }
|
c@10
|
260 );
|
c@10
|
261
|
c@37
|
262 { cqt }
|
c@10
|
263
|