Chris@366: /* Chris@366: Constant-Q library Chris@366: Copyright (c) 2013-2014 Queen Mary, University of London Chris@366: Chris@366: Permission is hereby granted, free of charge, to any person Chris@366: obtaining a copy of this software and associated documentation Chris@366: files (the "Software"), to deal in the Software without Chris@366: restriction, including without limitation the rights to use, copy, Chris@366: modify, merge, publish, distribute, sublicense, and/or sell copies Chris@366: of the Software, and to permit persons to whom the Software is Chris@366: furnished to do so, subject to the following conditions: Chris@366: Chris@366: The above copyright notice and this permission notice shall be Chris@366: included in all copies or substantial portions of the Software. Chris@366: Chris@366: THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, Chris@366: EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF Chris@366: MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND Chris@366: NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY Chris@366: CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF Chris@366: CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION Chris@366: WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. Chris@366: Chris@366: Except as contained in this notice, the names of the Centre for Chris@366: Digital Music; Queen Mary, University of London; and Chris Cannam Chris@366: shall not be used in advertising or otherwise to promote the sale, Chris@366: use or other dealings in this Software without prior written Chris@366: authorization. Chris@366: */ Chris@366: Chris@366: module cqt; Chris@366: Chris@366: cqtkernel = load cqtkernel; Chris@366: resample = load may.stream.resample; Chris@366: manipulate = load may.stream.manipulate; Chris@366: mat = load may.matrix; Chris@366: cm = load may.matrix.complex; Chris@366: framer = load may.stream.framer; Chris@366: cplx = load may.complex; Chris@366: fft = load may.transform.fft; Chris@366: vec = load may.vector; Chris@366: ch = load may.stream.channels; Chris@366: Chris@366: { pow, round, floor, ceil, log2, nextPowerOfTwo } = load may.mathmisc; Chris@366: Chris@366: cqt { maxFreq, minFreq, binsPerOctave } str = Chris@366: (sampleRate = str.sampleRate; Chris@366: octaves = ceil (log2 (maxFreq / minFreq)); Chris@366: // actualMinFreq = (maxFreq / (pow 2 octaves)) * (pow 2 (1/binsPerOctave)); Chris@366: Chris@366: kdata = cqtkernel.makeKernel { sampleRate, maxFreq, binsPerOctave }; Chris@366: Chris@366: // eprintln "sampleRate = \(sampleRate), maxFreq = \(maxFreq), minFreq = \(minFreq), actualMinFreq = \(actualMinFreq), octaves = \(octaves), binsPerOctave = \(binsPerOctave), fftSize = \(kdata.fftSize), hop = \(kdata.fftHop)"; Chris@366: Chris@366: // eprintln "atomsPerFrame = \(kdata.atomsPerFrame)"; Chris@366: Chris@366: padding = (kdata.fftSize * (pow 2 (octaves-1))); Chris@366: Chris@366: // eprintln "padding = \(padding)"; Chris@366: Chris@366: str = manipulate.paddedBy padding str; Chris@366: Chris@366: streams = manipulate.duplicated octaves str; Chris@366: Chris@366: // forward transform uses the conjugate-transposed kernel, inverse Chris@366: // uses the original Chris@366: kernel = cm.transposed (cm.conjugateTransposed kdata.kernel); Chris@366: Chris@366: // eprintln "have kernel"; Chris@366: Chris@366: fftFunc = fft.forward kdata.fftSize; Chris@366: Chris@366: cqblocks = Chris@366: map do octave: Chris@366: frames = map ch.mixedDown //!!! mono for now Chris@366: (framer.frames kdata.fftSize [ Hop kdata.fftHop, Padded false ] Chris@366: (resample.decimated (pow 2 octave) streams[octave])); Chris@366: map do frame: Chris@366: freq = fftFunc (cplx.complexArray frame (vec.zeros kdata.fftSize)); Chris@366: // eprintln "octave = \(octave), frame = \(vec.list frame)"; Chris@366: // eprintln "octave = \(octave), freq = \(freq)"; Chris@366: cm.product kernel (cm.newComplexColumnVector freq); Chris@366: done frames; Chris@366: done [0..octaves-1]; Chris@366: Chris@366: // The cqblocks list is a list>. Each top-level list Chris@366: // corresponds to an octave, from highest to lowest, each having Chris@366: // twice as many elements in its list as the next octave. The Chris@366: // sub-lists are sampled in time with an effective spacing of Chris@366: // fftSize * 2^(octave-1) audio frames, and the matrices are row Chris@366: // vectors with atomsPerFrame * binsPerOctave complex elements. Chris@366: // Chris@366: // *** Chris@366: // Chris@366: // In a typical constant-Q structure, each (2^(octaves-1) * Chris@366: // fftHop) input frames gives us an output structure conceptually Chris@366: // like this: Chris@366: // Chris@366: // [][][][][][][][] <- fftHop frames per highest-octave output value Chris@366: // [][][][][][][][] layered as many times as binsPerOctave (here 2) Chris@366: // [--][--][--][--] <- fftHop*2 frames for the next lower octave Chris@366: // [--][--][--][--] etc Chris@366: // [------][------] Chris@366: // [------][------] Chris@366: // [--------------] Chris@366: // [--------------] Chris@366: // Chris@366: // *** Chris@366: // Chris@366: // But the kernel we're using here has more than one temporally Chris@366: // spaced atom; each individual cell is a row vector with Chris@366: // atomsPerFrame * binsPerOctave elements, but that actually Chris@366: // represents a rectangular matrix of result cells with width Chris@366: // atomsPerFrame and height binsPerOctave. The columns of this Chris@366: // matrix (the atoms) then need to be spaced by 2^(octave-1) Chris@366: // relative to those from the highest octave. Chris@366: Chris@366: // Reshape each row vector into the appropriate rectangular matrix Chris@366: // and split into single-atom columns Chris@366: Chris@366: emptyHops = kdata.firstCentre / kdata.atomSpacing; //!!! int? round? Chris@366: // maxDrop = emptyHops * (pow 2 (octaves-1)) - emptyHops; Chris@366: // eprintln "maxDrop = \(maxDrop)"; Chris@366: Chris@366: cqblocks = Chris@366: map do octlist: Chris@366: concatMap do rv: Chris@366: cm.asColumns Chris@366: (cm.generate do row col: Chris@366: cm.at rv ((row * kdata.atomsPerFrame) + col) 0 Chris@366: done { Chris@366: rows = kdata.binsPerOctave, Chris@366: columns = kdata.atomsPerFrame Chris@366: }) Chris@366: done octlist Chris@366: done cqblocks; Chris@366: Chris@366: cqblocks = array (map2 do octlist octave: Chris@366: d = emptyHops * (pow 2 (octaves-octave)) - emptyHops; Chris@366: // eprintln "dropping \(d)"; Chris@366: drop d octlist; Chris@366: done cqblocks [1..octaves]); Chris@366: Chris@366: assembleBlock bits = Chris@366: (//eprintln "assembleBlock: structure of bits is:"; Chris@366: //eprintln (map length bits); Chris@366: Chris@366: rows = octaves * kdata.binsPerOctave; Chris@366: columns = (pow 2 (octaves - 1)) * kdata.atomsPerFrame; Chris@366: Chris@366: cm.generate do row col: Chris@366: Chris@366: // bits structure: [1,2,4,8,...] Chris@366: Chris@366: // each elt of bits is a list of the chunks that should Chris@366: // make up this block in that octave (lowest octave first) Chris@366: Chris@366: // each chunk has atomsPerFrame * binsPerOctave elts in it Chris@366: Chris@366: // row is disposed with 0 at the top, highest octave (in Chris@366: // both pitch and index into bits structure) Chris@366: Chris@366: oct = int (row / binsPerOctave); Chris@366: binNo = row % kdata.binsPerOctave; Chris@366: Chris@366: chunks = pow 2 oct; Chris@366: colsPerAtom = int (columns / (chunks * kdata.atomsPerFrame)); Chris@366: atomNo = int (col / colsPerAtom); Chris@366: atomOffset = col % colsPerAtom; Chris@366: Chris@366: if atomOffset == 0 and atomNo < length bits[oct] then Chris@366: bits[oct][atomNo][binNo]; Chris@366: else Chris@366: cplx.zero Chris@366: fi; Chris@366: Chris@366: done { rows, columns }; Chris@366: ); Chris@366: Chris@366: assembleBlockSpectrogram bits = Chris@366: (// As assembleBlock, but producing a dense magnitude Chris@366: // spectrogram (rather than a complex output). (todo: Chris@366: // interpolation, smoothing) Chris@366: Chris@366: //eprintln "assembleBlockSpectrogram: structure of bits is:"; Chris@366: //eprintln (map length bits); Chris@366: Chris@366: rows = octaves * kdata.binsPerOctave; Chris@366: columns = (pow 2 (octaves - 1)) * kdata.atomsPerFrame; Chris@366: Chris@366: mat.generate do row col: Chris@366: Chris@366: oct = int (row / binsPerOctave); Chris@366: binNo = row % kdata.binsPerOctave; Chris@366: Chris@366: chunks = pow 2 oct; Chris@366: colsPerAtom = int (columns / (chunks * kdata.atomsPerFrame)); Chris@366: atomNo = int (col / colsPerAtom); Chris@366: Chris@366: if atomNo < length bits[oct] then Chris@366: cplx.magnitude bits[oct][atomNo][binNo]; Chris@366: else Chris@366: 0 Chris@366: fi; Chris@366: Chris@366: done { rows, columns }; Chris@366: ); Chris@366: Chris@366: processOctaveLists assembler octs = Chris@366: case octs[0] of Chris@366: block::rest: Chris@366: (toAssemble = array Chris@366: (map do oct: Chris@366: n = kdata.atomsPerFrame * pow 2 oct; Chris@366: if not empty? octs[oct] then Chris@366: forBlock = array (take n octs[oct]); Chris@366: octs[oct] := drop n octs[oct]; Chris@366: forBlock Chris@366: else Chris@366: array [] Chris@366: fi Chris@366: done (keys octs)); Chris@366: assembler toAssemble :. \(processOctaveLists assembler octs)); Chris@366: _: [] Chris@366: esac; Chris@366: Chris@366: //eprintln "cqblocks has \(length cqblocks) entries"; Chris@366: Chris@366: octaveLists = [:]; Chris@366: Chris@366: cqblocks = array cqblocks; Chris@366: for [1..octaves] do oct: Chris@366: octaveLists[octaves - oct] := cqblocks[oct-1]; Chris@366: done; Chris@366: /* Chris@366: \() (map2 do octlist octave: Chris@366: println "oct \(octaves) - \(octave) = \(octaves - octave)"; Chris@366: octaveLists[octaves - octave] := octlist Chris@366: done cqblocks [1..octaves]); Chris@366: */ Chris@366: //eprintln "octaveLists keys are: \(keys octaveLists)"; Chris@366: Chris@366: { Chris@366: kernel = kdata with { Chris@366: binFrequencies = array Chris@366: (concatMap do octave: Chris@366: map do freq: Chris@366: freq / (pow 2 octave); Chris@366: done (reverse (list kdata.binFrequencies)) Chris@366: done [0..octaves-1]) Chris@366: }, Chris@366: sampleRate, Chris@366: octaves, Chris@366: get cqComplex () = processOctaveLists assembleBlock octaveLists, Chris@366: get cqSpectrogram () = processOctaveLists assembleBlockSpectrogram octaveLists, Chris@366: } Chris@366: ); Chris@366: Chris@366: { cqt } Chris@366: