c@69: /* c@69: Constant-Q library c@69: Copyright (c) 2013-2014 Queen Mary, University of London c@69: c@69: Permission is hereby granted, free of charge, to any person c@69: obtaining a copy of this software and associated documentation c@69: files (the "Software"), to deal in the Software without c@69: restriction, including without limitation the rights to use, copy, c@69: modify, merge, publish, distribute, sublicense, and/or sell copies c@69: of the Software, and to permit persons to whom the Software is c@69: furnished to do so, subject to the following conditions: c@69: c@69: The above copyright notice and this permission notice shall be c@69: included in all copies or substantial portions of the Software. c@69: c@69: THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, c@69: EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF c@69: MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND c@69: NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY c@69: CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF c@69: CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION c@69: WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. c@69: c@69: Except as contained in this notice, the names of the Centre for c@69: Digital Music; Queen Mary, University of London; and Chris Cannam c@69: shall not be used in advertising or otherwise to promote the sale, c@69: use or other dealings in this Software without prior written c@69: authorization. c@69: */ c@10: c@37: module cqt; c@10: c@10: cqtkernel = load cqtkernel; c@10: resample = load may.stream.resample; c@10: manipulate = load may.stream.manipulate; c@72: mat = load may.matrix; c@10: cm = load may.matrix.complex; c@10: framer = load may.stream.framer; c@10: cplx = load may.complex; c@10: fft = load may.transform.fft; c@10: vec = load may.vector; c@42: ch = load may.stream.channels; c@10: c@10: { pow, round, floor, ceil, log2, nextPowerOfTwo } = load may.mathmisc; c@10: c@37: cqt { maxFreq, minFreq, binsPerOctave } str = c@10: (sampleRate = str.sampleRate; c@10: octaves = ceil (log2 (maxFreq / minFreq)); c@65: // actualMinFreq = (maxFreq / (pow 2 octaves)) * (pow 2 (1/binsPerOctave)); c@10: c@41: kdata = cqtkernel.makeKernel { sampleRate, maxFreq, binsPerOctave }; c@10: c@63: // eprintln "sampleRate = \(sampleRate), maxFreq = \(maxFreq), minFreq = \(minFreq), actualMinFreq = \(actualMinFreq), octaves = \(octaves), binsPerOctave = \(binsPerOctave), fftSize = \(kdata.fftSize), hop = \(kdata.fftHop)"; c@10: c@63: // eprintln "atomsPerFrame = \(kdata.atomsPerFrame)"; c@11: c@41: padding = (kdata.fftSize * (pow 2 (octaves-1))); c@40: c@63: // eprintln "padding = \(padding)"; c@40: c@40: str = manipulate.paddedBy padding str; c@40: c@10: streams = manipulate.duplicated octaves str; c@10: c@10: //!!! can't be right! c@10: kernel = cm.transposed (cm.conjugateTransposed kdata.kernel); c@10: c@63: // eprintln "have kernel"; c@10: c@10: fftFunc = fft.forward kdata.fftSize; c@10: c@10: cqblocks = c@10: map do octave: c@42: frames = map ch.mixedDown //!!! mono for now c@42: (framer.frames kdata.fftSize [ Hop kdata.fftHop, Padded false ] c@42: (resample.decimated (pow 2 octave) streams[octave])); c@10: map do frame: c@10: freq = fftFunc (cplx.complexArray frame (vec.zeros kdata.fftSize)); c@43: // eprintln "octave = \(octave), frame = \(vec.list frame)"; c@43: // eprintln "octave = \(octave), freq = \(freq)"; c@10: cm.product kernel (cm.newComplexColumnVector freq); c@10: done frames; c@10: done [0..octaves-1]; c@10: c@13: // The cqblocks list is a list>. Each top-level list c@11: // corresponds to an octave, from highest to lowest, each having c@11: // twice as many elements in its list as the next octave. The c@11: // sub-lists are sampled in time with an effective spacing of c@11: // fftSize * 2^(octave-1) audio frames, and the matrices are row c@11: // vectors with atomsPerFrame * binsPerOctave complex elements. c@13: // c@13: // *** c@13: // c@13: // In a typical constant-Q structure, each (2^(octaves-1) * c@13: // fftHop) input frames gives us an output structure conceptually c@13: // like this: c@10: // c@10: // [][][][][][][][] <- fftHop frames per highest-octave output value c@10: // [][][][][][][][] layered as many times as binsPerOctave (here 2) c@10: // [--][--][--][--] <- fftHop*2 frames for the next lower octave c@10: // [--][--][--][--] etc c@10: // [------][------] c@10: // [------][------] c@10: // [--------------] c@10: // [--------------] c@10: // c@13: // *** c@13: // c@13: // But the kernel we're using here has more than one temporally c@13: // spaced atom; each individual cell is a row vector with c@13: // atomsPerFrame * binsPerOctave elements, but that actually c@13: // represents a rectangular matrix of result cells with width c@13: // atomsPerFrame and height binsPerOctave. The columns of this c@13: // matrix (the atoms) then need to be spaced by 2^(octave-1) c@13: // relative to those from the highest octave. c@10: c@15: // Reshape each row vector into the appropriate rectangular matrix c@21: // and split into single-atom columns c@19: c@44: emptyHops = kdata.firstCentre / kdata.atomSpacing; //!!! int? round? c@65: // maxDrop = emptyHops * (pow 2 (octaves-1)) - emptyHops; c@63: // eprintln "maxDrop = \(maxDrop)"; c@21: c@47: cqblocks = c@47: map do octlist: c@47: concatMap do rv: c@21: cm.asColumns c@21: (cm.generate do row col: c@21: cm.at rv ((row * kdata.atomsPerFrame) + col) 0 c@21: done { c@21: rows = kdata.binsPerOctave, c@21: columns = kdata.atomsPerFrame c@21: }) c@47: done octlist c@47: done cqblocks; c@21: c@21: cqblocks = array (map2 do octlist octave: c@21: d = emptyHops * (pow 2 (octaves-octave)) - emptyHops; c@63: // eprintln "dropping \(d)"; c@21: drop d octlist; c@21: done cqblocks [1..octaves]); c@14: c@17: assembleBlock bits = c@59: (//eprintln "assembleBlock: structure of bits is:"; c@59: //eprintln (map length bits); c@19: c@19: rows = octaves * kdata.binsPerOctave; c@19: columns = (pow 2 (octaves - 1)) * kdata.atomsPerFrame; c@19: c@18: cm.generate do row col: c@19: c@19: // bits structure: [1,2,4,8,...] c@19: c@19: // each elt of bits is a list of the chunks that should c@19: // make up this block in that octave (lowest octave first) c@19: c@19: // each chunk has atomsPerFrame * binsPerOctave elts in it c@19: c@19: // row is disposed with 0 at the top, highest octave (in c@19: // both pitch and index into bits structure) c@19: c@18: oct = int (row / binsPerOctave); c@19: binNo = row % kdata.binsPerOctave; c@21: c@19: chunks = pow 2 oct; c@21: colsPerAtom = int (columns / (chunks * kdata.atomsPerFrame)); c@21: atomNo = int (col / colsPerAtom); c@21: atomOffset = col % colsPerAtom; c@18: c@40: if atomOffset == 0 and atomNo < length bits[oct] then c@21: bits[oct][atomNo][binNo]; c@20: else c@20: cplx.zero c@20: fi; c@19: c@19: done { rows, columns }; c@19: ); c@15: c@72: assembleBlockSpectrogram bits = c@72: (// As assembleBlock, but producing a dense magnitude c@72: // spectrogram (rather than a complex output with zeros c@72: // between the cell values in lower octaves). (todo: smoothing) c@72: c@72: //eprintln "assembleBlockSpectrogram: structure of bits is:"; c@72: //eprintln (map length bits); c@72: c@72: rows = octaves * kdata.binsPerOctave; c@72: columns = (pow 2 (octaves - 1)) * kdata.atomsPerFrame; c@72: c@72: mat.generate do row col: c@72: c@72: oct = int (row / binsPerOctave); c@72: binNo = row % kdata.binsPerOctave; c@72: c@72: chunks = pow 2 oct; c@72: colsPerAtom = int (columns / (chunks * kdata.atomsPerFrame)); c@72: atomNo = int (col / colsPerAtom); c@72: c@72: if atomNo < length bits[oct] then c@72: cplx.magnitude bits[oct][atomNo][binNo]; c@72: else c@72: 0 c@72: fi; c@72: c@72: done { rows, columns }; c@72: ); c@72: c@72: processOctaveLists assembler octs = c@17: case octs[0] of c@17: block::rest: c@19: (toAssemble = array c@19: (map do oct: c@21: n = kdata.atomsPerFrame * pow 2 oct; c@17: if not empty? octs[oct] then c@19: forBlock = array (take n octs[oct]); c@17: octs[oct] := drop n octs[oct]; c@17: forBlock c@17: else c@19: array [] c@17: fi c@19: done (keys octs)); c@72: assembler toAssemble :. \(processOctaveLists assembler octs)); c@17: _: [] c@15: esac; c@15: c@63: //eprintln "cqblocks has \(length cqblocks) entries"; c@15: c@17: octaveLists = [:]; c@19: c@19: cqblocks = array cqblocks; c@17: for [1..octaves] do oct: c@17: octaveLists[octaves - oct] := cqblocks[oct-1]; c@17: done; c@17: /* c@17: \() (map2 do octlist octave: c@17: println "oct \(octaves) - \(octave) = \(octaves - octave)"; c@17: octaveLists[octaves - octave] := octlist c@17: done cqblocks [1..octaves]); c@17: */ c@63: //eprintln "octaveLists keys are: \(keys octaveLists)"; c@15: c@40: { c@40: kernel = kdata with { c@47: binFrequencies = array c@47: (concatMap do octave: c@40: map do freq: c@40: freq / (pow 2 octave); c@40: done (reverse (list kdata.binFrequencies)) c@47: done [0..octaves-1]) c@40: }, c@72: octaves, c@72: output type = c@72: case type of c@72: ComplexCQ (): c@72: Complex (processOctaveLists assembleBlock octaveLists); c@72: Spectrogram (): c@72: Real (processOctaveLists assembleBlockSpectrogram octaveLists); c@72: esac c@40: } c@10: ); c@10: c@37: { cqt } c@10: