Chris@12: Chris@12: module timefreq; Chris@12: Chris@12: // Obtain the time-frequency representation (based on constant-Q Chris@12: // transform) as transcription input Chris@12: Chris@12: af = load may.stream.audiofile; Chris@12: mat = load may.matrix; Chris@12: plot = load may.plot; Chris@12: vec = load may.vector; Chris@12: Chris@12: { pow } = load may.mathmisc; Chris@12: Chris@12: { resampledTo } = load may.stream.resample; Chris@12: Chris@12: { cqt } = load cqt; Chris@12: Chris@12: prepareTimeFrequency wavfile = Chris@12: (stream = resampledTo 44100 (af.openMono wavfile); Chris@12: Chris@12: streamLength = Chris@12: case stream.available of Chris@12: Known n: n; Chris@12: _: failWith "Audio file length unknown?!"; Chris@12: esac; Chris@12: Chris@12: eprintln "streamLength = \(streamLength)"; Chris@12: Chris@12: //!!! original also scales to peak = 0.5 Chris@12: Chris@12: cq = cqt { Chris@12: maxFreq = stream.sampleRate / 3, Chris@12: minFreq = 27.5, Chris@12: binsPerOctave = 60 Chris@12: } stream; Chris@12: Chris@12: //!!! note: original also modifies the Q and atomHopFactor Chris@12: eprintln "atomSpacing = \(cq.kernel.atomSpacing)"; Chris@12: Chris@12: matrices = case cq.output (Spectrogram ()) of Chris@12: Real mm: mm; Chris@12: _: failWith "Expected real"; Chris@12: esac; Chris@12: Chris@12: eprintln "have \(length matrices) matrices of size \(mat.size (head matrices)), isRowMajor? = \(mat.isRowMajor? (head matrices))"; Chris@12: Chris@12: levels = concatMap do m: Chris@12: map do c: vec.sum c done (mat.asColumns m); Chris@12: done matrices; Chris@12: Chris@12: nztail = find (> 0.1) levels; Chris@12: nzonly = reverse (find (> 0.1) (reverse nztail)); Chris@12: Chris@12: eprintln "non-zero columns start at \(length levels - length nztail), go on for \(length nzonly) [of \(length levels)]"; Chris@12: Chris@12: nzstart = (length levels - length nztail) * cq.kernel.atomSpacing; Chris@12: nzduration = (length nzonly) * cq.kernel.atomSpacing; Chris@12: Chris@12: // Get a stream of columns at 25 per second. Chris@12: // Chris@12: // The original picks samples at a rate of 100-per-second then Chris@12: // median filters to reduce noise then picks samples again at Chris@12: // 25-per-second. We don't do that (yet) Chris@12: Chris@12: samplesPerCol = stream.sampleRate / 25; Chris@12: var sample = samplesPerCol - nzstart; Chris@12: Chris@12: columns = take (nzduration / samplesPerCol) Chris@12: (concatMap do m: Chris@12: concatMap do col: Chris@12: sample := sample + cq.kernel.atomSpacing; Chris@12: if sample >= samplesPerCol then Chris@12: sample := sample - samplesPerCol; Chris@12: [col] Chris@12: else Chris@12: [] Chris@12: fi; Chris@12: done (mat.asColumns m); Chris@12: done matrices); Chris@12: Chris@12: eprintln "have \(length columns) columns of \(vec.length (head columns)) values each"; Chris@12: Chris@12: // drop the lowest 55 of the 600 bins Chris@12: columns = map do c: Chris@12: vec.slice c 55 (vec.length c); Chris@12: done columns; Chris@12: Chris@12: eprintln "now have \(length columns) columns of \(vec.length (head columns))"; Chris@12: Chris@12: // plot.plot [ Grid (mat.fromColumns columns) ]; Chris@12: Chris@12: columns); Chris@12: Chris@12: Chris@12: { Chris@12: prepareTimeFrequency Chris@12: } Chris@12: