Mercurial > hg > plosone_underreview
view scripts/PitchBihist.py @ 36:3b67cd634b9a branch-tests
tests for pitch bihist, before refactoring
author | Maria Panteli |
---|---|
date | Thu, 14 Sep 2017 14:16:29 +0100 |
parents | e50c63cf96be |
children | 2cc444441f42 |
line wrap: on
line source
# -*- coding: utf-8 -*- """ Created on Tue Feb 2 22:26:10 2016 @author: mariapanteli """ import numpy as np import os import scipy.signal import smoothiecore as s class PitchBihist: def __init__(self, win2sec=8): self.y = None self.sr = None self.chroma = None self.chromasr = None self.melodiasr = 44100. / 128. self.bihist = None self.win2sec = win2sec def hz_to_cents(self, freq_Hz, ref_Hz=32.703, n_cents=1200): """ convert frequency values from Hz to cents reference frequency at C1 """ freq_cents = np.round(n_cents * np.log2(freq_Hz/ref_Hz)) return freq_cents def wrap_to_octave(self, cents, octave_length=1200): """ wrap to a single octave 0-1200 """ octave_cents = cents % octave_length return octave_cents def get_melody_from_file(self, melodia_file, stop_sec=None): if not os.path.exists(melodia_file): return [] data = np.loadtxt(melodia_file, delimiter=',') times, freqs = (data[:, 0], data[:, 1]) self.chromasr = 1. / (times[1] - times[0]) if stop_sec is not None: stop_idx = np.where(times < stop_sec)[0] times, freqs = times[stop_idx], freqs[stop_idx] freqs[freqs<=0] = np.nan melody = freqs return melody def get_melody_matrix(self, melody): n_bins = 60 n_frames = len(melody) melody_cents = self.hz_to_cents(melody, n_cents=n_bins) melody_octave = self.wrap_to_octave(melody_cents, octave_length=n_bins) melody_matrix = np.zeros((n_bins, n_frames)) for time, pitch in enumerate(melody_octave): if not np.isnan(pitch): melody_matrix[int(pitch), time] = 1 return melody_matrix def bihist_from_melodia(self, filename='sample_melodia.csv', secondframedecomp=True, stop_sec=None): melody = self.get_melody_from_file(filename, stop_sec=stop_sec) if len(melody) == 0: self.bihist = [] return self.bihist melody_matrix = self.get_melody_matrix(melody) if secondframedecomp: nbins, norigframes = melody_matrix.shape win2 = int(round(self.win2sec*self.chromasr)) hop2 = int(round(0.5*self.chromasr)) if norigframes<=win2: nframes = 1 win2 = norigframes else: nframes = int(np.ceil((norigframes-win2)/float(hop2))) bihistframes = np.empty((nbins*nbins, nframes)) for i in range(nframes): # loop over all 8-sec frames frame = melody_matrix[:, (i*hop2):(i*hop2+win2)] bihist = self.bihistogram(frame) bihist = np.reshape(bihist, -1) bihistframes[:, i] = bihist self.bihist = bihistframes else: self.bihist = self.bihistogram(melody_matrix) return self.bihist def bihistogram(self, spec, winsec=0.5, align=True): win = int(round(winsec*self.chromasr)) ker = np.concatenate([np.zeros((win, 1)), np.ones((win+1, 1))], axis=0) spec = spec.T # transpose to have franes as rows in convolution # energy threshold thr = 0.3*np.max(spec) spec[spec < max(thr, 0)] = 0 # transitions via convolution tra = scipy.signal.convolve2d(spec, ker, mode='same') tra[spec > 0] = 0 # multiply with original B = np.dot(tra.T, spec) # normalize to [0, 1] mxB = np.max(B) mnB = np.min(B) if mxB != mnB: B = (B - mnB)/float(mxB-mnB) # circshift to highest? if align: ref = np.argmax(np.sum(spec, axis=0)) B = np.roll(B, -ref, axis=0) B = np.roll(B, -ref, axis=1) return B def bihist_from_chroma(self, filename='test.wav', secondframedecomp=True): self.chroma, self.chromasr = s.get_smoothie_for_bihist(filename=filename, hopinsec=0.005) if secondframedecomp: win2 = int(round(8*self.chromasr)) hop2 = int(round(0.5*self.chromasr)) nbins, norigframes = self.chroma.shape if norigframes<win2: nframes = 1 else: nframes = int(1+np.floor((norigframes-win2)/float(hop2))) bihistframes = np.empty((nbins*nbins, nframes)) for i in range(nframes): # loop over all 8-sec frames frame = self.chroma[:, (i*hop2):min((i*hop2+win2),norigframes)] bihist = self.bihistogram(frame) bihist = np.reshape(bihist, -1) bihistframes[:, i] = bihist self.bihist = bihistframes else: self.bihist = np.reshape(self.bihistogram(), -1) def bihist_from_precomp_chroma(self, align=False): win2 = int(round(self.win2sec*self.chromasr)) hop2 = int(round(0.5*self.chromasr)) nbins, norigframes = self.chroma.shape if norigframes<win2: nframes = 1 else: nframes = int(1+np.floor((norigframes-win2)/float(hop2))) bihistframes = np.empty((nbins*nbins, nframes)) for i in range(nframes): # loop over all 8-sec frames frame = self.chroma[:, (i*hop2):min((i*hop2+win2),norigframes)] bihist = self.bihistogram(frame, align=align) bihist = np.reshape(bihist, -1) bihistframes[:, i] = bihist self.bihist = bihistframes def get_pitchbihist(self, filename='test.wav'): self.bihist_from_chroma(filename=filename) return self.bihist def get_pitchbihist_from_chroma(self, chroma=[], chromasr=[]): self.chroma = chroma self.chromasr = chromasr self.bihist_from_precomp_chroma(align=False) return self.bihist if __name__ == '__main__': pb = PitchBihist() pb.get_pitchbihist()