# HG changeset patch # User Maria Panteli # Date 1505394989 -3600 # Node ID 3b67cd634b9a365453265c6d2d1a1c70c37cea64 # Parent c4428589b82b07d444655d1de3e51c19447c204b tests for pitch bihist, before refactoring diff -r c4428589b82b -r 3b67cd634b9a scripts/PitchBihist.py --- a/scripts/PitchBihist.py Thu Sep 14 13:07:19 2017 +0100 +++ b/scripts/PitchBihist.py Thu Sep 14 14:16:29 2017 +0100 @@ -4,10 +4,11 @@ @author: mariapanteli """ +import numpy as np +import os +import scipy.signal + import smoothiecore as s -import numpy -import scipy.signal -# import librosa class PitchBihist: @@ -16,9 +17,109 @@ self.sr = None self.chroma = None self.chromasr = None + self.melodiasr = 44100. / 128. self.bihist = None self.win2sec = win2sec + + def hz_to_cents(self, freq_Hz, ref_Hz=32.703, n_cents=1200): + """ convert frequency values from Hz to cents + reference frequency at C1 + """ + freq_cents = np.round(n_cents * np.log2(freq_Hz/ref_Hz)) + return freq_cents + + + def wrap_to_octave(self, cents, octave_length=1200): + """ wrap to a single octave 0-1200 + """ + octave_cents = cents % octave_length + return octave_cents + + + def get_melody_from_file(self, melodia_file, stop_sec=None): + if not os.path.exists(melodia_file): + return [] + data = np.loadtxt(melodia_file, delimiter=',') + times, freqs = (data[:, 0], data[:, 1]) + self.chromasr = 1. / (times[1] - times[0]) + if stop_sec is not None: + stop_idx = np.where(times < stop_sec)[0] + times, freqs = times[stop_idx], freqs[stop_idx] + freqs[freqs<=0] = np.nan + melody = freqs + return melody + + + def get_melody_matrix(self, melody): + n_bins = 60 + n_frames = len(melody) + melody_cents = self.hz_to_cents(melody, n_cents=n_bins) + melody_octave = self.wrap_to_octave(melody_cents, octave_length=n_bins) + melody_matrix = np.zeros((n_bins, n_frames)) + for time, pitch in enumerate(melody_octave): + if not np.isnan(pitch): + melody_matrix[int(pitch), time] = 1 + return melody_matrix + + + def bihist_from_melodia(self, filename='sample_melodia.csv', secondframedecomp=True, stop_sec=None): + melody = self.get_melody_from_file(filename, stop_sec=stop_sec) + if len(melody) == 0: + self.bihist = [] + return self.bihist + melody_matrix = self.get_melody_matrix(melody) + if secondframedecomp: + nbins, norigframes = melody_matrix.shape + win2 = int(round(self.win2sec*self.chromasr)) + hop2 = int(round(0.5*self.chromasr)) + if norigframes<=win2: + nframes = 1 + win2 = norigframes + else: + nframes = int(np.ceil((norigframes-win2)/float(hop2))) + bihistframes = np.empty((nbins*nbins, nframes)) + for i in range(nframes): # loop over all 8-sec frames + frame = melody_matrix[:, (i*hop2):(i*hop2+win2)] + bihist = self.bihistogram(frame) + bihist = np.reshape(bihist, -1) + bihistframes[:, i] = bihist + self.bihist = bihistframes + else: + self.bihist = self.bihistogram(melody_matrix) + return self.bihist + + + def bihistogram(self, spec, winsec=0.5, align=True): + win = int(round(winsec*self.chromasr)) + ker = np.concatenate([np.zeros((win, 1)), np.ones((win+1, 1))], axis=0) + spec = spec.T # transpose to have franes as rows in convolution + + # energy threshold + thr = 0.3*np.max(spec) + spec[spec < max(thr, 0)] = 0 + + # transitions via convolution + tra = scipy.signal.convolve2d(spec, ker, mode='same') + tra[spec > 0] = 0 + + # multiply with original + B = np.dot(tra.T, spec) + + # normalize to [0, 1] + mxB = np.max(B) + mnB = np.min(B) + if mxB != mnB: + B = (B - mnB)/float(mxB-mnB) + + # circshift to highest? + if align: + ref = np.argmax(np.sum(spec, axis=0)) + B = np.roll(B, -ref, axis=0) + B = np.roll(B, -ref, axis=1) + return B + + def bihist_from_chroma(self, filename='test.wav', secondframedecomp=True): self.chroma, self.chromasr = s.get_smoothie_for_bihist(filename=filename, hopinsec=0.005) if secondframedecomp: @@ -28,96 +129,17 @@ if norigframes0] - melody = freqs - n_frames = len(melody) - melody_cents = hz_to_cents(melody, n_cents=n_bins) - melody_octave = wrap_to_octave(melody_cents, octave_length=n_bins) - melody_matrix = numpy.zeros((n_bins, n_frames)) - for time, pitch in enumerate(melody_octave): - if not numpy.isnan(pitch): - melody_matrix[int(pitch), time] = 1 - if secondframedecomp: - win2 = int(round(self.win2sec*self.chromasr)) - hop2 = int(round(0.5*self.chromasr)) - nbins, norigframes = melody_matrix.shape - if norigframes 0] = 0 - - # multiply with original - B = numpy.dot(tra.T, spec) - - # normalize - mxB = numpy.max(B) - mnB = numpy.min(B) - if mxB != mnB: - B = (B - mnB)/float(mxB-mnB) - - # circshift to highest? - if align: - ref = numpy.argmax(numpy.sum(spec, axis=0)) - B = numpy.roll(B, -ref, axis=0) - B = numpy.roll(B, -ref, axis=1) - return B def bihist_from_precomp_chroma(self, align=False): win2 = int(round(self.win2sec*self.chromasr)) @@ -126,12 +148,12 @@ if norigframes 11 seconds + n_frames_true = np.round((dur_sec - pbi.win2sec) * 2) # for .5 sec hop size + assert bihist.shape[1] == n_frames_true + + +def test_bihistogram(): + melody = 440 * np.ones(1000) + melody_matrix = pbi.get_melody_matrix(melody) + bihist = pbi.bihistogram(melody_matrix, align=False) + assert np.array_equal(bihist, np.zeros((60, 60))) + + +def test_bihistogram_values(): + melody = np.concatenate([440 * np.ones(500), 32.703 * np.ones(500)]) + melody_matrix = pbi.get_melody_matrix(melody) + # melody transitions from A to C (bin 45/60 to bin 0/60) + bihist = pbi.bihistogram(melody_matrix, align=False) + # expect only element [45, 0] to be non-zero + assert bihist[45, 0] > 0 and (np.sum(bihist) - bihist[45, 0]) == 0 + + +