Mercurial > hg > plosone_underreview
changeset 36:3b67cd634b9a branch-tests
tests for pitch bihist, before refactoring
author | Maria Panteli |
---|---|
date | Thu, 14 Sep 2017 14:16:29 +0100 |
parents | c4428589b82b |
children | 2cc444441f42 |
files | scripts/PitchBihist.py scripts/load_features.py tests/test_PitchBihist.py |
diffstat | 3 files changed, 186 insertions(+), 132 deletions(-) [+] |
line wrap: on
line diff
--- a/scripts/PitchBihist.py Thu Sep 14 13:07:19 2017 +0100 +++ b/scripts/PitchBihist.py Thu Sep 14 14:16:29 2017 +0100 @@ -4,10 +4,11 @@ @author: mariapanteli """ +import numpy as np +import os +import scipy.signal + import smoothiecore as s -import numpy -import scipy.signal -# import librosa class PitchBihist: @@ -16,9 +17,109 @@ self.sr = None self.chroma = None self.chromasr = None + self.melodiasr = 44100. / 128. self.bihist = None self.win2sec = win2sec + + def hz_to_cents(self, freq_Hz, ref_Hz=32.703, n_cents=1200): + """ convert frequency values from Hz to cents + reference frequency at C1 + """ + freq_cents = np.round(n_cents * np.log2(freq_Hz/ref_Hz)) + return freq_cents + + + def wrap_to_octave(self, cents, octave_length=1200): + """ wrap to a single octave 0-1200 + """ + octave_cents = cents % octave_length + return octave_cents + + + def get_melody_from_file(self, melodia_file, stop_sec=None): + if not os.path.exists(melodia_file): + return [] + data = np.loadtxt(melodia_file, delimiter=',') + times, freqs = (data[:, 0], data[:, 1]) + self.chromasr = 1. / (times[1] - times[0]) + if stop_sec is not None: + stop_idx = np.where(times < stop_sec)[0] + times, freqs = times[stop_idx], freqs[stop_idx] + freqs[freqs<=0] = np.nan + melody = freqs + return melody + + + def get_melody_matrix(self, melody): + n_bins = 60 + n_frames = len(melody) + melody_cents = self.hz_to_cents(melody, n_cents=n_bins) + melody_octave = self.wrap_to_octave(melody_cents, octave_length=n_bins) + melody_matrix = np.zeros((n_bins, n_frames)) + for time, pitch in enumerate(melody_octave): + if not np.isnan(pitch): + melody_matrix[int(pitch), time] = 1 + return melody_matrix + + + def bihist_from_melodia(self, filename='sample_melodia.csv', secondframedecomp=True, stop_sec=None): + melody = self.get_melody_from_file(filename, stop_sec=stop_sec) + if len(melody) == 0: + self.bihist = [] + return self.bihist + melody_matrix = self.get_melody_matrix(melody) + if secondframedecomp: + nbins, norigframes = melody_matrix.shape + win2 = int(round(self.win2sec*self.chromasr)) + hop2 = int(round(0.5*self.chromasr)) + if norigframes<=win2: + nframes = 1 + win2 = norigframes + else: + nframes = int(np.ceil((norigframes-win2)/float(hop2))) + bihistframes = np.empty((nbins*nbins, nframes)) + for i in range(nframes): # loop over all 8-sec frames + frame = melody_matrix[:, (i*hop2):(i*hop2+win2)] + bihist = self.bihistogram(frame) + bihist = np.reshape(bihist, -1) + bihistframes[:, i] = bihist + self.bihist = bihistframes + else: + self.bihist = self.bihistogram(melody_matrix) + return self.bihist + + + def bihistogram(self, spec, winsec=0.5, align=True): + win = int(round(winsec*self.chromasr)) + ker = np.concatenate([np.zeros((win, 1)), np.ones((win+1, 1))], axis=0) + spec = spec.T # transpose to have franes as rows in convolution + + # energy threshold + thr = 0.3*np.max(spec) + spec[spec < max(thr, 0)] = 0 + + # transitions via convolution + tra = scipy.signal.convolve2d(spec, ker, mode='same') + tra[spec > 0] = 0 + + # multiply with original + B = np.dot(tra.T, spec) + + # normalize to [0, 1] + mxB = np.max(B) + mnB = np.min(B) + if mxB != mnB: + B = (B - mnB)/float(mxB-mnB) + + # circshift to highest? + if align: + ref = np.argmax(np.sum(spec, axis=0)) + B = np.roll(B, -ref, axis=0) + B = np.roll(B, -ref, axis=1) + return B + + def bihist_from_chroma(self, filename='test.wav', secondframedecomp=True): self.chroma, self.chromasr = s.get_smoothie_for_bihist(filename=filename, hopinsec=0.005) if secondframedecomp: @@ -28,96 +129,17 @@ if norigframes<win2: nframes = 1 else: - nframes = int(1+numpy.floor((norigframes-win2)/float(hop2))) - bihistframes = numpy.empty((nbins*nbins, nframes)) + nframes = int(1+np.floor((norigframes-win2)/float(hop2))) + bihistframes = np.empty((nbins*nbins, nframes)) for i in range(nframes): # loop over all 8-sec frames frame = self.chroma[:, (i*hop2):min((i*hop2+win2),norigframes)] bihist = self.bihistogram(frame) - bihist = numpy.reshape(bihist, -1) + bihist = np.reshape(bihist, -1) bihistframes[:, i] = bihist self.bihist = bihistframes else: - self.bihist = numpy.reshape(self.bihistogram(), -1) + self.bihist = np.reshape(self.bihistogram(), -1) - def bihist_from_melodia(self, filename='sample_melodia.csv', secondframedecomp=True, stop_sec=None): - def hz_to_cents(freq_Hz, ref_Hz=32.703, n_cents=1200): - """ convert frequency values from Hz to cents - reference frequency at C1 - """ - freq_cents = numpy.round(n_cents * numpy.log2(freq_Hz/ref_Hz)) - return freq_cents - def wrap_to_octave(cents, octave_length=1200): - """ wrap to a single octave 0-1200 - """ - octave_cents = cents % octave_length - return octave_cents - - n_bins = 60 - data = numpy.loadtxt(filename, delimiter=',') - times, freqs = (data[:, 0], data[:, 1]) - self.chromasr = 1. / (times[1] - times[0]) - #self.chromasr = 128. - if stop_sec is not None: - stop_idx = numpy.where(times < stop_sec)[0] - times, freqs = times[stop_idx], freqs[stop_idx] - freqs[freqs<=0] = numpy.nan - #melody = freqs[freqs>0] - melody = freqs - n_frames = len(melody) - melody_cents = hz_to_cents(melody, n_cents=n_bins) - melody_octave = wrap_to_octave(melody_cents, octave_length=n_bins) - melody_matrix = numpy.zeros((n_bins, n_frames)) - for time, pitch in enumerate(melody_octave): - if not numpy.isnan(pitch): - melody_matrix[int(pitch), time] = 1 - if secondframedecomp: - win2 = int(round(self.win2sec*self.chromasr)) - hop2 = int(round(0.5*self.chromasr)) - nbins, norigframes = melody_matrix.shape - if norigframes<win2: - nframes = 1 - win2 = norigframes - else: - nframes = int(1+numpy.floor((norigframes-win2)/float(hop2))) - bihistframes = numpy.empty((nbins*nbins, nframes)) - for i in range(nframes): # loop over all 8-sec frames - frame = melody_matrix[:, (i*hop2):(i*hop2+win2)] - bihist = self.bihistogram(frame) - bihist = numpy.reshape(bihist, -1) - bihistframes[:, i] = bihist - self.bihist = bihistframes - else: - self.bihist = self.bihistogram(melody_matrix) - return self.bihist - - def bihistogram(self, spec, winsec=0.5, align=True): - win = int(round(winsec*self.chromasr)) - ker = numpy.concatenate([numpy.zeros((win, 1)), numpy.ones((win+1, 1))], axis=0) - spec = spec.T # transpose to have franes as rows in convolution - - # energy threshold - thr = 0.3*numpy.max(spec) - spec[spec < max(thr, 0)] = 0 - - # transitions via convolution - tra = scipy.signal.convolve2d(spec, ker, mode='same') - tra[spec > 0] = 0 - - # multiply with original - B = numpy.dot(tra.T, spec) - - # normalize - mxB = numpy.max(B) - mnB = numpy.min(B) - if mxB != mnB: - B = (B - mnB)/float(mxB-mnB) - - # circshift to highest? - if align: - ref = numpy.argmax(numpy.sum(spec, axis=0)) - B = numpy.roll(B, -ref, axis=0) - B = numpy.roll(B, -ref, axis=1) - return B def bihist_from_precomp_chroma(self, align=False): win2 = int(round(self.win2sec*self.chromasr)) @@ -126,12 +148,12 @@ if norigframes<win2: nframes = 1 else: - nframes = int(1+numpy.floor((norigframes-win2)/float(hop2))) - bihistframes = numpy.empty((nbins*nbins, nframes)) + nframes = int(1+np.floor((norigframes-win2)/float(hop2))) + bihistframes = np.empty((nbins*nbins, nframes)) for i in range(nframes): # loop over all 8-sec frames frame = self.chroma[:, (i*hop2):min((i*hop2+win2),norigframes)] bihist = self.bihistogram(frame, align=align) - bihist = numpy.reshape(bihist, -1) + bihist = np.reshape(bihist, -1) bihistframes[:, i] = bihist self.bihist = bihistframes
--- a/scripts/load_features.py Thu Sep 14 13:07:19 2017 +0100 +++ b/scripts/load_features.py Thu Sep 14 14:16:29 2017 +0100 @@ -276,47 +276,4 @@ n_stop = np.int(np.ceil(stop_sec * self.framessr2)) pbihist = pbihist[:, :np.min([pbihist.shape[0], n_stop])] return pbihist - - - # def get_pb_from_melodia(self, melodia_file=None, nmfpb=True, scale=True, stop_sec=30.0): - # if not os.path.exists(melodia_file): - # return [] - # print 'extracting pitch bihist from melodia...' - # pb = pbi.PitchBihist(win2sec=self.win2sec) - # pbihist = pb.bihist_from_melodia(filename=melodia_file, stop_sec=stop_sec) - # if nmfpb is True: - # pbihist = self.nmfpitchbihist(pbihist) - # pbihist = pd.DataFrame(pbihist.T) - # if scale: - # # scale all frames by mean and std of recording - # pbihist = (pbihist - np.nanmean(pbihist)) / np.nanstd(pbihist) - # return pbihist - - - # def load_precomputed_pb_from_melodia(self, melodia_file=None, nmfpb=True, scale=True, stop_sec=30.0): - # base = os.path.basename(melodia_file) - # root = '/import/c4dm-05/mariap/Melodia-melody-'+str(int(self.win2sec))+'sec/' - # root_BL = '/import/c4dm-04/mariap/FeatureCsvs_BL_old/PB-melodia/' - # root_SM = '/import/c4dm-04/mariap/FeatureCsvs/PB-melodia/' - # if 'SampleAudio' in base: - # root = root_SM - # else: - # root = root_BL - # base = base.split('_')[-1].split('.csv')[0]+'_vamp_mtg-melodia_melodia_melody.csv' - # print 'load precomputed pitch bihist', root - # #if self.win2sec == 8: - # # pbihist = pd.read_csv(os.path.join(root, base)) - # #else: - # if 1: - # pbihist = np.loadtxt(os.path.join(root, base), delimiter=',').T - # if nmfpb is True: - # pbihist = self.nmfpitchbihist(pbihist) - # pbihist = pd.DataFrame(pbihist.T) - # n_stop = np.int(np.ceil(stop_sec * self.framessr2)) - # pbihist = pbihist.iloc[:np.min([pbihist.shape[0], n_stop]), :] - # print pbihist.shape - # if scale: - # # scale all frames by mean and std of recording - # pbihist = (pbihist - np.nanmean(pbihist)) / np.nanstd(pbihist) - # return pbihist
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests/test_PitchBihist.py Thu Sep 14 14:16:29 2017 +0100 @@ -0,0 +1,75 @@ +# -*- coding: utf-8 -*- +""" +Created on Fri Sep 1 19:11:52 2017 + +@author: mariapanteli +""" + +import pytest + +import numpy as np + +import scripts.PitchBihist as PitchBihist + + +pbi = PitchBihist.PitchBihist() + + +def test_hz_to_cents(): + freq_Hz = np.array([32.703, 65.406, 55, 110]) + freq_cents = pbi.hz_to_cents(freq_Hz) + freq_cents_true = np.array([0, 1200, 900, 2100]) + assert np.array_equal(freq_cents, freq_cents_true) + + +def test_wrap_to_octave(): + cents = np.array([900, 2100, 1200]) + octave_cents = pbi.wrap_to_octave(cents) + octave_cents_true = np.array([900, 900, 0]) + assert np.array_equal(octave_cents, octave_cents_true) + + +def test_get_melody_from_file(): + melodia_file = 'data/sample_dataset/Melodia/mel_1_2_1.csv' + melody = pbi.get_melody_from_file(melodia_file) + assert len(melody) < 12. * pbi.chromasr + + +def test_get_melody_matrix(): + melody = 440 * np.ones(1000) + melody_matrix = pbi.get_melody_matrix(melody) + n_frames = melody_matrix.shape[1] + assert np.array_equal(melody_matrix[45, :], np.ones(n_frames)) + + +def test_bihist_from_melodia(): + melodia_file = 'data/sample_dataset/Melodia/mel_1_2_1.csv' + bihist = pbi.bihist_from_melodia(melodia_file, secondframedecomp=False) + assert bihist.shape == (60, 60) + + +def test_bihist_from_melodia_n_frames(): + melodia_file = 'data/sample_dataset/Melodia/mel_1_2_1.csv' + bihist = pbi.bihist_from_melodia(melodia_file, secondframedecomp=True) + dur_sec = 11.5 # duration of first file in metadata.csv is > 11 seconds + n_frames_true = np.round((dur_sec - pbi.win2sec) * 2) # for .5 sec hop size + assert bihist.shape[1] == n_frames_true + + +def test_bihistogram(): + melody = 440 * np.ones(1000) + melody_matrix = pbi.get_melody_matrix(melody) + bihist = pbi.bihistogram(melody_matrix, align=False) + assert np.array_equal(bihist, np.zeros((60, 60))) + + +def test_bihistogram_values(): + melody = np.concatenate([440 * np.ones(500), 32.703 * np.ones(500)]) + melody_matrix = pbi.get_melody_matrix(melody) + # melody transitions from A to C (bin 45/60 to bin 0/60) + bihist = pbi.bihistogram(melody_matrix, align=False) + # expect only element [45, 0] to be non-zero + assert bihist[45, 0] > 0 and (np.sum(bihist) - bihist[45, 0]) == 0 + + +