Mercurial > hg > plosone_underreview
changeset 35:c4428589b82b branch-tests
more tests in load_features
author | Maria Panteli |
---|---|
date | Thu, 14 Sep 2017 13:07:19 +0100 |
parents | 115774aff442 |
children | 3b67cd634b9a |
files | notebooks/explain_components.ipynb scripts/load_features.py scripts/results.py scripts/results_classification.py tests/test_load_features.py |
diffstat | 5 files changed, 271 insertions(+), 262 deletions(-) [+] |
line wrap: on
line diff
--- a/notebooks/explain_components.ipynb Thu Sep 14 10:16:59 2017 +0100 +++ b/notebooks/explain_components.ipynb Thu Sep 14 13:07:19 2017 +0100 @@ -32,7 +32,9 @@ { "cell_type": "code", "execution_count": 2, - "metadata": {}, + "metadata": { + "collapsed": false + }, "outputs": [ { "name": "stdout", @@ -64,6 +66,7 @@ "cell_type": "code", "execution_count": 14, "metadata": { + "collapsed": false, "scrolled": false }, "outputs": [ @@ -230,7 +233,9 @@ { "cell_type": "code", "execution_count": 5, - "metadata": {}, + "metadata": { + "collapsed": false + }, "outputs": [ { "name": "stdout", @@ -273,7 +278,9 @@ { "cell_type": "code", "execution_count": 7, - "metadata": {}, + "metadata": { + "collapsed": false + }, "outputs": [ { "name": "stdout", @@ -291,7 +298,9 @@ { "cell_type": "code", "execution_count": 8, - "metadata": {}, + "metadata": { + "collapsed": false + }, "outputs": [ { "name": "stderr", @@ -369,7 +378,9 @@ { "cell_type": "code", "execution_count": 12, - "metadata": {}, + "metadata": { + "collapsed": false + }, "outputs": [ { "data": { @@ -425,7 +436,9 @@ { "cell_type": "code", "execution_count": 26, - "metadata": {}, + "metadata": { + "collapsed": false + }, "outputs": [ { "data": { @@ -457,7 +470,9 @@ { "cell_type": "code", "execution_count": 31, - "metadata": {}, + "metadata": { + "collapsed": false + }, "outputs": [ { "data": { @@ -490,7 +505,9 @@ { "cell_type": "code", "execution_count": 33, - "metadata": {}, + "metadata": { + "collapsed": false + }, "outputs": [ { "data": { @@ -533,7 +550,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", - "version": "2.7.12" + "version": "2.7.11" } }, "nbformat": 4,
--- a/scripts/load_features.py Thu Sep 14 10:16:59 2017 +0100 +++ b/scripts/load_features.py Thu Sep 14 13:07:19 2017 +0100 @@ -34,7 +34,7 @@ print 'extracting onset patterns and mfccs...' songframes = pd.read_csv(melspec_file, engine="c", header=None) songframes.iloc[np.where(np.isnan(songframes))] = 0 - n_stop = np.int(np.ceil(stop_sec * self.framessr)) + n_stop = np.int(np.ceil(stop_sec * self.framessr)) songframes = songframes.iloc[0:min(len(songframes), n_stop), :] melspec = songframes.get_values().T op = self.get_op_from_melspec(melspec, K=2) @@ -54,7 +54,7 @@ songframes = pd.read_csv(chroma_file, engine="c", header=None) songframes.iloc[np.where(np.isnan(songframes))] = 0 n_stop = np.int(np.ceil(stop_sec * self.framessr)) - songframes = songframes.iloc[0:min(len(songframes), n_stop), :] + songframes = songframes.iloc[0:min(len(songframes), n_stop), :] chroma = songframes.get_values().T ch = self.get_ave_chroma(chroma) if scale: @@ -103,7 +103,7 @@ return music_idx - def get_features(self, df, stop_sec=30.0, class_label='Country'): + def get_features(self, df, stop_sec=30.0, class_label='Country', precomp_melody=False): oplist = [] mflist = [] chlist = [] @@ -122,14 +122,16 @@ try: op, mfcc = self.get_op_mfcc_for_file(df['Melspec'].iloc[i], stop_sec=stop_sec) ch = self.get_chroma_for_file(df['Chroma'].iloc[i], stop_sec=stop_sec) - #pb = self.get_pb_from_melodia(df['Melodia'].iloc[i], stop_sec=stop_sec) - pb = self.load_precomputed_pb_from_melodia(df['Melodia'].iloc[i], stop_sec=stop_sec) - #pb = self.get_contour_feat_from_melodia(df['Melodia'].iloc[i]) + pb = self.get_pb_for_file(df['Melodia'].iloc[i], precomp_melody=precomp_melody, stop_sec=stop_sec) + #if precomp_melody: + # pb = self.load_precomputed_pb_from_melodia(df['Melodia'].iloc[i], stop_sec=stop_sec) + #else: + # pb = self.get_pb_from_melodia(df['Melodia'].iloc[i], stop_sec=stop_sec) except: continue n_stop = np.int(np.ceil(stop_sec * self.framessr2)) - print n_stop, len(op), len(mfcc), len(ch), len(pb) - min_n_frames = np.min([n_stop, len(op), len(mfcc), len(ch), len(pb)]) # ideally, features should have the same number of frames + print n_stop, len(op), len(mfcc), len(ch), len(pb) + min_n_frames = np.min([n_stop, len(op), len(mfcc), len(ch), len(pb)]) # ideally, features should have the same number of frames if min_n_frames==0: # no features extracted -> skip this file continue @@ -148,9 +150,10 @@ def get_op_from_melspec(self, melspec, K=None): op = opm.OPMellin(win2sec=self.win2sec) opmellin = op.get_opmellin_from_melspec(melspec=melspec, melsr=self.framessr) + opmel = pd.DataFrame(opmellin.T) if K is not None: opmel = self.mean_K_bands(opmellin.T, K) - opmel = pd.DataFrame(opmel) + opmel = pd.DataFrame(opmel) return opmel @@ -169,10 +172,10 @@ return mfcc - def get_ave_chroma(self, chroma, avelocalframes=True, stdlocalframes=True, alignchroma=True): + def get_ave_chroma(self, chroma, alignchroma=True, avelocalframes=True, stdlocalframes=True): chroma[np.where(np.isnan(chroma))] = 0 if alignchroma: - maxind = np.argmax(np.sum(chroma, axis=1)) + maxind = np.argmax(np.sum(chroma, axis=1)) # bin with max magnitude across time chroma = np.roll(chroma, -maxind, axis=0) if avelocalframes: chroma = self.average_local_frames(chroma, getstd=stdlocalframes) @@ -229,13 +232,15 @@ return newframes - def get_pb_from_melodia(self, melodia_file=None, nmfpb=True, scale=True, stop_sec=30.0): - pb = [] - if not os.path.exists(melodia_file): - return pb - print 'extracting pitch bihist from melodia...' - pb = pbi.PitchBihist(win2sec=self.win2sec) - pbihist = pb.bihist_from_melodia(filename=melodia_file, stop_sec=stop_sec) + def get_pb_for_file(self, melodia_file, precomp_melody=False, nmfpb=True, scale=True, stop_sec=30.0): + pbihist = [] + if precomp_melody: + pbihist = self.load_precomp_pb_from_melodia(melodia_file=melodia_file, stop_sec=stop_sec) + else: + pbihist = self.extract_pb_from_melodia(melodia_file=melodia_file, stop_sec=stop_sec) + if len(pbihist) == 0: + # no file was found + return pbihist if nmfpb is True: pbihist = self.nmfpitchbihist(pbihist) pbihist = pd.DataFrame(pbihist.T) @@ -245,7 +250,18 @@ return pbihist - def load_precomputed_pb_from_melodia(self, melodia_file=None, nmfpb=True, scale=True, stop_sec=30.0): + def extract_pb_from_melodia(self, melodia_file=None, stop_sec=30.0): + pbihist = [] + if not os.path.exists(melodia_file): + return pbihist + print 'extracting pitch bihist from melodia...' + pb = pbi.PitchBihist(win2sec=self.win2sec) + pbihist = pb.bihist_from_melodia(filename=melodia_file, stop_sec=stop_sec) + return pbihist + + + def load_precomp_pb_from_melodia(self, melodia_file=None, stop_sec=30.0): + pbihist = [] base = os.path.basename(melodia_file) root = '/import/c4dm-05/mariap/Melodia-melody-'+str(int(self.win2sec))+'sec/' root_BL = '/import/c4dm-04/mariap/FeatureCsvs_BL_old/PB-melodia/' @@ -254,21 +270,53 @@ root = root_SM else: root = root_BL - base = base.split('_')[-1].split('.csv')[0]+'_vamp_mtg-melodia_melodia_melody.csv' + base = base.split('_')[-1].split('.csv')[0]+'_vamp_mtg-melodia_melodia_melody.csv' print 'load precomputed pitch bihist', root - #if self.win2sec == 8: - # pbihist = pd.read_csv(os.path.join(root, base)) - #else: - if 1: - pbihist = np.loadtxt(os.path.join(root, base), delimiter=',').T - if nmfpb is True: - pbihist = self.nmfpitchbihist(pbihist) - pbihist = pd.DataFrame(pbihist.T) - n_stop = np.int(np.ceil(stop_sec * self.framessr2)) - pbihist = pbihist.iloc[:np.min([pbihist.shape[0], n_stop]), :] - print pbihist.shape - if scale: - # scale all frames by mean and std of recording - pbihist = (pbihist - np.nanmean(pbihist)) / np.nanstd(pbihist) + pbihist = np.loadtxt(os.path.join(root, base), delimiter=',').T + n_stop = np.int(np.ceil(stop_sec * self.framessr2)) + pbihist = pbihist[:, :np.min([pbihist.shape[0], n_stop])] return pbihist + + + # def get_pb_from_melodia(self, melodia_file=None, nmfpb=True, scale=True, stop_sec=30.0): + # if not os.path.exists(melodia_file): + # return [] + # print 'extracting pitch bihist from melodia...' + # pb = pbi.PitchBihist(win2sec=self.win2sec) + # pbihist = pb.bihist_from_melodia(filename=melodia_file, stop_sec=stop_sec) + # if nmfpb is True: + # pbihist = self.nmfpitchbihist(pbihist) + # pbihist = pd.DataFrame(pbihist.T) + # if scale: + # # scale all frames by mean and std of recording + # pbihist = (pbihist - np.nanmean(pbihist)) / np.nanstd(pbihist) + # return pbihist + + + # def load_precomputed_pb_from_melodia(self, melodia_file=None, nmfpb=True, scale=True, stop_sec=30.0): + # base = os.path.basename(melodia_file) + # root = '/import/c4dm-05/mariap/Melodia-melody-'+str(int(self.win2sec))+'sec/' + # root_BL = '/import/c4dm-04/mariap/FeatureCsvs_BL_old/PB-melodia/' + # root_SM = '/import/c4dm-04/mariap/FeatureCsvs/PB-melodia/' + # if 'SampleAudio' in base: + # root = root_SM + # else: + # root = root_BL + # base = base.split('_')[-1].split('.csv')[0]+'_vamp_mtg-melodia_melodia_melody.csv' + # print 'load precomputed pitch bihist', root + # #if self.win2sec == 8: + # # pbihist = pd.read_csv(os.path.join(root, base)) + # #else: + # if 1: + # pbihist = np.loadtxt(os.path.join(root, base), delimiter=',').T + # if nmfpb is True: + # pbihist = self.nmfpitchbihist(pbihist) + # pbihist = pd.DataFrame(pbihist.T) + # n_stop = np.int(np.ceil(stop_sec * self.framessr2)) + # pbihist = pbihist.iloc[:np.min([pbihist.shape[0], n_stop]), :] + # print pbihist.shape + # if scale: + # # scale all frames by mean and std of recording + # pbihist = (pbihist - np.nanmean(pbihist)) / np.nanstd(pbihist) + # return pbihist
--- a/scripts/results.py Thu Sep 14 10:16:59 2017 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,131 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Created on Tue Jul 12 20:49:48 2016 - -@author: mariapanteli -""" - -import numpy as np -import pandas as pd -import pickle -from collections import Counter -from sklearn.cluster import KMeans - -import utils -import utils_spatial - - -def country_outlier_df(counts, labels, out_file=None, normalize=False): - if len(counts.keys()) < len(np.unique(labels)): - for label in np.unique(labels): - if not counts.has_key(label): - counts.update({label:0}) - if normalize is True: - counts = normalize_outlier_counts(counts, Counter(labels)) - df = pd.DataFrame.from_dict(counts, orient='index').reset_index() - df.rename(columns={'index':'Country', 0:'Outliers'}, inplace=True) - if out_file is not None: - df.to_csv(out_file, index=False) - return df - - -def normalize_outlier_counts(outlier_counts, country_counts): - '''Normalize a dictionary of outlier counts per country by - the total number of recordings per country - ''' - for key in outlier_counts.keys(): - # dictionaries should have the same keys - outlier_counts[key] = float(outlier_counts[key]) / float(country_counts[key]) - return outlier_counts - - -def get_outliers_df(X, Y, chi2thr=0.999, out_file=None): - threshold, y_pred, MD = utils.get_outliers_Mahal(X, chi2thr=chi2thr) - global_counts = Counter(Y[y_pred]) - df = country_outlier_df(global_counts, Y, normalize=True) - if out_file is not None: - df.to_csv(out_file, index=False) - return df, threshold, MD - - -def print_most_least_outliers_topN(df, N=10): - sort_inds = df['Outliers'].argsort() # ascending order - df_most = df[['Country', 'Outliers']].iloc[sort_inds[::-1][:N]] - df_least = df[['Country', 'Outliers']].iloc[sort_inds[:N]] - print "most outliers " - print df_most - print "least outliers " - print df_least - - -def load_metadata(Yaudio, metadata_file): - df = pd.read_csv(metadata_file) - df_audio = pd.DataFrame({'Audio':Yaudio}) - ddf = pd.merge(df_audio, df, on='Audio', suffixes=['', '_r']) # in the order of Yaudio - return ddf - - -def clusters_metadata(df, cl_pred, out_file=None): - def get_top_N_counts(labels, N=3): - ulab, ucount = np.unique(labels, return_counts=True) - inds = np.argsort(ucount) - return zip(ulab[inds[-N:]],ucount[inds[-N:]]) - info = np.array([str(df['Country'].iloc[i]) for i in range(len(df))]) - styles_description = [] - uniq_cl = np.unique(cl_pred) - for ccl in uniq_cl: - inds = np.where(cl_pred==ccl)[0] - styles_description.append(get_top_N_counts(info[inds], N=3)) - df_styles = pd.DataFrame(data=styles_description, index=uniq_cl) - print df_styles.to_latex() - if out_file is not None: - df_styles.to_csv(out_file, index=False) - - -if __name__ == '__main__': - # load LDA-transformed frames - X_list, Y, Yaudio = pickle.load(open('data/lda_data_8.pickle','rb')) - ddf = load_metadata(Yaudio, metadata_file='data/metadata.csv') - w, data_countries = utils_spatial.get_neighbors_for_countries_in_dataset(Y) - w_dict = utils_spatial.from_weights_to_dict(w, data_countries) - X = np.concatenate(X_list, axis=1) - - # global outliers - df_global, threshold, MD = get_outliers_df(X, Y, chi2thr=0.999) - print_most_least_outliers_topN(df_global, N=10) - - spatial_outliers = utils.get_local_outliers_from_neighbors_dict(X, Y, w_dict, chi2thr=0.999, do_pca=True) - spatial_counts = Counter(dict([(ll[0],ll[1]) for ll in spatial_outliers])) - df_local = country_outlier_df(spatial_counts, Y, normalize=True) - print_most_least_outliers_topN(df_local, N=10) - - feat = [Xrhy, Xmel, Xmfc, Xchr] - feat_labels = ['rhy', 'mel', 'mfc', 'chr'] - tabs_feat = [] - for i in range(len(feat)): - XX = feat[i] - df_feat, threshold, MD = get_outliers_df(XX, Y, chi2thr=0.999) - print_most_least_outliers_topN(df_feat, N=5) - - # how many styles are there - #bestncl, ave_silh = utils.best_n_clusters_silhouette(X, min_ncl=5, max_ncl=50, metric="cosine") - bestncl = 13 - - # get cluster predictions and metadata for each cluster - cluster_model = KMeans(n_clusters=bestncl, random_state=50).fit(X) - centroids = cluster_model.cluster_centers_ - cl_pred = cluster_model.predict(X) - ddf['Clusters'] = cl_pred - clusters_metadata(ddf, cl_pred) - - # how similar are the cultures and which ones seem to be global outliers - cluster_freq = utils.get_cluster_freq_linear(X, Y, centroids) - - # Moran on Mahalanobis distances - data = cluster_freq.get_values() - data_countries = cluster_freq.index - #threshold, y_pred, MD = utils.get_outliers_Mahal(data, chi2thr=0.999) - threshold, y_pred, MD = utils.get_outliers(data, chi2thr=0.999) - y = np.sqrt(MD) - utils_spatial.print_Moran_outliers(y, w, data_countries) - utils_spatial.plot_Moran_scatterplot(y, w, data_countries)
--- a/scripts/results_classification.py Thu Sep 14 10:16:59 2017 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,87 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Created on Thu Nov 10 15:10:32 2016 - -@author: mariapanteli -""" -import numpy as np -import pandas as pd -from sklearn import metrics - -import map_and_average -import util_feature_learning - - -FILENAMES = map_and_average.OUTPUT_FILES - - -def load_data_from_pickle(filename): - X_list, Y, Yaudio = pickle.load(open(filename,'rb')) - X = np.concatenate(data_list, axis=1) - return X, Y, Yaudio - - -def get_train_test_indices(): - trainset, valset, testset = map_and_average.load_train_val_test_sets() - trainaudiolabels, testaudiolabels = trainset[2], testset[2] - # train, test indices - aa_train = np.unique(trainaudiolabels) - aa_test = np.unique(testaudiolabels) - traininds = np.array([i for i, item in enumerate(audiolabs) if item in aa_train]) - testinds = np.array([i for i, item in enumerate(audiolabs) if item in aa_test]) - return traininds, testinds - - -def get_train_test_sets(X, Y, traininds, testinds): - X_train = X[traininds, :] - Y_train = Y[traininds] - X_test = X[testinds, :] - Y_test = Y[testinds] - return X_train, Y_train, X_test, Y_test - - -def classify_for_filenames(file_list=FILENAMES): - df_results = pd.DataFrame() - feat_learner = util_feature_learning.Transformer() - for filename in file_list: - X, Y, Yaudio = load_data_from_pickle(filename) - traininds, testinds = get_train_test_indices() - X_train, Y_train, X_test, Y_test = get_train_test_sets(X, Y, traininds, testinds) - df_result = feat_learner.classify(X_train, Y_train, X_test, Y_test) - df_results = pd.concat([df_results, df_result], axis=0, ignore_index=True) - return df_results - - -def plot_CF(CF, labels=None, figurename=None): - labels[labels=='United States of America'] = 'United States Amer.' - plt.imshow(CF, cmap="Greys") - plt.xticks(range(len(labels)), labels, rotation='vertical', fontsize=4) - plt.yticks(range(len(labels)), labels, fontsize=4) - if figurename is not None: - plt.savefig(figurename, bbox_inches='tight') - - -def confusion_matrix(X_train, Y_train, X_test, Y_test, saveCF=False, plots=False): - feat_learner = util_feature_learning.Transformer() - accuracy, predictions = util_feature_learning.classification_accuracy(X_train, Y_train, - X_test, Y_test, model=util_feature_learning.modelLDA) - labels = np.unique(Y_test) # TODO: countries in geographical proximity - CF = metrics.confusion_matrix(Y_test, predictions, labels=labels) - if saveCF: - np.savetxt('data/CFlabels.csv', labels, fmt='%s') - np.savetxt('data/CF.csv', CF, fmt='%10.5f') - if plots: - plot_CF(CF, labels=labels, figurename='data/conf_matrix.pdf') - return accuracy, predictions - - -if __name__ == '__main__': - df_results = classify_for_filenames(file_list=FILENAMES) - max_i = np.argmax(df_results[:, 1]) - feat_learning_i = max_i % 4 # 4 classifiers for each feature learning method - filename = FILENAMES[feat_learning_i] - X, Y, Yaudio = load_data_from_pickle(filename) - traininds, testinds = get_train_test_indices() - X_train, Y_train, X_test, Y_test = get_train_test_sets(X, Y, traininds, testinds) - confusion_matrix(X_train, Y_train, X_test, Y_test, saveCF=True, plots=True) -
--- a/tests/test_load_features.py Thu Sep 14 10:16:59 2017 +0100 +++ b/tests/test_load_features.py Thu Sep 14 13:07:19 2017 +0100 @@ -7,8 +7,9 @@ import pytest +import os import numpy as np - +import pandas as pd import scripts.load_features as load_features feat_loader = load_features.FeatureLoader(win2sec=8) @@ -95,3 +96,164 @@ aveframes_true = np.array([[0.5, 0], [1.5, 1], [0.1, 0], [0.1, 0]]) # test only the second frame which contains values 0 or values 1 for all 8-second frame entries assert np.array_equal(aveframes[:, 1], aveframes_true[:, 1]) + + +def test_get_op_from_melspec_n_frames(): + dur_sec = 10.0 + dur_frames = np.int(np.round(dur_sec * feat_loader.framessr)) + np.random.seed(1) + melspec = np.random.randn(40, dur_frames) # melspec with 40 melbands + melspec = melspec - np.min(melspec) # melspec must be positive + opmel = feat_loader.get_op_from_melspec(melspec) + n_frames = opmel.shape[0] + # expect 4 frames for windows not centered and .5 sec hop size + # np.round((dur_sec - feat_loader.win2sec) * feat_loader.framessr2) + assert n_frames == 4 + + +def test_get_op_from_melspec_n_bins(): + dur_sec = 10.0 + dur_frames = np.int(np.round(dur_sec * feat_loader.framessr)) + np.random.seed(1) + melspec = np.random.randn(40, dur_frames) # melspec with 40 melbands + melspec = melspec - np.min(melspec) # melspec must be positive + opmel = feat_loader.get_op_from_melspec(melspec) + n_bins = opmel.shape[1] + assert n_bins == 40 * 200 + + +def test_get_op_from_melspec_K_bands(): + dur_sec = 10.0 + dur_frames = np.int(np.round(dur_sec * feat_loader.framessr)) + np.random.seed(1) + melspec = np.random.randn(40, dur_frames) # melspec with 40 melbands + melspec = melspec - np.min(melspec) # melspec must be positive + K = 2 + opmel = feat_loader.get_op_from_melspec(melspec, K=K) + n_bins = opmel.shape[1] + assert n_bins == K * 200 + + +def test_get_mfcc_from_melspec_n_coef(): + dur_sec = 10.0 + dur_frames = np.int(np.round(dur_sec * feat_loader.framessr)) + np.random.seed(1) + melspec = np.random.randn(40, dur_frames) # melspec with 40 melbands + melspec = melspec - np.min(melspec) # melspec must be positive + mfcc = feat_loader.get_mfcc_from_melspec(melspec, deltamfcc=False, avelocalframes=False) + assert mfcc.shape[1] == 20 + + +def test_get_mfcc_from_melspec_n_coef_delta(): + dur_sec = 10.0 + dur_frames = np.int(np.round(dur_sec * feat_loader.framessr)) + np.random.seed(1) + melspec = np.random.randn(40, dur_frames) # melspec with 40 melbands + melspec = melspec - np.min(melspec) # melspec must be positive + mfcc = feat_loader.get_mfcc_from_melspec(melspec, deltamfcc=True, avelocalframes=False) + assert mfcc.shape[1] == 40 + + +def test_get_mfcc_from_melspec_n_frames(): + dur_sec = 10.0 + dur_frames = np.int(np.round(dur_sec * feat_loader.framessr)) + np.random.seed(1) + melspec = np.random.randn(40, dur_frames) # melspec with 40 melbands + melspec = melspec - np.min(melspec) # melspec must be positive + mfcc = feat_loader.get_mfcc_from_melspec(melspec, deltamfcc=False, avelocalframes=False) + assert mfcc.shape[0] == dur_frames + + +def test_get_mfcc_from_melspec_n_frames_win2(): + dur_sec = 10.0 + dur_frames = np.int(np.round(dur_sec * feat_loader.framessr)) + np.random.seed(1) + melspec = np.random.randn(40, dur_frames) # melspec with 40 melbands + melspec = melspec - np.min(melspec) # melspec must be positive + mfcc = feat_loader.get_mfcc_from_melspec(melspec, deltamfcc=False, avelocalframes=True) + n_frames_true = np.round((dur_sec - feat_loader.win2sec) * feat_loader.framessr2) + assert mfcc.shape[0] == n_frames_true + + +def test_get_ave_chroma_align(): + dur_sec = 10.0 + dur_frames = np.int(np.round(dur_sec * feat_loader.framessr)) + np.random.seed(1) + chroma = np.random.randn(60, dur_frames) # chroma with 60 bins + chroma = chroma - np.min(chroma) # chroma must be positive + ave_chroma = feat_loader.get_ave_chroma(chroma, alignchroma=True, avelocalframes=False) + # the maximum bin across time is the first bin (after alignment) + assert np.argmax(np.sum(ave_chroma, axis=0)) == 0 + + +def test_get_ave_chroma_n_frames(): + dur_sec = 10.0 + dur_frames = np.int(np.round(dur_sec * feat_loader.framessr)) + np.random.seed(1) + chroma = np.random.randn(60, dur_frames) # chroma with 60 bins + chroma = chroma - np.min(chroma) # chroma must be positive + ave_chroma = feat_loader.get_ave_chroma(chroma, avelocalframes=True, stdlocalframes=False) + n_frames_true = np.round((dur_sec - feat_loader.win2sec) * feat_loader.framessr2) + assert ave_chroma.shape[0] == n_frames_true + + +def test_get_ave_chroma_n_bins(): + dur_sec = 10.0 + dur_frames = np.int(np.round(dur_sec * feat_loader.framessr)) + np.random.seed(1) + chroma = np.random.randn(60, dur_frames) # chroma with 60 bins + chroma = chroma - np.min(chroma) # chroma must be positive + ave_chroma = feat_loader.get_ave_chroma(chroma, avelocalframes=True, stdlocalframes=True) + assert ave_chroma.shape[1] == 120 + + +def test_get_pb_for_file_empty(): + pbihist = feat_loader.get_pb_for_file('') + assert np.array_equal(pbihist, []) + + +def test_get_pb_for_file_n_bins(): + pbihist = feat_loader.get_pb_for_file('data/sample_dataset/Melodia/mel_1_2_1.csv', nmfpb=False, scale=False) + assert pbihist.shape[1] == 3600 + + +def test_get_pb_for_file_align(): + pbihist = feat_loader.get_pb_for_file('data/sample_dataset/Melodia/mel_1_2_1.csv', nmfpb=False, scale=False) + pbihist = pbihist.get_values() + assert np.sum(pbihist[:, :60].ravel()) > np.sum(pbihist[:, 60:120].ravel()) + + +def test_get_pb_for_file_nmf(): + pbihist = feat_loader.get_pb_for_file('data/sample_dataset/Melodia/mel_1_2_1.csv', nmfpb=True, scale=False) + assert pbihist.shape[1] == 240 + + +def test_get_features(): + df = pd.read_csv('data/sample_dataset/metadata.csv') + df = df.iloc[:1, :] + os.chdir('data/') + data_list = feat_loader.get_features(df) + os.chdir('..') + assert len(np.unique(data_list[-1])) == 1 + + +def test_get_features_n_files(): + df = pd.read_csv('data/sample_dataset/metadata.csv') + n_files = 3 + df = df.iloc[:n_files, :] + os.chdir('data/') + data_list = feat_loader.get_features(df) + os.chdir('..') + assert len(np.unique(data_list[-1])) == n_files + + +def test_get_features_n_frames(): + df = pd.read_csv('data/sample_dataset/metadata.csv') + df = df.iloc[:1, :] + os.chdir('data/') + data_list = feat_loader.get_features(df) + os.chdir('..') + dur_sec = 11.5 # duration of first file in metadata.csv is > 11 seconds + n_frames_true = np.round((dur_sec - feat_loader.win2sec) * feat_loader.framessr2) + assert len(data_list[0]) == n_frames_true + \ No newline at end of file