Mercurial > hg > plosone_underreview
changeset 42:90f8a2ea6f6f branch-tests
notebook results and load_features minor edits
author | mpanteli <m.x.panteli@gmail.com> |
---|---|
date | Fri, 15 Sep 2017 16:17:17 +0100 |
parents | 57f53b0d1eaa |
children | 06e5711f9f62 |
files | notebooks/sensitivity_experiment.ipynb notebooks/test_hubness.ipynb notebooks/test_music_segments.ipynb scripts/load_features.py |
diffstat | 4 files changed, 507 insertions(+), 81 deletions(-) [+] |
line wrap: on
line diff
--- a/notebooks/sensitivity_experiment.ipynb Fri Sep 15 12:27:11 2017 +0100 +++ b/notebooks/sensitivity_experiment.ipynb Fri Sep 15 16:17:17 2017 +0100 @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 58, + "execution_count": 15, "metadata": {}, "outputs": [ { @@ -16,6 +16,7 @@ ], "source": [ "import numpy as np\n", + "import pandas as pd\n", "\n", "%matplotlib inline\n", "import matplotlib.pyplot as plt\n", @@ -27,19 +28,20 @@ "sys.path.append('../')\n", "import scripts.load_dataset as load_dataset\n", "import scripts.map_and_average as mapper\n", - "import scripts.classification\n", + "import scripts.classification as classification\n", "import scripts.outliers as outliers" ] }, { "cell_type": "code", - "execution_count": 46, - "metadata": {}, + "execution_count": 2, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "OUTPUT_FILES = load_dataset.OUTPUT_FILES\n", - "n_iters = 10\n", - "df = load_dataset.sample_dataset(csv_file=load_dataset.METADATA_FILE)" + "n_iters = 10" ] }, { @@ -59,6 +61,7 @@ } ], "source": [ + "df = load_dataset.sample_dataset(csv_file=load_dataset.METADATA_FILE)\n", "df.shape" ] }, @@ -4612,11 +4615,83 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "iteration 0\n", + "mapping...\n", + "/import/c4dm-04/mariap/train_data_melodia_8_0.pickle\n", + "(203219, 840) (68100, 840) (67143, 840)\n", + "mapping rhy\n", + "training with PCA transform...\n", + "variance explained 1.0\n", + "140 400\n", + "training with PCA transform...\n", + "variance explained 0.990203912455\n", + "training with LDA transform...\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/homes/mp305/anaconda/lib/python2.7/site-packages/sklearn/utils/validation.py:526: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n", + " y = column_or_1d(y, warn=True)\n", + "/homes/mp305/anaconda/lib/python2.7/site-packages/sklearn/discriminant_analysis.py:455: UserWarning: The priors do not sum to 1. Renormalizing\n", + " UserWarning)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "variance explained 1.0\n", + "transform test data...\n", + "mapping mel\n", + "training with PCA transform...\n", + "variance explained 1.0\n", + "214 240\n", + "training with PCA transform...\n", + "variance explained 0.990094273777\n", + "training with LDA transform...\n", + "variance explained 1.0\n", + "transform test data...\n", + "mapping mfc\n", + "training with PCA transform...\n", + "variance explained 1.0\n", + "39 80\n", + "training with PCA transform...\n", + "variance explained 0.9914399357\n", + "training with LDA transform...\n", + "variance explained 0.941390777379\n", + "transform test data...\n", + "mapping chr\n", + "training with PCA transform...\n", + "variance explained 1.0\n", + "70 120\n", + "training with PCA transform...\n", + "variance explained 0.990511935176\n", + "training with LDA transform...\n", + "variance explained 0.953613938607\n", + "transform test data...\n" + ] + }, + { + "ename": "ValueError", + "evalue": "all the input array dimensions except for the concatenation axis must match exactly", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m<ipython-input-3-971892d5bd8d>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 6\u001b[0m output_file in OUTPUT_FILES]\n\u001b[1;32m 7\u001b[0m \u001b[0m_\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0m_\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mldadata_list\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0m_\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0m_\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mY\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mYaudio\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmapper\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlda_map_and_average_frames\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmin_variance\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0.99\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 8\u001b[0;31m \u001b[0mX\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconcatenate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mldadata_list\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 9\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 10\u001b[0m \u001b[0;31m# classification and confusion\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mValueError\u001b[0m: all the input array dimensions except for the concatenation axis must match exactly" + ] + } + ], "source": [ "for n in range(n_iters):\n", " print \"iteration %d\" % n\n", @@ -4625,7 +4700,7 @@ " mapper.INPUT_FILES = [output_file.split('.pickle')[0]+'_'+str(n)+'.pickle' for \n", " output_file in OUTPUT_FILES]\n", " _, _, ldadata_list, _, _, Y, Yaudio = mapper.lda_map_and_average_frames(min_variance=0.99)\n", - " X = np.concatenate(ldadata_list)\n", + " X = np.concatenate(ldadata_list, axis=1)\n", " \n", " # classification and confusion\n", " print \"classifying...\"\n", @@ -4636,14 +4711,232 @@ " \n", " # outliers\n", " print \"detecting outliers...\"\n", - " ddf = outliers.load_metadata(Yaudio, metadata_file=load_dataset.METADATA_FILE)\n", - " df_global, threshold, MD = get_outliers_df(X, Y, chi2thr=0.999)\n", - " print_most_least_outliers_topN(df_global, N=10)\n", + " #ddf = outliers.load_metadata(Yaudio, metadata_file=load_dataset.METADATA_FILE)\n", + " df_global, threshold, MD = outliers.get_outliers_df(X, Y, chi2thr=0.999)\n", + " outliers.print_most_least_outliers_topN(df_global, N=10)\n", " \n", " # write output\n", " print \"writing file\"\n", " df_global.to_csv('../data/outliers_'+str(n)+'.csv', index=False)" ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "X = np.concatenate(ldadata_list, axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(8089, 381)" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "X.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0.176354062249\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/homes/mp305/anaconda/lib/python2.7/site-packages/sklearn/metrics/classification.py:1113: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 in labels with no predicted samples.\n", + " 'precision', 'predicted', average, warn_for)\n" + ] + } + ], + "source": [ + "#traininds, testinds = classification.get_train_test_indices()\n", + "traininds = np.arange(5000)\n", + "testinds = np.arange(len(X)-1600, len(X))\n", + "X_train, Y_train, X_test, Y_test = classification.get_train_test_sets(X, Y, traininds, testinds)\n", + "accuracy, _ = classification.confusion_matrix(X_train, Y_train, X_test, Y_test, saveCF=False, plots=False)\n", + "print accuracy" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "detecting outliers...\n", + "most outliers \n", + " Country Outliers\n", + "136 Botswana 0.590909\n", + "71 Ivory Coast 0.571429\n", + "86 Gambia 0.541667\n", + "43 Benin 0.538462\n", + "62 Fiji 0.466667\n", + "20 Pakistan 0.461538\n", + "65 Uganda 0.437500\n", + "14 Liberia 0.425000\n", + "78 El Salvador 0.424242\n", + "51 Western Sahara 0.421687\n", + "least outliers \n", + " Country Outliers\n", + "119 Denmark 0.000000\n", + "30 Afghanistan 0.000000\n", + "113 Iceland 0.000000\n", + "28 Tajikistan 0.000000\n", + "74 Czech Republic 0.000000\n", + "27 South Korea 0.000000\n", + "1 Lithuania 0.000000\n", + "15 Netherlands 0.014925\n", + "121 Poland 0.040000\n", + "134 Paraguay 0.043478\n" + ] + } + ], + "source": [ + "print \"detecting outliers...\"\n", + "df_global, threshold, MD = outliers.get_outliers_df(X, Y, chi2thr=0.999)\n", + "outliers.print_most_least_outliers_topN(df_global, N=10)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## correlation of outlier results" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's use Kendal correlation to compare the ranked list of countries sorted by most to least outliers.\n", + "<br> First load the ranked list of outlier countries.\n", + "<br> Sort by outlier percentage in descending order." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "ranked_countries = pd.DataFrame()\n", + "ranked_outliers = pd.DataFrame()\n", + "for n in range(n_iters):\n", + " df_global = pd.read_csv('../data/outliers_'+str(n)+'.csv')\n", + " df_global = df_global.sort_values('Outliers', axis=0, ascending=False, inplace=True)\n", + " ranked_countries = pd.concat([ranked_countries, df_global['Country']], axis=1)\n", + " ranked_outliers = pd.concat([ranked_outliers, df_global['Outliers']], axis=1)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Remove countries with 0% outliers as these are in random (probably alphabetical) order." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "zero_idx = np.where(np.sum(ranked_outliers, axis=1)==0)[0]\n", + "first_zero_idx = np.min(zero_idx)\n", + "ranked_countries = ranked_countries.iloc[:first_zero_idx, :]\n", + "ranked_outliers = ranked_outliers.iloc[:first_zero_idx, :]\n", + "\n", + "print ranked_countries.head()\n", + "print ranked_outliers.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "And now kendalltau correlation" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "KendalltauResult(correlation=0.99999999999999989, pvalue=2.5428927239036995e-67)\n" + ] + } + ], + "source": [ + "from scipy.stats import kendalltau\n", + "for i in range(len(ranked_countries)-1):\n", + " for j in range(i+1, len(ranked_countries)):\n", + " print kendalltau(ranked_countries.iloc[:, i], ranked_countries.iloc[:, j])" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "SpearmanrResult(correlation=1.0, pvalue=0.0)" + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from scipy.stats import spearmanr\n", + "spearmanr(ranked_countries)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] } ], "metadata": {
--- a/notebooks/test_hubness.ipynb Fri Sep 15 12:27:11 2017 +0100 +++ b/notebooks/test_hubness.ipynb Fri Sep 15 16:17:17 2017 +0100 @@ -27,57 +27,18 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 5, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/homes/mp305/anaconda/lib/python2.7/site-packages/pysal/weights/weights.py:189: UserWarning: There are 21 disconnected observations\n", - " warnings.warn(\"There are %d disconnected observations\" % ni)\n", - "/homes/mp305/anaconda/lib/python2.7/site-packages/pysal/weights/weights.py:190: UserWarning: Island ids: 3, 6, 26, 35, 39, 45, 52, 61, 62, 66, 77, 85, 94, 97, 98, 102, 103, 107, 110, 120, 121\n", - " warnings.warn(\"Island ids: %s\" % ', '.join(str(island) for island in self.islands))\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Antigua and Barbuda\n", - "Australia\n", - "Cuba\n", - "Fiji\n", - "French Polynesia\n", - "Grenada\n", - "Iceland\n", - "Jamaica\n", - "Japan\n", - "Kiribati\n", - "Malta\n", - "New Zealand\n", - "Philippines\n", - "Puerto Rico\n", - "Republic of Serbia\n", - "Saint Lucia\n", - "Samoa\n", - "Solomon Islands\n", - "South Korea\n", - "The Bahamas\n", - "Trinidad and Tobago\n" - ] - } - ], + "outputs": [], "source": [ "X_list, Y, Yaudio = pickle.load(open('../data/lda_data_melodia_8.pickle','rb'))\n", - "ddf = outliers.load_metadata(Yaudio, metadata_file='../data/metadata.csv')\n", - "w, data_countries = utils_spatial.get_neighbors_for_countries_in_dataset(Y)\n", - "w_dict = utils_spatial.from_weights_to_dict(w, data_countries)\n", - "Xrhy, Xmel, Xmfc, Xchr = X_list\n", - "X = np.concatenate((Xrhy, Xmel, Xmfc, Xchr), axis=1)\n", + "#ddf = outliers.load_metadata(Yaudio, metadata_file='../data/metadata.csv')\n", + "#w, data_countries = utils_spatial.get_neighbors_for_countries_in_dataset(Y)\n", + "#w_dict = utils_spatial.from_weights_to_dict(w, data_countries)\n", + "X = np.concatenate(X_list, axis=1)\n", "\n", "# global outliers\n", - "df_global, threshold, MD = outliers.get_outliers_df(X, Y, chi2thr=0.999)" + "#df_global, threshold, MD = outliers.get_outliers_df(X, Y, chi2thr=0.999)" ] }, { @@ -223,7 +184,9 @@ { "cell_type": "code", "execution_count": 16, - "metadata": {}, + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "sort_idx = np.argsort(D, axis=1)\n", @@ -304,6 +267,186 @@ ] }, { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## let's get the audio url to listen to tracks identified as large hubs" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/homes/mp305/anaconda/lib/python2.7/site-packages/IPython/core/interactiveshell.py:2822: DtypeWarning: Columns (0,1,2,4,5,6,7,8,10,11,12,13,14,15,16,17,19,21,22,23,24,25,26,27,29,31,35,38,39,40,41,44,45,48,55,56,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,93,95,96) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " if self.run_code(code, result):\n" + ] + } + ], + "source": [ + "ddf = outliers.load_metadata(Yaudio, metadata_file='../data/metadata_BLSM_language_all.csv')" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(8200, 108)" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ddf.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "\"songurls_Album\" in ddf.columns" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([ 'https://sounds.bl.uk/World-and-traditional-music/Decca-West-African-recordings/025A-1CS0043663XX-0100A0.mp3',\n", + " 'https://sounds.bl.uk/World-and-traditional-music/Rycroft/025A-C0811X0005XX-2000A0.mp3',\n", + " 'https://sounds.bl.uk/World-and-traditional-music/Colin-Huehns-Pakistan/025A-C0485X0085XX-3100A0.mp3',\n", + " 'https://sounds.bl.uk/World-and-traditional-music/Rycroft/025A-C0811X0005XX-1300A0.mp3',\n", + " 'https://sounds.bl.uk/World-and-traditional-music/Colin-Huehns-Pakistan/025A-C0485X0031XX-0500A0.mp3'], dtype=object)" + ] + }, + "execution_count": 37, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ddf['songurls_Album'].iloc[large_hubs_idx].get_values()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### first, fix the url for BL tracks (because it was changed recently and the metadata.csv file is not updated) " + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/homes/mp305/anaconda/lib/python2.7/site-packages/pandas/core/indexing.py:115: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", + " self._setitem_with_indexer(indexer, value)\n" + ] + } + ], + "source": [ + "bl_inds = np.where(np.isnan(ddf['BuyLinkTrackDownload']))[0]\n", + "for bl_ind in bl_inds:\n", + " ddf['songurls_Album'].iloc[bl_ind] = ('https://sounds.bl.uk/World-and-traditional-music/' + \n", + " ddf['Folder'].iloc[bl_ind] + '/' + \n", + " ddf['MetaFile'].iloc[bl_ind].split('.')[0])" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>songurls_Album</th>\n", + " <th>Country</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>515</th>\n", + " <td>https://sounds.bl.uk/World-and-traditional-mus...</td>\n", + " <td>Nigeria</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2549</th>\n", + " <td>https://sounds.bl.uk/World-and-traditional-mus...</td>\n", + " <td>Swaziland</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3486</th>\n", + " <td>https://sounds.bl.uk/World-and-traditional-mus...</td>\n", + " <td>Kazakhstan</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5020</th>\n", + " <td>https://sounds.bl.uk/World-and-traditional-mus...</td>\n", + " <td>Swaziland</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5119</th>\n", + " <td>https://sounds.bl.uk/World-and-traditional-mus...</td>\n", + " <td>Pakistan</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>" + ], + "text/plain": [ + "<IPython.core.display.HTML object>" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from IPython.display import HTML\n", + "HTML(ddf[['songurls_Album', 'Country']].iloc[large_hubs_idx, :].to_html())" + ] + }, + { "cell_type": "code", "execution_count": null, "metadata": {
--- a/notebooks/test_music_segments.ipynb Fri Sep 15 12:27:11 2017 +0100 +++ b/notebooks/test_music_segments.ipynb Fri Sep 15 16:17:17 2017 +0100 @@ -64,9 +64,7 @@ { "cell_type": "code", "execution_count": 8, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "name": "stdout", @@ -92,9 +90,7 @@ { "cell_type": "code", "execution_count": 17, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "name": "stdout", @@ -120,9 +116,7 @@ { "cell_type": "code", "execution_count": 18, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "name": "stdout", @@ -153,9 +147,7 @@ { "cell_type": "code", "execution_count": 23, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "name": "stdout", @@ -190,9 +182,7 @@ { "cell_type": "code", "execution_count": 22, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "name": "stdout", @@ -243,7 +233,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", - "version": "2.7.11" + "version": "2.7.12" } }, "nbformat": 4,
--- a/scripts/load_features.py Fri Sep 15 12:27:11 2017 +0100 +++ b/scripts/load_features.py Fri Sep 15 16:17:17 2017 +0100 @@ -103,7 +103,7 @@ return music_idx - def get_features(self, df, stop_sec=30.0, class_label='Country', precomp_melody=False): + def get_features(self, df, stop_sec=30.0, class_label='Country', precomp_melody=True): oplist = [] mflist = [] chlist = [] @@ -119,7 +119,7 @@ if len(music_idx)==0: # no music segments -> skip this file continue - try: + if 1: op, mfcc = self.get_op_mfcc_for_file(df['Melspec'].iloc[i], stop_sec=stop_sec) ch = self.get_chroma_for_file(df['Chroma'].iloc[i], stop_sec=stop_sec) pb = self.get_pb_for_file(df['Melodia'].iloc[i], precomp_melody=precomp_melody, stop_sec=stop_sec) @@ -127,7 +127,7 @@ # pb = self.load_precomputed_pb_from_melodia(df['Melodia'].iloc[i], stop_sec=stop_sec) #else: # pb = self.get_pb_from_melodia(df['Melodia'].iloc[i], stop_sec=stop_sec) - except: + else: continue n_stop = np.int(np.ceil(stop_sec * self.framessr2)) print n_stop, len(op), len(mfcc), len(ch), len(pb)