Mercurial > hg > plosone_underreview

--- a/notebooks/sensitivity_experiment.ipynb	Wed Sep 13 12:55:14 2017 +0100
+++ b/notebooks/sensitivity_experiment.ipynb	Wed Sep 13 13:52:26 2017 +0100
@@ -2,15 +2,15 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 3,
    "metadata": {},
    "outputs": [
     {
-     "name": "stderr",
+     "name": "stdout",
      "output_type": "stream",
      "text": [
-      "/homes/mp305/anaconda/lib/python2.7/site-packages/librosa/core/audio.py:33: UserWarning: Could not import scikits.samplerate. Falling back to scipy.signal\n",
-      "  warnings.warn('Could not import scikits.samplerate. '\n"
+      "The autoreload extension is already loaded. To reload it, use:\n",
+      "  %reload_ext autoreload\n"
      ]
     }
    ],
@@ -33,15 +33,15 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "/homes/mp305/anaconda/lib/python2.7/site-packages/IPython/core/interactiveshell.py:2822: DtypeWarning: Columns (0,1,2,4,5,6,7,8,10,11,12,13,14,15,16,17,19,21,22,23,24,25,26,27,29,31,35,38,39,40,41,44,45,48,55,56,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,93,95,96) have mixed types. Specify dtype option on import or set low_memory=False.\n",
-      "  if self.run_code(code, result):\n"
+      "../scripts/util_filter_dataset.py:22: FutureWarning: elementwise comparison failed; returning scalar instead, but in the future will perform elementwise comparison\n",
+      "  if len(np.where(bounds[:,2]=='m')[0])==0 or len(np.where(bounds[:,2]=='s')[0])==len(bounds):\n"
      ]
     }
    ],
@@ -53,15 +53,361 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 5,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(8396, 108)"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "df.shape"
    ]
   },
   {
    "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "file 0 of 6\n",
+      "loading speech/music segments...\n",
+      "extracting onset patterns and mfccs...\n",
+      "extracting chroma...\n",
+      "extracting pitch bihist from melodia...\n",
+      "file 1 of 6\n",
+      "loading speech/music segments...\n",
+      "extracting onset patterns and mfccs...\n",
+      "extracting chroma...\n",
+      "extracting pitch bihist from melodia...\n",
+      "file 2 of 6\n",
+      "loading speech/music segments...\n",
+      "extracting onset patterns and mfccs...\n",
+      "extracting chroma...\n",
+      "extracting pitch bihist from melodia...\n",
+      "file 3 of 6\n",
+      "loading speech/music segments...\n",
+      "extracting onset patterns and mfccs...\n",
+      "extracting chroma...\n",
+      "extracting pitch bihist from melodia...\n",
+      "file 4 of 6\n",
+      "loading speech/music segments...\n",
+      "extracting onset patterns and mfccs...\n",
+      "extracting chroma...\n",
+      "extracting pitch bihist from melodia...\n",
+      "file 5 of 6\n",
+      "loading speech/music segments...\n",
+      "extracting onset patterns and mfccs...\n",
+      "extracting chroma...\n",
+      "extracting pitch bihist from melodia...\n",
+      "6 6 6 6 6 6\n",
+      "(264, 400) (264, 240) (264, 80) (264, 120)\n",
+      "file 0 of 2\n",
+      "loading speech/music segments...\n",
+      "extracting onset patterns and mfccs...\n",
+      "extracting chroma...\n",
+      "extracting pitch bihist from melodia...\n",
+      "file 1 of 2\n",
+      "loading speech/music segments...\n",
+      "extracting onset patterns and mfccs...\n",
+      "extracting chroma...\n",
+      "extracting pitch bihist from melodia...\n",
+      "2 2 2 2 2 2\n",
+      "(88, 400) (88, 240) (88, 80) (88, 120)\n",
+      "file 0 of 2\n",
+      "loading speech/music segments...\n",
+      "extracting onset patterns and mfccs...\n",
+      "extracting chroma...\n",
+      "extracting pitch bihist from melodia...\n",
+      "file 1 of 2\n",
+      "loading speech/music segments...\n",
+      "extracting onset patterns and mfccs...\n",
+      "extracting chroma...\n",
+      "extracting pitch bihist from melodia...\n",
+      "2 2 2 2 2 2\n",
+      "(85, 400) (85, 240) (85, 80) (85, 120)\n"
+     ]
+    },
+    {
+     "ename": "AttributeError",
+     "evalue": "'list' object has no attribute 'shape'",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mAttributeError\u001b[0m                            Traceback (most recent call last)",
+      "\u001b[0;32m<ipython-input-9-ed1b80dfb251>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[0mtrain\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mval\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtest\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mload_dataset\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfeatures_for_train_test_sets\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0miloc\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;36m10\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m:\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mwrite_output\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mFalse\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0;32mprint\u001b[0m \u001b[0mtrain\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mval\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtest\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
+      "\u001b[0;31mAttributeError\u001b[0m: 'list' object has no attribute 'shape'"
+     ]
+    }
+   ],
+   "source": [
+    "train, val, test = load_dataset.features_for_train_test_sets(df.iloc[:10, :], write_output=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>AlbumTitle</th>\n",
+       "      <th>Artist</th>\n",
+       "      <th>Artist_Album</th>\n",
+       "      <th>BuyLinkTrackDownload</th>\n",
+       "      <th>CatalogNumber</th>\n",
+       "      <th>CatalogNumber_Album</th>\n",
+       "      <th>Collection</th>\n",
+       "      <th>ContentType</th>\n",
+       "      <th>ContentType_Album</th>\n",
+       "      <th>Country</th>\n",
+       "      <th>...</th>\n",
+       "      <th>Language_iso3</th>\n",
+       "      <th>Language_iso1</th>\n",
+       "      <th>Region</th>\n",
+       "      <th>Latitude</th>\n",
+       "      <th>Longitude</th>\n",
+       "      <th>Audio</th>\n",
+       "      <th>Speech</th>\n",
+       "      <th>Melspec</th>\n",
+       "      <th>Chroma</th>\n",
+       "      <th>Melodia</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>29182</th>\n",
+       "      <td>Music of Afghanistan</td>\n",
+       "      <td>Afghan National Orchestra</td>\n",
+       "      <td>Various Artists</td>\n",
+       "      <td>26138</td>\n",
+       "      <td>FW04361_201</td>\n",
+       "      <td>FW04361</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>Track</td>\n",
+       "      <td>Album</td>\n",
+       "      <td>Afghanistan</td>\n",
+       "      <td>...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>South Asia</td>\n",
+       "      <td>33.93911</td>\n",
+       "      <td>67.709953</td>\n",
+       "      <td>/import/c4dm-scratch/matthiasm/data/smithsonia...</td>\n",
+       "      <td>/import/c4dm-02/people/mariap/SpeechMusic/2613...</td>\n",
+       "      <td>/import/c4dm-02/people/mariap/MelSpec/26138_Sa...</td>\n",
+       "      <td>/import/c4dm-04/mariap/FeatureCsvs/Smoothie-py...</td>\n",
+       "      <td>/import/c4dm-02/people/mariap/Melodia/26138_Sa...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>29183</th>\n",
+       "      <td>Music of Afghanistan</td>\n",
+       "      <td>Yaqub Kasimi</td>\n",
+       "      <td>Various Artists</td>\n",
+       "      <td>26142</td>\n",
+       "      <td>FW04361_205</td>\n",
+       "      <td>FW04361</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>Track</td>\n",
+       "      <td>Album</td>\n",
+       "      <td>Afghanistan</td>\n",
+       "      <td>...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>South Asia</td>\n",
+       "      <td>33.93911</td>\n",
+       "      <td>67.709953</td>\n",
+       "      <td>/import/c4dm-scratch/matthiasm/data/smithsonia...</td>\n",
+       "      <td>/import/c4dm-02/people/mariap/SpeechMusic/2614...</td>\n",
+       "      <td>/import/c4dm-02/people/mariap/MelSpec/26142_Sa...</td>\n",
+       "      <td>/import/c4dm-04/mariap/FeatureCsvs/Smoothie-py...</td>\n",
+       "      <td>/import/c4dm-02/people/mariap/Melodia/26142_Sa...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>29184</th>\n",
+       "      <td>Ustad Mohammad Omar: Virtuoso from Afghanistan</td>\n",
+       "      <td>Ustad Mohammad Omar|Zakir Hussain</td>\n",
+       "      <td>Ustad Mohammad Omar</td>\n",
+       "      <td>51931</td>\n",
+       "      <td>SFW40439_102</td>\n",
+       "      <td>SFW40439</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>Track</td>\n",
+       "      <td>Album</td>\n",
+       "      <td>Afghanistan</td>\n",
+       "      <td>...</td>\n",
+       "      <td>pbt</td>\n",
+       "      <td>Indo-European</td>\n",
+       "      <td>South Asia</td>\n",
+       "      <td>33.93911</td>\n",
+       "      <td>67.709953</td>\n",
+       "      <td>/import/c4dm-scratch/matthiasm/data/smithsonia...</td>\n",
+       "      <td>/import/c4dm-02/people/mariap/SpeechMusic/5193...</td>\n",
+       "      <td>/import/c4dm-02/people/mariap/MelSpec/51931_Sa...</td>\n",
+       "      <td>/import/c4dm-04/mariap/FeatureCsvs/Smoothie-py...</td>\n",
+       "      <td>/import/c4dm-02/people/mariap/Melodia/51931_Sa...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>29185</th>\n",
+       "      <td>Music of Central Asia Vol. 3: Homayun Sakhi: T...</td>\n",
+       "      <td>Homayun Sakhi</td>\n",
+       "      <td>Homayun Sakhi</td>\n",
+       "      <td>53229</td>\n",
+       "      <td>SFW40522_101</td>\n",
+       "      <td>SFW40522</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>Track</td>\n",
+       "      <td>Album</td>\n",
+       "      <td>Afghanistan</td>\n",
+       "      <td>...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>South Asia</td>\n",
+       "      <td>33.93911</td>\n",
+       "      <td>67.709953</td>\n",
+       "      <td>/import/c4dm-scratch/matthiasm/data/smithsonia...</td>\n",
+       "      <td>/import/c4dm-02/people/mariap/SpeechMusic/5322...</td>\n",
+       "      <td>/import/c4dm-02/people/mariap/MelSpec/53229_Sa...</td>\n",
+       "      <td>/import/c4dm-04/mariap/FeatureCsvs/Smoothie-py...</td>\n",
+       "      <td>/import/c4dm-02/people/mariap/Melodia/53229_Sa...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>29186</th>\n",
+       "      <td>Music of Afghanistan</td>\n",
+       "      <td>Rebab and Dhol Duet</td>\n",
+       "      <td>Various Artists</td>\n",
+       "      <td>26140</td>\n",
+       "      <td>FW04361_203</td>\n",
+       "      <td>FW04361</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>Track</td>\n",
+       "      <td>Album</td>\n",
+       "      <td>Afghanistan</td>\n",
+       "      <td>...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>South Asia</td>\n",
+       "      <td>33.93911</td>\n",
+       "      <td>67.709953</td>\n",
+       "      <td>/import/c4dm-scratch/matthiasm/data/smithsonia...</td>\n",
+       "      <td>/import/c4dm-02/people/mariap/SpeechMusic/2614...</td>\n",
+       "      <td>/import/c4dm-02/people/mariap/MelSpec/26140_Sa...</td>\n",
+       "      <td>/import/c4dm-04/mariap/FeatureCsvs/Smoothie-py...</td>\n",
+       "      <td>/import/c4dm-02/people/mariap/Melodia/26140_Sa...</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>5 rows × 108 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                                              AlbumTitle  \\\n",
+       "29182                               Music of Afghanistan   \n",
+       "29183                               Music of Afghanistan   \n",
+       "29184     Ustad Mohammad Omar: Virtuoso from Afghanistan   \n",
+       "29185  Music of Central Asia Vol. 3: Homayun Sakhi: T...   \n",
+       "29186                               Music of Afghanistan   \n",
+       "\n",
+       "                                  Artist         Artist_Album  \\\n",
+       "29182          Afghan National Orchestra      Various Artists   \n",
+       "29183                       Yaqub Kasimi      Various Artists   \n",
+       "29184  Ustad Mohammad Omar|Zakir Hussain  Ustad Mohammad Omar   \n",
+       "29185                      Homayun Sakhi        Homayun Sakhi   \n",
+       "29186                Rebab and Dhol Duet      Various Artists   \n",
+       "\n",
+       "       BuyLinkTrackDownload CatalogNumber CatalogNumber_Album Collection  \\\n",
+       "29182                 26138   FW04361_201             FW04361        NaN   \n",
+       "29183                 26142   FW04361_205             FW04361        NaN   \n",
+       "29184                 51931  SFW40439_102            SFW40439        NaN   \n",
+       "29185                 53229  SFW40522_101            SFW40522        NaN   \n",
+       "29186                 26140   FW04361_203             FW04361        NaN   \n",
+       "\n",
+       "      ContentType ContentType_Album      Country  \\\n",
+       "29182       Track             Album  Afghanistan   \n",
+       "29183       Track             Album  Afghanistan   \n",
+       "29184       Track             Album  Afghanistan   \n",
+       "29185       Track             Album  Afghanistan   \n",
+       "29186       Track             Album  Afghanistan   \n",
+       "\n",
+       "                             ...                         Language_iso3  \\\n",
+       "29182                        ...                                   NaN   \n",
+       "29183                        ...                                   NaN   \n",
+       "29184                        ...                                   pbt   \n",
+       "29185                        ...                                   NaN   \n",
+       "29186                        ...                                   NaN   \n",
+       "\n",
+       "       Language_iso1      Region  Latitude  Longitude  \\\n",
+       "29182            NaN  South Asia  33.93911  67.709953   \n",
+       "29183            NaN  South Asia  33.93911  67.709953   \n",
+       "29184  Indo-European  South Asia  33.93911  67.709953   \n",
+       "29185            NaN  South Asia  33.93911  67.709953   \n",
+       "29186            NaN  South Asia  33.93911  67.709953   \n",
+       "\n",
+       "                                                   Audio  \\\n",
+       "29182  /import/c4dm-scratch/matthiasm/data/smithsonia...   \n",
+       "29183  /import/c4dm-scratch/matthiasm/data/smithsonia...   \n",
+       "29184  /import/c4dm-scratch/matthiasm/data/smithsonia...   \n",
+       "29185  /import/c4dm-scratch/matthiasm/data/smithsonia...   \n",
+       "29186  /import/c4dm-scratch/matthiasm/data/smithsonia...   \n",
+       "\n",
+       "                                                  Speech  \\\n",
+       "29182  /import/c4dm-02/people/mariap/SpeechMusic/2613...   \n",
+       "29183  /import/c4dm-02/people/mariap/SpeechMusic/2614...   \n",
+       "29184  /import/c4dm-02/people/mariap/SpeechMusic/5193...   \n",
+       "29185  /import/c4dm-02/people/mariap/SpeechMusic/5322...   \n",
+       "29186  /import/c4dm-02/people/mariap/SpeechMusic/2614...   \n",
+       "\n",
+       "                                                 Melspec  \\\n",
+       "29182  /import/c4dm-02/people/mariap/MelSpec/26138_Sa...   \n",
+       "29183  /import/c4dm-02/people/mariap/MelSpec/26142_Sa...   \n",
+       "29184  /import/c4dm-02/people/mariap/MelSpec/51931_Sa...   \n",
+       "29185  /import/c4dm-02/people/mariap/MelSpec/53229_Sa...   \n",
+       "29186  /import/c4dm-02/people/mariap/MelSpec/26140_Sa...   \n",
+       "\n",
+       "                                                  Chroma  \\\n",
+       "29182  /import/c4dm-04/mariap/FeatureCsvs/Smoothie-py...   \n",
+       "29183  /import/c4dm-04/mariap/FeatureCsvs/Smoothie-py...   \n",
+       "29184  /import/c4dm-04/mariap/FeatureCsvs/Smoothie-py...   \n",
+       "29185  /import/c4dm-04/mariap/FeatureCsvs/Smoothie-py...   \n",
+       "29186  /import/c4dm-04/mariap/FeatureCsvs/Smoothie-py...   \n",
+       "\n",
+       "                                                 Melodia  \n",
+       "29182  /import/c4dm-02/people/mariap/Melodia/26138_Sa...  \n",
+       "29183  /import/c4dm-02/people/mariap/Melodia/26142_Sa...  \n",
+       "29184  /import/c4dm-02/people/mariap/Melodia/51931_Sa...  \n",
+       "29185  /import/c4dm-02/people/mariap/Melodia/53229_Sa...  \n",
+       "29186  /import/c4dm-02/people/mariap/Melodia/26140_Sa...  \n",
+       "\n",
+       "[5 rows x 108 columns]"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
    "execution_count": null,
    "metadata": {},
    "outputs": [
--- a/scripts/load_features.py	Wed Sep 13 12:55:14 2017 +0100
+++ b/scripts/load_features.py	Wed Sep 13 13:52:26 2017 +0100
@@ -117,8 +117,8 @@
             try:
                 op, mfcc = self.get_op_mfcc_for_file(df['Melspec'].iloc[i])
                 ch = self.get_chroma_for_file(df['Chroma'].iloc[i])
-                #pb = self.get_pb_from_melodia(df['Melodia'].iloc[i])
-                pb = self.load_precomputed_pb_from_melodia(df['Melodia'].iloc[i])
+                pb = self.get_pb_from_melodia(df['Melodia'].iloc[i])
+                #pb = self.load_precomputed_pb_from_melodia(df['Melodia'].iloc[i])
                 #pb = self.get_contour_feat_from_melodia(df['Melodia'].iloc[i])
             except:
                 continue