Mercurial > hg > plosone_underreview

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/notebooks/test_music_segments.ipynb	Mon Sep 11 14:22:17 2017 +0100
@@ -0,0 +1,196 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import pickle"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "filenames = ['/import/c4dm-04/mariap/train_data_melodia_8.pickle', \n",
+    "             '/import/c4dm-04/mariap/val_data_melodia_8.pickle', \n",
+    "             '/import/c4dm-04/mariap/test_data_melodia_8.pickle']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "all_Yaudio = []\n",
+    "for filename in filenames:\n",
+    "    _, Y, Yaudio = pickle.load(open(filename), 'rb')\n",
+    "    all_Yaudio.append(Yaudio)\n",
+    "all_Yaudio = np.concatenate(all_Yaudio)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "ename": "NameError",
+     "evalue": "name 'all_Yaudio' is not defined",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
+      "\u001b[0;32m<ipython-input-3-4107ada442c0>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0muniq_audio\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0muniq_counts\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0munique\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mall_Yaudio\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mreturn_counts\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
+      "\u001b[0;31mNameError\u001b[0m: name 'all_Yaudio' is not defined"
+     ]
+    }
+   ],
+   "source": [
+    "uniq_audio, uniq_counts = np.unique(all_Yaudio, return_counts=True)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Stats on audio files with very few music frames (after the speech/music discrimination)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "ename": "NameError",
+     "evalue": "name 'uniq_counts' is not defined",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
+      "\u001b[0;32m<ipython-input-4-700ed156399c>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[0mmin_n_frames\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m10\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mn_short_files\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mwhere\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0muniq_counts\u001b[0m\u001b[0;34m<\u001b[0m\u001b[0mmin_n_frames\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      3\u001b[0m \u001b[0;32mprint\u001b[0m \u001b[0;34m'%d files out of %d have less than %d frames'\u001b[0m \u001b[0;34m%\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mn_short_files\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0muniq_counts\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmin_n_frames\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;31mNameError\u001b[0m: name 'uniq_counts' is not defined"
+     ]
+    }
+   ],
+   "source": [
+    "min_n_frames = 10\n",
+    "n_short_files = np.where(uniq_counts<min_n_frames)[0].shape\n",
+    "print '%d files out of %d have less than %d frames' % (n_short_files, len(uniq_counts), min_n_frames)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Stats on average duration of the music segments for all tracks"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "ename": "SyntaxError",
+     "evalue": "invalid syntax (<ipython-input-5-2c4ab0e943a6>, line 1)",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;36m  File \u001b[0;32m\"<ipython-input-5-2c4ab0e943a6>\"\u001b[0;36m, line \u001b[0;32m1\u001b[0m\n\u001b[0;31m    print 'mean %f' np.mean(uniq_counts)\u001b[0m\n\u001b[0m                     ^\u001b[0m\n\u001b[0;31mSyntaxError\u001b[0m\u001b[0;31m:\u001b[0m invalid syntax\n"
+     ]
+    }
+   ],
+   "source": [
+    "sr = 2.0  # with 8-second window and 0.5-second hop size the sampling rate is 2 about 2 samples per second\n",
+    "print 'mean %f' % np.mean(uniq_counts)\n",
+    "print 'median %f' % np.median(uniq_counts)\n",
+    "print 'std %f' % np.std(uniq_counts)\n",
+    "print 'mean duration %f' % (np.mean(uniq_counts) / sr)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Stats on average duration of the music segments for the British Library tracks"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "ename": "NameError",
+     "evalue": "name 'uniq_audio' is not defined",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
+      "\u001b[0;32m<ipython-input-7-4ebf50436e4a>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[0;31m#British library tracks start with 'D:/Audio/...'\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0midx_BL_tracks\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0marray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0muniq_audio\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0muniq_audio\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msplit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'D:/'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m>\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      3\u001b[0m \u001b[0msr\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m2.0\u001b[0m  \u001b[0;31m# with 8-second window and 0.5-second hop size the sampling rate is 2 about 2 samples per second\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      4\u001b[0m \u001b[0;32mprint\u001b[0m \u001b[0;34m'mean %f'\u001b[0m \u001b[0;34m%\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmean\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0muniq_counts\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0midx_BL_tracks\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      5\u001b[0m \u001b[0;32mprint\u001b[0m \u001b[0;34m'median %f'\u001b[0m \u001b[0;34m%\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmedian\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0muniq_counts\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0midx_BL_tracks\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;31mNameError\u001b[0m: name 'uniq_audio' is not defined"
+     ]
+    }
+   ],
+   "source": [
+    "#British library tracks start with 'D:/Audio/...'\n",
+    "idx_BL_tracks = np.array([i for i in range(len(uniq_audio)) if len(uniq_audio[i].split('D:/'))>1])\n",
+    "sr = 2.0  # with 8-second window and 0.5-second hop size the sampling rate is 2 about 2 samples per second\n",
+    "print 'mean %f' % np.mean(uniq_counts[idx_BL_tracks])\n",
+    "print 'median %f' % np.median(uniq_counts[idx_BL_tracks])\n",
+    "print 'std %f' % np.std(uniq_counts[idx_BL_tracks])\n",
+    "print 'mean duration %f' % (np.mean(uniq_counts[idx_BL_tracks]) / sr)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 2",
+   "language": "python",
+   "name": "python2"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 2
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython2",
+   "version": "2.7.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
--- a/scripts/load_features.py	Mon Sep 11 12:01:28 2017 +0100
+++ b/scripts/load_features.py	Mon Sep 11 14:22:17 2017 +0100
@@ -80,12 +80,12 @@
             duration_in_frames = np.round(np.array(bounds[:, 1], dtype=float) * sr)
             for music_bound in music_bounds:
                 lower_bound = np.max([0, bounds_in_frames[music_bound] - half_win_hop])
-                upper_bound = lower_bound + duration_in_frames[music_bound] - half_win_hop
+                upper_bound = bounds_in_frames[music_bound] + duration_in_frames[music_bound] - half_win_hop
                 music_idx.append(np.arange(lower_bound, upper_bound, dtype=int))
             if len(music_idx)>0:
                 music_idx = np.sort(np.concatenate(music_idx))  # it should be sorted, but just in case segments overlap -- remove duplicates if segments overlap
         return music_idx
-
+

     def get_music_idx_for_file(self, segmenter_file=None):
         music_idx = []
@@ -255,33 +255,3 @@
             pbihist = (pbihist - np.nanmean(pbihist)) / np.nanstd(pbihist)
         return pbihist

-
-#    def get_pb_chroma_for_file(self, chroma_file=None, scale=True):
-#        ch = []
-#        pb = []
-#        if not os.path.exists(chroma_file):
-#            return ch, pb
-#        songframes = pd.read_csv(chroma_file, engine="c", header=None)
-#        songframes.iloc[np.where(np.isnan(songframes))] = 0
-#        songframes = songframes.iloc[0:min(len(songframes), 18000), :]  # only first 1.5 minutes
-#        chroma = songframes.get_values().T
-#        ch = self.get_ave_chroma(chroma)
-#        pb = self.get_pbihist_from_chroma(chroma)
-#        if scale:
-#            # scale all frames by mean and std of recording
-#            ch = (ch - np.nanmean(ch)) / np.nanstd(ch)
-#            pb = (pb - np.nanmean(pb)) / np.nanstd(pb)
-#        return ch, pb
-
-
-#    def get_pbihist_from_chroma(self, chroma, alignchroma=True, nmfpb=True):
-#        pb = pbi.PitchBihist(win2sec=self.win2sec)
-#        chroma[np.where(np.isnan(chroma))] = 0
-#        if alignchroma:
-#            maxind = np.argmax(np.sum(chroma, axis=1))
-#            chroma = np.roll(chroma, -maxind, axis=0)
-#        pbihist = pb.get_pitchbihist_from_chroma(chroma=chroma, chromasr=self.framessr)
-#        if nmfpb is True:
-#            pbihist = self.nmfpitchbihist(pbihist)
-#        pbihist = pd.DataFrame(pbihist.T)
-#        return pbihist
\ No newline at end of file
--- a/tests/test_load_features.py	Mon Sep 11 12:01:28 2017 +0100
+++ b/tests/test_load_features.py	Mon Sep 11 14:22:17 2017 +0100
@@ -15,9 +15,7 @@


 def test_get_music_idx_from_bounds():
-    bounds = np.array([['0', '10.5', 'm']])#,
-              #['10.5', '12.0', 's'],
-              #['12.0', '30.0', 'm']])
+    bounds = np.array([['0', '10.5', 'm']])
     sr = feat_loader.framessr2
     music_bounds = feat_loader.get_music_idx_from_bounds(bounds, sr=sr)
     # upper bound minus half window size
@@ -29,19 +27,43 @@
 def test_get_music_idx_from_bounds_short_segment():
     # anything less than half window size is not processed
     bounds = np.array([['0', '3.8', 'm']])
-    sr = feat_loader.framessr2
+    sr = feat_loader.framessr2
     music_bounds = feat_loader.get_music_idx_from_bounds(bounds, sr=sr)
     music_bounds_true = np.array([])
     assert np.array_equal(music_bounds, music_bounds_true)


+def test_get_music_idx_from_bounds_single_frame():
+    bounds = np.array([['0', '4.3', 'm']])
+    sr = feat_loader.framessr2
+    music_bounds = feat_loader.get_music_idx_from_bounds(bounds, sr=sr)
+    music_bounds_true = np.array([0])
+    assert np.array_equal(music_bounds, music_bounds_true)
+
+
 def test_get_music_idx_from_bounds_mix_segments():
     bounds = np.array([['0', '10.5', 'm'],
               ['10.5', '3.0', 's'],
               ['13.5', '5.0', 'm']])
     sr = feat_loader.framessr2
     music_bounds = feat_loader.get_music_idx_from_bounds(bounds, sr=sr)
-    music_bounds_true = np.concatenate([np.arange(np.round(sr * (10.5-4.0)), dtype=int),
-                                        np.arange(np.round(sr * (13.5-4.0)), np.round(sr * (18.5-4.0)), dtype=int)])
+    half_win_sec = 4.0  # assume 8-second window
+    music_bounds_true = np.concatenate([np.arange(np.round(sr * (10.5 - half_win_sec)), dtype=int),
+                                        np.arange(np.round(sr * (13.5 - half_win_sec)),
+                                            np.round(sr * (18.5 - half_win_sec)), dtype=int)])
     assert np.array_equal(music_bounds, music_bounds_true)
-
\ No newline at end of file
+
+
+def test_get_music_idx_from_bounds_overlap_segments():
+    bounds = np.array([['0', '10.5', 'm'],
+              ['9.5', '3.0', 's'],
+              ['11.5', '5.0', 'm']])
+    sr = feat_loader.framessr2
+    music_bounds = feat_loader.get_music_idx_from_bounds(bounds, sr=sr)
+    half_win_sec = 4.0  # assume 8-second window
+    music_bounds_true = np.concatenate([np.arange(np.round(sr * (10.5 - half_win_sec)), dtype=int),
+                                        np.arange(np.round(sr * (11.5 - half_win_sec)),
+                                            np.round(sr * (16.5 - half_win_sec)), dtype=int)])
+    assert np.array_equal(music_bounds, music_bounds_true)
+
+