Mercurial > hg > plosone_underreview
changeset 6:a35bd818d8e9 branch-tests
notebook to test music segments
author | Maria Panteli <m.x.panteli@gmail.com> |
---|---|
date | Mon, 11 Sep 2017 14:22:17 +0100 |
parents | 543744ed1ae7 |
children | 46b2c713cc73 |
files | notebooks/test_music_segments.ipynb scripts/load_features.py tests/test_load_features.py |
diffstat | 3 files changed, 227 insertions(+), 39 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/notebooks/test_music_segments.ipynb Mon Sep 11 14:22:17 2017 +0100 @@ -0,0 +1,196 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pickle" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "filenames = ['/import/c4dm-04/mariap/train_data_melodia_8.pickle', \n", + " '/import/c4dm-04/mariap/val_data_melodia_8.pickle', \n", + " '/import/c4dm-04/mariap/test_data_melodia_8.pickle']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "all_Yaudio = []\n", + "for filename in filenames:\n", + " _, Y, Yaudio = pickle.load(open(filename), 'rb')\n", + " all_Yaudio.append(Yaudio)\n", + "all_Yaudio = np.concatenate(all_Yaudio)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "ename": "NameError", + "evalue": "name 'all_Yaudio' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m<ipython-input-3-4107ada442c0>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0muniq_audio\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0muniq_counts\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0munique\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mall_Yaudio\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mreturn_counts\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;31mNameError\u001b[0m: name 'all_Yaudio' is not defined" + ] + } + ], + "source": [ + "uniq_audio, uniq_counts = np.unique(all_Yaudio, return_counts=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Stats on audio files with very few music frames (after the speech/music discrimination)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "ename": "NameError", + "evalue": "name 'uniq_counts' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m<ipython-input-4-700ed156399c>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0mmin_n_frames\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m10\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mn_short_files\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mwhere\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0muniq_counts\u001b[0m\u001b[0;34m<\u001b[0m\u001b[0mmin_n_frames\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3\u001b[0m \u001b[0;32mprint\u001b[0m \u001b[0;34m'%d files out of %d have less than %d frames'\u001b[0m \u001b[0;34m%\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mn_short_files\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0muniq_counts\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmin_n_frames\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mNameError\u001b[0m: name 'uniq_counts' is not defined" + ] + } + ], + "source": [ + "min_n_frames = 10\n", + "n_short_files = np.where(uniq_counts<min_n_frames)[0].shape\n", + "print '%d files out of %d have less than %d frames' % (n_short_files, len(uniq_counts), min_n_frames)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Stats on average duration of the music segments for all tracks" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "ename": "SyntaxError", + "evalue": "invalid syntax (<ipython-input-5-2c4ab0e943a6>, line 1)", + "output_type": "error", + "traceback": [ + "\u001b[0;36m File \u001b[0;32m\"<ipython-input-5-2c4ab0e943a6>\"\u001b[0;36m, line \u001b[0;32m1\u001b[0m\n\u001b[0;31m print 'mean %f' np.mean(uniq_counts)\u001b[0m\n\u001b[0m ^\u001b[0m\n\u001b[0;31mSyntaxError\u001b[0m\u001b[0;31m:\u001b[0m invalid syntax\n" + ] + } + ], + "source": [ + "sr = 2.0 # with 8-second window and 0.5-second hop size the sampling rate is 2 about 2 samples per second\n", + "print 'mean %f' % np.mean(uniq_counts)\n", + "print 'median %f' % np.median(uniq_counts)\n", + "print 'std %f' % np.std(uniq_counts)\n", + "print 'mean duration %f' % (np.mean(uniq_counts) / sr)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Stats on average duration of the music segments for the British Library tracks" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "ename": "NameError", + "evalue": "name 'uniq_audio' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m<ipython-input-7-4ebf50436e4a>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;31m#British library tracks start with 'D:/Audio/...'\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0midx_BL_tracks\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0marray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0muniq_audio\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0muniq_audio\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msplit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'D:/'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m>\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3\u001b[0m \u001b[0msr\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m2.0\u001b[0m \u001b[0;31m# with 8-second window and 0.5-second hop size the sampling rate is 2 about 2 samples per second\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0;32mprint\u001b[0m \u001b[0;34m'mean %f'\u001b[0m \u001b[0;34m%\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmean\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0muniq_counts\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0midx_BL_tracks\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;32mprint\u001b[0m \u001b[0;34m'median %f'\u001b[0m \u001b[0;34m%\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmedian\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0muniq_counts\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0midx_BL_tracks\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mNameError\u001b[0m: name 'uniq_audio' is not defined" + ] + } + ], + "source": [ + "#British library tracks start with 'D:/Audio/...'\n", + "idx_BL_tracks = np.array([i for i in range(len(uniq_audio)) if len(uniq_audio[i].split('D:/'))>1])\n", + "sr = 2.0 # with 8-second window and 0.5-second hop size the sampling rate is 2 about 2 samples per second\n", + "print 'mean %f' % np.mean(uniq_counts[idx_BL_tracks])\n", + "print 'median %f' % np.median(uniq_counts[idx_BL_tracks])\n", + "print 'std %f' % np.std(uniq_counts[idx_BL_tracks])\n", + "print 'mean duration %f' % (np.mean(uniq_counts[idx_BL_tracks]) / sr)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 2", + "language": "python", + "name": "python2" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.12" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +}
--- a/scripts/load_features.py Mon Sep 11 12:01:28 2017 +0100 +++ b/scripts/load_features.py Mon Sep 11 14:22:17 2017 +0100 @@ -80,12 +80,12 @@ duration_in_frames = np.round(np.array(bounds[:, 1], dtype=float) * sr) for music_bound in music_bounds: lower_bound = np.max([0, bounds_in_frames[music_bound] - half_win_hop]) - upper_bound = lower_bound + duration_in_frames[music_bound] - half_win_hop + upper_bound = bounds_in_frames[music_bound] + duration_in_frames[music_bound] - half_win_hop music_idx.append(np.arange(lower_bound, upper_bound, dtype=int)) if len(music_idx)>0: music_idx = np.sort(np.concatenate(music_idx)) # it should be sorted, but just in case segments overlap -- remove duplicates if segments overlap return music_idx - + def get_music_idx_for_file(self, segmenter_file=None): music_idx = [] @@ -255,33 +255,3 @@ pbihist = (pbihist - np.nanmean(pbihist)) / np.nanstd(pbihist) return pbihist - -# def get_pb_chroma_for_file(self, chroma_file=None, scale=True): -# ch = [] -# pb = [] -# if not os.path.exists(chroma_file): -# return ch, pb -# songframes = pd.read_csv(chroma_file, engine="c", header=None) -# songframes.iloc[np.where(np.isnan(songframes))] = 0 -# songframes = songframes.iloc[0:min(len(songframes), 18000), :] # only first 1.5 minutes -# chroma = songframes.get_values().T -# ch = self.get_ave_chroma(chroma) -# pb = self.get_pbihist_from_chroma(chroma) -# if scale: -# # scale all frames by mean and std of recording -# ch = (ch - np.nanmean(ch)) / np.nanstd(ch) -# pb = (pb - np.nanmean(pb)) / np.nanstd(pb) -# return ch, pb - - -# def get_pbihist_from_chroma(self, chroma, alignchroma=True, nmfpb=True): -# pb = pbi.PitchBihist(win2sec=self.win2sec) -# chroma[np.where(np.isnan(chroma))] = 0 -# if alignchroma: -# maxind = np.argmax(np.sum(chroma, axis=1)) -# chroma = np.roll(chroma, -maxind, axis=0) -# pbihist = pb.get_pitchbihist_from_chroma(chroma=chroma, chromasr=self.framessr) -# if nmfpb is True: -# pbihist = self.nmfpitchbihist(pbihist) -# pbihist = pd.DataFrame(pbihist.T) -# return pbihist \ No newline at end of file
--- a/tests/test_load_features.py Mon Sep 11 12:01:28 2017 +0100 +++ b/tests/test_load_features.py Mon Sep 11 14:22:17 2017 +0100 @@ -15,9 +15,7 @@ def test_get_music_idx_from_bounds(): - bounds = np.array([['0', '10.5', 'm']])#, - #['10.5', '12.0', 's'], - #['12.0', '30.0', 'm']]) + bounds = np.array([['0', '10.5', 'm']]) sr = feat_loader.framessr2 music_bounds = feat_loader.get_music_idx_from_bounds(bounds, sr=sr) # upper bound minus half window size @@ -29,19 +27,43 @@ def test_get_music_idx_from_bounds_short_segment(): # anything less than half window size is not processed bounds = np.array([['0', '3.8', 'm']]) - sr = feat_loader.framessr2 + sr = feat_loader.framessr2 music_bounds = feat_loader.get_music_idx_from_bounds(bounds, sr=sr) music_bounds_true = np.array([]) assert np.array_equal(music_bounds, music_bounds_true) +def test_get_music_idx_from_bounds_single_frame(): + bounds = np.array([['0', '4.3', 'm']]) + sr = feat_loader.framessr2 + music_bounds = feat_loader.get_music_idx_from_bounds(bounds, sr=sr) + music_bounds_true = np.array([0]) + assert np.array_equal(music_bounds, music_bounds_true) + + def test_get_music_idx_from_bounds_mix_segments(): bounds = np.array([['0', '10.5', 'm'], ['10.5', '3.0', 's'], ['13.5', '5.0', 'm']]) sr = feat_loader.framessr2 music_bounds = feat_loader.get_music_idx_from_bounds(bounds, sr=sr) - music_bounds_true = np.concatenate([np.arange(np.round(sr * (10.5-4.0)), dtype=int), - np.arange(np.round(sr * (13.5-4.0)), np.round(sr * (18.5-4.0)), dtype=int)]) + half_win_sec = 4.0 # assume 8-second window + music_bounds_true = np.concatenate([np.arange(np.round(sr * (10.5 - half_win_sec)), dtype=int), + np.arange(np.round(sr * (13.5 - half_win_sec)), + np.round(sr * (18.5 - half_win_sec)), dtype=int)]) assert np.array_equal(music_bounds, music_bounds_true) - \ No newline at end of file + + +def test_get_music_idx_from_bounds_overlap_segments(): + bounds = np.array([['0', '10.5', 'm'], + ['9.5', '3.0', 's'], + ['11.5', '5.0', 'm']]) + sr = feat_loader.framessr2 + music_bounds = feat_loader.get_music_idx_from_bounds(bounds, sr=sr) + half_win_sec = 4.0 # assume 8-second window + music_bounds_true = np.concatenate([np.arange(np.round(sr * (10.5 - half_win_sec)), dtype=int), + np.arange(np.round(sr * (11.5 - half_win_sec)), + np.round(sr * (16.5 - half_win_sec)), dtype=int)]) + assert np.array_equal(music_bounds, music_bounds_true) + +