comparison notebooks/test_music_segments.ipynb @ 7:46b2c713cc73 branch-tests

stats on speech/music duration
author mpanteli <m.x.panteli@gmail.com>
date Mon, 11 Sep 2017 14:53:13 +0100
parents a35bd818d8e9
children e6e10013e11c
comparison
equal deleted inserted replaced
6:a35bd818d8e9 7:46b2c713cc73
18 "metadata": { 18 "metadata": {
19 "collapsed": true 19 "collapsed": true
20 }, 20 },
21 "outputs": [], 21 "outputs": [],
22 "source": [ 22 "source": [
23 "filenames = ['/import/c4dm-04/mariap/train_data_melodia_8.pickle', \n", 23 "filenames = ['/import/c4dm-04/mariap/train_data_melodia_8.pickle',\n",
24 " '/import/c4dm-04/mariap/val_data_melodia_8.pickle', \n", 24 " '/import/c4dm-04/mariap/val_data_melodia_8.pickle', \n",
25 " '/import/c4dm-04/mariap/test_data_melodia_8.pickle']" 25 " '/import/c4dm-04/mariap/test_data_melodia_8.pickle']"
26 ] 26 ]
27 }, 27 },
28 { 28 {
29 "cell_type": "code", 29 "cell_type": "code",
30 "execution_count": null, 30 "execution_count": 13,
31 "metadata": { 31 "metadata": {},
32 "collapsed": true
33 },
34 "outputs": [], 32 "outputs": [],
35 "source": [ 33 "source": [
36 "all_Yaudio = []\n", 34 "all_Yaudio = []\n",
35 "all_Y = []\n",
37 "for filename in filenames:\n", 36 "for filename in filenames:\n",
38 " _, Y, Yaudio = pickle.load(open(filename), 'rb')\n", 37 " _, Y, Yaudio = pickle.load(open(filename, 'rb'))\n",
39 " all_Yaudio.append(Yaudio)\n", 38 " all_Yaudio.append(Yaudio)\n",
40 "all_Yaudio = np.concatenate(all_Yaudio)" 39 " all_Y.append(Y)\n",
41 ] 40 "all_Yaudio = np.concatenate(all_Yaudio)\n",
42 }, 41 "all_Y = np.concatenate(all_Y)"
43 { 42 ]
44 "cell_type": "code", 43 },
45 "execution_count": 3, 44 {
46 "metadata": { 45 "cell_type": "code",
47 "collapsed": false 46 "execution_count": 5,
48 }, 47 "metadata": {},
49 "outputs": [ 48 "outputs": [],
50 {
51 "ename": "NameError",
52 "evalue": "name 'all_Yaudio' is not defined",
53 "output_type": "error",
54 "traceback": [
55 "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
56 "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
57 "\u001b[0;32m<ipython-input-3-4107ada442c0>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0muniq_audio\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0muniq_counts\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0munique\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mall_Yaudio\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mreturn_counts\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
58 "\u001b[0;31mNameError\u001b[0m: name 'all_Yaudio' is not defined"
59 ]
60 }
61 ],
62 "source": [ 49 "source": [
63 "uniq_audio, uniq_counts = np.unique(all_Yaudio, return_counts=True)" 50 "uniq_audio, uniq_counts = np.unique(all_Yaudio, return_counts=True)"
64 ] 51 ]
65 }, 52 },
66 { 53 {
70 "## Stats on audio files with very few music frames (after the speech/music discrimination)" 57 "## Stats on audio files with very few music frames (after the speech/music discrimination)"
71 ] 58 ]
72 }, 59 },
73 { 60 {
74 "cell_type": "code", 61 "cell_type": "code",
75 "execution_count": 4, 62 "execution_count": 8,
76 "metadata": { 63 "metadata": {},
77 "collapsed": false 64 "outputs": [
78 }, 65 {
79 "outputs": [ 66 "name": "stdout",
80 { 67 "output_type": "stream",
81 "ename": "NameError", 68 "text": [
82 "evalue": "name 'uniq_counts' is not defined", 69 "63 files out of 8200 have less than 10 frames\n"
83 "output_type": "error",
84 "traceback": [
85 "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
86 "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
87 "\u001b[0;32m<ipython-input-4-700ed156399c>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0mmin_n_frames\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m10\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mn_short_files\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mwhere\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0muniq_counts\u001b[0m\u001b[0;34m<\u001b[0m\u001b[0mmin_n_frames\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3\u001b[0m \u001b[0;32mprint\u001b[0m \u001b[0;34m'%d files out of %d have less than %d frames'\u001b[0m \u001b[0;34m%\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mn_short_files\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0muniq_counts\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmin_n_frames\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
88 "\u001b[0;31mNameError\u001b[0m: name 'uniq_counts' is not defined"
89 ] 70 ]
90 } 71 }
91 ], 72 ],
92 "source": [ 73 "source": [
93 "min_n_frames = 10\n", 74 "min_n_frames = 10\n",
94 "n_short_files = np.where(uniq_counts<min_n_frames)[0].shape\n", 75 "short_files_idx = np.where(uniq_counts<min_n_frames)[0]\n",
95 "print '%d files out of %d have less than %d frames' % (n_short_files, len(uniq_counts), min_n_frames)" 76 "print '%d files out of %d have less than %d frames' % (len(short_files_idx), len(uniq_counts), min_n_frames)"
77 ]
78 },
79 {
80 "cell_type": "markdown",
81 "metadata": {},
82 "source": [
83 "Countries for tracks with less than 10 frames"
84 ]
85 },
86 {
87 "cell_type": "code",
88 "execution_count": 17,
89 "metadata": {},
90 "outputs": [
91 {
92 "name": "stdout",
93 "output_type": "stream",
94 "text": [
95 "{'Italy': 1, 'Peru': 1, 'Solomon Islands': 2, 'France': 2, 'Ethiopia': 1, 'Somalia': 1, 'Ireland': 1, 'Swaziland': 1, 'Argentina': 1, 'Norway': 1, 'Nigeria': 1, 'Algeria': 1, 'Germany': 1, 'Puerto Rico': 1, 'Dominican Republic': 1, 'Poland': 3, 'Spain': 2, 'Netherlands': 1, 'Uganda': 4, 'Western Sahara': 5, 'Gambia': 2, 'Philippines': 2, 'Trinidad and Tobago': 1, 'Latvia': 1, 'South Sudan': 3, 'Mali': 1, 'Russia': 1, 'Romania': 1, 'Portugal': 1, 'South Africa': 3, 'Egypt': 1, 'Sierra Leone': 1, 'United Kingdom': 4, 'Lesotho': 1, 'Senegal': 2, 'Colombia': 2, 'Japan': 2, 'Nicaragua': 1, 'Botswana': 1}\n"
96 ]
97 }
98 ],
99 "source": [
100 "countries = np.array([all_Y[np.where(all_Yaudio==uniq_audio[audio_idx])[0][0]][0] for audio_idx in short_files_idx])\n",
101 "unique, counts = np.unique(countries, return_counts=True)\n",
102 "print dict(zip(unique, counts))"
96 ] 103 ]
97 }, 104 },
98 { 105 {
99 "cell_type": "markdown", 106 "cell_type": "markdown",
100 "metadata": {}, 107 "metadata": {},
102 "## Stats on average duration of the music segments for all tracks" 109 "## Stats on average duration of the music segments for all tracks"
103 ] 110 ]
104 }, 111 },
105 { 112 {
106 "cell_type": "code", 113 "cell_type": "code",
107 "execution_count": 5, 114 "execution_count": 18,
108 "metadata": { 115 "metadata": {},
109 "collapsed": false 116 "outputs": [
110 }, 117 {
111 "outputs": [ 118 "name": "stdout",
112 { 119 "output_type": "stream",
113 "ename": "SyntaxError", 120 "text": [
114 "evalue": "invalid syntax (<ipython-input-5-2c4ab0e943a6>, line 1)", 121 "mean 65.750000\n",
115 "output_type": "error", 122 "median 44.000000\n",
116 "traceback": [ 123 "std 45.947865\n",
117 "\u001b[0;36m File \u001b[0;32m\"<ipython-input-5-2c4ab0e943a6>\"\u001b[0;36m, line \u001b[0;32m1\u001b[0m\n\u001b[0;31m print 'mean %f' np.mean(uniq_counts)\u001b[0m\n\u001b[0m ^\u001b[0m\n\u001b[0;31mSyntaxError\u001b[0m\u001b[0;31m:\u001b[0m invalid syntax\n" 124 "mean duration 32.875000\n"
118 ] 125 ]
119 } 126 }
120 ], 127 ],
121 "source": [ 128 "source": [
122 "sr = 2.0 # with 8-second window and 0.5-second hop size the sampling rate is 2 about 2 samples per second\n", 129 "sr = 2.0 # with 8-second window and 0.5-second hop size the sampling rate is 2 about 2 samples per second\n",
128 }, 135 },
129 { 136 {
130 "cell_type": "markdown", 137 "cell_type": "markdown",
131 "metadata": {}, 138 "metadata": {},
132 "source": [ 139 "source": [
140 "## Stats on average duration of the music segments for the Smithsonian tracks"
141 ]
142 },
143 {
144 "cell_type": "code",
145 "execution_count": 23,
146 "metadata": {},
147 "outputs": [
148 {
149 "name": "stdout",
150 "output_type": "stream",
151 "text": [
152 "n tracks: 6132\n",
153 "mean 42.618885\n",
154 "median 44.000000\n",
155 "std 4.804534\n",
156 "mean duration 21.309442\n"
157 ]
158 }
159 ],
160 "source": [
161 "#British library tracks start with 'D:/Audio/...'\n",
162 "idx_SM_tracks = np.array([i for i in range(len(uniq_audio)) if len(uniq_audio[i].split('D:/'))==1])\n",
163 "sr = 2.0 # with 8-second window and 0.5-second hop size the sampling rate is 2 about 2 samples per second\n",
164 "print 'n tracks: %d' % len(idx_SM_tracks)\n",
165 "print 'mean %f' % np.mean(uniq_counts[idx_SM_tracks])\n",
166 "print 'median %f' % np.median(uniq_counts[idx_SM_tracks])\n",
167 "print 'std %f' % np.std(uniq_counts[idx_SM_tracks])\n",
168 "print 'mean duration %f' % (np.mean(uniq_counts[idx_SM_tracks]) / sr)"
169 ]
170 },
171 {
172 "cell_type": "markdown",
173 "metadata": {},
174 "source": [
133 "## Stats on average duration of the music segments for the British Library tracks" 175 "## Stats on average duration of the music segments for the British Library tracks"
134 ] 176 ]
135 }, 177 },
136 { 178 {
137 "cell_type": "code", 179 "cell_type": "code",
138 "execution_count": 7, 180 "execution_count": 22,
139 "metadata": { 181 "metadata": {},
140 "collapsed": false 182 "outputs": [
141 }, 183 {
142 "outputs": [ 184 "name": "stdout",
143 { 185 "output_type": "stream",
144 "ename": "NameError", 186 "text": [
145 "evalue": "name 'uniq_audio' is not defined", 187 "n tracks: 2068\n",
146 "output_type": "error", 188 "mean 134.338008\n",
147 "traceback": [ 189 "median 163.000000\n",
148 "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 190 "std 44.855790\n",
149 "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", 191 "mean duration 67.169004\n"
150 "\u001b[0;32m<ipython-input-7-4ebf50436e4a>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;31m#British library tracks start with 'D:/Audio/...'\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0midx_BL_tracks\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0marray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0muniq_audio\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0muniq_audio\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msplit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'D:/'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m>\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3\u001b[0m \u001b[0msr\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m2.0\u001b[0m \u001b[0;31m# with 8-second window and 0.5-second hop size the sampling rate is 2 about 2 samples per second\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0;32mprint\u001b[0m \u001b[0;34m'mean %f'\u001b[0m \u001b[0;34m%\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmean\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0muniq_counts\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0midx_BL_tracks\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;32mprint\u001b[0m \u001b[0;34m'median %f'\u001b[0m \u001b[0;34m%\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmedian\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0muniq_counts\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0midx_BL_tracks\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
151 "\u001b[0;31mNameError\u001b[0m: name 'uniq_audio' is not defined"
152 ] 192 ]
153 } 193 }
154 ], 194 ],
155 "source": [ 195 "source": [
156 "#British library tracks start with 'D:/Audio/...'\n", 196 "#British library tracks start with 'D:/Audio/...'\n",
157 "idx_BL_tracks = np.array([i for i in range(len(uniq_audio)) if len(uniq_audio[i].split('D:/'))>1])\n", 197 "idx_BL_tracks = np.array([i for i in range(len(uniq_audio)) if len(uniq_audio[i].split('D:/'))>1])\n",
158 "sr = 2.0 # with 8-second window and 0.5-second hop size the sampling rate is 2 about 2 samples per second\n", 198 "sr = 2.0 # with 8-second window and 0.5-second hop size the sampling rate is 2 about 2 samples per second\n",
199 "print 'n tracks: %d' % len(idx_BL_tracks)\n",
159 "print 'mean %f' % np.mean(uniq_counts[idx_BL_tracks])\n", 200 "print 'mean %f' % np.mean(uniq_counts[idx_BL_tracks])\n",
160 "print 'median %f' % np.median(uniq_counts[idx_BL_tracks])\n", 201 "print 'median %f' % np.median(uniq_counts[idx_BL_tracks])\n",
161 "print 'std %f' % np.std(uniq_counts[idx_BL_tracks])\n", 202 "print 'std %f' % np.std(uniq_counts[idx_BL_tracks])\n",
162 "print 'mean duration %f' % (np.mean(uniq_counts[idx_BL_tracks]) / sr)" 203 "print 'mean duration %f' % (np.mean(uniq_counts[idx_BL_tracks]) / sr)"
163 ] 204 ]
190 "pygments_lexer": "ipython2", 231 "pygments_lexer": "ipython2",
191 "version": "2.7.12" 232 "version": "2.7.12"
192 } 233 }
193 }, 234 },
194 "nbformat": 4, 235 "nbformat": 4,
195 "nbformat_minor": 0 236 "nbformat_minor": 1
196 } 237 }