Mercurial > hg > plosone_underreview
comparison notebooks/sensitivity_experiment.ipynb @ 23:56cbf155680a branch-tests
on melodia
author | mpanteli <m.x.panteli@gmail.com> |
---|---|
date | Wed, 13 Sep 2017 13:52:26 +0100 |
parents | 206fdeff4641 |
children | 29b5ee381305 |
comparison
equal
deleted
inserted
replaced
22:4aa0ce25fabd | 23:56cbf155680a |
---|---|
1 { | 1 { |
2 "cells": [ | 2 "cells": [ |
3 { | 3 { |
4 "cell_type": "code", | 4 "cell_type": "code", |
5 "execution_count": 1, | 5 "execution_count": 3, |
6 "metadata": {}, | 6 "metadata": {}, |
7 "outputs": [ | 7 "outputs": [ |
8 { | 8 { |
9 "name": "stderr", | 9 "name": "stdout", |
10 "output_type": "stream", | 10 "output_type": "stream", |
11 "text": [ | 11 "text": [ |
12 "/homes/mp305/anaconda/lib/python2.7/site-packages/librosa/core/audio.py:33: UserWarning: Could not import scikits.samplerate. Falling back to scipy.signal\n", | 12 "The autoreload extension is already loaded. To reload it, use:\n", |
13 " warnings.warn('Could not import scikits.samplerate. '\n" | 13 " %reload_ext autoreload\n" |
14 ] | 14 ] |
15 } | 15 } |
16 ], | 16 ], |
17 "source": [ | 17 "source": [ |
18 "import numpy as np\n", | 18 "import numpy as np\n", |
31 "import scripts.outliers as outliers" | 31 "import scripts.outliers as outliers" |
32 ] | 32 ] |
33 }, | 33 }, |
34 { | 34 { |
35 "cell_type": "code", | 35 "cell_type": "code", |
36 "execution_count": null, | 36 "execution_count": 4, |
37 "metadata": {}, | 37 "metadata": {}, |
38 "outputs": [ | 38 "outputs": [ |
39 { | 39 { |
40 "name": "stderr", | 40 "name": "stderr", |
41 "output_type": "stream", | 41 "output_type": "stream", |
42 "text": [ | 42 "text": [ |
43 "/homes/mp305/anaconda/lib/python2.7/site-packages/IPython/core/interactiveshell.py:2822: DtypeWarning: Columns (0,1,2,4,5,6,7,8,10,11,12,13,14,15,16,17,19,21,22,23,24,25,26,27,29,31,35,38,39,40,41,44,45,48,55,56,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,93,95,96) have mixed types. Specify dtype option on import or set low_memory=False.\n", | 43 "../scripts/util_filter_dataset.py:22: FutureWarning: elementwise comparison failed; returning scalar instead, but in the future will perform elementwise comparison\n", |
44 " if self.run_code(code, result):\n" | 44 " if len(np.where(bounds[:,2]=='m')[0])==0 or len(np.where(bounds[:,2]=='s')[0])==len(bounds):\n" |
45 ] | 45 ] |
46 } | 46 } |
47 ], | 47 ], |
48 "source": [ | 48 "source": [ |
49 "OUTPUT_FILES = load_dataset.OUTPUT_FILES\n", | 49 "OUTPUT_FILES = load_dataset.OUTPUT_FILES\n", |
51 "df = load_dataset.sample_dataset(csv_file=load_dataset.METADATA_FILE)" | 51 "df = load_dataset.sample_dataset(csv_file=load_dataset.METADATA_FILE)" |
52 ] | 52 ] |
53 }, | 53 }, |
54 { | 54 { |
55 "cell_type": "code", | 55 "cell_type": "code", |
56 "execution_count": null, | 56 "execution_count": 5, |
57 "metadata": {}, | 57 "metadata": {}, |
58 "outputs": [], | 58 "outputs": [ |
59 { | |
60 "data": { | |
61 "text/plain": [ | |
62 "(8396, 108)" | |
63 ] | |
64 }, | |
65 "execution_count": 5, | |
66 "metadata": {}, | |
67 "output_type": "execute_result" | |
68 } | |
69 ], | |
59 "source": [ | 70 "source": [ |
60 "df.shape" | 71 "df.shape" |
72 ] | |
73 }, | |
74 { | |
75 "cell_type": "code", | |
76 "execution_count": 9, | |
77 "metadata": {}, | |
78 "outputs": [ | |
79 { | |
80 "name": "stdout", | |
81 "output_type": "stream", | |
82 "text": [ | |
83 "file 0 of 6\n", | |
84 "loading speech/music segments...\n", | |
85 "extracting onset patterns and mfccs...\n", | |
86 "extracting chroma...\n", | |
87 "extracting pitch bihist from melodia...\n", | |
88 "file 1 of 6\n", | |
89 "loading speech/music segments...\n", | |
90 "extracting onset patterns and mfccs...\n", | |
91 "extracting chroma...\n", | |
92 "extracting pitch bihist from melodia...\n", | |
93 "file 2 of 6\n", | |
94 "loading speech/music segments...\n", | |
95 "extracting onset patterns and mfccs...\n", | |
96 "extracting chroma...\n", | |
97 "extracting pitch bihist from melodia...\n", | |
98 "file 3 of 6\n", | |
99 "loading speech/music segments...\n", | |
100 "extracting onset patterns and mfccs...\n", | |
101 "extracting chroma...\n", | |
102 "extracting pitch bihist from melodia...\n", | |
103 "file 4 of 6\n", | |
104 "loading speech/music segments...\n", | |
105 "extracting onset patterns and mfccs...\n", | |
106 "extracting chroma...\n", | |
107 "extracting pitch bihist from melodia...\n", | |
108 "file 5 of 6\n", | |
109 "loading speech/music segments...\n", | |
110 "extracting onset patterns and mfccs...\n", | |
111 "extracting chroma...\n", | |
112 "extracting pitch bihist from melodia...\n", | |
113 "6 6 6 6 6 6\n", | |
114 "(264, 400) (264, 240) (264, 80) (264, 120)\n", | |
115 "file 0 of 2\n", | |
116 "loading speech/music segments...\n", | |
117 "extracting onset patterns and mfccs...\n", | |
118 "extracting chroma...\n", | |
119 "extracting pitch bihist from melodia...\n", | |
120 "file 1 of 2\n", | |
121 "loading speech/music segments...\n", | |
122 "extracting onset patterns and mfccs...\n", | |
123 "extracting chroma...\n", | |
124 "extracting pitch bihist from melodia...\n", | |
125 "2 2 2 2 2 2\n", | |
126 "(88, 400) (88, 240) (88, 80) (88, 120)\n", | |
127 "file 0 of 2\n", | |
128 "loading speech/music segments...\n", | |
129 "extracting onset patterns and mfccs...\n", | |
130 "extracting chroma...\n", | |
131 "extracting pitch bihist from melodia...\n", | |
132 "file 1 of 2\n", | |
133 "loading speech/music segments...\n", | |
134 "extracting onset patterns and mfccs...\n", | |
135 "extracting chroma...\n", | |
136 "extracting pitch bihist from melodia...\n", | |
137 "2 2 2 2 2 2\n", | |
138 "(85, 400) (85, 240) (85, 80) (85, 120)\n" | |
139 ] | |
140 }, | |
141 { | |
142 "ename": "AttributeError", | |
143 "evalue": "'list' object has no attribute 'shape'", | |
144 "output_type": "error", | |
145 "traceback": [ | |
146 "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", | |
147 "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", | |
148 "\u001b[0;32m<ipython-input-9-ed1b80dfb251>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0mtrain\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mval\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtest\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mload_dataset\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfeatures_for_train_test_sets\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0miloc\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;36m10\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m:\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mwrite_output\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mFalse\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0;32mprint\u001b[0m \u001b[0mtrain\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mval\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtest\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", | |
149 "\u001b[0;31mAttributeError\u001b[0m: 'list' object has no attribute 'shape'" | |
150 ] | |
151 } | |
152 ], | |
153 "source": [ | |
154 "train, val, test = load_dataset.features_for_train_test_sets(df.iloc[:10, :], write_output=False)" | |
155 ] | |
156 }, | |
157 { | |
158 "cell_type": "code", | |
159 "execution_count": 8, | |
160 "metadata": {}, | |
161 "outputs": [ | |
162 { | |
163 "data": { | |
164 "text/html": [ | |
165 "<div>\n", | |
166 "<table border=\"1\" class=\"dataframe\">\n", | |
167 " <thead>\n", | |
168 " <tr style=\"text-align: right;\">\n", | |
169 " <th></th>\n", | |
170 " <th>AlbumTitle</th>\n", | |
171 " <th>Artist</th>\n", | |
172 " <th>Artist_Album</th>\n", | |
173 " <th>BuyLinkTrackDownload</th>\n", | |
174 " <th>CatalogNumber</th>\n", | |
175 " <th>CatalogNumber_Album</th>\n", | |
176 " <th>Collection</th>\n", | |
177 " <th>ContentType</th>\n", | |
178 " <th>ContentType_Album</th>\n", | |
179 " <th>Country</th>\n", | |
180 " <th>...</th>\n", | |
181 " <th>Language_iso3</th>\n", | |
182 " <th>Language_iso1</th>\n", | |
183 " <th>Region</th>\n", | |
184 " <th>Latitude</th>\n", | |
185 " <th>Longitude</th>\n", | |
186 " <th>Audio</th>\n", | |
187 " <th>Speech</th>\n", | |
188 " <th>Melspec</th>\n", | |
189 " <th>Chroma</th>\n", | |
190 " <th>Melodia</th>\n", | |
191 " </tr>\n", | |
192 " </thead>\n", | |
193 " <tbody>\n", | |
194 " <tr>\n", | |
195 " <th>29182</th>\n", | |
196 " <td>Music of Afghanistan</td>\n", | |
197 " <td>Afghan National Orchestra</td>\n", | |
198 " <td>Various Artists</td>\n", | |
199 " <td>26138</td>\n", | |
200 " <td>FW04361_201</td>\n", | |
201 " <td>FW04361</td>\n", | |
202 " <td>NaN</td>\n", | |
203 " <td>Track</td>\n", | |
204 " <td>Album</td>\n", | |
205 " <td>Afghanistan</td>\n", | |
206 " <td>...</td>\n", | |
207 " <td>NaN</td>\n", | |
208 " <td>NaN</td>\n", | |
209 " <td>South Asia</td>\n", | |
210 " <td>33.93911</td>\n", | |
211 " <td>67.709953</td>\n", | |
212 " <td>/import/c4dm-scratch/matthiasm/data/smithsonia...</td>\n", | |
213 " <td>/import/c4dm-02/people/mariap/SpeechMusic/2613...</td>\n", | |
214 " <td>/import/c4dm-02/people/mariap/MelSpec/26138_Sa...</td>\n", | |
215 " <td>/import/c4dm-04/mariap/FeatureCsvs/Smoothie-py...</td>\n", | |
216 " <td>/import/c4dm-02/people/mariap/Melodia/26138_Sa...</td>\n", | |
217 " </tr>\n", | |
218 " <tr>\n", | |
219 " <th>29183</th>\n", | |
220 " <td>Music of Afghanistan</td>\n", | |
221 " <td>Yaqub Kasimi</td>\n", | |
222 " <td>Various Artists</td>\n", | |
223 " <td>26142</td>\n", | |
224 " <td>FW04361_205</td>\n", | |
225 " <td>FW04361</td>\n", | |
226 " <td>NaN</td>\n", | |
227 " <td>Track</td>\n", | |
228 " <td>Album</td>\n", | |
229 " <td>Afghanistan</td>\n", | |
230 " <td>...</td>\n", | |
231 " <td>NaN</td>\n", | |
232 " <td>NaN</td>\n", | |
233 " <td>South Asia</td>\n", | |
234 " <td>33.93911</td>\n", | |
235 " <td>67.709953</td>\n", | |
236 " <td>/import/c4dm-scratch/matthiasm/data/smithsonia...</td>\n", | |
237 " <td>/import/c4dm-02/people/mariap/SpeechMusic/2614...</td>\n", | |
238 " <td>/import/c4dm-02/people/mariap/MelSpec/26142_Sa...</td>\n", | |
239 " <td>/import/c4dm-04/mariap/FeatureCsvs/Smoothie-py...</td>\n", | |
240 " <td>/import/c4dm-02/people/mariap/Melodia/26142_Sa...</td>\n", | |
241 " </tr>\n", | |
242 " <tr>\n", | |
243 " <th>29184</th>\n", | |
244 " <td>Ustad Mohammad Omar: Virtuoso from Afghanistan</td>\n", | |
245 " <td>Ustad Mohammad Omar|Zakir Hussain</td>\n", | |
246 " <td>Ustad Mohammad Omar</td>\n", | |
247 " <td>51931</td>\n", | |
248 " <td>SFW40439_102</td>\n", | |
249 " <td>SFW40439</td>\n", | |
250 " <td>NaN</td>\n", | |
251 " <td>Track</td>\n", | |
252 " <td>Album</td>\n", | |
253 " <td>Afghanistan</td>\n", | |
254 " <td>...</td>\n", | |
255 " <td>pbt</td>\n", | |
256 " <td>Indo-European</td>\n", | |
257 " <td>South Asia</td>\n", | |
258 " <td>33.93911</td>\n", | |
259 " <td>67.709953</td>\n", | |
260 " <td>/import/c4dm-scratch/matthiasm/data/smithsonia...</td>\n", | |
261 " <td>/import/c4dm-02/people/mariap/SpeechMusic/5193...</td>\n", | |
262 " <td>/import/c4dm-02/people/mariap/MelSpec/51931_Sa...</td>\n", | |
263 " <td>/import/c4dm-04/mariap/FeatureCsvs/Smoothie-py...</td>\n", | |
264 " <td>/import/c4dm-02/people/mariap/Melodia/51931_Sa...</td>\n", | |
265 " </tr>\n", | |
266 " <tr>\n", | |
267 " <th>29185</th>\n", | |
268 " <td>Music of Central Asia Vol. 3: Homayun Sakhi: T...</td>\n", | |
269 " <td>Homayun Sakhi</td>\n", | |
270 " <td>Homayun Sakhi</td>\n", | |
271 " <td>53229</td>\n", | |
272 " <td>SFW40522_101</td>\n", | |
273 " <td>SFW40522</td>\n", | |
274 " <td>NaN</td>\n", | |
275 " <td>Track</td>\n", | |
276 " <td>Album</td>\n", | |
277 " <td>Afghanistan</td>\n", | |
278 " <td>...</td>\n", | |
279 " <td>NaN</td>\n", | |
280 " <td>NaN</td>\n", | |
281 " <td>South Asia</td>\n", | |
282 " <td>33.93911</td>\n", | |
283 " <td>67.709953</td>\n", | |
284 " <td>/import/c4dm-scratch/matthiasm/data/smithsonia...</td>\n", | |
285 " <td>/import/c4dm-02/people/mariap/SpeechMusic/5322...</td>\n", | |
286 " <td>/import/c4dm-02/people/mariap/MelSpec/53229_Sa...</td>\n", | |
287 " <td>/import/c4dm-04/mariap/FeatureCsvs/Smoothie-py...</td>\n", | |
288 " <td>/import/c4dm-02/people/mariap/Melodia/53229_Sa...</td>\n", | |
289 " </tr>\n", | |
290 " <tr>\n", | |
291 " <th>29186</th>\n", | |
292 " <td>Music of Afghanistan</td>\n", | |
293 " <td>Rebab and Dhol Duet</td>\n", | |
294 " <td>Various Artists</td>\n", | |
295 " <td>26140</td>\n", | |
296 " <td>FW04361_203</td>\n", | |
297 " <td>FW04361</td>\n", | |
298 " <td>NaN</td>\n", | |
299 " <td>Track</td>\n", | |
300 " <td>Album</td>\n", | |
301 " <td>Afghanistan</td>\n", | |
302 " <td>...</td>\n", | |
303 " <td>NaN</td>\n", | |
304 " <td>NaN</td>\n", | |
305 " <td>South Asia</td>\n", | |
306 " <td>33.93911</td>\n", | |
307 " <td>67.709953</td>\n", | |
308 " <td>/import/c4dm-scratch/matthiasm/data/smithsonia...</td>\n", | |
309 " <td>/import/c4dm-02/people/mariap/SpeechMusic/2614...</td>\n", | |
310 " <td>/import/c4dm-02/people/mariap/MelSpec/26140_Sa...</td>\n", | |
311 " <td>/import/c4dm-04/mariap/FeatureCsvs/Smoothie-py...</td>\n", | |
312 " <td>/import/c4dm-02/people/mariap/Melodia/26140_Sa...</td>\n", | |
313 " </tr>\n", | |
314 " </tbody>\n", | |
315 "</table>\n", | |
316 "<p>5 rows × 108 columns</p>\n", | |
317 "</div>" | |
318 ], | |
319 "text/plain": [ | |
320 " AlbumTitle \\\n", | |
321 "29182 Music of Afghanistan \n", | |
322 "29183 Music of Afghanistan \n", | |
323 "29184 Ustad Mohammad Omar: Virtuoso from Afghanistan \n", | |
324 "29185 Music of Central Asia Vol. 3: Homayun Sakhi: T... \n", | |
325 "29186 Music of Afghanistan \n", | |
326 "\n", | |
327 " Artist Artist_Album \\\n", | |
328 "29182 Afghan National Orchestra Various Artists \n", | |
329 "29183 Yaqub Kasimi Various Artists \n", | |
330 "29184 Ustad Mohammad Omar|Zakir Hussain Ustad Mohammad Omar \n", | |
331 "29185 Homayun Sakhi Homayun Sakhi \n", | |
332 "29186 Rebab and Dhol Duet Various Artists \n", | |
333 "\n", | |
334 " BuyLinkTrackDownload CatalogNumber CatalogNumber_Album Collection \\\n", | |
335 "29182 26138 FW04361_201 FW04361 NaN \n", | |
336 "29183 26142 FW04361_205 FW04361 NaN \n", | |
337 "29184 51931 SFW40439_102 SFW40439 NaN \n", | |
338 "29185 53229 SFW40522_101 SFW40522 NaN \n", | |
339 "29186 26140 FW04361_203 FW04361 NaN \n", | |
340 "\n", | |
341 " ContentType ContentType_Album Country \\\n", | |
342 "29182 Track Album Afghanistan \n", | |
343 "29183 Track Album Afghanistan \n", | |
344 "29184 Track Album Afghanistan \n", | |
345 "29185 Track Album Afghanistan \n", | |
346 "29186 Track Album Afghanistan \n", | |
347 "\n", | |
348 " ... Language_iso3 \\\n", | |
349 "29182 ... NaN \n", | |
350 "29183 ... NaN \n", | |
351 "29184 ... pbt \n", | |
352 "29185 ... NaN \n", | |
353 "29186 ... NaN \n", | |
354 "\n", | |
355 " Language_iso1 Region Latitude Longitude \\\n", | |
356 "29182 NaN South Asia 33.93911 67.709953 \n", | |
357 "29183 NaN South Asia 33.93911 67.709953 \n", | |
358 "29184 Indo-European South Asia 33.93911 67.709953 \n", | |
359 "29185 NaN South Asia 33.93911 67.709953 \n", | |
360 "29186 NaN South Asia 33.93911 67.709953 \n", | |
361 "\n", | |
362 " Audio \\\n", | |
363 "29182 /import/c4dm-scratch/matthiasm/data/smithsonia... \n", | |
364 "29183 /import/c4dm-scratch/matthiasm/data/smithsonia... \n", | |
365 "29184 /import/c4dm-scratch/matthiasm/data/smithsonia... \n", | |
366 "29185 /import/c4dm-scratch/matthiasm/data/smithsonia... \n", | |
367 "29186 /import/c4dm-scratch/matthiasm/data/smithsonia... \n", | |
368 "\n", | |
369 " Speech \\\n", | |
370 "29182 /import/c4dm-02/people/mariap/SpeechMusic/2613... \n", | |
371 "29183 /import/c4dm-02/people/mariap/SpeechMusic/2614... \n", | |
372 "29184 /import/c4dm-02/people/mariap/SpeechMusic/5193... \n", | |
373 "29185 /import/c4dm-02/people/mariap/SpeechMusic/5322... \n", | |
374 "29186 /import/c4dm-02/people/mariap/SpeechMusic/2614... \n", | |
375 "\n", | |
376 " Melspec \\\n", | |
377 "29182 /import/c4dm-02/people/mariap/MelSpec/26138_Sa... \n", | |
378 "29183 /import/c4dm-02/people/mariap/MelSpec/26142_Sa... \n", | |
379 "29184 /import/c4dm-02/people/mariap/MelSpec/51931_Sa... \n", | |
380 "29185 /import/c4dm-02/people/mariap/MelSpec/53229_Sa... \n", | |
381 "29186 /import/c4dm-02/people/mariap/MelSpec/26140_Sa... \n", | |
382 "\n", | |
383 " Chroma \\\n", | |
384 "29182 /import/c4dm-04/mariap/FeatureCsvs/Smoothie-py... \n", | |
385 "29183 /import/c4dm-04/mariap/FeatureCsvs/Smoothie-py... \n", | |
386 "29184 /import/c4dm-04/mariap/FeatureCsvs/Smoothie-py... \n", | |
387 "29185 /import/c4dm-04/mariap/FeatureCsvs/Smoothie-py... \n", | |
388 "29186 /import/c4dm-04/mariap/FeatureCsvs/Smoothie-py... \n", | |
389 "\n", | |
390 " Melodia \n", | |
391 "29182 /import/c4dm-02/people/mariap/Melodia/26138_Sa... \n", | |
392 "29183 /import/c4dm-02/people/mariap/Melodia/26142_Sa... \n", | |
393 "29184 /import/c4dm-02/people/mariap/Melodia/51931_Sa... \n", | |
394 "29185 /import/c4dm-02/people/mariap/Melodia/53229_Sa... \n", | |
395 "29186 /import/c4dm-02/people/mariap/Melodia/26140_Sa... \n", | |
396 "\n", | |
397 "[5 rows x 108 columns]" | |
398 ] | |
399 }, | |
400 "execution_count": 8, | |
401 "metadata": {}, | |
402 "output_type": "execute_result" | |
403 } | |
404 ], | |
405 "source": [ | |
406 "df.head()" | |
61 ] | 407 ] |
62 }, | 408 }, |
63 { | 409 { |
64 "cell_type": "code", | 410 "cell_type": "code", |
65 "execution_count": null, | 411 "execution_count": null, |