comparison notebooks/sensitivity_experiment.ipynb @ 23:56cbf155680a branch-tests

on melodia
author mpanteli <m.x.panteli@gmail.com>
date Wed, 13 Sep 2017 13:52:26 +0100
parents 206fdeff4641
children 29b5ee381305
comparison
equal deleted inserted replaced
22:4aa0ce25fabd 23:56cbf155680a
1 { 1 {
2 "cells": [ 2 "cells": [
3 { 3 {
4 "cell_type": "code", 4 "cell_type": "code",
5 "execution_count": 1, 5 "execution_count": 3,
6 "metadata": {}, 6 "metadata": {},
7 "outputs": [ 7 "outputs": [
8 { 8 {
9 "name": "stderr", 9 "name": "stdout",
10 "output_type": "stream", 10 "output_type": "stream",
11 "text": [ 11 "text": [
12 "/homes/mp305/anaconda/lib/python2.7/site-packages/librosa/core/audio.py:33: UserWarning: Could not import scikits.samplerate. Falling back to scipy.signal\n", 12 "The autoreload extension is already loaded. To reload it, use:\n",
13 " warnings.warn('Could not import scikits.samplerate. '\n" 13 " %reload_ext autoreload\n"
14 ] 14 ]
15 } 15 }
16 ], 16 ],
17 "source": [ 17 "source": [
18 "import numpy as np\n", 18 "import numpy as np\n",
31 "import scripts.outliers as outliers" 31 "import scripts.outliers as outliers"
32 ] 32 ]
33 }, 33 },
34 { 34 {
35 "cell_type": "code", 35 "cell_type": "code",
36 "execution_count": null, 36 "execution_count": 4,
37 "metadata": {}, 37 "metadata": {},
38 "outputs": [ 38 "outputs": [
39 { 39 {
40 "name": "stderr", 40 "name": "stderr",
41 "output_type": "stream", 41 "output_type": "stream",
42 "text": [ 42 "text": [
43 "/homes/mp305/anaconda/lib/python2.7/site-packages/IPython/core/interactiveshell.py:2822: DtypeWarning: Columns (0,1,2,4,5,6,7,8,10,11,12,13,14,15,16,17,19,21,22,23,24,25,26,27,29,31,35,38,39,40,41,44,45,48,55,56,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,93,95,96) have mixed types. Specify dtype option on import or set low_memory=False.\n", 43 "../scripts/util_filter_dataset.py:22: FutureWarning: elementwise comparison failed; returning scalar instead, but in the future will perform elementwise comparison\n",
44 " if self.run_code(code, result):\n" 44 " if len(np.where(bounds[:,2]=='m')[0])==0 or len(np.where(bounds[:,2]=='s')[0])==len(bounds):\n"
45 ] 45 ]
46 } 46 }
47 ], 47 ],
48 "source": [ 48 "source": [
49 "OUTPUT_FILES = load_dataset.OUTPUT_FILES\n", 49 "OUTPUT_FILES = load_dataset.OUTPUT_FILES\n",
51 "df = load_dataset.sample_dataset(csv_file=load_dataset.METADATA_FILE)" 51 "df = load_dataset.sample_dataset(csv_file=load_dataset.METADATA_FILE)"
52 ] 52 ]
53 }, 53 },
54 { 54 {
55 "cell_type": "code", 55 "cell_type": "code",
56 "execution_count": null, 56 "execution_count": 5,
57 "metadata": {}, 57 "metadata": {},
58 "outputs": [], 58 "outputs": [
59 {
60 "data": {
61 "text/plain": [
62 "(8396, 108)"
63 ]
64 },
65 "execution_count": 5,
66 "metadata": {},
67 "output_type": "execute_result"
68 }
69 ],
59 "source": [ 70 "source": [
60 "df.shape" 71 "df.shape"
72 ]
73 },
74 {
75 "cell_type": "code",
76 "execution_count": 9,
77 "metadata": {},
78 "outputs": [
79 {
80 "name": "stdout",
81 "output_type": "stream",
82 "text": [
83 "file 0 of 6\n",
84 "loading speech/music segments...\n",
85 "extracting onset patterns and mfccs...\n",
86 "extracting chroma...\n",
87 "extracting pitch bihist from melodia...\n",
88 "file 1 of 6\n",
89 "loading speech/music segments...\n",
90 "extracting onset patterns and mfccs...\n",
91 "extracting chroma...\n",
92 "extracting pitch bihist from melodia...\n",
93 "file 2 of 6\n",
94 "loading speech/music segments...\n",
95 "extracting onset patterns and mfccs...\n",
96 "extracting chroma...\n",
97 "extracting pitch bihist from melodia...\n",
98 "file 3 of 6\n",
99 "loading speech/music segments...\n",
100 "extracting onset patterns and mfccs...\n",
101 "extracting chroma...\n",
102 "extracting pitch bihist from melodia...\n",
103 "file 4 of 6\n",
104 "loading speech/music segments...\n",
105 "extracting onset patterns and mfccs...\n",
106 "extracting chroma...\n",
107 "extracting pitch bihist from melodia...\n",
108 "file 5 of 6\n",
109 "loading speech/music segments...\n",
110 "extracting onset patterns and mfccs...\n",
111 "extracting chroma...\n",
112 "extracting pitch bihist from melodia...\n",
113 "6 6 6 6 6 6\n",
114 "(264, 400) (264, 240) (264, 80) (264, 120)\n",
115 "file 0 of 2\n",
116 "loading speech/music segments...\n",
117 "extracting onset patterns and mfccs...\n",
118 "extracting chroma...\n",
119 "extracting pitch bihist from melodia...\n",
120 "file 1 of 2\n",
121 "loading speech/music segments...\n",
122 "extracting onset patterns and mfccs...\n",
123 "extracting chroma...\n",
124 "extracting pitch bihist from melodia...\n",
125 "2 2 2 2 2 2\n",
126 "(88, 400) (88, 240) (88, 80) (88, 120)\n",
127 "file 0 of 2\n",
128 "loading speech/music segments...\n",
129 "extracting onset patterns and mfccs...\n",
130 "extracting chroma...\n",
131 "extracting pitch bihist from melodia...\n",
132 "file 1 of 2\n",
133 "loading speech/music segments...\n",
134 "extracting onset patterns and mfccs...\n",
135 "extracting chroma...\n",
136 "extracting pitch bihist from melodia...\n",
137 "2 2 2 2 2 2\n",
138 "(85, 400) (85, 240) (85, 80) (85, 120)\n"
139 ]
140 },
141 {
142 "ename": "AttributeError",
143 "evalue": "'list' object has no attribute 'shape'",
144 "output_type": "error",
145 "traceback": [
146 "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
147 "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)",
148 "\u001b[0;32m<ipython-input-9-ed1b80dfb251>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0mtrain\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mval\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtest\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mload_dataset\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfeatures_for_train_test_sets\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0miloc\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;36m10\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m:\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mwrite_output\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mFalse\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0;32mprint\u001b[0m \u001b[0mtrain\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mval\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtest\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
149 "\u001b[0;31mAttributeError\u001b[0m: 'list' object has no attribute 'shape'"
150 ]
151 }
152 ],
153 "source": [
154 "train, val, test = load_dataset.features_for_train_test_sets(df.iloc[:10, :], write_output=False)"
155 ]
156 },
157 {
158 "cell_type": "code",
159 "execution_count": 8,
160 "metadata": {},
161 "outputs": [
162 {
163 "data": {
164 "text/html": [
165 "<div>\n",
166 "<table border=\"1\" class=\"dataframe\">\n",
167 " <thead>\n",
168 " <tr style=\"text-align: right;\">\n",
169 " <th></th>\n",
170 " <th>AlbumTitle</th>\n",
171 " <th>Artist</th>\n",
172 " <th>Artist_Album</th>\n",
173 " <th>BuyLinkTrackDownload</th>\n",
174 " <th>CatalogNumber</th>\n",
175 " <th>CatalogNumber_Album</th>\n",
176 " <th>Collection</th>\n",
177 " <th>ContentType</th>\n",
178 " <th>ContentType_Album</th>\n",
179 " <th>Country</th>\n",
180 " <th>...</th>\n",
181 " <th>Language_iso3</th>\n",
182 " <th>Language_iso1</th>\n",
183 " <th>Region</th>\n",
184 " <th>Latitude</th>\n",
185 " <th>Longitude</th>\n",
186 " <th>Audio</th>\n",
187 " <th>Speech</th>\n",
188 " <th>Melspec</th>\n",
189 " <th>Chroma</th>\n",
190 " <th>Melodia</th>\n",
191 " </tr>\n",
192 " </thead>\n",
193 " <tbody>\n",
194 " <tr>\n",
195 " <th>29182</th>\n",
196 " <td>Music of Afghanistan</td>\n",
197 " <td>Afghan National Orchestra</td>\n",
198 " <td>Various Artists</td>\n",
199 " <td>26138</td>\n",
200 " <td>FW04361_201</td>\n",
201 " <td>FW04361</td>\n",
202 " <td>NaN</td>\n",
203 " <td>Track</td>\n",
204 " <td>Album</td>\n",
205 " <td>Afghanistan</td>\n",
206 " <td>...</td>\n",
207 " <td>NaN</td>\n",
208 " <td>NaN</td>\n",
209 " <td>South Asia</td>\n",
210 " <td>33.93911</td>\n",
211 " <td>67.709953</td>\n",
212 " <td>/import/c4dm-scratch/matthiasm/data/smithsonia...</td>\n",
213 " <td>/import/c4dm-02/people/mariap/SpeechMusic/2613...</td>\n",
214 " <td>/import/c4dm-02/people/mariap/MelSpec/26138_Sa...</td>\n",
215 " <td>/import/c4dm-04/mariap/FeatureCsvs/Smoothie-py...</td>\n",
216 " <td>/import/c4dm-02/people/mariap/Melodia/26138_Sa...</td>\n",
217 " </tr>\n",
218 " <tr>\n",
219 " <th>29183</th>\n",
220 " <td>Music of Afghanistan</td>\n",
221 " <td>Yaqub Kasimi</td>\n",
222 " <td>Various Artists</td>\n",
223 " <td>26142</td>\n",
224 " <td>FW04361_205</td>\n",
225 " <td>FW04361</td>\n",
226 " <td>NaN</td>\n",
227 " <td>Track</td>\n",
228 " <td>Album</td>\n",
229 " <td>Afghanistan</td>\n",
230 " <td>...</td>\n",
231 " <td>NaN</td>\n",
232 " <td>NaN</td>\n",
233 " <td>South Asia</td>\n",
234 " <td>33.93911</td>\n",
235 " <td>67.709953</td>\n",
236 " <td>/import/c4dm-scratch/matthiasm/data/smithsonia...</td>\n",
237 " <td>/import/c4dm-02/people/mariap/SpeechMusic/2614...</td>\n",
238 " <td>/import/c4dm-02/people/mariap/MelSpec/26142_Sa...</td>\n",
239 " <td>/import/c4dm-04/mariap/FeatureCsvs/Smoothie-py...</td>\n",
240 " <td>/import/c4dm-02/people/mariap/Melodia/26142_Sa...</td>\n",
241 " </tr>\n",
242 " <tr>\n",
243 " <th>29184</th>\n",
244 " <td>Ustad Mohammad Omar: Virtuoso from Afghanistan</td>\n",
245 " <td>Ustad Mohammad Omar|Zakir Hussain</td>\n",
246 " <td>Ustad Mohammad Omar</td>\n",
247 " <td>51931</td>\n",
248 " <td>SFW40439_102</td>\n",
249 " <td>SFW40439</td>\n",
250 " <td>NaN</td>\n",
251 " <td>Track</td>\n",
252 " <td>Album</td>\n",
253 " <td>Afghanistan</td>\n",
254 " <td>...</td>\n",
255 " <td>pbt</td>\n",
256 " <td>Indo-European</td>\n",
257 " <td>South Asia</td>\n",
258 " <td>33.93911</td>\n",
259 " <td>67.709953</td>\n",
260 " <td>/import/c4dm-scratch/matthiasm/data/smithsonia...</td>\n",
261 " <td>/import/c4dm-02/people/mariap/SpeechMusic/5193...</td>\n",
262 " <td>/import/c4dm-02/people/mariap/MelSpec/51931_Sa...</td>\n",
263 " <td>/import/c4dm-04/mariap/FeatureCsvs/Smoothie-py...</td>\n",
264 " <td>/import/c4dm-02/people/mariap/Melodia/51931_Sa...</td>\n",
265 " </tr>\n",
266 " <tr>\n",
267 " <th>29185</th>\n",
268 " <td>Music of Central Asia Vol. 3: Homayun Sakhi: T...</td>\n",
269 " <td>Homayun Sakhi</td>\n",
270 " <td>Homayun Sakhi</td>\n",
271 " <td>53229</td>\n",
272 " <td>SFW40522_101</td>\n",
273 " <td>SFW40522</td>\n",
274 " <td>NaN</td>\n",
275 " <td>Track</td>\n",
276 " <td>Album</td>\n",
277 " <td>Afghanistan</td>\n",
278 " <td>...</td>\n",
279 " <td>NaN</td>\n",
280 " <td>NaN</td>\n",
281 " <td>South Asia</td>\n",
282 " <td>33.93911</td>\n",
283 " <td>67.709953</td>\n",
284 " <td>/import/c4dm-scratch/matthiasm/data/smithsonia...</td>\n",
285 " <td>/import/c4dm-02/people/mariap/SpeechMusic/5322...</td>\n",
286 " <td>/import/c4dm-02/people/mariap/MelSpec/53229_Sa...</td>\n",
287 " <td>/import/c4dm-04/mariap/FeatureCsvs/Smoothie-py...</td>\n",
288 " <td>/import/c4dm-02/people/mariap/Melodia/53229_Sa...</td>\n",
289 " </tr>\n",
290 " <tr>\n",
291 " <th>29186</th>\n",
292 " <td>Music of Afghanistan</td>\n",
293 " <td>Rebab and Dhol Duet</td>\n",
294 " <td>Various Artists</td>\n",
295 " <td>26140</td>\n",
296 " <td>FW04361_203</td>\n",
297 " <td>FW04361</td>\n",
298 " <td>NaN</td>\n",
299 " <td>Track</td>\n",
300 " <td>Album</td>\n",
301 " <td>Afghanistan</td>\n",
302 " <td>...</td>\n",
303 " <td>NaN</td>\n",
304 " <td>NaN</td>\n",
305 " <td>South Asia</td>\n",
306 " <td>33.93911</td>\n",
307 " <td>67.709953</td>\n",
308 " <td>/import/c4dm-scratch/matthiasm/data/smithsonia...</td>\n",
309 " <td>/import/c4dm-02/people/mariap/SpeechMusic/2614...</td>\n",
310 " <td>/import/c4dm-02/people/mariap/MelSpec/26140_Sa...</td>\n",
311 " <td>/import/c4dm-04/mariap/FeatureCsvs/Smoothie-py...</td>\n",
312 " <td>/import/c4dm-02/people/mariap/Melodia/26140_Sa...</td>\n",
313 " </tr>\n",
314 " </tbody>\n",
315 "</table>\n",
316 "<p>5 rows × 108 columns</p>\n",
317 "</div>"
318 ],
319 "text/plain": [
320 " AlbumTitle \\\n",
321 "29182 Music of Afghanistan \n",
322 "29183 Music of Afghanistan \n",
323 "29184 Ustad Mohammad Omar: Virtuoso from Afghanistan \n",
324 "29185 Music of Central Asia Vol. 3: Homayun Sakhi: T... \n",
325 "29186 Music of Afghanistan \n",
326 "\n",
327 " Artist Artist_Album \\\n",
328 "29182 Afghan National Orchestra Various Artists \n",
329 "29183 Yaqub Kasimi Various Artists \n",
330 "29184 Ustad Mohammad Omar|Zakir Hussain Ustad Mohammad Omar \n",
331 "29185 Homayun Sakhi Homayun Sakhi \n",
332 "29186 Rebab and Dhol Duet Various Artists \n",
333 "\n",
334 " BuyLinkTrackDownload CatalogNumber CatalogNumber_Album Collection \\\n",
335 "29182 26138 FW04361_201 FW04361 NaN \n",
336 "29183 26142 FW04361_205 FW04361 NaN \n",
337 "29184 51931 SFW40439_102 SFW40439 NaN \n",
338 "29185 53229 SFW40522_101 SFW40522 NaN \n",
339 "29186 26140 FW04361_203 FW04361 NaN \n",
340 "\n",
341 " ContentType ContentType_Album Country \\\n",
342 "29182 Track Album Afghanistan \n",
343 "29183 Track Album Afghanistan \n",
344 "29184 Track Album Afghanistan \n",
345 "29185 Track Album Afghanistan \n",
346 "29186 Track Album Afghanistan \n",
347 "\n",
348 " ... Language_iso3 \\\n",
349 "29182 ... NaN \n",
350 "29183 ... NaN \n",
351 "29184 ... pbt \n",
352 "29185 ... NaN \n",
353 "29186 ... NaN \n",
354 "\n",
355 " Language_iso1 Region Latitude Longitude \\\n",
356 "29182 NaN South Asia 33.93911 67.709953 \n",
357 "29183 NaN South Asia 33.93911 67.709953 \n",
358 "29184 Indo-European South Asia 33.93911 67.709953 \n",
359 "29185 NaN South Asia 33.93911 67.709953 \n",
360 "29186 NaN South Asia 33.93911 67.709953 \n",
361 "\n",
362 " Audio \\\n",
363 "29182 /import/c4dm-scratch/matthiasm/data/smithsonia... \n",
364 "29183 /import/c4dm-scratch/matthiasm/data/smithsonia... \n",
365 "29184 /import/c4dm-scratch/matthiasm/data/smithsonia... \n",
366 "29185 /import/c4dm-scratch/matthiasm/data/smithsonia... \n",
367 "29186 /import/c4dm-scratch/matthiasm/data/smithsonia... \n",
368 "\n",
369 " Speech \\\n",
370 "29182 /import/c4dm-02/people/mariap/SpeechMusic/2613... \n",
371 "29183 /import/c4dm-02/people/mariap/SpeechMusic/2614... \n",
372 "29184 /import/c4dm-02/people/mariap/SpeechMusic/5193... \n",
373 "29185 /import/c4dm-02/people/mariap/SpeechMusic/5322... \n",
374 "29186 /import/c4dm-02/people/mariap/SpeechMusic/2614... \n",
375 "\n",
376 " Melspec \\\n",
377 "29182 /import/c4dm-02/people/mariap/MelSpec/26138_Sa... \n",
378 "29183 /import/c4dm-02/people/mariap/MelSpec/26142_Sa... \n",
379 "29184 /import/c4dm-02/people/mariap/MelSpec/51931_Sa... \n",
380 "29185 /import/c4dm-02/people/mariap/MelSpec/53229_Sa... \n",
381 "29186 /import/c4dm-02/people/mariap/MelSpec/26140_Sa... \n",
382 "\n",
383 " Chroma \\\n",
384 "29182 /import/c4dm-04/mariap/FeatureCsvs/Smoothie-py... \n",
385 "29183 /import/c4dm-04/mariap/FeatureCsvs/Smoothie-py... \n",
386 "29184 /import/c4dm-04/mariap/FeatureCsvs/Smoothie-py... \n",
387 "29185 /import/c4dm-04/mariap/FeatureCsvs/Smoothie-py... \n",
388 "29186 /import/c4dm-04/mariap/FeatureCsvs/Smoothie-py... \n",
389 "\n",
390 " Melodia \n",
391 "29182 /import/c4dm-02/people/mariap/Melodia/26138_Sa... \n",
392 "29183 /import/c4dm-02/people/mariap/Melodia/26142_Sa... \n",
393 "29184 /import/c4dm-02/people/mariap/Melodia/51931_Sa... \n",
394 "29185 /import/c4dm-02/people/mariap/Melodia/53229_Sa... \n",
395 "29186 /import/c4dm-02/people/mariap/Melodia/26140_Sa... \n",
396 "\n",
397 "[5 rows x 108 columns]"
398 ]
399 },
400 "execution_count": 8,
401 "metadata": {},
402 "output_type": "execute_result"
403 }
404 ],
405 "source": [
406 "df.head()"
61 ] 407 ]
62 }, 408 },
63 { 409 {
64 "cell_type": "code", 410 "cell_type": "code",
65 "execution_count": null, 411 "execution_count": null,