Mercurial > hg > plosone_underreview
comparison scripts/output_metadata.py @ 98:5eba53437755 branch-tests
notebooks for publication
author | Maria Panteli <m.x.panteli@gmail.com> |
---|---|
date | Tue, 03 Oct 2017 15:55:35 +0100 |
parents | 4aa0763bf8d8 |
children | 192259977b50 |
comparison
equal
deleted
inserted
replaced
97:68ec8699e32a | 98:5eba53437755 |
---|---|
1 import numpy as np | 1 import numpy as np |
2 import pandas as pd | 2 import pandas as pd |
3 import pickle | 3 import pickle |
4 import os | 4 import os |
5 | |
6 %load_ext autoreload | |
7 %autoreload 2 | |
8 | 5 |
9 import sys | 6 import sys |
10 sys.path.append('../') | 7 sys.path.append('../') |
11 import scripts.outliers as outliers | 8 import scripts.outliers as outliers |
12 import scripts.utils as utils | 9 import scripts.utils as utils |
27 X_list, Y, Yaudio = dataset | 24 X_list, Y, Yaudio = dataset |
28 X = np.concatenate(X_list, axis=1) | 25 X = np.concatenate(X_list, axis=1) |
29 | 26 |
30 cols_to_keep = ['Country', 'continent', 'REGION', 'LocDetails', | 27 cols_to_keep = ['Country', 'continent', 'REGION', 'LocDetails', |
31 'Language', 'Language_iso3', 'Culture', 'Genre_Album', 'Year', 'Decade', | 28 'Language', 'Language_iso3', 'Culture', 'Genre_Album', 'Year', 'Decade', |
32 'songurls_Album', 'Speech', 'Melspec', 'Chroma', 'Melodia'] | 29 'songurls_Album', 'Audio', 'Speech', 'Melspec', 'Chroma', 'Melodia'] |
33 cols_rename = ['Country', 'Continent', 'Region', 'Location_details', | 30 cols_rename = ['Country', 'Continent', 'Region', 'Location_details', |
34 'Language', 'Language_iso3', 'Culture', 'Genre', 'Year', 'Decade', | 31 'Language', 'Language_iso3', 'Culture', 'Genre', 'Year', 'Decade', |
35 'Url', 'Speech', 'Melspec', 'Chroma', 'Melodia'] | 32 'Url', 'Audio', 'Speech', 'Melspec', 'Chroma', 'Melodia'] |
36 col_idx = [] | 33 col_idx = [] |
37 for column in cols_to_keep: | 34 for column in cols_to_keep: |
38 if column not in ddf.columns: | 35 if column not in ddf.columns: |
39 print column | 36 print column |
40 col_idx.append(np.where(ddf.columns==column)[0]) | 37 col_idx.append(np.where(ddf.columns==column)[0]) |
43 ddf_new.columns = cols_rename | 40 ddf_new.columns = cols_rename |
44 | 41 |
45 for i, yy in enumerate(ddf_new['Audio']): | 42 for i, yy in enumerate(ddf_new['Audio']): |
46 new_name = os.path.split(os.path.split(yy)[0])[-1] + '_' + os.path.split(yy)[-1] | 43 new_name = os.path.split(os.path.split(yy)[0])[-1] + '_' + os.path.split(yy)[-1] |
47 new_csv_name = new_name.split('.')[0]+'.csv' | 44 new_csv_name = new_name.split('.')[0]+'.csv' |
48 ddf_new['Audio'].iloc[i] = os.path.join('Audio', new_name) | 45 ddf_new.loc[i, 'Audio'] = new_name |
49 ddf_new['Chroma'].iloc[i] = os.path.join('Chroma', new_csv_name) | 46 ddf_new.loc[i, 'Chroma'] = os.path.join('Chroma', new_csv_name) |
50 ddf_new['Melspec'].iloc[i] = os.path.join('Melspec', new_csv_name) | 47 ddf_new.loc[i, 'Melspec'] = os.path.join('Melspec', new_csv_name) |
51 ddf_new['Melodia'].iloc[i] = os.path.join('Melodia', new_csv_name) | 48 ddf_new.loc[i, 'Melodia'] = os.path.join('Melodia', new_csv_name) |
52 ddf_new['Speech'].iloc[i] = os.path.join('Speech', new_csv_name) | 49 ddf_new.loc[i, 'Speech'] = os.path.join('Speech', new_csv_name) |
53 Yaudio[i] = new_name | 50 Yaudio[i] = new_name |
51 | |
52 print ddf_new.head() | |
53 print Yaudio[:20] | |
54 | 54 |
55 ddf_new.to_csv('../data/metadata.csv', index=False) | 55 ddf_new.to_csv('../data/metadata.csv', index=False) |
56 pickle.dump([X_list, Y, Yaudio], open('../data/lda_data_8.pickle', 'wb')) | 56 pickle.dump([X_list, Y, Yaudio], open('../data/lda_data_8.pickle', 'wb')) |