Mercurial > hg > plosone_underreview
comparison scripts/output_metadata.py @ 95:4aa0763bf8d8 branch-tests
trying to clear up pickle and metadata
author | Maria Panteli <m.x.panteli@gmail.com> |
---|---|
date | Mon, 02 Oct 2017 19:00:35 +0100 |
parents | |
children | 5eba53437755 |
comparison
equal
deleted
inserted
replaced
92:ce525367960e | 95:4aa0763bf8d8 |
---|---|
1 import numpy as np | |
2 import pandas as pd | |
3 import pickle | |
4 import os | |
5 | |
6 %load_ext autoreload | |
7 %autoreload 2 | |
8 | |
9 import sys | |
10 sys.path.append('../') | |
11 import scripts.outliers as outliers | |
12 import scripts.utils as utils | |
13 | |
14 DATA_FILE = '../data/lda_data_melodia_8_30sec.pickle' | |
15 METADATA_FILE = '/Users/mariapanteli/Documents/QMUL/Code/MyPythonCode/MergeBL-Smith/data/metadata_BLSM_language_all.csv' | |
16 #METADATA_FILE = '../data/metadata.csv' | |
17 | |
18 dataset, ddf, w_dict = outliers.load_data(DATA_FILE, METADATA_FILE) | |
19 | |
20 # correct BL urls: | |
21 bl_inds = np.where(np.isnan(ddf['BuyLinkTrackDownload']))[0] | |
22 for bl_ind in bl_inds: | |
23 ddf['songurls_Album'].iloc[bl_ind] = ('https://sounds.bl.uk/World-and-traditional-music/' + | |
24 ddf['Folder'].iloc[bl_ind] + '/' + | |
25 ddf['MetaFile'].iloc[bl_ind].split('.')[0]) | |
26 | |
27 X_list, Y, Yaudio = dataset | |
28 X = np.concatenate(X_list, axis=1) | |
29 | |
30 cols_to_keep = ['Country', 'continent', 'REGION', 'LocDetails', | |
31 'Language', 'Language_iso3', 'Culture', 'Genre_Album', 'Year', 'Decade', | |
32 'songurls_Album', 'Speech', 'Melspec', 'Chroma', 'Melodia'] | |
33 cols_rename = ['Country', 'Continent', 'Region', 'Location_details', | |
34 'Language', 'Language_iso3', 'Culture', 'Genre', 'Year', 'Decade', | |
35 'Url', 'Speech', 'Melspec', 'Chroma', 'Melodia'] | |
36 col_idx = [] | |
37 for column in cols_to_keep: | |
38 if column not in ddf.columns: | |
39 print column | |
40 col_idx.append(np.where(ddf.columns==column)[0]) | |
41 col_idx = np.concatenate(col_idx) | |
42 ddf_new = ddf.iloc[:, col_idx] | |
43 ddf_new.columns = cols_rename | |
44 | |
45 for i, yy in enumerate(ddf_new['Audio']): | |
46 new_name = os.path.split(os.path.split(yy)[0])[-1] + '_' + os.path.split(yy)[-1] | |
47 new_csv_name = new_name.split('.')[0]+'.csv' | |
48 ddf_new['Audio'].iloc[i] = os.path.join('Audio', new_name) | |
49 ddf_new['Chroma'].iloc[i] = os.path.join('Chroma', new_csv_name) | |
50 ddf_new['Melspec'].iloc[i] = os.path.join('Melspec', new_csv_name) | |
51 ddf_new['Melodia'].iloc[i] = os.path.join('Melodia', new_csv_name) | |
52 ddf_new['Speech'].iloc[i] = os.path.join('Speech', new_csv_name) | |
53 Yaudio[i] = new_name | |
54 | |
55 ddf_new.to_csv('../data/metadata.csv', index=False) | |
56 pickle.dump([X_list, Y, Yaudio], open('../data/lda_data_8.pickle', 'wb')) |