Mercurial > hg > plosone_underreview
view scripts/output_metadata.py @ 96:1d9c96974c3e branch-tests
merged
author | Maria Panteli <m.x.panteli@gmail.com> |
---|---|
date | Mon, 02 Oct 2017 19:00:59 +0100 |
parents | 4aa0763bf8d8 |
children | 5eba53437755 |
line wrap: on
line source
import numpy as np import pandas as pd import pickle import os %load_ext autoreload %autoreload 2 import sys sys.path.append('../') import scripts.outliers as outliers import scripts.utils as utils DATA_FILE = '../data/lda_data_melodia_8_30sec.pickle' METADATA_FILE = '/Users/mariapanteli/Documents/QMUL/Code/MyPythonCode/MergeBL-Smith/data/metadata_BLSM_language_all.csv' #METADATA_FILE = '../data/metadata.csv' dataset, ddf, w_dict = outliers.load_data(DATA_FILE, METADATA_FILE) # correct BL urls: bl_inds = np.where(np.isnan(ddf['BuyLinkTrackDownload']))[0] for bl_ind in bl_inds: ddf['songurls_Album'].iloc[bl_ind] = ('https://sounds.bl.uk/World-and-traditional-music/' + ddf['Folder'].iloc[bl_ind] + '/' + ddf['MetaFile'].iloc[bl_ind].split('.')[0]) X_list, Y, Yaudio = dataset X = np.concatenate(X_list, axis=1) cols_to_keep = ['Country', 'continent', 'REGION', 'LocDetails', 'Language', 'Language_iso3', 'Culture', 'Genre_Album', 'Year', 'Decade', 'songurls_Album', 'Speech', 'Melspec', 'Chroma', 'Melodia'] cols_rename = ['Country', 'Continent', 'Region', 'Location_details', 'Language', 'Language_iso3', 'Culture', 'Genre', 'Year', 'Decade', 'Url', 'Speech', 'Melspec', 'Chroma', 'Melodia'] col_idx = [] for column in cols_to_keep: if column not in ddf.columns: print column col_idx.append(np.where(ddf.columns==column)[0]) col_idx = np.concatenate(col_idx) ddf_new = ddf.iloc[:, col_idx] ddf_new.columns = cols_rename for i, yy in enumerate(ddf_new['Audio']): new_name = os.path.split(os.path.split(yy)[0])[-1] + '_' + os.path.split(yy)[-1] new_csv_name = new_name.split('.')[0]+'.csv' ddf_new['Audio'].iloc[i] = os.path.join('Audio', new_name) ddf_new['Chroma'].iloc[i] = os.path.join('Chroma', new_csv_name) ddf_new['Melspec'].iloc[i] = os.path.join('Melspec', new_csv_name) ddf_new['Melodia'].iloc[i] = os.path.join('Melodia', new_csv_name) ddf_new['Speech'].iloc[i] = os.path.join('Speech', new_csv_name) Yaudio[i] = new_name ddf_new.to_csv('../data/metadata.csv', index=False) pickle.dump([X_list, Y, Yaudio], open('../data/lda_data_8.pickle', 'wb'))