Mercurial > hg > hybrid-music-recommender-using-content-based-and-social-information
changeset 8:28f15e232028
test to get audio samples URL from 7 digital
author | Paulo Chiliguano <p.e.chiliguano@se14.qmul.ac.uk> |
---|---|
date | Tue, 14 Jul 2015 23:41:55 +0100 |
parents | 4de098e10bbb |
children | 5b45b9f0540e |
files | Code/read_taste_profile.py |
diffstat | 1 files changed, 63 insertions(+), 22 deletions(-) [+] |
line wrap: on
line diff
--- a/Code/read_taste_profile.py Sun Jul 12 23:56:25 2015 +0100 +++ b/Code/read_taste_profile.py Tue Jul 14 23:41:55 2015 +0100 @@ -2,33 +2,74 @@ import csv import pandas as pd import numpy as np +import itertools +import time # List of h5 files (audio streams) -with open('/homes/pchilguano/dataset/cal10kHDF5.csv', 'wb') as out: +#with open('/homes/pchilguano/dataset/cal10kHDF5.csv', 'wb') as out: +# writer = csv.writer(out, delimiter=',') +# for root, dirs, files in os.walk("/homes/pchilguano/dataset/cal10k"): +# for file in files: +# if file.endswith(".h5"): +# #print(os.path.join(root, file)) +# track = ''.join(['SO',str(file)[2:-3]]) +# print(track) +# writer.writerow([track]) + +#with open('/homes/pchilguano/dataset/cal10kHDF5.csv', 'rb') as f: +# reader = csv.reader(f) +# your_list = list(reader) +# your_list.sort() +# chain = list(itertools.chain(*your_list)) + + +#store = pd.HDFStore('/homes/pchilguano/dataset/store.h5') +location = r'~/dataset/train_triplets.txt' +chunksize = 10000 +count = 0 +frame = pd.DataFrame() +for chunk in pd.read_csv(location, delim_whitespace=True, header=None, names=['user','song','plays'], chunksize=chunksize): + #chunk.sort(columns='song') + #frame = chunk.query('song == your_list') + frame = frame.append(chunk.pivot(index='user', columns='song', values='plays'), ignore_index=True) + count = count + 1 + print(count) + #for item in your_list: + #chunk['song'].isin(item) + #store.append('df', chunk[chunk['song'].isin(item)]) +#store.close() + + +df = pd.read_csv(location, delim_whitespace=True, header=None, names=['user','song','plays']) +ddf = df.drop_duplicates(subset = 'song') +ddf.to_csv('train_triplets_song.csv',columns=['song'], header=False, index=False) + +with open('/homes/pchilguano/dataset/sid_mismatches.txt', 'rb') as f, open('/homes/pchilguano/dataset/sid_mismatches_song.txt', 'wb') as out: writer = csv.writer(out, delimiter=',') - for root, dirs, files in os.walk("/homes/pchilguano/dataset/cal10k"): - for file in files: - if file.endswith(".h5"): - #print(os.path.join(root, file)) - track = ''.join(['SO',str(file)[2:-3]]) - print(track) - writer.writerow([track]) - -with open('/homes/pchilguano/dataset/cal10kHDF5.csv', 'rb') as f: - reader = csv.reader(f) - your_list = list(reader) + next = f.readline() + while next != "": + writer.writerow([next[8:26]]) + print(next[8:26]) + next = f.readline() +#mismatch.to_csv('sid_mismatches_song.csv',columns=1, header=False, index=False) -store = pd.HDFStore('/homes/pchilguano/dataset/store.h5') -location = r'~/dataset/train_triplets.txt' -chunksize = 4 -for chunk in pd.read_csv(location, delim_whitespace=True, header=None, names=['user','song','plays'], chunksize=chunksize): - #frame = pd.Dataframe() - #frame = chunk.query('song == your_list') - frame = chunk.pivot(index='user', columns='song', values='plays') - store.append('df', frame) - - +from pyechonest import song, config +config.ECHO_NEST_API_KEY="LINDFDUTQZQ781IE8" +with open('/homes/pchilguano/dataset/test_echonest.txt', 'rb') as input, open('/homes/pchilguano/dataset/test_echonest_url.txt', 'wb') as output: + writer = csv.writer(output, delimiter=',') + next = input.readline() + while next != "": + time.sleep(1) + s = song.Song(next[:-2]) + time.sleep(1) + ss_tracks = s.get_tracks('7digital-UK') + if len(ss_tracks) != 0: + ss_track = ss_tracks[0] + preview_url = ss_track.get('preview_url') + print(preview_url) + writer.writerow([next[:-2], preview_url]) + next = input.readline() df = store['df']