# HG changeset patch
# User Paulo Chiliguano
# Date 1436913715 -3600
# Node ID 28f15e23202866e8c1ef84687b80d35c223266ac
# Parent 4de098e10bbb3a1c7d02a21ebf8f3f089be46537
test to get audio samples URL from 7 digital
diff -r 4de098e10bbb -r 28f15e232028 Code/read_taste_profile.py
--- a/Code/read_taste_profile.py Sun Jul 12 23:56:25 2015 +0100
+++ b/Code/read_taste_profile.py Tue Jul 14 23:41:55 2015 +0100
@@ -2,33 +2,74 @@
import csv
import pandas as pd
import numpy as np
+import itertools
+import time
# List of h5 files (audio streams)
-with open('/homes/pchilguano/dataset/cal10kHDF5.csv', 'wb') as out:
+#with open('/homes/pchilguano/dataset/cal10kHDF5.csv', 'wb') as out:
+# writer = csv.writer(out, delimiter=',')
+# for root, dirs, files in os.walk("/homes/pchilguano/dataset/cal10k"):
+# for file in files:
+# if file.endswith(".h5"):
+# #print(os.path.join(root, file))
+# track = ''.join(['SO',str(file)[2:-3]])
+# print(track)
+# writer.writerow([track])
+
+#with open('/homes/pchilguano/dataset/cal10kHDF5.csv', 'rb') as f:
+# reader = csv.reader(f)
+# your_list = list(reader)
+# your_list.sort()
+# chain = list(itertools.chain(*your_list))
+
+
+#store = pd.HDFStore('/homes/pchilguano/dataset/store.h5')
+location = r'~/dataset/train_triplets.txt'
+chunksize = 10000
+count = 0
+frame = pd.DataFrame()
+for chunk in pd.read_csv(location, delim_whitespace=True, header=None, names=['user','song','plays'], chunksize=chunksize):
+ #chunk.sort(columns='song')
+ #frame = chunk.query('song == your_list')
+ frame = frame.append(chunk.pivot(index='user', columns='song', values='plays'), ignore_index=True)
+ count = count + 1
+ print(count)
+ #for item in your_list:
+ #chunk['song'].isin(item)
+ #store.append('df', chunk[chunk['song'].isin(item)])
+#store.close()
+
+
+df = pd.read_csv(location, delim_whitespace=True, header=None, names=['user','song','plays'])
+ddf = df.drop_duplicates(subset = 'song')
+ddf.to_csv('train_triplets_song.csv',columns=['song'], header=False, index=False)
+
+with open('/homes/pchilguano/dataset/sid_mismatches.txt', 'rb') as f, open('/homes/pchilguano/dataset/sid_mismatches_song.txt', 'wb') as out:
writer = csv.writer(out, delimiter=',')
- for root, dirs, files in os.walk("/homes/pchilguano/dataset/cal10k"):
- for file in files:
- if file.endswith(".h5"):
- #print(os.path.join(root, file))
- track = ''.join(['SO',str(file)[2:-3]])
- print(track)
- writer.writerow([track])
-
-with open('/homes/pchilguano/dataset/cal10kHDF5.csv', 'rb') as f:
- reader = csv.reader(f)
- your_list = list(reader)
+ next = f.readline()
+ while next != "":
+ writer.writerow([next[8:26]])
+ print(next[8:26])
+ next = f.readline()
+#mismatch.to_csv('sid_mismatches_song.csv',columns=1, header=False, index=False)
-store = pd.HDFStore('/homes/pchilguano/dataset/store.h5')
-location = r'~/dataset/train_triplets.txt'
-chunksize = 4
-for chunk in pd.read_csv(location, delim_whitespace=True, header=None, names=['user','song','plays'], chunksize=chunksize):
- #frame = pd.Dataframe()
- #frame = chunk.query('song == your_list')
- frame = chunk.pivot(index='user', columns='song', values='plays')
- store.append('df', frame)
-
-
+from pyechonest import song, config
+config.ECHO_NEST_API_KEY="LINDFDUTQZQ781IE8"
+with open('/homes/pchilguano/dataset/test_echonest.txt', 'rb') as input, open('/homes/pchilguano/dataset/test_echonest_url.txt', 'wb') as output:
+ writer = csv.writer(output, delimiter=',')
+ next = input.readline()
+ while next != "":
+ time.sleep(1)
+ s = song.Song(next[:-2])
+ time.sleep(1)
+ ss_tracks = s.get_tracks('7digital-UK')
+ if len(ss_tracks) != 0:
+ ss_track = ss_tracks[0]
+ preview_url = ss_track.get('preview_url')
+ print(preview_url)
+ writer.writerow([next[:-2], preview_url])
+ next = input.readline()
df = store['df']