Mercurial > hg > hybrid-music-recommender-using-content-based-and-social-information
diff Code/content_based.py @ 26:e4bcfe00abf4
Final version of code
author | Paulo Chiliguano <p.e.chiilguano@se14.qmul.ac.uk> |
---|---|
date | Wed, 26 Aug 2015 02:00:48 +0100 |
parents | fafc0b249a73 |
children | ae650489d3a8 |
line wrap: on
line diff
--- a/Code/content_based.py Sun Aug 23 16:47:54 2015 +0100 +++ b/Code/content_based.py Wed Aug 26 02:00:48 2015 +0100 @@ -9,6 +9,7 @@ from math import sqrt import numpy as np import pandas as pd +import time # Item-vector dictionary f = file('/Users/paulochiliguano/Documents/msc-project/dataset/\ @@ -16,6 +17,18 @@ song_library = pickle.load(f) f.close() +# Normalisation +#test = [] +#for k, v in song_library.iteritems(): +# test.append(v) +#test = np.array(test) +#test_median = np.median(test, axis=0) +#test_abs = abs(test - test_median) +#test_asd = test_abs.sum(axis=0) / test.shape[0] +#for k, v in song_library.iteritems(): +# modified_standard_score = (np.array(v) - test_median) / test_asd +# song_library[k] = modified_standard_score.tolist() + # Load training and test data f = file('/Users/paulochiliguano/Documents/msc-project/dataset/\ cross_validation.pkl', 'rb') @@ -35,45 +48,26 @@ dem2 = sum(map(lambda w_j: (w_j - avrg_w_j) ** 2, vector_j)) return num / (sqrt(dem1) * sqrt(dem2)) -def computeNearestNeighbor(itemName, itemVector, items): - """creates a sorted list of items based on their distance to item""" - similarities = [] - for otherItem in items: - if otherItem != itemName: - sim = adj_cos_sim(itemVector, items[otherItem]) - similarities.append((sim, otherItem)) - # sort based on distance -- closest first - similarities.sort(reverse=True) - #if len(similarities) > N: - #similarities = similarities[0:N] - return similarities - -def nearest_neighbours(song, train_songs, N): - similarities = [] - for k in train_songs: - sim = adj_cos_sim(song_library[song], song_library[k]) - similarities.append((sim, k)) - similarities.sort(reverse=True) - #if len(similarities) > N: - #similarities = similarities[0:N] - return similarities - #return {t[1]: t[0] for t in similarities} - -def build_model_cb(song_library, k=30): - other_songs = song_library.keys() +def build_model_cb(train_data, k=30): + a = [] + for user, info in train_data.iteritems(): + a.extend([i for i in info]) + songIDs = list(set(a)) + #other_songs = song_library.keys() + similarity_matrix = {} - for song in song_library: + for song in songIDs: similarities = [] - for other in other_songs: + for other in songIDs: if other != song: sim = adj_cos_sim(song_library[song], song_library[other]) similarities.append((sim, other)) similarities.sort(reverse=True) similarity_matrix[song] = similarities[0:k] + return similarity_matrix #similarity_rows[song] = {t[1]: t[0] for t in similarities} - def top_n(sim_matrix, user, song_rating, rating_threshold=2, N=10): candidate = pd.DataFrame() entries = song_rating.keys() @@ -99,7 +93,7 @@ return list(topN.head(N).keys()) -def evaluate_cb(topN, test_data, rating_threshold=3): +def evaluate_cb(topN, test_data, rating_threshold=2): tp = 0. fp = 0. @@ -119,22 +113,35 @@ elif rating <= rating_threshold: tn += 1 print tp, fp, fn, tn - precision = tp / (tp + fp) - recall = tp / (tp + fn) - F1 = 2 * precision * recall / (precision + recall) + if tp != 0: + precision = tp / (tp + fp) + recall = tp / (tp + fn) + F1 = 2 * precision * recall / (precision + recall) + else: + precision = 0 + recall = 0 + F1 = 0 + accuracy = (tp + tn) / (tp + fp + tn + fn) return precision, recall, F1, accuracy - -sim_matrix = build_model_cb(song_library, 30) + p = np.array([]) f = np.array([]) r = np.array([]) a = np.array([]) + for i in range(len(users_train)): + + start_time = time.time() + sim_matrix = build_model_cb(users_train[i]) + topN = {} for user, song_rating in users_train[i].iteritems(): topN[user] = top_n(sim_matrix, user, song_rating) + elapsed_time = time.time() - start_time + print 'Training execution time: %.3f seconds' % elapsed_time + pi, ri, fi, ai = evaluate_cb(topN, users_test[i]) p = np.append(p, pi) @@ -142,7 +149,6 @@ f = np.append(f, fi) a = np.append(a, ai) - print "Precision = %f3 ± %f3" % (p.mean(), p.std()) print "Recall = %f3 ± %f3" % (r.mean(), r.std()) print "F1 = %f3 ± %f3" % (f.mean(), f.std())