Mercurial > hg > hybrid-music-recommender-using-content-based-and-social-information
changeset 17:ee13c193c76e
Continue working on EDA
author | Paulo Chiliguano <p.e.chiilguano@se14.qmul.ac.uk> |
---|---|
date | Tue, 28 Jul 2015 21:11:22 +0100 |
parents | 68b8b088f50a |
children | f1504bb2c552 |
files | Code/eda.py |
diffstat | 1 files changed, 93 insertions(+), 3 deletions(-) [+] |
line wrap: on
line diff
--- a/Code/eda.py Mon Jul 27 19:24:37 2015 +0100 +++ b/Code/eda.py Tue Jul 28 21:11:22 2015 +0100 @@ -6,7 +6,7 @@ """ -import random +from math import sqrt, log10 import numpy as np from sklearn import mixture @@ -60,10 +60,95 @@ ''' ''' -Generate M individuals uniformly +Functions to compute similarity between items or between profiles ''' +# Pearson Correlation Coefficient +# Source: http://www.guidetodatamining.com +def pearson(rating1, rating2): + sum_xy = 0 + sum_x = 0 + sum_y = 0 + sum_x2 = 0 + sum_y2 = 0 + n = 0 + for key in rating1: + if key in rating2: + n += 1 + x = rating1[key] + y = rating2[key] + sum_xy += x * y + sum_x += x + sum_y += y + sum_x2 += pow(x, 2) + sum_y2 += pow(y, 2) + if n == 0: + return 0 + # now compute denominator + denominator = sqrt(sum_x2 - pow(sum_x, 2) / n) * \ + sqrt(sum_y2 - pow(sum_y, 2) / n) + if denominator == 0: + return 0 + else: + return (sum_xy - (sum_x * sum_y) / n) / denominator + +# Cosine Similarity for test purposes +def cosine_similarity(rating1, rating2): + sum_xy = 0 + sum_x2 = 0 + sum_y2 = 0 + n = 0 + for key in rating1: + if key in rating2: + n += 1 + x = rating1[key] + y = rating2[key] + sum_xy += x * y + if n == 0: + return 0 + + # now compute denominator + for key in rating1: + x = rating1[key] + sum_x2 += pow(x, 2) + + for key in rating2: + y = rating2[key] + sum_y2 += pow(y, 2) + + denominator = sqrt(sum_x2) * sqrt(sum_y2) + if denominator == 0: + return 0 + else: + return sum_xy / denominator + +''' +Fitness function of EDA +''' +def Fitness(profile, user_index): + sim = 0 + sum_log = 0 + + features = profile.items()[user_index][1] + songs = users.items()[user_index][1] + + for song, rating in songs.items(): + sim = pearson(features, items[song]) + print(sim) + + for username, songs in users.items(): + for song, rating in songs.items(): + sim = pearson(profile, items[song]) + #sum_log += log10(rating * sim) + return sim + + +''' +Generation of M individuals uniformly +''' +population_size = len(users) +fraction_of_population = 0.5 np.random.seed(len(users)) -M = np.random.uniform(1, 5, len(users) * len(items.values()[0])) +M = np.random.uniform(1, 5, population_size * len(items.values()[0])) M.shape = (-1, len(items.values()[0])) profile = {} i = 0 @@ -71,6 +156,11 @@ profile["Profile" + str(i)] = M.tolist()[i] i = i + 1 +''' +Calculate fitness values +''' +Fitness(profile, 0) + np.random.seed(1) g = mixture.GMM(n_components=7) # Generate random observations with two modes centered on 0