# HG changeset patch
# User Paulo Chiliguano
# Date 1438114282 -3600
# Node ID ee13c193c76e44c4262a7b4d25f85e650f368491
# Parent 68b8b088f50a4a05c95968d5ebc92d32cb7de563
Continue working on EDA
diff -r 68b8b088f50a -r ee13c193c76e Code/eda.py
--- a/Code/eda.py Mon Jul 27 19:24:37 2015 +0100
+++ b/Code/eda.py Tue Jul 28 21:11:22 2015 +0100
@@ -6,7 +6,7 @@
"""
-import random
+from math import sqrt, log10
import numpy as np
from sklearn import mixture
@@ -60,10 +60,95 @@
'''
'''
-Generate M individuals uniformly
+Functions to compute similarity between items or between profiles
'''
+# Pearson Correlation Coefficient
+# Source: http://www.guidetodatamining.com
+def pearson(rating1, rating2):
+ sum_xy = 0
+ sum_x = 0
+ sum_y = 0
+ sum_x2 = 0
+ sum_y2 = 0
+ n = 0
+ for key in rating1:
+ if key in rating2:
+ n += 1
+ x = rating1[key]
+ y = rating2[key]
+ sum_xy += x * y
+ sum_x += x
+ sum_y += y
+ sum_x2 += pow(x, 2)
+ sum_y2 += pow(y, 2)
+ if n == 0:
+ return 0
+ # now compute denominator
+ denominator = sqrt(sum_x2 - pow(sum_x, 2) / n) * \
+ sqrt(sum_y2 - pow(sum_y, 2) / n)
+ if denominator == 0:
+ return 0
+ else:
+ return (sum_xy - (sum_x * sum_y) / n) / denominator
+
+# Cosine Similarity for test purposes
+def cosine_similarity(rating1, rating2):
+ sum_xy = 0
+ sum_x2 = 0
+ sum_y2 = 0
+ n = 0
+ for key in rating1:
+ if key in rating2:
+ n += 1
+ x = rating1[key]
+ y = rating2[key]
+ sum_xy += x * y
+ if n == 0:
+ return 0
+
+ # now compute denominator
+ for key in rating1:
+ x = rating1[key]
+ sum_x2 += pow(x, 2)
+
+ for key in rating2:
+ y = rating2[key]
+ sum_y2 += pow(y, 2)
+
+ denominator = sqrt(sum_x2) * sqrt(sum_y2)
+ if denominator == 0:
+ return 0
+ else:
+ return sum_xy / denominator
+
+'''
+Fitness function of EDA
+'''
+def Fitness(profile, user_index):
+ sim = 0
+ sum_log = 0
+
+ features = profile.items()[user_index][1]
+ songs = users.items()[user_index][1]
+
+ for song, rating in songs.items():
+ sim = pearson(features, items[song])
+ print(sim)
+
+ for username, songs in users.items():
+ for song, rating in songs.items():
+ sim = pearson(profile, items[song])
+ #sum_log += log10(rating * sim)
+ return sim
+
+
+'''
+Generation of M individuals uniformly
+'''
+population_size = len(users)
+fraction_of_population = 0.5
np.random.seed(len(users))
-M = np.random.uniform(1, 5, len(users) * len(items.values()[0]))
+M = np.random.uniform(1, 5, population_size * len(items.values()[0]))
M.shape = (-1, len(items.values()[0]))
profile = {}
i = 0
@@ -71,6 +156,11 @@
profile["Profile" + str(i)] = M.tolist()[i]
i = i + 1
+'''
+Calculate fitness values
+'''
+Fitness(profile, 0)
+
np.random.seed(1)
g = mixture.GMM(n_components=7)
# Generate random observations with two modes centered on 0