changeset 17:ee13c193c76e

Continue working on EDA
author Paulo Chiliguano <p.e.chiilguano@se14.qmul.ac.uk>
date Tue, 28 Jul 2015 21:11:22 +0100
parents 68b8b088f50a
children f1504bb2c552
files Code/eda.py
diffstat 1 files changed, 93 insertions(+), 3 deletions(-) [+]
line wrap: on
line diff
--- a/Code/eda.py	Mon Jul 27 19:24:37 2015 +0100
+++ b/Code/eda.py	Tue Jul 28 21:11:22 2015 +0100
@@ -6,7 +6,7 @@
 """
 
 
-import random
+from math import sqrt, log10
 import numpy as np
 from sklearn import mixture
 
@@ -60,10 +60,95 @@
 '''
 
 '''
-Generate M individuals uniformly
+Functions to compute similarity between items or between profiles
 '''
+# Pearson Correlation Coefficient
+# Source: http://www.guidetodatamining.com
+def pearson(rating1, rating2):
+    sum_xy = 0
+    sum_x = 0
+    sum_y = 0
+    sum_x2 = 0
+    sum_y2 = 0
+    n = 0
+    for key in rating1:
+        if key in rating2:
+            n += 1
+            x = rating1[key]
+            y = rating2[key]
+            sum_xy += x * y
+            sum_x += x
+            sum_y += y
+            sum_x2 += pow(x, 2)
+            sum_y2 += pow(y, 2)
+    if n == 0:
+        return 0
+    # now compute denominator
+    denominator = sqrt(sum_x2 - pow(sum_x, 2) / n) * \
+                  sqrt(sum_y2 - pow(sum_y, 2) / n)
+    if denominator == 0:
+        return 0
+    else:
+        return (sum_xy - (sum_x * sum_y) / n) / denominator
+
+# Cosine Similarity for test purposes
+def cosine_similarity(rating1, rating2):
+    sum_xy = 0
+    sum_x2 = 0
+    sum_y2 = 0
+    n = 0
+    for key in rating1:
+        if key in rating2:
+            n += 1
+            x = rating1[key]
+            y = rating2[key]
+            sum_xy += x * y
+    if n == 0:
+        return 0
+    
+    # now compute denominator
+    for key in rating1:
+        x = rating1[key]
+        sum_x2 += pow(x, 2)
+    
+    for key in rating2:
+        y = rating2[key]
+        sum_y2 += pow(y, 2)
+    
+    denominator = sqrt(sum_x2) * sqrt(sum_y2)
+    if denominator == 0:
+        return 0
+    else:
+        return sum_xy / denominator
+
+'''
+Fitness function of EDA
+'''
+def Fitness(profile, user_index):
+    sim = 0
+    sum_log = 0
+    
+    features = profile.items()[user_index][1]
+    songs = users.items()[user_index][1]
+    
+    for song, rating in songs.items():
+        sim = pearson(features, items[song])
+        print(sim)
+    
+    for username, songs in users.items():
+        for song, rating in songs.items():
+            sim = pearson(profile, items[song])
+            #sum_log += log10(rating * sim)
+    return sim
+
+
+'''
+Generation of M individuals uniformly
+'''
+population_size = len(users)
+fraction_of_population = 0.5
 np.random.seed(len(users))
-M = np.random.uniform(1, 5, len(users) * len(items.values()[0]))
+M = np.random.uniform(1, 5, population_size * len(items.values()[0]))
 M.shape = (-1, len(items.values()[0]))
 profile = {}
 i = 0
@@ -71,6 +156,11 @@
     profile["Profile" + str(i)] = M.tolist()[i]
     i = i + 1
 
+'''
+Calculate fitness values
+'''
+Fitness(profile, 0)
+
 np.random.seed(1)
 g = mixture.GMM(n_components=7)
 # Generate random observations with two modes centered on 0