Mercurial > hg > hybrid-music-recommender-using-content-based-and-social-information
diff Code/eda.py @ 23:45e6f85d0ba4
List of clips downloaded
author | Paulo Chiliguano <p.e.chiilguano@se14.qmul.ac.uk> |
---|---|
date | Tue, 11 Aug 2015 14:23:42 +0100 |
parents | e68dbee1f6db |
children | 68a62ca32441 |
line wrap: on
line diff
--- a/Code/eda.py Tue Aug 11 10:56:51 2015 +0100 +++ b/Code/eda.py Tue Aug 11 14:23:42 2015 +0100 @@ -49,11 +49,44 @@ "SOAJZEP12A8C14379B": [5, 5, 4, 2, 1, 1, 1, 5, 4, 1], "SOAHQFM12A8C134B65": [2.5, 4, 4, 1, 1, 1, 1, 5, 4, 1]} -''' -Functions to compute similarity between items or between profiles -''' +#Functions to compute similarity between items or between profiles +# Source: http://www.guidetodatamining.com +def manhattan(vector1, vector2): + """Computes the Manhattan distance.""" + distance = 0 + total = 0 + n = len(vector1) + for i in range(n): + distance += abs(vector1[i] - vector2[i]) + return distance + +def computeNearestNeighbor(itemName, itemVector, items): + """creates a sorted list of items based on their distance to item""" + distances = [] + for otherItem in items: + if otherItem != itemName: + distance = manhattan(itemVector, items[otherItem]) + distances.append((distance, otherItem)) + # sort based on distance -- closest first + distances.sort() + return distances + +def classify(user, itemName, itemVector): + """Classify the itemName based on user ratings + Should really have items and users as parameters""" + # first find nearest neighbor + nearest = computeNearestNeighbor(itemName, itemVector, items)[0][1] + rating = users[user][nearest] + return rating + +# Fitness function of EDA +def Fitness(profile, user): + nearest = computeNearestNeighbor(itemName, itemVector, items)[0][1] + rating = users[user][nearest] + return rating + + # Pearson Correlation Coefficient -# Source: http://www.guidetodatamining.com def pearson(rating1, rating2): sum_xy = 0 sum_x = 0 @@ -111,8 +144,7 @@ else: return sum_xy / denominator -''' -Fitness function of EDA + ''' def Fitness(profile, user_index): sim = 0 @@ -130,10 +162,8 @@ sim = pearson(profile, items[song]) #sum_log += log10(rating * sim) return sim - ''' -Generation of M individuals uniformly -''' +# Generation of M individuals uniformly population_size = len(users) fraction_of_population = 0.5 np.random.seed(len(users))