Mercurial > hg > hybrid-music-recommender-using-content-based-and-social-information
comparison Code/eda.py @ 23:45e6f85d0ba4
List of clips downloaded
author | Paulo Chiliguano <p.e.chiilguano@se14.qmul.ac.uk> |
---|---|
date | Tue, 11 Aug 2015 14:23:42 +0100 |
parents | e68dbee1f6db |
children | 68a62ca32441 |
comparison
equal
deleted
inserted
replaced
22:ce1e76c21c12 | 23:45e6f85d0ba4 |
---|---|
47 "SOAKWCK12A8C139F81": [1, 4, 5, 3.5, 5, 1, 1, 5, 4, 1], | 47 "SOAKWCK12A8C139F81": [1, 4, 5, 3.5, 5, 1, 1, 5, 4, 1], |
48 "SOAKNZI12A58A79CAC": [1, 5, 3.5, 3, 4, 5, 1, 5, 4, 1], | 48 "SOAKNZI12A58A79CAC": [1, 5, 3.5, 3, 4, 5, 1, 5, 4, 1], |
49 "SOAJZEP12A8C14379B": [5, 5, 4, 2, 1, 1, 1, 5, 4, 1], | 49 "SOAJZEP12A8C14379B": [5, 5, 4, 2, 1, 1, 1, 5, 4, 1], |
50 "SOAHQFM12A8C134B65": [2.5, 4, 4, 1, 1, 1, 1, 5, 4, 1]} | 50 "SOAHQFM12A8C134B65": [2.5, 4, 4, 1, 1, 1, 1, 5, 4, 1]} |
51 | 51 |
52 ''' | 52 #Functions to compute similarity between items or between profiles |
53 Functions to compute similarity between items or between profiles | 53 # Source: http://www.guidetodatamining.com |
54 ''' | 54 def manhattan(vector1, vector2): |
55 """Computes the Manhattan distance.""" | |
56 distance = 0 | |
57 total = 0 | |
58 n = len(vector1) | |
59 for i in range(n): | |
60 distance += abs(vector1[i] - vector2[i]) | |
61 return distance | |
62 | |
63 def computeNearestNeighbor(itemName, itemVector, items): | |
64 """creates a sorted list of items based on their distance to item""" | |
65 distances = [] | |
66 for otherItem in items: | |
67 if otherItem != itemName: | |
68 distance = manhattan(itemVector, items[otherItem]) | |
69 distances.append((distance, otherItem)) | |
70 # sort based on distance -- closest first | |
71 distances.sort() | |
72 return distances | |
73 | |
74 def classify(user, itemName, itemVector): | |
75 """Classify the itemName based on user ratings | |
76 Should really have items and users as parameters""" | |
77 # first find nearest neighbor | |
78 nearest = computeNearestNeighbor(itemName, itemVector, items)[0][1] | |
79 rating = users[user][nearest] | |
80 return rating | |
81 | |
82 # Fitness function of EDA | |
83 def Fitness(profile, user): | |
84 nearest = computeNearestNeighbor(itemName, itemVector, items)[0][1] | |
85 rating = users[user][nearest] | |
86 return rating | |
87 | |
88 | |
55 # Pearson Correlation Coefficient | 89 # Pearson Correlation Coefficient |
56 # Source: http://www.guidetodatamining.com | |
57 def pearson(rating1, rating2): | 90 def pearson(rating1, rating2): |
58 sum_xy = 0 | 91 sum_xy = 0 |
59 sum_x = 0 | 92 sum_x = 0 |
60 sum_y = 0 | 93 sum_y = 0 |
61 sum_x2 = 0 | 94 sum_x2 = 0 |
109 if denominator == 0: | 142 if denominator == 0: |
110 return 0 | 143 return 0 |
111 else: | 144 else: |
112 return sum_xy / denominator | 145 return sum_xy / denominator |
113 | 146 |
114 ''' | 147 |
115 Fitness function of EDA | |
116 ''' | 148 ''' |
117 def Fitness(profile, user_index): | 149 def Fitness(profile, user_index): |
118 sim = 0 | 150 sim = 0 |
119 sum_log = 0 | 151 sum_log = 0 |
120 | 152 |
128 for username, songs in users.items(): | 160 for username, songs in users.items(): |
129 for song, rating in songs.items(): | 161 for song, rating in songs.items(): |
130 sim = pearson(profile, items[song]) | 162 sim = pearson(profile, items[song]) |
131 #sum_log += log10(rating * sim) | 163 #sum_log += log10(rating * sim) |
132 return sim | 164 return sim |
133 | 165 ''' |
134 ''' | 166 # Generation of M individuals uniformly |
135 Generation of M individuals uniformly | |
136 ''' | |
137 population_size = len(users) | 167 population_size = len(users) |
138 fraction_of_population = 0.5 | 168 fraction_of_population = 0.5 |
139 np.random.seed(len(users)) | 169 np.random.seed(len(users)) |
140 M = np.random.uniform(size=population_size * len(items.values()[0])) | 170 M = np.random.uniform(size=population_size * len(items.values()[0])) |
141 M.shape = (-1, len(items.values()[0])) | 171 M.shape = (-1, len(items.values()[0])) |