comparison Code/eda.py @ 23:45e6f85d0ba4

List of clips downloaded
author Paulo Chiliguano <p.e.chiilguano@se14.qmul.ac.uk>
date Tue, 11 Aug 2015 14:23:42 +0100
parents e68dbee1f6db
children 68a62ca32441
comparison
equal deleted inserted replaced
22:ce1e76c21c12 23:45e6f85d0ba4
47 "SOAKWCK12A8C139F81": [1, 4, 5, 3.5, 5, 1, 1, 5, 4, 1], 47 "SOAKWCK12A8C139F81": [1, 4, 5, 3.5, 5, 1, 1, 5, 4, 1],
48 "SOAKNZI12A58A79CAC": [1, 5, 3.5, 3, 4, 5, 1, 5, 4, 1], 48 "SOAKNZI12A58A79CAC": [1, 5, 3.5, 3, 4, 5, 1, 5, 4, 1],
49 "SOAJZEP12A8C14379B": [5, 5, 4, 2, 1, 1, 1, 5, 4, 1], 49 "SOAJZEP12A8C14379B": [5, 5, 4, 2, 1, 1, 1, 5, 4, 1],
50 "SOAHQFM12A8C134B65": [2.5, 4, 4, 1, 1, 1, 1, 5, 4, 1]} 50 "SOAHQFM12A8C134B65": [2.5, 4, 4, 1, 1, 1, 1, 5, 4, 1]}
51 51
52 ''' 52 #Functions to compute similarity between items or between profiles
53 Functions to compute similarity between items or between profiles 53 # Source: http://www.guidetodatamining.com
54 ''' 54 def manhattan(vector1, vector2):
55 """Computes the Manhattan distance."""
56 distance = 0
57 total = 0
58 n = len(vector1)
59 for i in range(n):
60 distance += abs(vector1[i] - vector2[i])
61 return distance
62
63 def computeNearestNeighbor(itemName, itemVector, items):
64 """creates a sorted list of items based on their distance to item"""
65 distances = []
66 for otherItem in items:
67 if otherItem != itemName:
68 distance = manhattan(itemVector, items[otherItem])
69 distances.append((distance, otherItem))
70 # sort based on distance -- closest first
71 distances.sort()
72 return distances
73
74 def classify(user, itemName, itemVector):
75 """Classify the itemName based on user ratings
76 Should really have items and users as parameters"""
77 # first find nearest neighbor
78 nearest = computeNearestNeighbor(itemName, itemVector, items)[0][1]
79 rating = users[user][nearest]
80 return rating
81
82 # Fitness function of EDA
83 def Fitness(profile, user):
84 nearest = computeNearestNeighbor(itemName, itemVector, items)[0][1]
85 rating = users[user][nearest]
86 return rating
87
88
55 # Pearson Correlation Coefficient 89 # Pearson Correlation Coefficient
56 # Source: http://www.guidetodatamining.com
57 def pearson(rating1, rating2): 90 def pearson(rating1, rating2):
58 sum_xy = 0 91 sum_xy = 0
59 sum_x = 0 92 sum_x = 0
60 sum_y = 0 93 sum_y = 0
61 sum_x2 = 0 94 sum_x2 = 0
109 if denominator == 0: 142 if denominator == 0:
110 return 0 143 return 0
111 else: 144 else:
112 return sum_xy / denominator 145 return sum_xy / denominator
113 146
114 ''' 147
115 Fitness function of EDA
116 ''' 148 '''
117 def Fitness(profile, user_index): 149 def Fitness(profile, user_index):
118 sim = 0 150 sim = 0
119 sum_log = 0 151 sum_log = 0
120 152
128 for username, songs in users.items(): 160 for username, songs in users.items():
129 for song, rating in songs.items(): 161 for song, rating in songs.items():
130 sim = pearson(profile, items[song]) 162 sim = pearson(profile, items[song])
131 #sum_log += log10(rating * sim) 163 #sum_log += log10(rating * sim)
132 return sim 164 return sim
133 165 '''
134 ''' 166 # Generation of M individuals uniformly
135 Generation of M individuals uniformly
136 '''
137 population_size = len(users) 167 population_size = len(users)
138 fraction_of_population = 0.5 168 fraction_of_population = 0.5
139 np.random.seed(len(users)) 169 np.random.seed(len(users))
140 M = np.random.uniform(size=population_size * len(items.values()[0])) 170 M = np.random.uniform(size=population_size * len(items.values()[0]))
141 M.shape = (-1, len(items.values()[0])) 171 M.shape = (-1, len(items.values()[0]))