p@15
|
1 # -*- coding: utf-8 -*-
|
p@15
|
2 """
|
p@15
|
3 Created on Wed Jul 22 17:42:09 2015
|
p@15
|
4
|
p@15
|
5 @author: paulochiliguano
|
p@15
|
6 """
|
p@15
|
7
|
p@16
|
8
|
p@17
|
9 from math import sqrt, log10
|
p@15
|
10 import numpy as np
|
p@15
|
11 from sklearn import mixture
|
p@15
|
12
|
p@15
|
13 #User-item dictionary
|
p@15
|
14 users = {"Angelica": {"SOAJJPC12AB017D63F": 3.5, "SOAKIXJ12AC3DF7152": 2.0,
|
p@15
|
15 "SOAKPFH12A8C13BA4A": 4.5, "SOAGTJW12A6701F1F5": 5.0,
|
p@15
|
16 "SOAKWCK12A8C139F81": 1.5, "SOAKNZI12A58A79CAC": 2.5,
|
p@15
|
17 "SOAJZEP12A8C14379B": 2.0},
|
p@15
|
18 "Bill":{"SOAJJPC12AB017D63F": 2.0, "SOAKIXJ12AC3DF7152": 3.5,
|
p@15
|
19 "SOAHQFM12A8C134B65": 4.0, "SOAGTJW12A6701F1F5": 2.0,
|
p@15
|
20 "SOAKWCK12A8C139F81": 3.5, "SOAJZEP12A8C14379B": 3.0},
|
p@15
|
21 "Chan": {"SOAJJPC12AB017D63F": 5.0, "SOAKIXJ12AC3DF7152": 1.0,
|
p@15
|
22 "SOAHQFM12A8C134B65": 1.0, "SOAKPFH12A8C13BA4A": 3.0,
|
p@15
|
23 "SOAGTJW12A6701F1F5": 5, "SOAKWCK12A8C139F81": 1.0},
|
p@15
|
24 "Dan": {"SOAJJPC12AB017D63F": 3.0, "SOAKIXJ12AC3DF7152": 4.0,
|
p@15
|
25 "SOAHQFM12A8C134B65": 4.5, "SOAGTJW12A6701F1F5": 3.0,
|
p@15
|
26 "SOAKWCK12A8C139F81": 4.5, "SOAKNZI12A58A79CAC": 4.0,
|
p@15
|
27 "SOAJZEP12A8C14379B": 2.0},
|
p@15
|
28 "Hailey": {"SOAKIXJ12AC3DF7152": 4.0, "SOAHQFM12A8C134B65": 1.0,
|
p@15
|
29 "SOAKPFH12A8C13BA4A": 4.0, "SOAKNZI12A58A79CAC": 4.0,
|
p@15
|
30 "SOAJZEP12A8C14379B": 1.0},
|
p@15
|
31 "Jordyn": {"SOAKIXJ12AC3DF7152": 4.5, "SOAHQFM12A8C134B65": 4.0,
|
p@15
|
32 "SOAKPFH12A8C13BA4A": 5.0, "SOAGTJW12A6701F1F5": 5.0,
|
p@15
|
33 "SOAKWCK12A8C139F81": 4.5, "SOAKNZI12A58A79CAC": 4.0,
|
p@15
|
34 "SOAJZEP12A8C14379B": 4.0},
|
p@15
|
35 "Sam": {"SOAJJPC12AB017D63F": 5.0, "SOAKIXJ12AC3DF7152": 2.0,
|
p@15
|
36 "SOAKPFH12A8C13BA4A": 3.0, "SOAGTJW12A6701F1F5": 5.0,
|
p@15
|
37 "SOAKWCK12A8C139F81": 4.0, "SOAKNZI12A58A79CAC": 5.0},
|
p@15
|
38 "Veronica": {"SOAJJPC12AB017D63F": 3.0, "SOAKPFH12A8C13BA4A": 5.0,
|
p@15
|
39 "SOAGTJW12A6701F1F5": 4.0, "SOAKWCK12A8C139F81": 2.5,
|
p@15
|
40 "SOAKNZI12A58A79CAC": 3.0}
|
p@15
|
41 }
|
p@15
|
42
|
p@16
|
43 items = {"SOAJJPC12AB017D63F": [2.5, 4, 3.5, 3, 5, 4, 1, 5, 4, 1],
|
p@16
|
44 "SOAKIXJ12AC3DF7152": [2, 5, 5, 3, 2, 1, 1, 5, 4, 1],
|
p@16
|
45 "SOAKPFH12A8C13BA4A": [1, 5, 4, 2, 4, 1, 1, 5, 4, 1],
|
p@16
|
46 "SOAGTJW12A6701F1F5": [4, 5, 4, 4, 1, 5, 1, 5, 4, 1],
|
p@16
|
47 "SOAKWCK12A8C139F81": [1, 4, 5, 3.5, 5, 1, 1, 5, 4, 1],
|
p@16
|
48 "SOAKNZI12A58A79CAC": [1, 5, 3.5, 3, 4, 5, 1, 5, 4, 1],
|
p@16
|
49 "SOAJZEP12A8C14379B": [5, 5, 4, 2, 1, 1, 1, 5, 4, 1],
|
p@16
|
50 "SOAHQFM12A8C134B65": [2.5, 4, 4, 1, 1, 1, 1, 5, 4, 1]}
|
p@15
|
51
|
p@23
|
52 #Functions to compute similarity between items or between profiles
|
p@23
|
53 # Source: http://www.guidetodatamining.com
|
p@23
|
54 def manhattan(vector1, vector2):
|
p@23
|
55 """Computes the Manhattan distance."""
|
p@23
|
56 distance = 0
|
p@23
|
57 total = 0
|
p@23
|
58 n = len(vector1)
|
p@23
|
59 for i in range(n):
|
p@23
|
60 distance += abs(vector1[i] - vector2[i])
|
p@23
|
61 return distance
|
p@23
|
62
|
p@23
|
63 def computeNearestNeighbor(itemName, itemVector, items):
|
p@23
|
64 """creates a sorted list of items based on their distance to item"""
|
p@23
|
65 distances = []
|
p@23
|
66 for otherItem in items:
|
p@23
|
67 if otherItem != itemName:
|
p@23
|
68 distance = manhattan(itemVector, items[otherItem])
|
p@23
|
69 distances.append((distance, otherItem))
|
p@23
|
70 # sort based on distance -- closest first
|
p@23
|
71 distances.sort()
|
p@23
|
72 return distances
|
p@23
|
73
|
p@23
|
74 def classify(user, itemName, itemVector):
|
p@23
|
75 """Classify the itemName based on user ratings
|
p@23
|
76 Should really have items and users as parameters"""
|
p@23
|
77 # first find nearest neighbor
|
p@23
|
78 nearest = computeNearestNeighbor(itemName, itemVector, items)[0][1]
|
p@23
|
79 rating = users[user][nearest]
|
p@23
|
80 return rating
|
p@23
|
81
|
p@23
|
82 # Fitness function of EDA
|
p@23
|
83 def Fitness(profile, user):
|
p@23
|
84 nearest = computeNearestNeighbor(itemName, itemVector, items)[0][1]
|
p@23
|
85 rating = users[user][nearest]
|
p@23
|
86 return rating
|
p@23
|
87
|
p@23
|
88
|
p@17
|
89 # Pearson Correlation Coefficient
|
p@17
|
90 def pearson(rating1, rating2):
|
p@17
|
91 sum_xy = 0
|
p@17
|
92 sum_x = 0
|
p@17
|
93 sum_y = 0
|
p@17
|
94 sum_x2 = 0
|
p@17
|
95 sum_y2 = 0
|
p@17
|
96 n = 0
|
p@17
|
97 for key in rating1:
|
p@17
|
98 if key in rating2:
|
p@17
|
99 n += 1
|
p@17
|
100 x = rating1[key]
|
p@17
|
101 y = rating2[key]
|
p@17
|
102 sum_xy += x * y
|
p@17
|
103 sum_x += x
|
p@17
|
104 sum_y += y
|
p@17
|
105 sum_x2 += pow(x, 2)
|
p@17
|
106 sum_y2 += pow(y, 2)
|
p@17
|
107 if n == 0:
|
p@17
|
108 return 0
|
p@17
|
109 # now compute denominator
|
p@17
|
110 denominator = sqrt(sum_x2 - pow(sum_x, 2) / n) * \
|
p@17
|
111 sqrt(sum_y2 - pow(sum_y, 2) / n)
|
p@17
|
112 if denominator == 0:
|
p@17
|
113 return 0
|
p@17
|
114 else:
|
p@17
|
115 return (sum_xy - (sum_x * sum_y) / n) / denominator
|
p@17
|
116
|
p@17
|
117 # Cosine Similarity for test purposes
|
p@17
|
118 def cosine_similarity(rating1, rating2):
|
p@17
|
119 sum_xy = 0
|
p@17
|
120 sum_x2 = 0
|
p@17
|
121 sum_y2 = 0
|
p@17
|
122 n = 0
|
p@17
|
123 for key in rating1:
|
p@17
|
124 if key in rating2:
|
p@17
|
125 n += 1
|
p@17
|
126 x = rating1[key]
|
p@17
|
127 y = rating2[key]
|
p@17
|
128 sum_xy += x * y
|
p@17
|
129 if n == 0:
|
p@17
|
130 return 0
|
p@17
|
131
|
p@17
|
132 # now compute denominator
|
p@17
|
133 for key in rating1:
|
p@17
|
134 x = rating1[key]
|
p@17
|
135 sum_x2 += pow(x, 2)
|
p@17
|
136
|
p@17
|
137 for key in rating2:
|
p@17
|
138 y = rating2[key]
|
p@17
|
139 sum_y2 += pow(y, 2)
|
p@17
|
140
|
p@17
|
141 denominator = sqrt(sum_x2) * sqrt(sum_y2)
|
p@17
|
142 if denominator == 0:
|
p@17
|
143 return 0
|
p@17
|
144 else:
|
p@17
|
145 return sum_xy / denominator
|
p@17
|
146
|
p@23
|
147
|
p@17
|
148 '''
|
p@17
|
149 def Fitness(profile, user_index):
|
p@17
|
150 sim = 0
|
p@17
|
151 sum_log = 0
|
p@17
|
152
|
p@17
|
153 features = profile.items()[user_index][1]
|
p@17
|
154 songs = users.items()[user_index][1]
|
p@17
|
155
|
p@17
|
156 for song, rating in songs.items():
|
p@17
|
157 sim = pearson(features, items[song])
|
p@17
|
158 print(sim)
|
p@17
|
159
|
p@17
|
160 for username, songs in users.items():
|
p@17
|
161 for song, rating in songs.items():
|
p@17
|
162 sim = pearson(profile, items[song])
|
p@17
|
163 #sum_log += log10(rating * sim)
|
p@17
|
164 return sim
|
p@17
|
165 '''
|
p@23
|
166 # Generation of M individuals uniformly
|
p@17
|
167 population_size = len(users)
|
p@17
|
168 fraction_of_population = 0.5
|
p@16
|
169 np.random.seed(len(users))
|
p@21
|
170 M = np.random.uniform(size=population_size * len(items.values()[0]))
|
p@16
|
171 M.shape = (-1, len(items.values()[0]))
|
p@16
|
172 profile = {}
|
p@16
|
173 i = 0
|
p@16
|
174 for row in M.tolist():
|
p@16
|
175 profile["Profile" + str(i)] = M.tolist()[i]
|
p@16
|
176 i = i + 1
|
p@15
|
177
|
p@17
|
178 '''
|
p@17
|
179 Calculate fitness values
|
p@17
|
180 '''
|
p@17
|
181 Fitness(profile, 0)
|
p@17
|
182
|
p@21
|
183
|
p@21
|
184
|
p@21
|
185
|
p@21
|
186
|
p@21
|
187
|
p@15
|
188 np.random.seed(1)
|
p@15
|
189 g = mixture.GMM(n_components=7)
|
p@15
|
190 # Generate random observations with two modes centered on 0
|
p@15
|
191 # and 10 to use for training.
|
p@15
|
192 obs = np.concatenate((np.random.randn(100, 1), 10 + np.random.randn(300, 1)))
|
p@15
|
193 g.fit(obs)
|
p@15
|
194 np.round(g.weights_, 2)
|
p@15
|
195 np.round(g.means_, 2)
|
p@15
|
196 np.round(g.covars_, 2)
|
p@15
|
197 g.predict([[0], [2], [9], [10]])
|
p@15
|
198 np.round(g.score([[0], [2], [9], [10]]), 2)
|
p@15
|
199 # Refit the model on new data (initial parameters remain the
|
p@15
|
200 # same), this time with an even split between the two modes.
|
p@15
|
201 g.fit(20 * [[0]] + 20 * [[10]])
|
p@15
|
202 np.round(g.weights_, 2)
|