p@15
|
1 # -*- coding: utf-8 -*-
|
p@15
|
2 """
|
p@15
|
3 Created on Wed Jul 22 17:42:09 2015
|
p@15
|
4
|
p@15
|
5 @author: paulochiliguano
|
p@15
|
6 """
|
p@15
|
7
|
p@16
|
8
|
p@17
|
9 from math import sqrt, log10
|
p@15
|
10 import numpy as np
|
p@15
|
11 from sklearn import mixture
|
p@15
|
12
|
p@15
|
13 #User-item dictionary
|
p@15
|
14 users = {"Angelica": {"SOAJJPC12AB017D63F": 3.5, "SOAKIXJ12AC3DF7152": 2.0,
|
p@15
|
15 "SOAKPFH12A8C13BA4A": 4.5, "SOAGTJW12A6701F1F5": 5.0,
|
p@15
|
16 "SOAKWCK12A8C139F81": 1.5, "SOAKNZI12A58A79CAC": 2.5,
|
p@15
|
17 "SOAJZEP12A8C14379B": 2.0},
|
p@15
|
18 "Bill":{"SOAJJPC12AB017D63F": 2.0, "SOAKIXJ12AC3DF7152": 3.5,
|
p@15
|
19 "SOAHQFM12A8C134B65": 4.0, "SOAGTJW12A6701F1F5": 2.0,
|
p@15
|
20 "SOAKWCK12A8C139F81": 3.5, "SOAJZEP12A8C14379B": 3.0},
|
p@15
|
21 "Chan": {"SOAJJPC12AB017D63F": 5.0, "SOAKIXJ12AC3DF7152": 1.0,
|
p@15
|
22 "SOAHQFM12A8C134B65": 1.0, "SOAKPFH12A8C13BA4A": 3.0,
|
p@15
|
23 "SOAGTJW12A6701F1F5": 5, "SOAKWCK12A8C139F81": 1.0},
|
p@15
|
24 "Dan": {"SOAJJPC12AB017D63F": 3.0, "SOAKIXJ12AC3DF7152": 4.0,
|
p@15
|
25 "SOAHQFM12A8C134B65": 4.5, "SOAGTJW12A6701F1F5": 3.0,
|
p@15
|
26 "SOAKWCK12A8C139F81": 4.5, "SOAKNZI12A58A79CAC": 4.0,
|
p@15
|
27 "SOAJZEP12A8C14379B": 2.0},
|
p@15
|
28 "Hailey": {"SOAKIXJ12AC3DF7152": 4.0, "SOAHQFM12A8C134B65": 1.0,
|
p@15
|
29 "SOAKPFH12A8C13BA4A": 4.0, "SOAKNZI12A58A79CAC": 4.0,
|
p@15
|
30 "SOAJZEP12A8C14379B": 1.0},
|
p@15
|
31 "Jordyn": {"SOAKIXJ12AC3DF7152": 4.5, "SOAHQFM12A8C134B65": 4.0,
|
p@15
|
32 "SOAKPFH12A8C13BA4A": 5.0, "SOAGTJW12A6701F1F5": 5.0,
|
p@15
|
33 "SOAKWCK12A8C139F81": 4.5, "SOAKNZI12A58A79CAC": 4.0,
|
p@15
|
34 "SOAJZEP12A8C14379B": 4.0},
|
p@15
|
35 "Sam": {"SOAJJPC12AB017D63F": 5.0, "SOAKIXJ12AC3DF7152": 2.0,
|
p@15
|
36 "SOAKPFH12A8C13BA4A": 3.0, "SOAGTJW12A6701F1F5": 5.0,
|
p@15
|
37 "SOAKWCK12A8C139F81": 4.0, "SOAKNZI12A58A79CAC": 5.0},
|
p@15
|
38 "Veronica": {"SOAJJPC12AB017D63F": 3.0, "SOAKPFH12A8C13BA4A": 5.0,
|
p@15
|
39 "SOAGTJW12A6701F1F5": 4.0, "SOAKWCK12A8C139F81": 2.5,
|
p@15
|
40 "SOAKNZI12A58A79CAC": 3.0}
|
p@15
|
41 }
|
p@15
|
42
|
p@16
|
43 items = {"SOAJJPC12AB017D63F": [2.5, 4, 3.5, 3, 5, 4, 1, 5, 4, 1],
|
p@16
|
44 "SOAKIXJ12AC3DF7152": [2, 5, 5, 3, 2, 1, 1, 5, 4, 1],
|
p@16
|
45 "SOAKPFH12A8C13BA4A": [1, 5, 4, 2, 4, 1, 1, 5, 4, 1],
|
p@16
|
46 "SOAGTJW12A6701F1F5": [4, 5, 4, 4, 1, 5, 1, 5, 4, 1],
|
p@16
|
47 "SOAKWCK12A8C139F81": [1, 4, 5, 3.5, 5, 1, 1, 5, 4, 1],
|
p@16
|
48 "SOAKNZI12A58A79CAC": [1, 5, 3.5, 3, 4, 5, 1, 5, 4, 1],
|
p@16
|
49 "SOAJZEP12A8C14379B": [5, 5, 4, 2, 1, 1, 1, 5, 4, 1],
|
p@16
|
50 "SOAHQFM12A8C134B65": [2.5, 4, 4, 1, 1, 1, 1, 5, 4, 1]}
|
p@16
|
51 '''
|
p@15
|
52 profile = {"Profile0": [2.5, 4, 3.5, 3, 5, 4, 1],
|
p@15
|
53 "Profile1": [2.5, 4, 3.5, 3, 5, 4, 1],
|
p@15
|
54 "Profile2": [2.5, 4, 3.5, 3, 5, 4, 1],
|
p@15
|
55 "Profile3": [2.5, 4, 3.5, 3, 5, 4, 1],
|
p@15
|
56 "Profile4": [2.5, 4, 3.5, 3, 5, 4, 1],
|
p@15
|
57 "Profile5": [2.5, 4, 3.5, 3, 5, 4, 1],
|
p@15
|
58 "Profile6": [2.5, 4, 3.5, 3, 5, 4, 1],
|
p@15
|
59 "Profile7": [2.5, 4, 3.5, 3, 5, 4, 1]}
|
p@16
|
60 '''
|
p@15
|
61
|
p@16
|
62 '''
|
p@17
|
63 Functions to compute similarity between items or between profiles
|
p@16
|
64 '''
|
p@17
|
65 # Pearson Correlation Coefficient
|
p@17
|
66 # Source: http://www.guidetodatamining.com
|
p@17
|
67 def pearson(rating1, rating2):
|
p@17
|
68 sum_xy = 0
|
p@17
|
69 sum_x = 0
|
p@17
|
70 sum_y = 0
|
p@17
|
71 sum_x2 = 0
|
p@17
|
72 sum_y2 = 0
|
p@17
|
73 n = 0
|
p@17
|
74 for key in rating1:
|
p@17
|
75 if key in rating2:
|
p@17
|
76 n += 1
|
p@17
|
77 x = rating1[key]
|
p@17
|
78 y = rating2[key]
|
p@17
|
79 sum_xy += x * y
|
p@17
|
80 sum_x += x
|
p@17
|
81 sum_y += y
|
p@17
|
82 sum_x2 += pow(x, 2)
|
p@17
|
83 sum_y2 += pow(y, 2)
|
p@17
|
84 if n == 0:
|
p@17
|
85 return 0
|
p@17
|
86 # now compute denominator
|
p@17
|
87 denominator = sqrt(sum_x2 - pow(sum_x, 2) / n) * \
|
p@17
|
88 sqrt(sum_y2 - pow(sum_y, 2) / n)
|
p@17
|
89 if denominator == 0:
|
p@17
|
90 return 0
|
p@17
|
91 else:
|
p@17
|
92 return (sum_xy - (sum_x * sum_y) / n) / denominator
|
p@17
|
93
|
p@17
|
94 # Cosine Similarity for test purposes
|
p@17
|
95 def cosine_similarity(rating1, rating2):
|
p@17
|
96 sum_xy = 0
|
p@17
|
97 sum_x2 = 0
|
p@17
|
98 sum_y2 = 0
|
p@17
|
99 n = 0
|
p@17
|
100 for key in rating1:
|
p@17
|
101 if key in rating2:
|
p@17
|
102 n += 1
|
p@17
|
103 x = rating1[key]
|
p@17
|
104 y = rating2[key]
|
p@17
|
105 sum_xy += x * y
|
p@17
|
106 if n == 0:
|
p@17
|
107 return 0
|
p@17
|
108
|
p@17
|
109 # now compute denominator
|
p@17
|
110 for key in rating1:
|
p@17
|
111 x = rating1[key]
|
p@17
|
112 sum_x2 += pow(x, 2)
|
p@17
|
113
|
p@17
|
114 for key in rating2:
|
p@17
|
115 y = rating2[key]
|
p@17
|
116 sum_y2 += pow(y, 2)
|
p@17
|
117
|
p@17
|
118 denominator = sqrt(sum_x2) * sqrt(sum_y2)
|
p@17
|
119 if denominator == 0:
|
p@17
|
120 return 0
|
p@17
|
121 else:
|
p@17
|
122 return sum_xy / denominator
|
p@17
|
123
|
p@17
|
124 '''
|
p@17
|
125 Fitness function of EDA
|
p@17
|
126 '''
|
p@17
|
127 def Fitness(profile, user_index):
|
p@17
|
128 sim = 0
|
p@17
|
129 sum_log = 0
|
p@17
|
130
|
p@17
|
131 features = profile.items()[user_index][1]
|
p@17
|
132 songs = users.items()[user_index][1]
|
p@17
|
133
|
p@17
|
134 for song, rating in songs.items():
|
p@17
|
135 sim = pearson(features, items[song])
|
p@17
|
136 print(sim)
|
p@17
|
137
|
p@17
|
138 for username, songs in users.items():
|
p@17
|
139 for song, rating in songs.items():
|
p@17
|
140 sim = pearson(profile, items[song])
|
p@17
|
141 #sum_log += log10(rating * sim)
|
p@17
|
142 return sim
|
p@17
|
143
|
p@17
|
144
|
p@17
|
145 '''
|
p@17
|
146 Generation of M individuals uniformly
|
p@17
|
147 '''
|
p@17
|
148 population_size = len(users)
|
p@17
|
149 fraction_of_population = 0.5
|
p@16
|
150 np.random.seed(len(users))
|
p@17
|
151 M = np.random.uniform(1, 5, population_size * len(items.values()[0]))
|
p@16
|
152 M.shape = (-1, len(items.values()[0]))
|
p@16
|
153 profile = {}
|
p@16
|
154 i = 0
|
p@16
|
155 for row in M.tolist():
|
p@16
|
156 profile["Profile" + str(i)] = M.tolist()[i]
|
p@16
|
157 i = i + 1
|
p@15
|
158
|
p@17
|
159 '''
|
p@17
|
160 Calculate fitness values
|
p@17
|
161 '''
|
p@17
|
162 Fitness(profile, 0)
|
p@17
|
163
|
p@15
|
164 np.random.seed(1)
|
p@15
|
165 g = mixture.GMM(n_components=7)
|
p@15
|
166 # Generate random observations with two modes centered on 0
|
p@15
|
167 # and 10 to use for training.
|
p@15
|
168 obs = np.concatenate((np.random.randn(100, 1), 10 + np.random.randn(300, 1)))
|
p@15
|
169 g.fit(obs)
|
p@15
|
170 np.round(g.weights_, 2)
|
p@15
|
171 np.round(g.means_, 2)
|
p@15
|
172 np.round(g.covars_, 2)
|
p@15
|
173 g.predict([[0], [2], [9], [10]])
|
p@15
|
174 np.round(g.score([[0], [2], [9], [10]]), 2)
|
p@15
|
175 # Refit the model on new data (initial parameters remain the
|
p@15
|
176 # same), this time with an even split between the two modes.
|
p@15
|
177 g.fit(20 * [[0]] + 20 * [[10]])
|
p@15
|
178 np.round(g.weights_, 2)
|