hybrid-music-recommender-using-content-based-and-social-information: Code/eda.py annotate

annotate Code/eda.py @ 23:45e6f85d0ba4

List of clips downloaded

author	Paulo Chiliguano <p.e.chiilguano@se14.qmul.ac.uk>
date	Tue, 11 Aug 2015 14:23:42 +0100
parents	e68dbee1f6db
children	68a62ca32441

rev	line source
p@15	1 # -- coding: utf-8 --
p@15	2 """
p@15	3 Created on Wed Jul 22 17:42:09 2015
p@15	4
p@15	5 @author: paulochiliguano
p@15	6 """
p@15	7
p@16	8
p@17	9 from math import sqrt, log10
p@15	10 import numpy as np
p@15	11 from sklearn import mixture
p@15	12
p@15	13 #User-item dictionary
p@15	14 users = {"Angelica": {"SOAJJPC12AB017D63F": 3.5, "SOAKIXJ12AC3DF7152": 2.0,
p@15	15 "SOAKPFH12A8C13BA4A": 4.5, "SOAGTJW12A6701F1F5": 5.0,
p@15	16 "SOAKWCK12A8C139F81": 1.5, "SOAKNZI12A58A79CAC": 2.5,
p@15	17 "SOAJZEP12A8C14379B": 2.0},
p@15	18 "Bill":{"SOAJJPC12AB017D63F": 2.0, "SOAKIXJ12AC3DF7152": 3.5,
p@15	19 "SOAHQFM12A8C134B65": 4.0, "SOAGTJW12A6701F1F5": 2.0,
p@15	20 "SOAKWCK12A8C139F81": 3.5, "SOAJZEP12A8C14379B": 3.0},
p@15	21 "Chan": {"SOAJJPC12AB017D63F": 5.0, "SOAKIXJ12AC3DF7152": 1.0,
p@15	22 "SOAHQFM12A8C134B65": 1.0, "SOAKPFH12A8C13BA4A": 3.0,
p@15	23 "SOAGTJW12A6701F1F5": 5, "SOAKWCK12A8C139F81": 1.0},
p@15	24 "Dan": {"SOAJJPC12AB017D63F": 3.0, "SOAKIXJ12AC3DF7152": 4.0,
p@15	25 "SOAHQFM12A8C134B65": 4.5, "SOAGTJW12A6701F1F5": 3.0,
p@15	26 "SOAKWCK12A8C139F81": 4.5, "SOAKNZI12A58A79CAC": 4.0,
p@15	27 "SOAJZEP12A8C14379B": 2.0},
p@15	28 "Hailey": {"SOAKIXJ12AC3DF7152": 4.0, "SOAHQFM12A8C134B65": 1.0,
p@15	29 "SOAKPFH12A8C13BA4A": 4.0, "SOAKNZI12A58A79CAC": 4.0,
p@15	30 "SOAJZEP12A8C14379B": 1.0},
p@15	31 "Jordyn": {"SOAKIXJ12AC3DF7152": 4.5, "SOAHQFM12A8C134B65": 4.0,
p@15	32 "SOAKPFH12A8C13BA4A": 5.0, "SOAGTJW12A6701F1F5": 5.0,
p@15	33 "SOAKWCK12A8C139F81": 4.5, "SOAKNZI12A58A79CAC": 4.0,
p@15	34 "SOAJZEP12A8C14379B": 4.0},
p@15	35 "Sam": {"SOAJJPC12AB017D63F": 5.0, "SOAKIXJ12AC3DF7152": 2.0,
p@15	36 "SOAKPFH12A8C13BA4A": 3.0, "SOAGTJW12A6701F1F5": 5.0,
p@15	37 "SOAKWCK12A8C139F81": 4.0, "SOAKNZI12A58A79CAC": 5.0},
p@15	38 "Veronica": {"SOAJJPC12AB017D63F": 3.0, "SOAKPFH12A8C13BA4A": 5.0,
p@15	39 "SOAGTJW12A6701F1F5": 4.0, "SOAKWCK12A8C139F81": 2.5,
p@15	40 "SOAKNZI12A58A79CAC": 3.0}
p@15	41 }
p@15	42
p@16	43 items = {"SOAJJPC12AB017D63F": [2.5, 4, 3.5, 3, 5, 4, 1, 5, 4, 1],
p@16	44 "SOAKIXJ12AC3DF7152": [2, 5, 5, 3, 2, 1, 1, 5, 4, 1],
p@16	45 "SOAKPFH12A8C13BA4A": [1, 5, 4, 2, 4, 1, 1, 5, 4, 1],
p@16	46 "SOAGTJW12A6701F1F5": [4, 5, 4, 4, 1, 5, 1, 5, 4, 1],
p@16	47 "SOAKWCK12A8C139F81": [1, 4, 5, 3.5, 5, 1, 1, 5, 4, 1],
p@16	48 "SOAKNZI12A58A79CAC": [1, 5, 3.5, 3, 4, 5, 1, 5, 4, 1],
p@16	49 "SOAJZEP12A8C14379B": [5, 5, 4, 2, 1, 1, 1, 5, 4, 1],
p@16	50 "SOAHQFM12A8C134B65": [2.5, 4, 4, 1, 1, 1, 1, 5, 4, 1]}
p@15	51
p@23	52 #Functions to compute similarity between items or between profiles
p@23	53 # Source: http://www.guidetodatamining.com
p@23	54 def manhattan(vector1, vector2):
p@23	55 """Computes the Manhattan distance."""
p@23	56 distance = 0
p@23	57 total = 0
p@23	58 n = len(vector1)
p@23	59 for i in range(n):
p@23	60 distance += abs(vector1[i] - vector2[i])
p@23	61 return distance
p@23	62
p@23	63 def computeNearestNeighbor(itemName, itemVector, items):
p@23	64 """creates a sorted list of items based on their distance to item"""
p@23	65 distances = []
p@23	66 for otherItem in items:
p@23	67 if otherItem != itemName:
p@23	68 distance = manhattan(itemVector, items[otherItem])
p@23	69 distances.append((distance, otherItem))
p@23	70 # sort based on distance -- closest first
p@23	71 distances.sort()
p@23	72 return distances
p@23	73
p@23	74 def classify(user, itemName, itemVector):
p@23	75 """Classify the itemName based on user ratings
p@23	76 Should really have items and users as parameters"""
p@23	77 # first find nearest neighbor
p@23	78 nearest = computeNearestNeighbor(itemName, itemVector, items)[0][1]
p@23	79 rating = users[user][nearest]
p@23	80 return rating
p@23	81
p@23	82 # Fitness function of EDA
p@23	83 def Fitness(profile, user):
p@23	84 nearest = computeNearestNeighbor(itemName, itemVector, items)[0][1]
p@23	85 rating = users[user][nearest]
p@23	86 return rating
p@23	87
p@23	88
p@17	89 # Pearson Correlation Coefficient
p@17	90 def pearson(rating1, rating2):
p@17	91 sum_xy = 0
p@17	92 sum_x = 0
p@17	93 sum_y = 0
p@17	94 sum_x2 = 0
p@17	95 sum_y2 = 0
p@17	96 n = 0
p@17	97 for key in rating1:
p@17	98 if key in rating2:
p@17	99 n += 1
p@17	100 x = rating1[key]
p@17	101 y = rating2[key]
p@17	102 sum_xy += x * y
p@17	103 sum_x += x
p@17	104 sum_y += y
p@17	105 sum_x2 += pow(x, 2)
p@17	106 sum_y2 += pow(y, 2)
p@17	107 if n == 0:
p@17	108 return 0
p@17	109 # now compute denominator
p@17	110 denominator = sqrt(sum_x2 - pow(sum_x, 2) / n) * \
p@17	111 sqrt(sum_y2 - pow(sum_y, 2) / n)
p@17	112 if denominator == 0:
p@17	113 return 0
p@17	114 else:
p@17	115 return (sum_xy - (sum_x * sum_y) / n) / denominator
p@17	116
p@17	117 # Cosine Similarity for test purposes
p@17	118 def cosine_similarity(rating1, rating2):
p@17	119 sum_xy = 0
p@17	120 sum_x2 = 0
p@17	121 sum_y2 = 0
p@17	122 n = 0
p@17	123 for key in rating1:
p@17	124 if key in rating2:
p@17	125 n += 1
p@17	126 x = rating1[key]
p@17	127 y = rating2[key]
p@17	128 sum_xy += x * y
p@17	129 if n == 0:
p@17	130 return 0
p@17	131
p@17	132 # now compute denominator
p@17	133 for key in rating1:
p@17	134 x = rating1[key]
p@17	135 sum_x2 += pow(x, 2)
p@17	136
p@17	137 for key in rating2:
p@17	138 y = rating2[key]
p@17	139 sum_y2 += pow(y, 2)
p@17	140
p@17	141 denominator = sqrt(sum_x2) * sqrt(sum_y2)
p@17	142 if denominator == 0:
p@17	143 return 0
p@17	144 else:
p@17	145 return sum_xy / denominator
p@17	146
p@23	147
p@17	148 '''
p@17	149 def Fitness(profile, user_index):
p@17	150 sim = 0
p@17	151 sum_log = 0
p@17	152
p@17	153 features = profile.items()[user_index][1]
p@17	154 songs = users.items()[user_index][1]
p@17	155
p@17	156 for song, rating in songs.items():
p@17	157 sim = pearson(features, items[song])
p@17	158 print(sim)
p@17	159
p@17	160 for username, songs in users.items():
p@17	161 for song, rating in songs.items():
p@17	162 sim = pearson(profile, items[song])
p@17	163 #sum_log += log10(rating * sim)
p@17	164 return sim
p@17	165 '''
p@23	166 # Generation of M individuals uniformly
p@17	167 population_size = len(users)
p@17	168 fraction_of_population = 0.5
p@16	169 np.random.seed(len(users))
p@21	170 M = np.random.uniform(size=population_size * len(items.values()[0]))
p@16	171 M.shape = (-1, len(items.values()[0]))
p@16	172 profile = {}
p@16	173 i = 0
p@16	174 for row in M.tolist():
p@16	175 profile["Profile" + str(i)] = M.tolist()[i]
p@16	176 i = i + 1
p@15	177
p@17	178 '''
p@17	179 Calculate fitness values
p@17	180 '''
p@17	181 Fitness(profile, 0)
p@17	182
p@21	183
p@21	184
p@21	185
p@21	186
p@21	187
p@15	188 np.random.seed(1)
p@15	189 g = mixture.GMM(n_components=7)
p@15	190 # Generate random observations with two modes centered on 0
p@15	191 # and 10 to use for training.
p@15	192 obs = np.concatenate((np.random.randn(100, 1), 10 + np.random.randn(300, 1)))
p@15	193 g.fit(obs)
p@15	194 np.round(g.weights_, 2)
p@15	195 np.round(g.means_, 2)
p@15	196 np.round(g.covars_, 2)
p@15	197 g.predict([[0], [2], [9], [10]])
p@15	198 np.round(g.score([[0], [2], [9], [10]]), 2)
p@15	199 # Refit the model on new data (initial parameters remain the
p@15	200 # same), this time with an even split between the two modes.
p@15	201 g.fit(20 * [[0]] + 20 * [[10]])
p@15	202 np.round(g.weights_, 2)

Mercurial > hg > hybrid-music-recommender-using-content-based-and-social-information

annotate Code/eda.py @ 23:45e6f85d0ba4