changeset 26:e4bcfe00abf4

Final version of code
author Paulo Chiliguano <p.e.chiilguano@se14.qmul.ac.uk>
date Wed, 26 Aug 2015 02:00:48 +0100
parents fafc0b249a73
children ae650489d3a8
files Code/content_based.py Code/eda.py Code/eda_discrete.py Code/split_dataset.py Dataset/cross_validation.pkl Report/Logo.png Report/abstract/abstract.tex Report/acknowledgements/acknowledgements.tex Report/chapter1/introduction.tex Report/chapter2/background.tex Report/chapter3/ch3.tex Report/chapter5/results.tex Report/chiliguano_msc_finalproject.blg Report/chiliguano_msc_finalproject.pdf Report/chiliguano_msc_finalproject.synctex.gz Report/chiliguano_msc_finalproject.tex Report/chiliguano_msc_finalproject.toc Report/references.bib
diffstat 18 files changed, 378 insertions(+), 468 deletions(-) [+]
line wrap: on
line diff
--- a/Code/content_based.py	Sun Aug 23 16:47:54 2015 +0100
+++ b/Code/content_based.py	Wed Aug 26 02:00:48 2015 +0100
@@ -9,6 +9,7 @@
 from math import sqrt
 import numpy as np
 import pandas as pd
+import time
 
 # Item-vector dictionary
 f = file('/Users/paulochiliguano/Documents/msc-project/dataset/\
@@ -16,6 +17,18 @@
 song_library = pickle.load(f)
 f.close()
 
+# Normalisation
+#test = []
+#for k, v in song_library.iteritems():
+#    test.append(v)
+#test = np.array(test)
+#test_median = np.median(test, axis=0)
+#test_abs = abs(test - test_median)
+#test_asd = test_abs.sum(axis=0) / test.shape[0]
+#for k, v in song_library.iteritems():
+#    modified_standard_score = (np.array(v) - test_median) / test_asd
+#    song_library[k] = modified_standard_score.tolist()
+
 # Load training and test data
 f = file('/Users/paulochiliguano/Documents/msc-project/dataset/\
 cross_validation.pkl', 'rb')
@@ -35,45 +48,26 @@
     dem2 = sum(map(lambda w_j: (w_j - avrg_w_j) ** 2, vector_j))
     return num / (sqrt(dem1) * sqrt(dem2))
 
-def computeNearestNeighbor(itemName, itemVector, items):
-    """creates a sorted list of items based on their distance to item"""
-    similarities = []
-    for otherItem in items:
-        if otherItem != itemName:
-            sim = adj_cos_sim(itemVector, items[otherItem])
-            similarities.append((sim, otherItem))
-    # sort based on distance -- closest first
-    similarities.sort(reverse=True)
-    #if len(similarities) > N:
-        #similarities = similarities[0:N]
-    return similarities
-
-def nearest_neighbours(song, train_songs, N):
-    similarities = []
-    for k in train_songs:
-        sim = adj_cos_sim(song_library[song], song_library[k])
-        similarities.append((sim, k))
-    similarities.sort(reverse=True)
-    #if len(similarities) > N:
-        #similarities = similarities[0:N] 
-    return similarities
-    #return {t[1]: t[0] for t in similarities}
-
-def build_model_cb(song_library, k=30):
-    other_songs = song_library.keys()
+def build_model_cb(train_data, k=30):
+    a = []
+    for user, info in train_data.iteritems():
+        a.extend([i for i in info])
+    songIDs = list(set(a))       
+    #other_songs = song_library.keys()
+    
     similarity_matrix = {}
-    for song in song_library:
+    for song in songIDs:
         similarities = []
-        for other in other_songs:
+        for other in songIDs:
             if other != song:
                 sim = adj_cos_sim(song_library[song], song_library[other])
                 similarities.append((sim, other))
         similarities.sort(reverse=True)
         similarity_matrix[song] = similarities[0:k]
+    
     return similarity_matrix
         #similarity_rows[song] = {t[1]: t[0] for t in similarities}
 
-
 def top_n(sim_matrix, user, song_rating, rating_threshold=2, N=10): 
     candidate = pd.DataFrame()
     entries = song_rating.keys()
@@ -99,7 +93,7 @@
     
     return list(topN.head(N).keys())
 
-def evaluate_cb(topN, test_data, rating_threshold=3):    
+def evaluate_cb(topN, test_data, rating_threshold=2):    
     
     tp = 0.
     fp = 0.
@@ -119,22 +113,35 @@
                 elif rating <= rating_threshold:
                     tn += 1
     print tp, fp, fn, tn
-    precision = tp / (tp + fp)
-    recall = tp / (tp + fn)
-    F1 = 2 * precision * recall / (precision + recall)
+    if tp != 0:
+        precision = tp / (tp + fp)
+        recall = tp / (tp + fn)
+        F1 = 2 * precision * recall / (precision + recall)
+    else:
+        precision = 0
+        recall = 0
+        F1 = 0
+    
     accuracy = (tp + tn) / (tp + fp + tn + fn)
     
     return precision, recall, F1, accuracy
-  
-sim_matrix = build_model_cb(song_library, 30)
+
 p = np.array([])
 f = np.array([])
 r = np.array([])
 a = np.array([])
+
 for i in range(len(users_train)):
+ 
+    start_time = time.time()
+    sim_matrix = build_model_cb(users_train[i])
+    
     topN = {}
     for user, song_rating in users_train[i].iteritems():
         topN[user] = top_n(sim_matrix, user, song_rating)
+    elapsed_time = time.time() - start_time
+    print 'Training execution time: %.3f seconds' % elapsed_time
+        
     pi, ri, fi, ai = evaluate_cb(topN, users_test[i])
     
     p = np.append(p, pi)
@@ -142,7 +149,6 @@
     f = np.append(f, fi)
     a = np.append(a, ai)
     
-    
 print "Precision = %f3 ± %f3" % (p.mean(), p.std())
 print "Recall = %f3 ± %f3" % (r.mean(), r.std())
 print "F1 = %f3 ± %f3" % (f.mean(), f.std())
--- a/Code/eda.py	Sun Aug 23 16:47:54 2015 +0100
+++ b/Code/eda.py	Wed Aug 26 02:00:48 2015 +0100
@@ -10,7 +10,7 @@
 import numpy as np
 import pandas as pd
 import cPickle as pickle
-#import random
+import time
 
 # Item-vector dictionary
 f = file('/Users/paulochiliguano/Documents/msc-project/dataset/\
@@ -59,7 +59,7 @@
         #fitness_value += score * cosine_similarity(profile, song_library[songID])
     return fitness_value
 
-def users_likes_subset(users, rating_threshold=3):
+def users_likes_subset(users, rating_threshold=2):
     # Subset of most-liked items
     users_subset = {}
     for userID, songs in users.iteritems():
@@ -77,7 +77,7 @@
         
     return users_subset
 
-def eda_train(users_subset, max_gen=200):
+def eda_train(users_subset, max_gen=250):
     # TRAINING
     num_features = len(song_library.values()[0])
     # Given parameters for EDA
@@ -98,6 +98,7 @@
         profile_u[userID] = M.tolist()[i]
         i += 1
 
+    fitnesses = []
     generation = 0
     while generation < max_gen:
         # Compute fitness values
@@ -111,7 +112,8 @@
             users_fitness.items(),
             columns=["userID", "fitness"]
         )
-    
+        fitnesses.append(users_fitness_df.fitness.values.tolist())
+        
         # Selection of best individuals based on fitness values
         best_individuals = {}
         users_fitness_df = users_fitness_df.sort(columns='fitness')
@@ -148,19 +150,26 @@
             i += 1
         generation += 1
     
-    return profile_u
+    return profile_u, D, np.array(fitnesses)
 
 # Similarity matrix
 def cb_similarity(profileID, profile_data, test_data, N):
+    
+    a = []
+    for user, info in test_data.iteritems():
+        a.extend([i for i in info])
+    songIDs = list(set(a))
+    
     ''' Content-based: Similarity matrix '''
     similarity = []
-    for songID in test_data[profileID]:
+    for songID in songIDs:
         sim = adj_cos_sim(profile_data, song_library[songID])
         similarity.append((sim, songID))
-    # Top-N recommendation
-    #similarity.sort(reverse=True)
-    #if len(similarity) > N:
-        #similarity = similarity[0:N]
+    
+    ''' Top-N recommendation '''
+    similarity.sort(reverse=True)
+    if len(similarity) > N:
+        similarity = similarity[0:N]
         
     #sim_matrix[userID] = {t[1]: t[0] for t in similarity}
     return {t[1]: t[0] for t in similarity}
@@ -169,8 +178,8 @@
     profiles,
     test_data,
     N=10,
-    rating_threshold=3,
-    EDA_treshold=0.3):    
+    rating_threshold=2,
+    EDA_treshold=0.5):    
     
     ''' Evaluation '''
     
@@ -183,21 +192,43 @@
     fp = 0.
     fn = 0.
     tn = 0.
-    for userID, songID_sim in sim_matrix.iteritems():
-        for songID, sim_value in songID_sim.iteritems():
-            score = test_data[userID][songID]
-            if score > rating_threshold and sim_value >= EDA_treshold:
-                tp += 1
-            elif score <= rating_threshold and sim_value >= EDA_treshold:
-                fp += 1
-            elif score > rating_threshold and sim_value < EDA_treshold:
-                fn += 1
-            elif score <= rating_threshold and sim_value < EDA_treshold:
-                tn += 1
     
-    precision = tp / (tp + fp)
-    recall = tp / (tp + fn)
-    F1 = 2 * precision * recall / (precision + recall)
+    for user, song_rating in test_data.iteritems():
+        entries = sim_matrix[user]
+        for song, rating in song_rating.iteritems():
+            if song in entries:
+                if rating > rating_threshold:
+                    tp += 1
+                elif rating <= rating_threshold:
+                    fp += 1   
+            else:
+                if rating > rating_threshold:
+                    fn += 1
+                elif rating <= rating_threshold:
+                    tn += 1
+    
+    
+#    for userID, songID_sim in sim_matrix.iteritems():
+#        for songID, sim_value in songID_sim.iteritems():
+#            score = test_data[userID][songID]
+#            if score > rating_threshold and sim_value >= EDA_treshold:
+#                tp += 1
+#            elif score <= rating_threshold and sim_value >= EDA_treshold:
+#                fp += 1
+#            elif score > rating_threshold and sim_value < EDA_treshold:
+#                fn += 1
+#            elif score <= rating_threshold and sim_value < EDA_treshold:
+#                tn += 1
+    print tp, fp, fn, tn
+    if tp != 0:
+        precision = tp / (tp + fp)
+        recall = tp / (tp + fn)
+        F1 = 2 * precision * recall / (precision + recall)
+    else:
+        precision = 0
+        recall = 0
+        F1 = 0
+    
     accuracy = (tp + tn) / (tp + fp + tn + fn)
     
     return precision, recall, F1, accuracy
@@ -227,8 +258,11 @@
 a = np.array([])
 
 for i in range(len(users_train)):
+    start_time = time.time()
+    profile_u, prob, fffitness = eda_train(users_likes_subset(users_train[i]))
+    elapsed_time = time.time() - start_time
+    print 'Training execution time: %.3f seconds' % elapsed_time
     
-    profile_u = eda_train(users_likes_subset(users_train[i]))
     pi, ri, fi, ai = evaluate_eda(profile_u, users_test[i])
     p = np.append(p, pi)
     r = np.append(r, ri)
@@ -244,193 +278,3 @@
 print "Recall = %f3 ± %f3" % (r.mean(), r.std())
 print "F1 = %f3 ± %f3" % (f.mean(), f.std())
 print "Accuracy = %f3 ± %f3" % (a.mean(), a.std())
-   
-'''# Collaborative-filtering: Similarity matrix
-sim_matrix_cf = {}
-count = 0
-for userID_1 in profile:
-    similarities = {}
-    for userID_2 in profile:
-        if userID_1 != userID_2:
-            similarities[userID_2] = adj_cos_sim(
-                profile[userID_1],
-                profile[userID_2]
-            )
-            #print similarities
-    sim_matrix_cf[userID_1] = similarities'''
-
-# Predicted rating
-#for userID in users:
-#    print np.array(users[userID].values()).mean()
-
-'''scores_above_threshold = {
-        songID: score for songID, score in songs.iteritems() if score > rating_threshold
-    }'''
-
-'''for key, value in sorted(similarity.iteritems(), key=lambda (k,v): (v,k), reverse=True):
-                print "%s: %s" % (key, value)
-                break'''
-
-
-# Recommend new item
-
-'''
-def computeNearestNeighbor(itemName, itemVector, items):
-    """creates a sorted list of items based on their distance to item"""
-    distances = []
-    for otherItem in items:
-        if otherItem != itemName:
-            distance = adj_cos_sim(itemVector, items[otherItem])
-            distances.append((distance, otherItem))
-    # sort based on distance -- closest first
-    distances.sort(reverse=True)
-    return distances
-
-def classify(user, itemName, itemVector):
-    """Classify the itemName based on user ratings
-    Should really have items and users as parameters"""
-    # first find nearest neighbor
-    nearest = computeNearestNeighbor(itemName, itemVector, song_library)[0][1]
-    rating = users[user][nearest]
-    return rating
-'''
-# Source: guidetodatamining.com
-'''def computeSimilarity(band1, band2, userRatings):
-    averages = {}
-    for (key, ratings) in userRatings.items():
-        averages[key] = (float(sum(ratings.values())) / len(ratings.values()))
-    num = 0 # numerator
-    dem1 = 0 # first half of denominator
-    dem2 = 0
-    for (user, ratings) in userRatings.items():
-        if band1 in ratings and band2 in ratings:
-            avg = averages[user]
-            num += (ratings[band1] - avg) * (ratings[band2] - avg)
-            dem1 += (ratings[band1] - avg)**2
-            dem2 += (ratings[band2] - avg)**2
-    return num / (sqrt(dem1) * sqrt(dem2))'''
-
-'''
-    sum_xy = 0
-    sum_x2 = 0
-    sum_y2 = 0
-    n = 0
-    for key in rating1:
-        if key in rating2:
-            n += 1
-            x = rating1[key]
-            y = rating2[key]
-            sum_xy += x * y
-    if n == 0:
-        return 0
-    
-    # now compute denominator
-    for key in rating1:
-        x = rating1[key]
-        sum_x2 += pow(x, 2)
-    
-    for key in rating2:
-        y = rating2[key]
-        sum_y2 += pow(y, 2)
-    
-    denominator = sqrt(sum_x2) * sqrt(sum_y2)
-    if denominator == 0:
-        return 0
-    else:
-        return sum_xy / denominator'''
-
-'''
-# Median
-# http://stackoverflow.com/questions/24101524/finding-median-of-list-in-python
-def get_median(lst):
-    return np.median(np.array(lst))
-
-# Absolute Standard Deviation
-def get_asd(lst, median):
-    sum = 0
-    for item in lst:
-        sum += abs(item - median)
-    return sum / len(lst)
-
-# Normalisation rating with Modified Standard Score
-def normalize_rating(ratings, median, asd):
-    for i in range(len(ratings)):
-        ratings[i] = (ratings[i] - median) / asd
-    return ratings
-'''
-
-'''
-# Pearson Correlation Coefficient
-def pearson(rating1, rating2):
-    sum_xy = 0
-    sum_x = 0
-    sum_y = 0
-    sum_x2 = 0
-    sum_y2 = 0
-    n = 0
-    for key in rating1:
-        if key in rating2:
-            n += 1
-            x = rating1[key]
-            y = rating2[key]
-            sum_xy += x * y
-            sum_x += x
-            sum_y += y
-            sum_x2 += pow(x, 2)
-            sum_y2 += pow(y, 2)
-    if n == 0:
-        return 0
-    # now compute denominator
-    denominator = sqrt(sum_x2 - pow(sum_x, 2) / n) * \
-                  sqrt(sum_y2 - pow(sum_y, 2) / n)
-    if denominator == 0:
-        return 0
-    else:
-        return (sum_xy - (sum_x * sum_y) / n) / denominator
-'''
-'''
-def buckets(filename, bucketName, separator, classColumn):
-    """the original data is in the file named filename
-    bucketName is the prefix for all the bucket names
-    separator is the character that divides the columns
-    (for ex., a tab or comma and classColumn is the column
-    that indicates the class"""
-    # put the data in 10 buckets
-    numberOfBuckets = 10
-    data = {}
-    # first read in the data and divide by category
-    with open(filename) as f:
-        lines = f.readlines()
-    for line in lines:
-        if separator != '\t':
-            line = line.replace(separator, '\t')
-            # first get the category
-            category = line.split()[classColumn]
-            data.setdefault(category, [])
-            data[category].append(line)
-    # initialize the buckets
-    buckets = []
-    for i in range(numberOfBuckets):
-        buckets.append([])
-    # now for each category put the data into the buckets
-    for k in song_library.keys():
-        #randomize order of instances for each class
-        print random.shuffle(song_library[k])
-        bNum = 0
-        # divide into buckets
-        for item in song_library[k]:
-            buckets[bNum].append(item)
-            bNum = (bNum + 1) % numberOfBuckets
-    # write to file
-    for bNum in range(numberOfBuckets):
-        f = open("%s-%02i" % (bucketName, bNum + 1), 'w')
-        for item in buckets[bNum]:
-            f.write(item)
-        f.close()
-'''
-
-'''# Functions to compute similarity between items or between profiles
-# Source: http://www.guidetodatamining.com
-def manhattan(vector1, vector2):
-    """Computes the Manhattan distance."""
-    return sum(map(lambda v1, v2: abs(v1 - v2), vector1, vector2))'''
--- a/Code/eda_discrete.py	Sun Aug 23 16:47:54 2015 +0100
+++ b/Code/eda_discrete.py	Wed Aug 26 02:00:48 2015 +0100
@@ -10,7 +10,7 @@
 import numpy as np
 import pandas as pd
 import cPickle as pickle
-#import random
+import time
 
 # Item-vector dictionary
 f = file('/Users/paulochiliguano/Documents/msc-project/dataset/\
@@ -60,7 +60,7 @@
         #fitness_value += score * cosine_similarity(profile, song_library[songID])
     return fitness_value
 
-def users_likes_subset(users, rating_threshold=3):
+def users_likes_subset(users, rating_threshold=2):
     # Subset of most-liked items
     users_subset = {}
     for userID, songs in users.iteritems():
@@ -78,7 +78,7 @@
         
     return users_subset
 
-def eda_train(users_subset, max_gen=1000):
+def eda_train(users_subset, max_gen=250):
     # TRAINING
     num_features = len(song_library.values()[0])
     # Given parameters for EDA
@@ -123,6 +123,8 @@
         profile_u[userID] = feature_v
         profile_aux[userID] = [(k, v) for k, v in b.iteritems()]
 
+    fitnesses = []
+    
     generation = 0
     while generation < max_gen:
         # Compute fitness values
@@ -136,6 +138,7 @@
             users_fitness.items(),
             columns=["userID", "fitness"]
         )
+        fitnesses.append(users_fitness_df.fitness.values.tolist())
     
         # Selection of best individuals based on fitness values
         #best_individuals = {}
@@ -170,23 +173,29 @@
                 feature_v[k] = v
             profile_u[userID] = feature_v
             profile_aux[userID] = [(k, v) for k, v in b.iteritems()]
-            
+
         generation += 1
     
-    return profile_u, p
+    return profile_u, p, np.array(fitnesses)
 
 # Similarity matrix
 def cb_similarity(profileID, profile_data, test_data, N):
+    
+    a = []
+    for user, info in test_data.iteritems():
+        a.extend([i for i in info])
+    songIDs = list(set(a))    
+    
     ''' Content-based: Similarity matrix '''
     similarity = []
     #keys_a = train_data[profileID].keys()
-    for songID in test_data[profileID]:
+    for songID in songIDs:
         sim = adj_cos_sim(profile_data, song_library[songID])
         similarity.append((sim, songID))
-    # Top-N recommendation
-    #similarity.sort(reverse=True)
-    #if len(similarity) > N:
-        #similarity = similarity[0:N]
+    ''' Top-N recommendation '''
+    similarity.sort(reverse=True)
+    if len(similarity) > N:
+        similarity = similarity[0:N]
         
     #sim_matrix[userID] = {t[1]: t[0] for t in similarity}
     return {t[1]: t[0] for t in similarity}
@@ -195,7 +204,7 @@
     profiles,
     test_data,
     N=10,
-    rating_threshold=3,
+    rating_threshold=2,
     EDA_treshold=0.5):    
     
     ''' Evaluation '''
@@ -209,21 +218,42 @@
     fp = 0.
     fn = 0.
     tn = 0.
-    for userID, songID_sim in sim_matrix.iteritems():
-        for songID, sim_value in songID_sim.iteritems():
-            score = test_data[userID][songID]
-            if score > rating_threshold and sim_value >= EDA_treshold:
-                tp += 1
-            elif score <= rating_threshold and sim_value >= EDA_treshold:
-                fp += 1
-            elif score > rating_threshold and sim_value < EDA_treshold:
-                fn += 1
-            elif score <= rating_threshold and sim_value < EDA_treshold:
-                tn += 1
     
-    precision = tp / (tp + fp)
-    recall = tp / (tp + fn)
-    F1 = 2 * precision * recall / (precision + recall)
+    for user, song_rating in test_data.iteritems():
+        entries = sim_matrix[user]
+        for song, rating in song_rating.iteritems():
+            if song in entries:
+                if rating > rating_threshold:
+                    tp += 1
+                elif rating <= rating_threshold:
+                    fp += 1   
+            else:
+                if rating > rating_threshold:
+                    fn += 1
+                elif rating <= rating_threshold:
+                    tn += 1    
+    
+#    for userID, songID_sim in sim_matrix.iteritems():
+#        for songID, sim_value in songID_sim.iteritems():
+#            score = test_data[userID][songID]
+#            if score > rating_threshold and sim_value >= EDA_treshold:
+#                tp += 1
+#            elif score <= rating_threshold and sim_value >= EDA_treshold:
+#                fp += 1
+#            elif score > rating_threshold and sim_value < EDA_treshold:
+#                fn += 1
+#            elif score <= rating_threshold and sim_value < EDA_treshold:
+#                tn += 1
+    print tp, fp, fn, tn
+    if tp != 0:
+        precision = tp / (tp + fp)
+        recall = tp / (tp + fn)
+        F1 = 2 * precision * recall / (precision + recall)
+    else:
+        precision = 0
+        recall = 0
+        F1 = 0
+    
     accuracy = (tp + tn) / (tp + fp + tn + fn)
     
     return precision, recall, F1, accuracy
@@ -234,8 +264,11 @@
 a = np.array([])
 
 for i in range(len(users_train)):
+    start_time = time.time()
+    profile_u, prob, fffitness = eda_train(users_likes_subset(users_train[i]))
+    elapsed_time = time.time() - start_time
+    print 'Training execution time: %.3f seconds' % elapsed_time
     
-    profile_u, prob = eda_train(users_likes_subset(users_train[i]))
     pi, ri, fi, ai = evaluate_eda(profile_u, users_test[i])
     p = np.append(p, pi)
     r = np.append(r, ri)
--- a/Code/split_dataset.py	Sun Aug 23 16:47:54 2015 +0100
+++ b/Code/split_dataset.py	Wed Aug 26 02:00:48 2015 +0100
@@ -69,32 +69,51 @@
 users_df = users_df[users_df.song.isin(available_clips)]
 
 # Users with more than 50 ratings
-users_df = users_df.groupby('user').filter(lambda x: len(x) >= 50)
+#users_df = users_df.groupby('user').filter(lambda x: len(x) >= 50)
 
 # Normalise users' rating
 users_norm_df = pd.DataFrame()
 for k, v in users_df.groupby("user"):
-    users_norm_df = users_norm_df.append(normalise_user_play_count(v))
+    norm = normalise_user_play_count(v)
+    users_norm_df = users_norm_df.append(norm)
+#    counts = norm['plays'].value_counts()
+#    if counts[counts.index == 5].values > 0:
+#        users_norm_df = users_norm_df.append(norm)
+    
+#for k, v in users_norm_df.groupby('user'):
+#    counts = v['plays'].value_counts()
+#    df = v.loc[v['plays'].isin(counts[counts >= 5].index), :]
+#    print df
 
 trial = 10
 users_train = []
 users_test = []
-#songs_train = []
-#songs_test = []
+#highest_rating = [4, 5]
+#lowest_rating = [1, 2, 3]
 for i in range(trial):
     test_df = pd.DataFrame()
     train_df = pd.DataFrame()
     for k, v in users_norm_df.groupby("user"):
+#        likes = v.loc[v['plays'].isin(highest_rating)]
+#        dislikes = v.loc[v['plays'].isin(lowest_rating)]
+#        test_like_index = np.random.choice(
+#            likes.index,
+#            1,
+#            replace=False
+#        )
+#        test_dislike_index = np.random.choice(
+#            dislikes.index,
+#            1,
+#            replace=False
+#        )
+#        test_index = np.append(test_like_index, test_dislike_index)
+#        test_index = test_like_index
         test_index = np.random.choice(
             v.index,
             int(len(v.index) / 5),
             replace=False
         )
-#        test_index = np.random.choice(
-#            v.index,
-#            1,
-#            replace=False
-#        )
+
         test_df = test_df.append(v.loc[test_index])
         train_df = train_df.append(v.loc[~v.index.isin(test_index)])
     
@@ -111,86 +130,9 @@
         users_test[i][k] = {
             x: y["plays"].values[0] for x, y in v.groupby("song")
         }
-        
-    #songs_test.append([])
-    #songs_test[i] = list(users_test.drop_duplicates(subset='song').song)
-    
-    #songs_test.append([])
-    #songs_test[i] = list(users_test.drop_duplicates(subset='song').song)'''
-
-# Filtered song library
-
-
 
 # Save training and test sets
 f = file('/Users/paulochiliguano/Documents/msc-project/dataset/\
 cross_validation.pkl', 'wb')
 pickle.dump((users_train, users_test), f, protocol=pickle.HIGHEST_PROTOCOL)
 f.close()
-
-
-# Ground truth dictionary
-#users_ground_truth = {}
-#for k, v in users_norm_df.groupby("user"):
-    #users_ground_truth[k] = {
-        #x: y["plays"].values[0] for x, y in v.groupby("song")
-    #}
-
-# Ground truth dictionary
-#users_ground_truth = {}
-#for k, v in users_ground_truth_df.groupby("user"):
-    #users_ground_truth[k] = {
-        #x: y["plays"].values[0] for x, y in v.groupby("song")
-    #}
-
-'''
-
-# Dataset for training/test
-songIDs = song_library.keys()
-dataset_keys = set(songIDs)
-random.shuffle(songIDs)
-
-folds = 10
-fold_size = int(round(len(songIDs) / folds))
-
-song_library_test = []
-song_library_train = []
-users_train = []
-users_test = []
-
-for i in range(folds):
-    song_library_test.append([])
-    song_library_test[i] = {
-        k: song_library[k] for k in songIDs[
-            (i * fold_size):((i + 1) * fold_size)
-        ]
-    }
-    
-    test_keys = set(song_library_test[i].keys())
-    train_keys = dataset_keys - test_keys
-    song_library_train.append([])
-    song_library_train[i] = {k: song_library[k] for k in train_keys}
-    
-    users_train_df = users_ground_truth_df[
-        users_ground_truth_df.song.isin(train_keys)
-    ]
-    users_train.append([])
-    users_train[i] = {}
-    for k, v in users_train_df.groupby("user"):
-        users_train[i][k] = {
-            x: y["plays"].values[0] for x, y in v.groupby("song")
-        }
-    
-    users_test_df = users_ground_truth_df[
-        users_ground_truth_df.song.isin(test_keys)
-    ]
-    users_test.append([])
-    users_test[i] = {}
-    for k, v in users_test_df.groupby("user"):
-        users_test[i][k] = {
-            x: y["plays"].values[0] for x, y in v.groupby("song")
-        }
-        
-for k, v in users_norm_df.groupby("user"):
-    test_index = np.random.choice(v.index, int(len(v.index)/10), replace=False)
-    print v.plays.count(), test_index.shape'''
\ No newline at end of file
Binary file Dataset/cross_validation.pkl has changed
Binary file Report/Logo.png has changed
--- a/Report/abstract/abstract.tex	Sun Aug 23 16:47:54 2015 +0100
+++ b/Report/abstract/abstract.tex	Wed Aug 26 02:00:48 2015 +0100
@@ -1,5 +1,5 @@
 \begin{abstract}
 
-.
+This is an abstract.
 
 \end{abstract}
\ No newline at end of file
--- a/Report/acknowledgements/acknowledgements.tex	Sun Aug 23 16:47:54 2015 +0100
+++ b/Report/acknowledgements/acknowledgements.tex	Wed Aug 26 02:00:48 2015 +0100
@@ -1,5 +1,15 @@
-\begin{abstract}
-
-This is an abstract.
-
-\end{abstract}
\ No newline at end of file
+%\renewcommand{\abstractname}{Acknowledgements}
+%\begin{abstract}
+%	Thanks Mum!
+%\end{abstract}
+\section*{Acknowledgements}
+I wish to express my sincere gratitude to Dr. Georgy Fazekas, Lecturer in Digital Media at Queen Mary University of London, for providing me guidance and valuable suggestions during the planning and development of this project. I also wish to acknowledge the help provided by Dr. Mathieu Barthet and Mr. Tim Kay with the access to research servers.
+\\
+\\
+I would also like to extend my thanks to Dr. Tony Stockman, my academic advisor, for the assistance provided during my time as a student at Queen Mary University of London.
+\\
+\\
+I am particularly grateful with National Government of the Republic of Ecuador for awarding me with a scholarship to study a postgraduate degree at a high-quality research university in the United Kingdom of Great Britain and Northern Ireland.
+\\
+\\
+Finally, I wish to thank my parents and brothers for their support and encouragement throughout my study.
\ No newline at end of file
--- a/Report/chapter1/introduction.tex	Sun Aug 23 16:47:54 2015 +0100
+++ b/Report/chapter1/introduction.tex	Wed Aug 26 02:00:48 2015 +0100
@@ -1,28 +1,36 @@
 \setcounter{page}{1}
 \pagenumbering{arabic}
 \chapter{Introduction}
-Music has accompanied social activities on our daily lives and has influenced the shape of the technology landscape that we have today. Portable media players, mobile device management applications or music streaming software enable us to access to a large volume of digital recorded music, that includes songs that are relevant or not to the listener, being necessary to develop services in order to filter appropriate musical pieces to an user.
+Music has accompanied social activities on our daily lives and has influenced the shape of the technology landscape that we have today. Portable media players, mobile device applications or music streaming services enable us the access to a large volume of digital recorded music. This vast range of music tracks might include songs that are relevant or not to a listener, being necessary to develop facilities to bring out appropriate musical pieces to an user.
 
-Recommender systems can be described as facilities that guide users to interesting objects in a huge space of information. In order to enhance  performance, there is the motivations of hybridization of two or more recommendation techniques.
+Recommender systems can be described as engines that guide the users to suitable objects from a large number of options in a particular domain such as books, films or music. The available information of users and items' attributes is analysed and exploited by the recommender systems to produce a list of previously unseen items that each user might find enjoyable. Depending on the analysed data, the design of a recommender can be focused on historical ratings given by users or similarities between the attributes of items that an user already rated.
 
-This project is going to examine a different approach to develop a hybrid music recommender system in order to suggest new items that would be appealing and enjoyable to the users. This system will combine two recommendation techniques. The first technique is collaborative filtering to predict music preferences on the basis of users' information from an online social network (OSN) such as Last.fm, and the second technique is content-based filtering in which acoustical features from audio tracks are correlated to compute their similarities.
+\section{Motivation}
+Due to the available information of relationship between users and items would be sparse, e.g., most part of the users tend to do not give enough ratings, the accuracy of predictions would decrease. Another disadvantage of traditional recommender systems, referred as \textit{cold-start problem}, arises when a new item cannot be recommended until it gets enough ratings, or, equivalently, when a new user does not have any ratings \citep{melville2010recommender}. In order to alleviate the rating sparsity and cold-start problems, there is the motivation to combine two or more recommendation designs into hybrid approaches. 
 
-Users' information will be obtained from the complementary Taste Profile subset, which is a part of the Million Song Dataset. The music library will be consolidated by crawling songs' information via 7digital API.
+Deep learning is an approach to artificial intelligence for describing raw data as a nested hierarchy of concepts, with each abstract concept defined in terms of simpler representations. For example, deep learning can describe high-level features of an image of a car such as position, color or brightness of the object, in terms of contours, which are also represented in terms of edges. \citep{Bengio-et-al-2015-Book}  
 
-A convolutional neural network (CNN), which is a deep learning model, will be employed for describing the audio files of the music library. Estimation of distribution algorithms (EDA), which are optimization methods in statistics and machine learning, will be investigated to model user profiles that will be comparable with the features of the audio files to predict ratings and produce new item recommendations.
+Inspired in natural evolution of species, Estimation of Distribution Algorithms (EDAs) \citep{larranaga2002estimation} are robust techniques developed during the last decade for optimisation in Statistics and Machine Learning fields. EDAs can capture the explicit structure of a population with a probability distribution estimated from the best individuals of that population.
 
-The evaluation of the hybrid recommender system will be assessed by prediction accuracy and performance comparison with a typical content-based system.
+\section{Aim}
+We aim to design and implement a hybrid music recommender to suggest new music tracks that an user would find them appealing and enjoyable. The architecture of our hybrid recommender combines two recommendation techniques. 
 
+%The first technique is collaborative filtering to predict music preferences on the basis of users' information from an online social network (OSN) such as Last.fm,
+
+the second technique is \textit{content-based filtering} where recommendations are produced by computing similarities between representations of content of items that an user
+
+
+in which  are correlated to compute similarities between them.
+
+Users' information is obtained from the Taste Profile dataset, which is a complementary subset of the Million Song Dataset\footnote{http://labrosa.ee.columbia.edu/millionsong/}. The music library that contains sample audio clips of the rated songs in the Taste Profile dataset is consolidated by fetching audio files using 7digital API.
+
+A convolutional neural network (CNN), which is a deep learning model, is employed to describe each audio file of the music library with a n-dimensional vector, whose dimensions represent music genres.
+
+An Estimation of Distribution Algorithm (EDA) technique is implemented to model user profiles in terms of music genres in order to compare each profile with the vector representation of the audio clips to compute similarities between them. Recommendation is achieved by choosing the clips with highest similarity values.
+
+The evaluation of our hybrid music recommender will be assessed by comparing the prediction accuracy with a traditional content-based recommender p.
+
+%that as automatically as possible analyses the multi-track input audio signals, finds and measures the masking phenomenon and uses Equaliser to reduce or solve the problem.
 \section{Outline of the thesis}
 
-The rest of the report is organised as follows:
-
-\textbf{Chapter 2} reviews related work with deep learning techniques and Estimation of Distribution Algorithms on recommendation systems.
-
-\textbf{Chapter 3} explains the proposed approach of the hybrid system for recommending new music items.
-
-\textbf{Chapter 4} addresses the experiments and the evaluation scenarios of the performance for the hybrid recommender system.
-
-\textbf{Chapter 5} discusses and analyses the results from the conducted experiments to evaluate the performance of the proposed hybrid music recommender system approach.
-
-\textbf{Chapter 6} presents the conclusions and some thoughts for further research.
\ No newline at end of file
+The rest of the report is organised as follows: Chapter 2 provides an overview in recommender systems. Recommendation process, associated challenges, and related work based on state-of-the-art techniques are discussed. In Chapter 3, we present our proposed hybrid recommendation approach and describe the stages and algorithms in detail. The experiments and evaluation protocols are to assess the performance of the hybrid recommender presented in Chapter 4. We proceed to discuss and analyse the results from the conducted experiments to evaluate the proposed hybrid music recommender. In Chapter 6, we present the conclusions and some thoughts for further research.
\ No newline at end of file
--- a/Report/chapter2/background.tex	Sun Aug 23 16:47:54 2015 +0100
+++ b/Report/chapter2/background.tex	Wed Aug 26 02:00:48 2015 +0100
@@ -64,6 +64,8 @@
 Estimation of distribution algorithms (EDAs) \citep{pelikan2015estimation} are optimisation techniques by constructing a probabilistic model from a sample of solutions, generating a new population and leading to an optimal solution \citep{Santana:Bielza:Larrañaga:Lozano:Echegoyen:Mendiburu:Armañanzas:Shakya:2009:JSSOBK:v35i07}.
 
 \citet{Liang2014781} exploited an EDA to model user profiles by using weighted featured vectors of keywords from a set of items that the user had rated above a threshold.
+
+These algorithms were applied in complex problems such as load balancing for mobile networks \citep{Hejazi15} or software reliability prediction 
 \\
 \\
 In this chapter, previous work on recommender systems has been reviewed and novelty techniques for representing acoustical features and for modelling user profiles has been presented. The next step is to implement the algorithms to collect the dataset by crawling online social information, to extract the acoustical features of a collection of songs for representing them as vectors, to model the user profiles by an EDA, and therefore, to return predicted recommendations.
\ No newline at end of file
--- a/Report/chapter3/ch3.tex	Sun Aug 23 16:47:54 2015 +0100
+++ b/Report/chapter3/ch3.tex	Wed Aug 26 02:00:48 2015 +0100
@@ -28,7 +28,11 @@
 The Python script for feature extraction implemented by \citet{Sigtia20146959} is modified to return the log-mel spectrograms by using the LibROSA\footnote{https://bmcfee.github.io/librosa/index.html} package.
 
 ``Representations of music directly from the temporal or spectral domain can be very sensitive to small time and frequency deformations''. \citep{zhang2014deep}
-
+\section{Data preprocessing}
+\begin{itemize}
+	\item Rating complementary cumulative distribution
+	\item Flatenning spectrogram Sidsig
+\end{itemize}
 \section{Algorithms}
 \subsection{CNN architecture}
 The input of the CNN consist of the 128-component spectrograms obtained in feature extraction. The batch size considered is 20 frames.
--- a/Report/chapter5/results.tex	Sun Aug 23 16:47:54 2015 +0100
+++ b/Report/chapter5/results.tex	Wed Aug 26 02:00:48 2015 +0100
@@ -1,1 +1,36 @@
 \chapter{Results}
+%\begin{minipage}{\textwidth}
+	%\centering
+	%\includegraphics[scale=.6]{"Project Images/Figure_GA02".png}
+	%\caption{\label{fig:frog1}This is a figure caption.}
+%\end{minipage}
+
+
+fadslkfjdsalfjdsalf
+\begin{table}[tbp]
+	\caption{Genre classification Results} % title of Table
+	\centering % used for centering table
+	\begin{tabular}{c c c c c} % centered columns (4 columns)
+		\hline\hline %inserts double horizontal lines
+		Trial & Validation error (\%) & Test error (\%) & Iterations & Time elapsed (min.) \\ [0.5ex] % inserts table
+		%heading
+		\hline % inserts single horizontal line
+		1 & 58.0 & 65.2 & 650 & 7.00 \\ % inserting body of the table
+		2 & 37.6 & 46.0 & 2150 & 13.07 \\
+		3 & 39.6 & 46.0 & 700 & 7.54 \\
+		4 & 35.6 & 36.8 & 550 & 6.01 \\
+		5 & 36.4 & 40.0 & 250 & 5.47 \\
+		6 & 40.4 & 44.8 & 150 & 5.41 \\
+		7 & 32.4 & 40.4 & 800 & 8.64 \\
+		8 & 36.0 & 38.8 & 250 & 5.42 \\
+		9 & 34.0 & 38.8 & 850 & 9.14 \\ [1ex] % [1ex] adds vertical space
+		\hline %inserts single line
+	\end{tabular}
+	\label{table:nonlin} % is used to refer this table in the text
+\end{table}
+el mindsafa se va a cabarakl;dflakdjfl;akds
+dflk;djflkajflkajf
+jlkd;fjlk;ajdlf;kajsld
+
+
+jkdl;fkaj
\ No newline at end of file
--- a/Report/chiliguano_msc_finalproject.blg	Sun Aug 23 16:47:54 2015 +0100
+++ b/Report/chiliguano_msc_finalproject.blg	Wed Aug 26 02:00:48 2015 +0100
@@ -1,6 +1,8 @@
 This is BibTeX, Version 0.99d (TeX Live 2015)
 Capacity: max_strings=35307, hash_size=35307, hash_prime=30011
 The top-level auxiliary file: chiliguano_msc_finalproject.aux
+A level-1 auxiliary file: acknowledgements/acknowledgements.aux
+A level-1 auxiliary file: abstract/abstract.aux
 A level-1 auxiliary file: chapter1/introduction.aux
 A level-1 auxiliary file: chapter2/background.aux
 A level-1 auxiliary file: chapter3/ch3.aux
@@ -12,45 +14,45 @@
 Warning--empty institution in export:115396
 Warning--empty booktitle in Sigtia20146959
 Warning--empty booktitle in Sturm20127
-You've used 15 entries,
+You've used 18 entries,
             2909 wiz_defined-function locations,
-            765 strings with 9134 characters,
-and the built_in function-call counts, 73392 in all, are:
-= -- 11955
-> -- 1038
-< -- 1
-+ -- 5753
-- -- 317
-* -- 6401
-:= -- 14530
-add.period$ -- 21
-call.type$ -- 15
-change.case$ -- 209
-chr.to.int$ -- 15
-cite$ -- 18
-duplicate$ -- 280
-empty$ -- 553
-format.name$ -- 371
-if$ -- 13265
+            784 strings with 9691 characters,
+and the built_in function-call counts, 85694 in all, are:
+= -- 13970
+> -- 1179
+< -- 2
++ -- 6696
+- -- 359
+* -- 7452
+:= -- 16983
+add.period$ -- 24
+call.type$ -- 18
+change.case$ -- 250
+chr.to.int$ -- 18
+cite$ -- 21
+duplicate$ -- 338
+empty$ -- 664
+format.name$ -- 431
+if$ -- 15496
 int.to.chr$ -- 5
 int.to.str$ -- 0
-missing$ -- 12
-newline$ -- 66
-num.names$ -- 162
-pop$ -- 70
+missing$ -- 16
+newline$ -- 76
+num.names$ -- 192
+pop$ -- 89
 preamble$ -- 1
-purify$ -- 225
+purify$ -- 270
 quote$ -- 0
-skip$ -- 241
+skip$ -- 286
 stack$ -- 0
-substring$ -- 16955
-swap$ -- 69
-text.length$ -- 211
+substring$ -- 19771
+swap$ -- 83
+text.length$ -- 254
 text.prefix$ -- 0
 top$ -- 0
-type$ -- 60
+type$ -- 70
 warning$ -- 3
-while$ -- 339
+while$ -- 406
 width$ -- 0
-write$ -- 231
+write$ -- 271
 (There were 3 warnings)
Binary file Report/chiliguano_msc_finalproject.pdf has changed
Binary file Report/chiliguano_msc_finalproject.synctex.gz has changed
--- a/Report/chiliguano_msc_finalproject.tex	Sun Aug 23 16:47:54 2015 +0100
+++ b/Report/chiliguano_msc_finalproject.tex	Wed Aug 26 02:00:48 2015 +0100
@@ -64,14 +64,7 @@
 	%\end{doublespace}
 	\flushright
 	{\normalsize \today}%\hspace{0.5cm}
-	%Supervisor\hspace{0.5cm}
-	
-	
-	
-	%Author\hspace{0.5cm}
-	
-	%{\large Nicolas Julien Roussin}\hspace{0.5cm}
-	%\maketitle
+
 \end{minipage}
 
 %\title{Hybrid music recommender using content-based and social information}
@@ -91,18 +84,17 @@
 % \title{Hybrid music recommender using content-based and social information}
 % \qualification{Master of Science} 
 
-
-% \include{acknowledgements/acknowledgements}
+\include{acknowledgements/acknowledgements}
 
 % \begin{summary}
 % \end{summary}
-%\include{abstract/abstract}
+\include{abstract/abstract}
 
 
 
 \tableofcontents
-%\listoffigures
-%\listoftables
+\listoffigures
+\listoftables
 
 % could also have a \listoftables, but this example doesn't include any
 
--- a/Report/chiliguano_msc_finalproject.toc	Sun Aug 23 16:47:54 2015 +0100
+++ b/Report/chiliguano_msc_finalproject.toc	Wed Aug 26 02:00:48 2015 +0100
@@ -1,36 +1,39 @@
 \contentsline {chapter}{\numberline {1}Introduction}{1}{chapter.1}
-\contentsline {section}{\numberline {1.1}Outline of the thesis}{2}{section.1.1}
-\contentsline {chapter}{\numberline {2}Background}{4}{chapter.2}
-\contentsline {section}{\numberline {2.1}Online Social Networks}{5}{section.2.1}
-\contentsline {subsection}{\numberline {2.1.1}Last.fm}{5}{subsection.2.1.1}
-\contentsline {section}{\numberline {2.2}Music services platforms}{6}{section.2.2}
-\contentsline {subsection}{\numberline {2.2.1}Echonest}{6}{subsection.2.2.1}
-\contentsline {subsection}{\numberline {2.2.2}7Digital}{6}{subsection.2.2.2}
-\contentsline {section}{\numberline {2.3}Recommender Systems}{6}{section.2.3}
-\contentsline {subsection}{\numberline {2.3.1}Collaborative filtering}{6}{subsection.2.3.1}
-\contentsline {subsection}{\numberline {2.3.2}Content-based methods}{7}{subsection.2.3.2}
-\contentsline {section}{\numberline {2.4}Hybrid recommender methods}{7}{section.2.4}
-\contentsline {section}{\numberline {2.5}Music Information Retrieval}{7}{section.2.5}
-\contentsline {subsection}{\numberline {2.5.1}Musical genre classification}{7}{subsection.2.5.1}
-\contentsline {subsection}{\numberline {2.5.2}Deep Learning}{8}{subsection.2.5.2}
-\contentsline {subsection}{\numberline {2.5.3}Convolutional Neural Networks}{8}{subsection.2.5.3}
-\contentsline {section}{\numberline {2.6}Estimation of Distribution Algorithms}{9}{section.2.6}
-\contentsline {chapter}{\numberline {3}Methodology}{10}{chapter.3}
-\contentsline {section}{\numberline {3.1}Data collection}{10}{section.3.1}
-\contentsline {subsection}{\numberline {3.1.1}Taste Profile subset cleaning}{11}{subsection.3.1.1}
-\contentsline {subsection}{\numberline {3.1.2}Audio clips retrieval}{11}{subsection.3.1.2}
-\contentsline {subsection}{\numberline {3.1.3}Intermediate time-frequency representation for audio signals}{12}{subsection.3.1.3}
-\contentsline {section}{\numberline {3.2}Algorithms}{12}{section.3.2}
-\contentsline {subsection}{\numberline {3.2.1}CNN architecture}{12}{subsection.3.2.1}
-\contentsline {subsubsection}{Genre classification}{13}{section*.2}
-\contentsline {subsection}{\numberline {3.2.2}Continuous Bayesian EDA}{13}{subsection.3.2.2}
-\contentsline {subsection}{\numberline {3.2.3}EDA-based hybrid recommender}{13}{subsection.3.2.3}
-\contentsline {chapter}{\numberline {4}Experiments}{14}{chapter.4}
-\contentsline {section}{\numberline {4.1}Evaluation for recommender systems}{14}{section.4.1}
-\contentsline {subsection}{\numberline {4.1.1}Types of experiments}{14}{subsection.4.1.1}
-\contentsline {section}{\numberline {4.2}Evaluation method}{16}{section.4.2}
-\contentsline {subsection}{\numberline {4.2.1}Dataset description}{16}{subsection.4.2.1}
-\contentsline {subsection}{\numberline {4.2.2}Evaluation measures}{16}{subsection.4.2.2}
-\contentsline {chapter}{\numberline {5}Results}{17}{chapter.5}
-\contentsline {chapter}{\numberline {6}Conclusion}{18}{chapter.6}
-\contentsline {section}{\numberline {6.1}Future work}{18}{section.6.1}
+\contentsline {section}{\numberline {1.1}Motivation}{2}{section.1.1}
+\contentsline {section}{\numberline {1.2}Aim}{3}{section.1.2}
+\contentsline {section}{\numberline {1.3}Outline of the thesis}{4}{section.1.3}
+\contentsline {chapter}{\numberline {2}Background}{5}{chapter.2}
+\contentsline {section}{\numberline {2.1}Online Social Networks}{6}{section.2.1}
+\contentsline {subsection}{\numberline {2.1.1}Last.fm}{6}{subsection.2.1.1}
+\contentsline {section}{\numberline {2.2}Music services platforms}{7}{section.2.2}
+\contentsline {subsection}{\numberline {2.2.1}Echonest}{7}{subsection.2.2.1}
+\contentsline {subsection}{\numberline {2.2.2}7Digital}{7}{subsection.2.2.2}
+\contentsline {section}{\numberline {2.3}Recommender Systems}{7}{section.2.3}
+\contentsline {subsection}{\numberline {2.3.1}Collaborative filtering}{7}{subsection.2.3.1}
+\contentsline {subsection}{\numberline {2.3.2}Content-based methods}{8}{subsection.2.3.2}
+\contentsline {section}{\numberline {2.4}Hybrid recommender methods}{8}{section.2.4}
+\contentsline {section}{\numberline {2.5}Music Information Retrieval}{8}{section.2.5}
+\contentsline {subsection}{\numberline {2.5.1}Musical genre classification}{8}{subsection.2.5.1}
+\contentsline {subsection}{\numberline {2.5.2}Deep Learning}{9}{subsection.2.5.2}
+\contentsline {subsection}{\numberline {2.5.3}Convolutional Neural Networks}{9}{subsection.2.5.3}
+\contentsline {section}{\numberline {2.6}Estimation of Distribution Algorithms}{10}{section.2.6}
+\contentsline {chapter}{\numberline {3}Methodology}{11}{chapter.3}
+\contentsline {section}{\numberline {3.1}Data collection}{11}{section.3.1}
+\contentsline {subsection}{\numberline {3.1.1}Taste Profile subset cleaning}{12}{subsection.3.1.1}
+\contentsline {subsection}{\numberline {3.1.2}Audio clips retrieval}{12}{subsection.3.1.2}
+\contentsline {subsection}{\numberline {3.1.3}Intermediate time-frequency representation for audio signals}{13}{subsection.3.1.3}
+\contentsline {section}{\numberline {3.2}Data preprocessing}{13}{section.3.2}
+\contentsline {section}{\numberline {3.3}Algorithms}{14}{section.3.3}
+\contentsline {subsection}{\numberline {3.3.1}CNN architecture}{14}{subsection.3.3.1}
+\contentsline {subsubsection}{Genre classification}{14}{section*.5}
+\contentsline {subsection}{\numberline {3.3.2}Continuous Bayesian EDA}{14}{subsection.3.3.2}
+\contentsline {subsection}{\numberline {3.3.3}EDA-based hybrid recommender}{14}{subsection.3.3.3}
+\contentsline {chapter}{\numberline {4}Experiments}{15}{chapter.4}
+\contentsline {section}{\numberline {4.1}Evaluation for recommender systems}{15}{section.4.1}
+\contentsline {subsection}{\numberline {4.1.1}Types of experiments}{15}{subsection.4.1.1}
+\contentsline {section}{\numberline {4.2}Evaluation method}{17}{section.4.2}
+\contentsline {subsection}{\numberline {4.2.1}Dataset description}{17}{subsection.4.2.1}
+\contentsline {subsection}{\numberline {4.2.2}Evaluation measures}{17}{subsection.4.2.2}
+\contentsline {chapter}{\numberline {5}Results}{18}{chapter.5}
+\contentsline {chapter}{\numberline {6}Conclusion}{20}{chapter.6}
+\contentsline {section}{\numberline {6.1}Future work}{20}{section.6.1}
--- a/Report/references.bib	Sun Aug 23 16:47:54 2015 +0100
+++ b/Report/references.bib	Wed Aug 26 02:00:48 2015 +0100
@@ -1,3 +1,32 @@
+@article{Hejazi15,
+	author={Hejazi,S. A. and Stapleton,S. P.},
+	year={2015},
+	title={A self-organized network for load balancing using intelligent distributed antenna system},
+	journal={Canadian Journal of Electrical and Computer Engineering},
+	volume={38},
+	number={2},
+	pages={89-99},
+	url={www.scopus.com},
+}
+}
+
+@book{larranaga2002estimation,
+	title={Estimation of distribution algorithms: A new tool for evolutionary computation},
+	author={Larranaga, Pedro and Lozano, Jose A},
+	volume={2},
+	year={2002},
+	publisher={Springer Science \& Business Media}
+}
+
+@incollection{melville2010recommender,
+	title={Recommender systems},
+	author={Melville, Prem and Sindhwani, Vikas},
+	booktitle={Encyclopedia of machine learning},
+	pages={829--838},
+	year={2010},
+	publisher={Springer}
+}
+
 @incollection{Lops2011,
 	year={2011},
 	isbn={978-0-387-85819-7},