Mercurial > hg > hybrid-music-recommender-using-content-based-and-social-information
view Code/latent_vectors.py @ 18:c0a08cbdfacd
First script
author | Paulo Chiliguano <p.e.chiliguano@se14.qmul.ac.uk> |
---|---|
date | Tue, 28 Jul 2015 20:58:57 +0100 |
parents | c63dac455296 |
children | 2e3c57fba632 |
line wrap: on
line source
# -*- coding: utf-8 -*- """ Created on Mon Jul 20 13:37:43 2015 @author: Paulo """ import pandas as pd import csv import itertools import numpy as np import wmf # Read songID of downloaded audio clips with open('/homes/pchilguano/dataset/audio_files.txt', 'rb') as input1: available = list(csv.reader(input1)) chain1 = list(itertools.chain(*available)) # Sparse user-item matrix result = pd.DataFrame() for chunk in pd.read_csv('/homes/pchilguano/dataset/train_triplets_wo_mismatches.csv', low_memory = False, delim_whitespace=False, chunksize=10000, names=['user','song','plays'], header=None): chunk = chunk[chunk.song.isin(chain1)] result = result.append(chunk.pivot(index='user', columns='song', values='plays') , ignore_index=True) print (result.shape) sresult = result.to_sparse() sresult.to_pickle('/homes/pchilguano/dataset/taste_profile_sparse.pkl') # Weight Matrix Factorization B = np.load("test_matrix.pkl") S = wmf.log_surplus_confidence_matrix(B, alpha=2.0, epsilon=1e-6) U, V = wmf.factorize(S, num_factors=40, lambda_reg=1e-5, num_iterations=2, init_std=0.01, verbose=True, dtype='float32', recompute_factors=wmf.recompute_factors_bias)