Mercurial > hg > hybrid-music-recommender-using-content-based-and-social-information
view Code/prepare_dataset.py @ 15:2e3c57fba632
Convolutional Neural Network code
Scratch for Estimation of Distribution Algorithm
author | Paulo Chiliguano <p.e.chiilguano@se14.qmul.ac.uk> |
---|---|
date | Sat, 25 Jul 2015 21:51:16 +0100 |
parents | |
children | 68b8b088f50a |
line wrap: on
line source
# -*- coding: utf-8 -*- """ Created on Thu Jul 23 21:55:58 2015 @author: paulochiliguano """ import tables import numpy as np import cPickle filename = '/homes/pchilguano/deep_learning/features/feats.h5' with tables.openFile(filename, 'r') as f: features = f.root.x.read() #filenames = f.root.filenames.read() #initial_shape = features.shape[1:] n_per_example = np.prod(features.shape[1:-1]) number_of_features = features.shape[-1] flat_data = features.view() flat_data.shape = (features.shape[0], -1) #flat_targets = filenames.repeat(n_per_example) #genre = np.asarray([line.strip().split('\t')[1] for line in open(filename,'r').readlines()]) filename = '/homes/pchilguano/deep_learning/lists/ground_truth.txt' with open(filename, 'r') as f: tag_set = set() for line in f: tag = line.strip().split('\t')[1] tag_set.add(tag) tag_dict = dict([(item, index) for index, item in enumerate(sorted(tag_set))]) with open(filename, 'r') as f: target = np.asarray([]) mp3_dict = {} for line in f: tag = line.strip().split('\t')[1] target = np.append(target, tag_dict[tag]) train_input, valid_input, test_input = np.array_split(flat_data, [flat_data.shape[0]*4/5, flat_data.shape[0]*9/10]) train_target, valid_target, test_target = np.array_split(target, [target.shape[0]*4/5, target.shape[0]*9/10]) f = file('/homes/pchilguano/deep_learning/gtzan.pkl', 'wb') cPickle.dump(((train_input, train_target), (valid_input, valid_target), (test_input, test_target)), f, protocol=cPickle.HIGHEST_PROTOCOL) f.close()