comparison gmm_baseline_experiments/run_experiments.py @ 5:b523456082ca tip

Update path to dataset and reflect modified chunk naming convention.
author peterf
date Mon, 01 Feb 2016 21:35:27 +0000
parents cb535b80218a
children
comparison
equal deleted inserted replaced
4:39258b875228 5:b523456082ca
19 from compute_performance_statistics import compute_performance_statistics 19 from compute_performance_statistics import compute_performance_statistics
20 import pdb 20 import pdb
21 21
22 Settings = {'paths':{}, 'algorithms':{}} 22 Settings = {'paths':{}, 'algorithms':{}}
23 Settings['paths'] = {'chime_home': {}, 'resultsdir':'/import/c4dm-scratch/peterf/audex/results/', 'featuresdir':'/import/c4dm-scratch/peterf/audex/features/'} 23 Settings['paths'] = {'chime_home': {}, 'resultsdir':'/import/c4dm-scratch/peterf/audex/results/', 'featuresdir':'/import/c4dm-scratch/peterf/audex/features/'}
24 Settings['paths']['chime_home'] = {'basepath':'/import/c4dm-02/people/peterf/audex/datasets/chime_home/'} 24 Settings['paths']['chime_home'] = {'basepath':'/import/c4dm-02/people/peterf/audex/datasets/chime_home/release/'}
25 25
26 #Read data sets and class assignments 26 #Read data sets and class assignments
27 Datasets = {'chime_home':{}} 27 Datasets = {'chime_home':{}}
28 28
29 #Read in annotations 29 #Read in annotations
30 Chunks = list(Series.from_csv(Settings['paths']['chime_home']['basepath'] + 'release_chunks_refined.csv',header=None)) 30 Chunks = list(Series.from_csv(Settings['paths']['chime_home']['basepath'] + 'chunks_refined.csv',header=None))
31 Annotations = [] 31 Annotations = []
32 for chunk in Chunks: 32 for chunk in Chunks:
33 Annotations.append(Series.from_csv(Settings['paths']['chime_home']['basepath'] + 'chunks/' + chunk + '.csv')) 33 Annotations.append(Series.from_csv(Settings['paths']['chime_home']['basepath'] + 'chunks/' + chunk + '.csv'))
34 Datasets['chime_home']['dataset'] = DataFrame(Annotations) 34 Datasets['chime_home']['dataset'] = DataFrame(Annotations)
35 35
36 #Compute label statistics 36 #Compute label statistics
37 Datasets['chime_home']['labelstats'] = defaultdict(lambda: 0) 37 Datasets['chime_home']['labelstats'] = defaultdict(lambda: 0)
38 for item in Datasets['chime_home']['dataset']['majorityvote']: 38 for item in Datasets['chime_home']['dataset']['majorityvote']:
39 for label in item: 39 for label in item:
40 Datasets['chime_home']['labelstats'][label] += 1 40 Datasets['chime_home']['labelstats'][label] += 1
41 #Labels to consider for multilabel classification -- based on label set used in Stowell and Plumbley (2013) 41 #Labels to consider for multilabel classification
42 Datasets['chime_home']['consideredlabels'] = ['c', 'b', 'f', 'm', 'o', 'p', 'v'] 42 Datasets['chime_home']['consideredlabels'] = ['c', 'b', 'f', 'm', 'o', 'p', 'v']
43 #Populate binary label assignments 43 #Populate binary label assignments
44 for label in Datasets['chime_home']['consideredlabels']: 44 for label in Datasets['chime_home']['consideredlabels']:
45 Datasets['chime_home']['dataset'][label] = [label in item for item in Datasets['chime_home']['dataset']['majorityvote']] 45 Datasets['chime_home']['dataset'][label] = [label in item for item in Datasets['chime_home']['dataset']['majorityvote']]
46 #Obtain statistics for considered labels 46 #Obtain statistics for considered labels
47 sum(Datasets['chime_home']['dataset'][Datasets['chime_home']['consideredlabels']]) / len(Datasets['chime_home']['dataset']) 47 sum(Datasets['chime_home']['dataset'][Datasets['chime_home']['consideredlabels']]) / len(Datasets['chime_home']['dataset'])
48 #Create partition for 10-fold cross-validation. Shuffling ensures each fold has approximately equal proportion of label ocurrences 48 #Create partition for 10-fold cross-validation. Shuffling ensures each fold has approximately equal proportion of label occurrences
49 np.random.seed(475686) 49 np.random.seed(475686)
50 Datasets['chime_home']['crossval_10fold'] = cross_validation.KFold(len(Datasets['chime_home']['dataset']), 10, shuffle=True) 50 Datasets['chime_home']['crossval_10fold'] = cross_validation.KFold(len(Datasets['chime_home']['dataset']), 10, shuffle=True)
51 51
52 Datasets['chime_home']['dataset']['wavfile'] = Datasets['chime_home']['dataset']['chunkname'].apply(lambda s: Settings['paths']['chime_home']['basepath'] + 'chunks/' + s + '.wav') 52 Datasets['chime_home']['dataset']['wavfile'] = Datasets['chime_home']['dataset']['chunkname'].apply(lambda s: Settings['paths']['chime_home']['basepath'] + 'chunks/' + s + '.48kHz.wav')
53 53
54 #Extract features and assign them to Datasets structure 54 #Extract features and assign them to Datasets structure
55 for dataset in Datasets.keys(): 55 for dataset in Datasets.keys():
56 picklepath = os.path.join(Settings['paths']['featuresdir'],'features_' + dataset) 56 picklepath = os.path.join(Settings['paths']['featuresdir'],'features_' + dataset)
57 if not(os.path.isfile(picklepath)): 57 if not(os.path.isfile(picklepath)):