peterf@2
|
1 #!/usr/bin/python
|
peterf@2
|
2
|
peterf@2
|
3 #
|
peterf@2
|
4 # run_experiments.py:
|
peterf@2
|
5 # Main script for CHiME-Home dataset baseline GMM evaluation
|
peterf@2
|
6 #
|
peterf@2
|
7 # Author: Peter Foster
|
peterf@2
|
8 # (c) 2015 Peter Foster
|
peterf@2
|
9 #
|
peterf@2
|
10
|
peterf@2
|
11 from pylab import *
|
peterf@2
|
12 from sklearn import cross_validation
|
peterf@2
|
13 import os
|
peterf@2
|
14 from pandas import Series, DataFrame
|
peterf@2
|
15 from collections import defaultdict
|
peterf@2
|
16 from extract_features import FeatureExtractor
|
peterf@2
|
17 import exper002
|
peterf@2
|
18 import custompickler
|
peterf@2
|
19 from compute_performance_statistics import compute_performance_statistics
|
peterf@2
|
20 import pdb
|
peterf@2
|
21
|
peterf@2
|
22 Settings = {'paths':{}, 'algorithms':{}}
|
peterf@2
|
23 Settings['paths'] = {'chime_home': {}, 'resultsdir':'/import/c4dm-scratch/peterf/audex/results/', 'featuresdir':'/import/c4dm-scratch/peterf/audex/features/'}
|
peterf@5
|
24 Settings['paths']['chime_home'] = {'basepath':'/import/c4dm-02/people/peterf/audex/datasets/chime_home/release/'}
|
peterf@2
|
25
|
peterf@2
|
26 #Read data sets and class assignments
|
peterf@2
|
27 Datasets = {'chime_home':{}}
|
peterf@2
|
28
|
peterf@2
|
29 #Read in annotations
|
peterf@5
|
30 Chunks = list(Series.from_csv(Settings['paths']['chime_home']['basepath'] + 'chunks_refined.csv',header=None))
|
peterf@2
|
31 Annotations = []
|
peterf@2
|
32 for chunk in Chunks:
|
peterf@2
|
33 Annotations.append(Series.from_csv(Settings['paths']['chime_home']['basepath'] + 'chunks/' + chunk + '.csv'))
|
peterf@2
|
34 Datasets['chime_home']['dataset'] = DataFrame(Annotations)
|
peterf@2
|
35
|
peterf@2
|
36 #Compute label statistics
|
peterf@2
|
37 Datasets['chime_home']['labelstats'] = defaultdict(lambda: 0)
|
peterf@2
|
38 for item in Datasets['chime_home']['dataset']['majorityvote']:
|
peterf@2
|
39 for label in item:
|
peterf@2
|
40 Datasets['chime_home']['labelstats'][label] += 1
|
peterf@5
|
41 #Labels to consider for multilabel classification
|
peterf@2
|
42 Datasets['chime_home']['consideredlabels'] = ['c', 'b', 'f', 'm', 'o', 'p', 'v']
|
peterf@2
|
43 #Populate binary label assignments
|
peterf@2
|
44 for label in Datasets['chime_home']['consideredlabels']:
|
peterf@2
|
45 Datasets['chime_home']['dataset'][label] = [label in item for item in Datasets['chime_home']['dataset']['majorityvote']]
|
peterf@2
|
46 #Obtain statistics for considered labels
|
peterf@2
|
47 sum(Datasets['chime_home']['dataset'][Datasets['chime_home']['consideredlabels']]) / len(Datasets['chime_home']['dataset'])
|
peterf@5
|
48 #Create partition for 10-fold cross-validation. Shuffling ensures each fold has approximately equal proportion of label occurrences
|
peterf@2
|
49 np.random.seed(475686)
|
peterf@2
|
50 Datasets['chime_home']['crossval_10fold'] = cross_validation.KFold(len(Datasets['chime_home']['dataset']), 10, shuffle=True)
|
peterf@2
|
51
|
peterf@5
|
52 Datasets['chime_home']['dataset']['wavfile'] = Datasets['chime_home']['dataset']['chunkname'].apply(lambda s: Settings['paths']['chime_home']['basepath'] + 'chunks/' + s + '.48kHz.wav')
|
peterf@2
|
53
|
peterf@2
|
54 #Extract features and assign them to Datasets structure
|
peterf@2
|
55 for dataset in Datasets.keys():
|
peterf@2
|
56 picklepath = os.path.join(Settings['paths']['featuresdir'],'features_' + dataset)
|
peterf@2
|
57 if not(os.path.isfile(picklepath)):
|
peterf@2
|
58 if dataset == 'chime_home':
|
peterf@2
|
59 featureExtractor = FeatureExtractor(samplingRate=48000, frameLength=1024, hopLength=512)
|
peterf@2
|
60 else:
|
peterf@2
|
61 raise NotImplementedError()
|
peterf@2
|
62 FeatureList = featureExtractor.files_to_features(Datasets[dataset]['dataset']['wavfile'])
|
peterf@2
|
63 custompickler.pickle_save(FeatureList,picklepath)
|
peterf@2
|
64 else:
|
peterf@2
|
65 FeatureList = custompickler.pickle_load(picklepath)
|
peterf@2
|
66 #Integrity check
|
peterf@2
|
67 for features in FeatureList:
|
peterf@2
|
68 for feature in features.values():
|
peterf@2
|
69 assert(all(isfinite(feature.ravel())))
|
peterf@2
|
70 Datasets[dataset]['dataset']['features'] = FeatureList
|
peterf@2
|
71
|
peterf@2
|
72 #GMM experiments using CHiME home dataset
|
peterf@2
|
73 EXPER005 = {}
|
peterf@2
|
74 EXPER005['name'] = 'GMM_Baseline_EXPER005'
|
peterf@2
|
75 EXPER005['path'] = os.path.join(Settings['paths']['resultsdir'],'exploratory','saved_objects','EXPER005')
|
peterf@2
|
76 EXPER005['settings'] = {'numcomponents': (1,2,4,8), 'features': ('librosa_mfccs',)}
|
peterf@2
|
77 EXPER005['datasets'] = {}
|
peterf@2
|
78 EXPER005['datasets']['chime_home'] = exper002.exper002_multilabelclassification(Datasets['chime_home']['dataset'], Datasets['chime_home']['consideredlabels'], Datasets['chime_home']['crossval_10fold'], Settings, numComponentValues=EXPER005['settings']['numcomponents'], featureTypeValues=EXPER005['settings']['features'])
|
peterf@2
|
79 EXPER005 = compute_performance_statistics(EXPER005, Datasets, Settings, iterableParameters=['numcomponents', 'features'])
|
peterf@2
|
80 custompickler.pickle_save(EXPER005, EXPER005['path'])
|
peterf@2
|
81
|
peterf@2
|
82 #Collate results
|
peterf@2
|
83 def accumulate_results(EXPER):
|
peterf@2
|
84 EXPER['summaryresults'] = {}
|
peterf@2
|
85 ds = EXPER['datasets'].keys()[0]
|
peterf@2
|
86 for numComponents in EXPER['settings']['numcomponents']:
|
peterf@2
|
87 EXPER['summaryresults'][numComponents] = {}
|
peterf@2
|
88 for label in Datasets[ds]['consideredlabels']:
|
peterf@2
|
89 EXPER['summaryresults'][numComponents][label] = EXPER['datasets'][ds][(numComponents, 'librosa_mfccs')]['performance']['classwise'][label]['auc_precisionrecall']
|
peterf@2
|
90 EXPER['summaryresults'] = DataFrame(EXPER['summaryresults'])
|
peterf@2
|
91 accumulate_results(EXPER005)
|
peterf@2
|
92
|
peterf@2
|
93 #Generate plot
|
peterf@2
|
94 def plot_performance(EXPER):
|
peterf@2
|
95 fig_width_pt = 246.0 # Get this from LaTeX using \showthe\columnwidth
|
peterf@2
|
96 inches_per_pt = 1.0/72.27 # Convert pt to inch
|
peterf@2
|
97 golden_mean = (sqrt(5)-1.0)/2.0 # Aesthetic ratio
|
peterf@2
|
98 fig_width = fig_width_pt*inches_per_pt # width in inches
|
peterf@2
|
99 fig_height = fig_width*golden_mean # height in inches
|
peterf@2
|
100 fig_size = [fig_width,fig_height]
|
peterf@2
|
101 params = {'backend': 'ps',
|
peterf@2
|
102 'axes.labelsize': 8,
|
peterf@2
|
103 'text.fontsize': 8,
|
peterf@2
|
104 'legend.fontsize': 7.0,
|
peterf@2
|
105 'xtick.labelsize': 8,
|
peterf@2
|
106 'ytick.labelsize': 8,
|
peterf@2
|
107 'text.usetex': False,
|
peterf@2
|
108 'figure.figsize': fig_size}
|
peterf@2
|
109 rcParams.update(params)
|
peterf@2
|
110 ind = np.arange(len(EXPER['summaryresults'][1])) # the x locations for the groups
|
peterf@2
|
111 width = 0.22 # the width of the bars
|
peterf@2
|
112 fig, ax = plt.subplots()
|
peterf@2
|
113 rects = []
|
peterf@2
|
114 colours = ('r', 'y', 'g', 'b', 'c')
|
peterf@2
|
115 for numComponents, i in zip(EXPER['summaryresults'],range(len(EXPER['summaryresults']))):
|
peterf@2
|
116 rects.append(ax.bar(ind+width*i, EXPER['summaryresults'][numComponents][['c','m','f','v','p','b','o']], width, color=colours[i], align='center'))
|
peterf@2
|
117 # add text for labels, title and axes ticks
|
peterf@2
|
118 ax.set_ylabel('AUC')
|
peterf@2
|
119 ax.set_xlabel('Label')
|
peterf@2
|
120 ax.set_xticks(ind+width)
|
peterf@2
|
121 ax.set_xticklabels(('c','m','f','v','p','b','o'))
|
peterf@2
|
122 ax.legend( (rect[0] for rect in rects), ('k=1', 'k=2', 'k=4','k=8') ,loc='lower right')
|
peterf@2
|
123 #Tweak x-axis limit
|
peterf@2
|
124 ax.set_xlim(left=-0.5)
|
peterf@2
|
125 ax.set_ylim(top=1.19)
|
peterf@2
|
126 plt.gcf().subplots_adjust(left=0.15) #Prevent y-axis label from being chopped off
|
peterf@2
|
127 def autolabel(r):
|
peterf@2
|
128 for rects in r:
|
peterf@2
|
129 for rect in rects:
|
peterf@2
|
130 height = rect.get_height()
|
peterf@2
|
131 ax.text(rect.get_x()+0.14,0.04+height,'%1.2f'%float(height),ha='center',va='bottom',rotation='vertical',size=6.0)
|
peterf@2
|
132 autolabel(rects)
|
peterf@2
|
133 plt.draw()
|
peterf@2
|
134 plt.savefig('figures/predictionperformance' + EXPER['name'] +'.pdf')
|
peterf@5
|
135 plot_performance(EXPER005)
|