Mercurial > hg > chime-home-dataset-annotation-and-baseline-evaluation-code
comparison gmm_baseline_experiments/run_experiments.py @ 5:b523456082ca tip
Update path to dataset and reflect modified chunk naming convention.
author | peterf |
---|---|
date | Mon, 01 Feb 2016 21:35:27 +0000 |
parents | cb535b80218a |
children |
comparison
equal
deleted
inserted
replaced
4:39258b875228 | 5:b523456082ca |
---|---|
19 from compute_performance_statistics import compute_performance_statistics | 19 from compute_performance_statistics import compute_performance_statistics |
20 import pdb | 20 import pdb |
21 | 21 |
22 Settings = {'paths':{}, 'algorithms':{}} | 22 Settings = {'paths':{}, 'algorithms':{}} |
23 Settings['paths'] = {'chime_home': {}, 'resultsdir':'/import/c4dm-scratch/peterf/audex/results/', 'featuresdir':'/import/c4dm-scratch/peterf/audex/features/'} | 23 Settings['paths'] = {'chime_home': {}, 'resultsdir':'/import/c4dm-scratch/peterf/audex/results/', 'featuresdir':'/import/c4dm-scratch/peterf/audex/features/'} |
24 Settings['paths']['chime_home'] = {'basepath':'/import/c4dm-02/people/peterf/audex/datasets/chime_home/'} | 24 Settings['paths']['chime_home'] = {'basepath':'/import/c4dm-02/people/peterf/audex/datasets/chime_home/release/'} |
25 | 25 |
26 #Read data sets and class assignments | 26 #Read data sets and class assignments |
27 Datasets = {'chime_home':{}} | 27 Datasets = {'chime_home':{}} |
28 | 28 |
29 #Read in annotations | 29 #Read in annotations |
30 Chunks = list(Series.from_csv(Settings['paths']['chime_home']['basepath'] + 'release_chunks_refined.csv',header=None)) | 30 Chunks = list(Series.from_csv(Settings['paths']['chime_home']['basepath'] + 'chunks_refined.csv',header=None)) |
31 Annotations = [] | 31 Annotations = [] |
32 for chunk in Chunks: | 32 for chunk in Chunks: |
33 Annotations.append(Series.from_csv(Settings['paths']['chime_home']['basepath'] + 'chunks/' + chunk + '.csv')) | 33 Annotations.append(Series.from_csv(Settings['paths']['chime_home']['basepath'] + 'chunks/' + chunk + '.csv')) |
34 Datasets['chime_home']['dataset'] = DataFrame(Annotations) | 34 Datasets['chime_home']['dataset'] = DataFrame(Annotations) |
35 | 35 |
36 #Compute label statistics | 36 #Compute label statistics |
37 Datasets['chime_home']['labelstats'] = defaultdict(lambda: 0) | 37 Datasets['chime_home']['labelstats'] = defaultdict(lambda: 0) |
38 for item in Datasets['chime_home']['dataset']['majorityvote']: | 38 for item in Datasets['chime_home']['dataset']['majorityvote']: |
39 for label in item: | 39 for label in item: |
40 Datasets['chime_home']['labelstats'][label] += 1 | 40 Datasets['chime_home']['labelstats'][label] += 1 |
41 #Labels to consider for multilabel classification -- based on label set used in Stowell and Plumbley (2013) | 41 #Labels to consider for multilabel classification |
42 Datasets['chime_home']['consideredlabels'] = ['c', 'b', 'f', 'm', 'o', 'p', 'v'] | 42 Datasets['chime_home']['consideredlabels'] = ['c', 'b', 'f', 'm', 'o', 'p', 'v'] |
43 #Populate binary label assignments | 43 #Populate binary label assignments |
44 for label in Datasets['chime_home']['consideredlabels']: | 44 for label in Datasets['chime_home']['consideredlabels']: |
45 Datasets['chime_home']['dataset'][label] = [label in item for item in Datasets['chime_home']['dataset']['majorityvote']] | 45 Datasets['chime_home']['dataset'][label] = [label in item for item in Datasets['chime_home']['dataset']['majorityvote']] |
46 #Obtain statistics for considered labels | 46 #Obtain statistics for considered labels |
47 sum(Datasets['chime_home']['dataset'][Datasets['chime_home']['consideredlabels']]) / len(Datasets['chime_home']['dataset']) | 47 sum(Datasets['chime_home']['dataset'][Datasets['chime_home']['consideredlabels']]) / len(Datasets['chime_home']['dataset']) |
48 #Create partition for 10-fold cross-validation. Shuffling ensures each fold has approximately equal proportion of label ocurrences | 48 #Create partition for 10-fold cross-validation. Shuffling ensures each fold has approximately equal proportion of label occurrences |
49 np.random.seed(475686) | 49 np.random.seed(475686) |
50 Datasets['chime_home']['crossval_10fold'] = cross_validation.KFold(len(Datasets['chime_home']['dataset']), 10, shuffle=True) | 50 Datasets['chime_home']['crossval_10fold'] = cross_validation.KFold(len(Datasets['chime_home']['dataset']), 10, shuffle=True) |
51 | 51 |
52 Datasets['chime_home']['dataset']['wavfile'] = Datasets['chime_home']['dataset']['chunkname'].apply(lambda s: Settings['paths']['chime_home']['basepath'] + 'chunks/' + s + '.wav') | 52 Datasets['chime_home']['dataset']['wavfile'] = Datasets['chime_home']['dataset']['chunkname'].apply(lambda s: Settings['paths']['chime_home']['basepath'] + 'chunks/' + s + '.48kHz.wav') |
53 | 53 |
54 #Extract features and assign them to Datasets structure | 54 #Extract features and assign them to Datasets structure |
55 for dataset in Datasets.keys(): | 55 for dataset in Datasets.keys(): |
56 picklepath = os.path.join(Settings['paths']['featuresdir'],'features_' + dataset) | 56 picklepath = os.path.join(Settings['paths']['featuresdir'],'features_' + dataset) |
57 if not(os.path.isfile(picklepath)): | 57 if not(os.path.isfile(picklepath)): |