Mercurial > hg > chime-home-dataset-annotation-and-baseline-evaluation-code
comparison gmm_baseline_experiments/run_experiments.py @ 5:b523456082ca tip
Update path to dataset and reflect modified chunk naming convention.
| author | peterf |
|---|---|
| date | Mon, 01 Feb 2016 21:35:27 +0000 |
| parents | cb535b80218a |
| children |
comparison
equal
deleted
inserted
replaced
| 4:39258b875228 | 5:b523456082ca |
|---|---|
| 19 from compute_performance_statistics import compute_performance_statistics | 19 from compute_performance_statistics import compute_performance_statistics |
| 20 import pdb | 20 import pdb |
| 21 | 21 |
| 22 Settings = {'paths':{}, 'algorithms':{}} | 22 Settings = {'paths':{}, 'algorithms':{}} |
| 23 Settings['paths'] = {'chime_home': {}, 'resultsdir':'/import/c4dm-scratch/peterf/audex/results/', 'featuresdir':'/import/c4dm-scratch/peterf/audex/features/'} | 23 Settings['paths'] = {'chime_home': {}, 'resultsdir':'/import/c4dm-scratch/peterf/audex/results/', 'featuresdir':'/import/c4dm-scratch/peterf/audex/features/'} |
| 24 Settings['paths']['chime_home'] = {'basepath':'/import/c4dm-02/people/peterf/audex/datasets/chime_home/'} | 24 Settings['paths']['chime_home'] = {'basepath':'/import/c4dm-02/people/peterf/audex/datasets/chime_home/release/'} |
| 25 | 25 |
| 26 #Read data sets and class assignments | 26 #Read data sets and class assignments |
| 27 Datasets = {'chime_home':{}} | 27 Datasets = {'chime_home':{}} |
| 28 | 28 |
| 29 #Read in annotations | 29 #Read in annotations |
| 30 Chunks = list(Series.from_csv(Settings['paths']['chime_home']['basepath'] + 'release_chunks_refined.csv',header=None)) | 30 Chunks = list(Series.from_csv(Settings['paths']['chime_home']['basepath'] + 'chunks_refined.csv',header=None)) |
| 31 Annotations = [] | 31 Annotations = [] |
| 32 for chunk in Chunks: | 32 for chunk in Chunks: |
| 33 Annotations.append(Series.from_csv(Settings['paths']['chime_home']['basepath'] + 'chunks/' + chunk + '.csv')) | 33 Annotations.append(Series.from_csv(Settings['paths']['chime_home']['basepath'] + 'chunks/' + chunk + '.csv')) |
| 34 Datasets['chime_home']['dataset'] = DataFrame(Annotations) | 34 Datasets['chime_home']['dataset'] = DataFrame(Annotations) |
| 35 | 35 |
| 36 #Compute label statistics | 36 #Compute label statistics |
| 37 Datasets['chime_home']['labelstats'] = defaultdict(lambda: 0) | 37 Datasets['chime_home']['labelstats'] = defaultdict(lambda: 0) |
| 38 for item in Datasets['chime_home']['dataset']['majorityvote']: | 38 for item in Datasets['chime_home']['dataset']['majorityvote']: |
| 39 for label in item: | 39 for label in item: |
| 40 Datasets['chime_home']['labelstats'][label] += 1 | 40 Datasets['chime_home']['labelstats'][label] += 1 |
| 41 #Labels to consider for multilabel classification -- based on label set used in Stowell and Plumbley (2013) | 41 #Labels to consider for multilabel classification |
| 42 Datasets['chime_home']['consideredlabels'] = ['c', 'b', 'f', 'm', 'o', 'p', 'v'] | 42 Datasets['chime_home']['consideredlabels'] = ['c', 'b', 'f', 'm', 'o', 'p', 'v'] |
| 43 #Populate binary label assignments | 43 #Populate binary label assignments |
| 44 for label in Datasets['chime_home']['consideredlabels']: | 44 for label in Datasets['chime_home']['consideredlabels']: |
| 45 Datasets['chime_home']['dataset'][label] = [label in item for item in Datasets['chime_home']['dataset']['majorityvote']] | 45 Datasets['chime_home']['dataset'][label] = [label in item for item in Datasets['chime_home']['dataset']['majorityvote']] |
| 46 #Obtain statistics for considered labels | 46 #Obtain statistics for considered labels |
| 47 sum(Datasets['chime_home']['dataset'][Datasets['chime_home']['consideredlabels']]) / len(Datasets['chime_home']['dataset']) | 47 sum(Datasets['chime_home']['dataset'][Datasets['chime_home']['consideredlabels']]) / len(Datasets['chime_home']['dataset']) |
| 48 #Create partition for 10-fold cross-validation. Shuffling ensures each fold has approximately equal proportion of label ocurrences | 48 #Create partition for 10-fold cross-validation. Shuffling ensures each fold has approximately equal proportion of label occurrences |
| 49 np.random.seed(475686) | 49 np.random.seed(475686) |
| 50 Datasets['chime_home']['crossval_10fold'] = cross_validation.KFold(len(Datasets['chime_home']['dataset']), 10, shuffle=True) | 50 Datasets['chime_home']['crossval_10fold'] = cross_validation.KFold(len(Datasets['chime_home']['dataset']), 10, shuffle=True) |
| 51 | 51 |
| 52 Datasets['chime_home']['dataset']['wavfile'] = Datasets['chime_home']['dataset']['chunkname'].apply(lambda s: Settings['paths']['chime_home']['basepath'] + 'chunks/' + s + '.wav') | 52 Datasets['chime_home']['dataset']['wavfile'] = Datasets['chime_home']['dataset']['chunkname'].apply(lambda s: Settings['paths']['chime_home']['basepath'] + 'chunks/' + s + '.48kHz.wav') |
| 53 | 53 |
| 54 #Extract features and assign them to Datasets structure | 54 #Extract features and assign them to Datasets structure |
| 55 for dataset in Datasets.keys(): | 55 for dataset in Datasets.keys(): |
| 56 picklepath = os.path.join(Settings['paths']['featuresdir'],'features_' + dataset) | 56 picklepath = os.path.join(Settings['paths']['featuresdir'],'features_' + dataset) |
| 57 if not(os.path.isfile(picklepath)): | 57 if not(os.path.isfile(picklepath)): |
