chime-home-dataset-annotation-and-baseline-evaluation-code: gmm_baseline_experiments/run

comparison gmm_baseline_experiments/run_experiments.py @ 5:b523456082ca tip

Update path to dataset and reflect modified chunk naming convention.

author	peterf
date	Mon, 01 Feb 2016 21:35:27 +0000
parents	cb535b80218a
children

comparison

equal deleted inserted replaced

-:39258b875228
+:b523456082ca
 from compute_performance_statistics import compute_performance_statistics
 import pdb
 Settings = {'paths':{}, 'algorithms':{}}
 Settings['paths'] = {'chime_home': {}, 'resultsdir':'/import/c4dm-scratch/peterf/audex/results/', 'featuresdir':'/import/c4dm-scratch/peterf/audex/features/'}
-Settings['paths']['chime_home'] = {'basepath':'/import/c4dm-02/people/peterf/audex/datasets/chime_home/'}
+Settings['paths']['chime_home'] = {'basepath':'/import/c4dm-02/people/peterf/audex/datasets/chime_home/release/'}
 #Read data sets and class assignments
 Datasets = {'chime_home':{}}
 #Read in annotations
-Chunks = list(Series.from_csv(Settings['paths']['chime_home']['basepath'] + 'release_chunks_refined.csv',header=None))
+Chunks = list(Series.from_csv(Settings['paths']['chime_home']['basepath'] + 'chunks_refined.csv',header=None))
 Annotations = []
 for chunk in Chunks:
 Annotations.append(Series.from_csv(Settings['paths']['chime_home']['basepath'] + 'chunks/' + chunk + '.csv'))
 Datasets['chime_home']['dataset'] = DataFrame(Annotations)
 #Compute label statistics
 Datasets['chime_home']['labelstats'] = defaultdict(lambda: 0)
 for item in Datasets['chime_home']['dataset']['majorityvote']:
 for label in item:
 Datasets['chime_home']['labelstats'][label] += 1
-#Labels to consider for multilabel classification -- based on label set used in Stowell and Plumbley (2013)
+#Labels to consider for multilabel classification
 Datasets['chime_home']['consideredlabels'] = ['c', 'b', 'f', 'm', 'o', 'p', 'v']
 #Populate binary label assignments
 for label in Datasets['chime_home']['consideredlabels']:
 Datasets['chime_home']['dataset'][label] = [label in item for item in Datasets['chime_home']['dataset']['majorityvote']]
 #Obtain statistics for considered labels
 sum(Datasets['chime_home']['dataset'][Datasets['chime_home']['consideredlabels']]) / len(Datasets['chime_home']['dataset'])
-#Create partition for 10-fold cross-validation. Shuffling ensures each fold has approximately equal proportion of label ocurrences
+#Create partition for 10-fold cross-validation. Shuffling ensures each fold has approximately equal proportion of label occurrences
 np.random.seed(475686)
 Datasets['chime_home']['crossval_10fold'] = cross_validation.KFold(len(Datasets['chime_home']['dataset']), 10, shuffle=True)
-Datasets['chime_home']['dataset']['wavfile'] = Datasets['chime_home']['dataset']['chunkname'].apply(lambda s: Settings['paths']['chime_home']['basepath'] + 'chunks/' + s + '.wav')
+Datasets['chime_home']['dataset']['wavfile'] = Datasets['chime_home']['dataset']['chunkname'].apply(lambda s: Settings['paths']['chime_home']['basepath'] + 'chunks/' + s + '.48kHz.wav')
 #Extract features and assign them to Datasets structure
 for dataset in Datasets.keys():
 picklepath = os.path.join(Settings['paths']['featuresdir'],'features_' + dataset)
 if not(os.path.isfile(picklepath)):

Mercurial > hg > chime-home-dataset-annotation-and-baseline-evaluation-code

comparison gmm_baseline_experiments/run_experiments.py @ 5:b523456082ca tip