Mercurial > hg > plosone_underreview
changeset 47:081ff4ea7da7 branch-tests
sensitivity experiment split
author | Maria Panteli <m.x.panteli@gmail.com> |
---|---|
date | Fri, 15 Sep 2017 17:33:14 +0100 |
parents | 3ed4c6af5a93 |
children | 08b9327f1935 590a1a54855a |
files | notebooks/sensitivity_experiment.ipynb scripts/classification.py |
diffstat | 2 files changed, 103 insertions(+), 109 deletions(-) [+] |
line wrap: on
line diff
--- a/notebooks/sensitivity_experiment.ipynb Fri Sep 15 16:34:30 2017 +0100 +++ b/notebooks/sensitivity_experiment.ipynb Fri Sep 15 17:33:14 2017 +0100 @@ -3,7 +3,9 @@ { "cell_type": "code", "execution_count": 15, - "metadata": {}, + "metadata": { + "collapsed": false + }, "outputs": [ { "name": "stdout", @@ -47,7 +49,9 @@ { "cell_type": "code", "execution_count": 5, - "metadata": {}, + "metadata": { + "collapsed": false + }, "outputs": [ { "data": { @@ -68,7 +72,9 @@ { "cell_type": "code", "execution_count": 48, - "metadata": {}, + "metadata": { + "collapsed": false + }, "outputs": [ { "name": "stdout", @@ -278,7 +284,9 @@ { "cell_type": "code", "execution_count": 52, - "metadata": {}, + "metadata": { + "collapsed": false + }, "outputs": [ { "name": "stdout", @@ -447,7 +455,9 @@ { "cell_type": "code", "execution_count": 56, - "metadata": {}, + "metadata": { + "collapsed": false + }, "outputs": [ { "data": { @@ -470,7 +480,9 @@ { "cell_type": "code", "execution_count": 8, - "metadata": {}, + "metadata": { + "collapsed": false + }, "outputs": [ { "data": { @@ -729,7 +741,9 @@ { "cell_type": "code", "execution_count": 47, - "metadata": {}, + "metadata": { + "collapsed": false + }, "outputs": [ { "name": "stdout", @@ -828,7 +842,9 @@ { "cell_type": "code", "execution_count": 59, - "metadata": {}, + "metadata": { + "collapsed": false + }, "outputs": [ { "name": "stdout", @@ -1105,13 +1121,7 @@ "loading speech/music segments...\n", "extracting onset patterns and mfccs...\n", "extracting chroma...\n", - "load precomputed pitch bihist /import/c4dm-04/mariap/FeatureCsvs/PB-melodia/\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ + "load precomputed pitch bihist /import/c4dm-04/mariap/FeatureCsvs/PB-melodia/\n", "(45, 240)\n", "60 44 44 44 45\n", "file 39 of 5037\n", @@ -1382,13 +1392,7 @@ "60 44 44 44 45\n", "file 77 of 5037\n", "loading speech/music segments...\n", - "extracting onset patterns and mfccs...\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ + "extracting onset patterns and mfccs...\n", "extracting chroma...\n", "load precomputed pitch bihist /import/c4dm-04/mariap/FeatureCsvs_BL_old/PB-melodia/\n", "(60, 240)\n", @@ -1656,13 +1660,7 @@ "loading speech/music segments...\n", "extracting onset patterns and mfccs...\n", "extracting chroma...\n", - "load precomputed pitch bihist /import/c4dm-04/mariap/FeatureCsvs/PB-melodia/\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ + "load precomputed pitch bihist /import/c4dm-04/mariap/FeatureCsvs/PB-melodia/\n", "(45, 240)\n", "60 44 44 44 45\n", "file 116 of 5037\n", @@ -1932,13 +1930,7 @@ "(45, 240)\n", "60 44 44 44 45\n", "file 154 of 5037\n", - "loading speech/music segments...\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ + "loading speech/music segments...\n", "extracting onset patterns and mfccs...\n", "extracting chroma...\n", "load precomputed pitch bihist /import/c4dm-04/mariap/FeatureCsvs/PB-melodia/\n", @@ -2207,13 +2199,7 @@ "loading speech/music segments...\n", "extracting onset patterns and mfccs...\n", "extracting chroma...\n", - "load precomputed pitch bihist /import/c4dm-04/mariap/FeatureCsvs_BL_old/PB-melodia/\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ + "load precomputed pitch bihist /import/c4dm-04/mariap/FeatureCsvs_BL_old/PB-melodia/\n", "(60, 240)\n", "60 44 44 44 60\n", "file 193 of 5037\n", @@ -2486,13 +2472,7 @@ "60 44 44 44 45\n", "file 232 of 5037\n", "loading speech/music segments...\n", - "extracting onset patterns and mfccs...\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ + "extracting onset patterns and mfccs...\n", "extracting chroma...\n", "load precomputed pitch bihist /import/c4dm-04/mariap/FeatureCsvs/PB-melodia/\n", "(45, 240)\n", @@ -2760,13 +2740,7 @@ "loading speech/music segments...\n", "extracting onset patterns and mfccs...\n", "extracting chroma...\n", - "load precomputed pitch bihist /import/c4dm-04/mariap/FeatureCsvs/PB-melodia/\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ + "load precomputed pitch bihist /import/c4dm-04/mariap/FeatureCsvs/PB-melodia/\n", "(45, 240)\n", "60 44 44 44 45\n", "file 271 of 5037\n", @@ -3037,13 +3011,7 @@ "60 44 44 44 60\n", "file 309 of 5037\n", "loading speech/music segments...\n", - "extracting onset patterns and mfccs...\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ + "extracting onset patterns and mfccs...\n", "extracting chroma...\n", "load precomputed pitch bihist /import/c4dm-04/mariap/FeatureCsvs/PB-melodia/\n", "(45, 240)\n", @@ -3311,13 +3279,7 @@ "loading speech/music segments...\n", "extracting onset patterns and mfccs...\n", "extracting chroma...\n", - "load precomputed pitch bihist /import/c4dm-04/mariap/FeatureCsvs/PB-melodia/\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ + "load precomputed pitch bihist /import/c4dm-04/mariap/FeatureCsvs/PB-melodia/\n", "(45, 240)\n", "60 44 44 44 45\n", "file 348 of 5037\n", @@ -3588,13 +3550,7 @@ "60 44 44 44 45\n", "file 386 of 5037\n", "loading speech/music segments...\n", - "extracting onset patterns and mfccs...\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ + "extracting onset patterns and mfccs...\n", "extracting chroma...\n", "load precomputed pitch bihist /import/c4dm-04/mariap/FeatureCsvs/PB-melodia/\n", "(45, 240)\n", @@ -3867,13 +3823,7 @@ "60 44 44 44 45\n", "file 425 of 5037\n", "loading speech/music segments...\n", - "extracting onset patterns and mfccs...\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ + "extracting onset patterns and mfccs...\n", "extracting chroma...\n", "load precomputed pitch bihist /import/c4dm-04/mariap/FeatureCsvs/PB-melodia/\n", "(45, 240)\n", @@ -4141,13 +4091,7 @@ "60 44 44 44 45\n", "file 464 of 5037\n", "loading speech/music segments...\n", - "extracting onset patterns and mfccs...\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ + "extracting onset patterns and mfccs...\n", "extracting chroma...\n", "load precomputed pitch bihist /import/c4dm-04/mariap/FeatureCsvs/PB-melodia/\n", "(45, 240)\n", @@ -4415,13 +4359,7 @@ "loading speech/music segments...\n", "extracting onset patterns and mfccs...\n", "extracting chroma...\n", - "load precomputed pitch bihist /import/c4dm-04/mariap/FeatureCsvs_BL_old/PB-melodia/\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ + "load precomputed pitch bihist /import/c4dm-04/mariap/FeatureCsvs_BL_old/PB-melodia/\n", "(60, 240)\n", "60 44 44 44 60\n", "file 503 of 5037\n", @@ -4615,8 +4553,21 @@ }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "MAPPER_OUTPUT_FILES = mapper.OUTPUT_FILES" + ] + }, + { + "cell_type": "code", "execution_count": 3, - "metadata": {}, + "metadata": { + "collapsed": false + }, "outputs": [ { "name": "stdout", @@ -4699,12 +4650,31 @@ " print \"mapping...\"\n", " mapper.INPUT_FILES = [output_file.split('.pickle')[0]+'_'+str(n)+'.pickle' for \n", " output_file in OUTPUT_FILES]\n", + " mapper.OUTPUT_FILES = [output_file.split('.pickle')[0]+'_'+str(n)+'.pickle' for \n", + " output_file in MAPPER_OUTPUT_FILES]\n", " _, _, ldadata_list, _, _, Y, Yaudio = mapper.lda_map_and_average_frames(min_variance=0.99)\n", + " mapper.write_output([], [], ldadata_list, [], [], Y, Yaudio)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "CLASS_INPUT_FILES = [output_file.split('.pickle')[0]+'_'+str(n)+'.pickle' for \n", + " output_file in mapper.OUTPUT_FILES]\n", + "mapper.OUTPUT_FILES = CLASS_INPUT_FILES\n", + "mapper.INPUT_FILES = OUTPUT_FILES\n", + "for n in range(n_iters):\n", + " print \"iteration %d\" % n\n", + " ldadata_list, Y, Yaudio = classification.load_data_from_pickle(CLASS_INPUT_FILES[2])\n", " X = np.concatenate(ldadata_list, axis=1)\n", - " \n", " # classification and confusion\n", " print \"classifying...\"\n", - " traininds, testinds = classification.get_train_test_indices()\n", + " traininds, testinds = classification.get_train_test_indices(Yaudio)\n", " X_train, Y_train, X_test, Y_test = classification.get_train_test_sets(X, Y, traininds, testinds)\n", " accuracy, _ = classification.confusion_matrix(X_train, Y_train, X_test, Y_test, saveCF=False, plots=False)\n", " print accuracy\n", @@ -4734,7 +4704,9 @@ { "cell_type": "code", "execution_count": 5, - "metadata": {}, + "metadata": { + "collapsed": false + }, "outputs": [ { "data": { @@ -4754,7 +4726,9 @@ { "cell_type": "code", "execution_count": 10, - "metadata": {}, + "metadata": { + "collapsed": false + }, "outputs": [ { "name": "stdout", @@ -4784,7 +4758,9 @@ { "cell_type": "code", "execution_count": 13, - "metadata": {}, + "metadata": { + "collapsed": false + }, "outputs": [ { "name": "stdout", @@ -4891,7 +4867,9 @@ { "cell_type": "code", "execution_count": 33, - "metadata": {}, + "metadata": { + "collapsed": false + }, "outputs": [ { "name": "stdout", @@ -4911,7 +4889,9 @@ { "cell_type": "code", "execution_count": 34, - "metadata": {}, + "metadata": { + "collapsed": false + }, "outputs": [ { "data": {
--- a/scripts/classification.py Fri Sep 15 16:34:30 2017 +0100 +++ b/scripts/classification.py Fri Sep 15 17:33:14 2017 +0100 @@ -45,11 +45,25 @@ feat_learner = util_feature_learning.Transformer() for filename in file_list: X, Y, Yaudio = load_data_from_pickle(filename) - traininds, testinds = get_train_test_indices() + traininds, testinds = get_train_test_indices(Yaudio) X_train, Y_train, X_test, Y_test = get_train_test_sets(X, Y, traininds, testinds) df_result = feat_learner.classify(X_train, Y_train, X_test, Y_test) df_results = pd.concat([df_results, df_result], axis=0, ignore_index=True) - return df_results + return df_results + + +def classify_each_feature(X_train, Y_train, X_test, Y_test): + n_dim = X_train.shape[1] + feat_labels, feat_inds = map_and_average.get_feat_inds(n_dim=n_dim) + #df_results = pd.DataFrame() + # first the classification with all features together + df_results = feat_learner.classify(X_train, Y_train, X_test, Y_test) + # then append for each feature separately + for i in range(len(feat_inds)): + df_result = feat_learner.classify(X_train[:, feat_inds[i]], Y_train, + X_test[:, feat_inds[i]], Y_test) + df_results = pd.concat([df_results, df_result], axis=1, ignore_index=True) + return df_results def plot_CF(CF, labels=None, figurename=None):