comparison notebooks/sensitivity_experiment.ipynb @ 48:08b9327f1935 branch-tests

mapper now writes output
author mpanteli <m.x.panteli@gmail.com>
date Fri, 15 Sep 2017 17:46:45 +0100
parents 081ff4ea7da7
children d3de9ac0d545
comparison
equal deleted inserted replaced
47:081ff4ea7da7 48:08b9327f1935
1 { 1 {
2 "cells": [ 2 "cells": [
3 { 3 {
4 "cell_type": "code", 4 "cell_type": "code",
5 "execution_count": 15, 5 "execution_count": 1,
6 "metadata": { 6 "metadata": {},
7 "collapsed": false
8 },
9 "outputs": [ 7 "outputs": [
10 { 8 {
11 "name": "stdout", 9 "name": "stderr",
12 "output_type": "stream", 10 "output_type": "stream",
13 "text": [ 11 "text": [
14 "The autoreload extension is already loaded. To reload it, use:\n", 12 "/homes/mp305/anaconda/lib/python2.7/site-packages/librosa/core/audio.py:33: UserWarning: Could not import scikits.samplerate. Falling back to scipy.signal\n",
15 " %reload_ext autoreload\n" 13 " warnings.warn('Could not import scikits.samplerate. '\n"
16 ] 14 ]
17 } 15 }
18 ], 16 ],
19 "source": [ 17 "source": [
20 "import numpy as np\n", 18 "import numpy as np\n",
34 "import scripts.outliers as outliers" 32 "import scripts.outliers as outliers"
35 ] 33 ]
36 }, 34 },
37 { 35 {
38 "cell_type": "code", 36 "cell_type": "code",
39 "execution_count": 2, 37 "execution_count": 3,
40 "metadata": { 38 "metadata": {
41 "collapsed": true 39 "collapsed": true
42 }, 40 },
43 "outputs": [], 41 "outputs": [],
44 "source": [ 42 "source": [
47 ] 45 ]
48 }, 46 },
49 { 47 {
50 "cell_type": "code", 48 "cell_type": "code",
51 "execution_count": 5, 49 "execution_count": 5,
52 "metadata": { 50 "metadata": {},
53 "collapsed": false
54 },
55 "outputs": [ 51 "outputs": [
56 { 52 {
57 "data": { 53 "data": {
58 "text/plain": [ 54 "text/plain": [
59 "(8396, 108)" 55 "(8396, 108)"
70 ] 66 ]
71 }, 67 },
72 { 68 {
73 "cell_type": "code", 69 "cell_type": "code",
74 "execution_count": 48, 70 "execution_count": 48,
75 "metadata": { 71 "metadata": {},
76 "collapsed": false
77 },
78 "outputs": [ 72 "outputs": [
79 { 73 {
80 "name": "stdout", 74 "name": "stdout",
81 "output_type": "stream", 75 "output_type": "stream",
82 "text": [ 76 "text": [
282 ] 276 ]
283 }, 277 },
284 { 278 {
285 "cell_type": "code", 279 "cell_type": "code",
286 "execution_count": 52, 280 "execution_count": 52,
287 "metadata": { 281 "metadata": {},
288 "collapsed": false
289 },
290 "outputs": [ 282 "outputs": [
291 { 283 {
292 "name": "stdout", 284 "name": "stdout",
293 "output_type": "stream", 285 "output_type": "stream",
294 "text": [ 286 "text": [
453 ] 445 ]
454 }, 446 },
455 { 447 {
456 "cell_type": "code", 448 "cell_type": "code",
457 "execution_count": 56, 449 "execution_count": 56,
458 "metadata": { 450 "metadata": {},
459 "collapsed": false
460 },
461 "outputs": [ 451 "outputs": [
462 { 452 {
463 "data": { 453 "data": {
464 "text/plain": [ 454 "text/plain": [
465 "array([ 59, 1, 1, 1, 1, 733, 733, 733, 733, 733, 733, 733, 733,\n", 455 "array([ 59, 1, 1, 1, 1, 733, 733, 733, 733, 733, 733, 733, 733,\n",
478 ] 468 ]
479 }, 469 },
480 { 470 {
481 "cell_type": "code", 471 "cell_type": "code",
482 "execution_count": 8, 472 "execution_count": 8,
483 "metadata": { 473 "metadata": {},
484 "collapsed": false
485 },
486 "outputs": [ 474 "outputs": [
487 { 475 {
488 "data": { 476 "data": {
489 "text/html": [ 477 "text/html": [
490 "<div>\n", 478 "<div>\n",
739 ] 727 ]
740 }, 728 },
741 { 729 {
742 "cell_type": "code", 730 "cell_type": "code",
743 "execution_count": 47, 731 "execution_count": 47,
744 "metadata": { 732 "metadata": {},
745 "collapsed": false
746 },
747 "outputs": [ 733 "outputs": [
748 { 734 {
749 "name": "stdout", 735 "name": "stdout",
750 "output_type": "stream", 736 "output_type": "stream",
751 "text": [ 737 "text": [
840 ] 826 ]
841 }, 827 },
842 { 828 {
843 "cell_type": "code", 829 "cell_type": "code",
844 "execution_count": 59, 830 "execution_count": 59,
845 "metadata": { 831 "metadata": {},
846 "collapsed": false
847 },
848 "outputs": [ 832 "outputs": [
849 { 833 {
850 "name": "stdout", 834 "name": "stdout",
851 "output_type": "stream", 835 "output_type": "stream",
852 "text": [ 836 "text": [
4550 " output_file in OUTPUT_FILES]\n", 4534 " output_file in OUTPUT_FILES]\n",
4551 " load_dataset.features_for_train_test_sets(df, write_output=True)" 4535 " load_dataset.features_for_train_test_sets(df, write_output=True)"
4552 ] 4536 ]
4553 }, 4537 },
4554 { 4538 {
4555 "cell_type": "code", 4539 "cell_type": "markdown",
4556 "execution_count": null,
4557 "metadata": { 4540 "metadata": {
4558 "collapsed": true 4541 "collapsed": true
4559 }, 4542 },
4560 "outputs": [],
4561 "source": [ 4543 "source": [
4562 "MAPPER_OUTPUT_FILES = mapper.OUTPUT_FILES" 4544 "## Map frames and write output for the lda transformed frames"
4563 ] 4545 ]
4564 }, 4546 },
4565 { 4547 {
4566 "cell_type": "code", 4548 "cell_type": "code",
4567 "execution_count": 3, 4549 "execution_count": 7,
4568 "metadata": { 4550 "metadata": {},
4569 "collapsed": false
4570 },
4571 "outputs": [ 4551 "outputs": [
4572 { 4552 {
4573 "name": "stdout", 4553 "name": "stdout",
4574 "output_type": "stream", 4554 "output_type": "stream",
4575 "text": [ 4555 "text": [
4576 "iteration 0\n", 4556 "iteration 0\n",
4577 "mapping...\n", 4557 "mapping...\n",
4578 "/import/c4dm-04/mariap/train_data_melodia_8_0.pickle\n", 4558 "/import/c4dm-04/mariap/train_data_melodia_8_0.pickle\n"
4579 "(203219, 840) (68100, 840) (67143, 840)\n",
4580 "mapping rhy\n",
4581 "training with PCA transform...\n",
4582 "variance explained 1.0\n",
4583 "140 400\n",
4584 "training with PCA transform...\n",
4585 "variance explained 0.990203912455\n",
4586 "training with LDA transform...\n"
4587 ] 4559 ]
4588 }, 4560 },
4589 { 4561 {
4590 "name": "stderr", 4562 "ename": "KeyboardInterrupt",
4591 "output_type": "stream", 4563 "evalue": "",
4592 "text": [
4593 "/homes/mp305/anaconda/lib/python2.7/site-packages/sklearn/utils/validation.py:526: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
4594 " y = column_or_1d(y, warn=True)\n",
4595 "/homes/mp305/anaconda/lib/python2.7/site-packages/sklearn/discriminant_analysis.py:455: UserWarning: The priors do not sum to 1. Renormalizing\n",
4596 " UserWarning)\n"
4597 ]
4598 },
4599 {
4600 "name": "stdout",
4601 "output_type": "stream",
4602 "text": [
4603 "variance explained 1.0\n",
4604 "transform test data...\n",
4605 "mapping mel\n",
4606 "training with PCA transform...\n",
4607 "variance explained 1.0\n",
4608 "214 240\n",
4609 "training with PCA transform...\n",
4610 "variance explained 0.990094273777\n",
4611 "training with LDA transform...\n",
4612 "variance explained 1.0\n",
4613 "transform test data...\n",
4614 "mapping mfc\n",
4615 "training with PCA transform...\n",
4616 "variance explained 1.0\n",
4617 "39 80\n",
4618 "training with PCA transform...\n",
4619 "variance explained 0.9914399357\n",
4620 "training with LDA transform...\n",
4621 "variance explained 0.941390777379\n",
4622 "transform test data...\n",
4623 "mapping chr\n",
4624 "training with PCA transform...\n",
4625 "variance explained 1.0\n",
4626 "70 120\n",
4627 "training with PCA transform...\n",
4628 "variance explained 0.990511935176\n",
4629 "training with LDA transform...\n",
4630 "variance explained 0.953613938607\n",
4631 "transform test data...\n"
4632 ]
4633 },
4634 {
4635 "ename": "ValueError",
4636 "evalue": "all the input array dimensions except for the concatenation axis must match exactly",
4637 "output_type": "error", 4564 "output_type": "error",
4638 "traceback": [ 4565 "traceback": [
4639 "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 4566 "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
4640 "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", 4567 "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
4641 "\u001b[0;32m<ipython-input-3-971892d5bd8d>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 6\u001b[0m output_file in OUTPUT_FILES]\n\u001b[1;32m 7\u001b[0m \u001b[0m_\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0m_\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mldadata_list\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0m_\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0m_\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mY\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mYaudio\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmapper\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlda_map_and_average_frames\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmin_variance\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0.99\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 8\u001b[0;31m \u001b[0mX\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconcatenate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mldadata_list\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 9\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 10\u001b[0m \u001b[0;31m# classification and confusion\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 4568 "\u001b[0;32m<ipython-input-7-f093c6f2c550>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 7\u001b[0m mapper.OUTPUT_FILES = [output_file.split('.pickle')[0]+'_'+str(n)+'.pickle' for \n\u001b[1;32m 8\u001b[0m output_file in MAPPER_OUTPUT_FILES]\n\u001b[0;32m----> 9\u001b[0;31m \u001b[0m_\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0m_\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mldadata_list\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0m_\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0m_\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mY\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mYaudio\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmapper\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlda_map_and_average_frames\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmin_variance\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0.99\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 10\u001b[0m \u001b[0mmapper\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mwrite_output\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mldadata_list\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mY\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mYaudio\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
4642 "\u001b[0;31mValueError\u001b[0m: all the input array dimensions except for the concatenation axis must match exactly" 4569 "\u001b[0;32m/homes/mp305/code/pythoncode/plosone_underreview/scripts/map_and_average.pyc\u001b[0m in \u001b[0;36mlda_map_and_average_frames\u001b[0;34m(dataset, n_components, min_variance)\u001b[0m\n\u001b[1;32m 149\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mlda_map_and_average_frames\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdataset\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mn_components\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmin_variance\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 150\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mdataset\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 151\u001b[0;31m \u001b[0mtrainset\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalset\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtestset\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mload_train_val_test_sets\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 152\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 153\u001b[0m \u001b[0mtrainset\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalset\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtestset\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdataset\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
4570 "\u001b[0;32m/homes/mp305/code/pythoncode/plosone_underreview/scripts/map_and_average.pyc\u001b[0m in \u001b[0;36mload_train_val_test_sets\u001b[0;34m()\u001b[0m\n\u001b[1;32m 69\u001b[0m '''\n\u001b[1;32m 70\u001b[0m \u001b[0;32mprint\u001b[0m \u001b[0mINPUT_FILES\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 71\u001b[0;31m \u001b[0mtrainset\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mload_data_from_pickle\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mINPUT_FILES\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 72\u001b[0m \u001b[0mvalset\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mload_data_from_pickle\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mINPUT_FILES\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 73\u001b[0m \u001b[0mtestset\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mload_data_from_pickle\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mINPUT_FILES\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
4571 "\u001b[0;32m/homes/mp305/code/pythoncode/plosone_underreview/scripts/map_and_average.pyc\u001b[0m in \u001b[0;36mload_data_from_pickle\u001b[0;34m(pickle_file)\u001b[0m\n\u001b[1;32m 57\u001b[0m '''\n\u001b[1;32m 58\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mopen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpickle_file\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m'rb'\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 59\u001b[0;31m \u001b[0mdata\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlabels\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maudiolabels\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpickle\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mload\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 60\u001b[0m \u001b[0;31m# remove 'unknown' and 'unidentified' country\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 61\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlabels\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maudiolabels\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mremove_inds\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlabels\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maudiolabels\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
4572 "\u001b[0;32m/homes/mp305/anaconda/lib/python2.7/pickle.pyc\u001b[0m in \u001b[0;36mload\u001b[0;34m(file)\u001b[0m\n\u001b[1;32m 1382\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1383\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mload\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfile\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1384\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mUnpickler\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfile\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mload\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1385\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1386\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mloads\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mstr\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
4573 "\u001b[0;32m/homes/mp305/anaconda/lib/python2.7/pickle.pyc\u001b[0m in \u001b[0;36mload\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 862\u001b[0m \u001b[0;32mwhile\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 863\u001b[0m \u001b[0mkey\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mread\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 864\u001b[0;31m \u001b[0mdispatch\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 865\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0m_Stop\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mstopinst\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 866\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mstopinst\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalue\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
4574 "\u001b[0;32m/homes/mp305/anaconda/lib/python2.7/pickle.pyc\u001b[0m in \u001b[0;36mload_string\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 966\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 967\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mload_string\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 968\u001b[0;31m \u001b[0mrep\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreadline\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 969\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mq\u001b[0m \u001b[0;32min\u001b[0m \u001b[0;34m\"\\\"'\"\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;31m# double or single quote\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 970\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mrep\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstartswith\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mq\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
4575 "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
4643 ] 4576 ]
4644 } 4577 }
4645 ], 4578 ],
4646 "source": [ 4579 "source": [
4580 "MAPPER_OUTPUT_FILES = mapper.OUTPUT_FILES\n",
4647 "for n in range(n_iters):\n", 4581 "for n in range(n_iters):\n",
4648 " print \"iteration %d\" % n\n", 4582 " print \"iteration %d\" % n\n",
4649 " \n", 4583 " \n",
4650 " print \"mapping...\"\n", 4584 " print \"mapping...\"\n",
4651 " mapper.INPUT_FILES = [output_file.split('.pickle')[0]+'_'+str(n)+'.pickle' for \n", 4585 " mapper.INPUT_FILES = [output_file.split('.pickle')[0]+'_'+str(n)+'.pickle' for \n",
4655 " _, _, ldadata_list, _, _, Y, Yaudio = mapper.lda_map_and_average_frames(min_variance=0.99)\n", 4589 " _, _, ldadata_list, _, _, Y, Yaudio = mapper.lda_map_and_average_frames(min_variance=0.99)\n",
4656 " mapper.write_output([], [], ldadata_list, [], [], Y, Yaudio)" 4590 " mapper.write_output([], [], ldadata_list, [], [], Y, Yaudio)"
4657 ] 4591 ]
4658 }, 4592 },
4659 { 4593 {
4594 "cell_type": "markdown",
4595 "metadata": {},
4596 "source": [
4597 "## Classification only - assuming mapper files are exported "
4598 ]
4599 },
4600 {
4660 "cell_type": "code", 4601 "cell_type": "code",
4661 "execution_count": null, 4602 "execution_count": 5,
4662 "metadata": { 4603 "metadata": {},
4663 "collapsed": true 4604 "outputs": [
4664 }, 4605 {
4665 "outputs": [], 4606 "name": "stdout",
4607 "output_type": "stream",
4608 "text": [
4609 "iteration 0\n"
4610 ]
4611 },
4612 {
4613 "ename": "IOError",
4614 "evalue": "[Errno 2] No such file or directory: '/import/c4dm-04/mariap/nmf_data_melodia_8_0.pickle'",
4615 "output_type": "error",
4616 "traceback": [
4617 "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
4618 "\u001b[0;31mIOError\u001b[0m Traceback (most recent call last)",
4619 "\u001b[0;32m<ipython-input-5-eb8ccb858c3f>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0mmapper\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mOUTPUT_FILES\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mCLASS_INPUT_FILES\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 6\u001b[0m \u001b[0mmapper\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mINPUT_FILES\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mOUTPUT_FILES\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 7\u001b[0;31m \u001b[0mldadata_list\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mY\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mYaudio\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mclassification\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mload_data_from_pickle\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mCLASS_INPUT_FILES\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 8\u001b[0m \u001b[0mX\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconcatenate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mldadata_list\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 9\u001b[0m \u001b[0;31m# classification and confusion\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
4620 "\u001b[0;32m/homes/mp305/code/pythoncode/plosone_underreview/scripts/classification.pyc\u001b[0m in \u001b[0;36mload_data_from_pickle\u001b[0;34m(filename)\u001b[0m\n\u001b[1;32m 18\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 19\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mload_data_from_pickle\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfilename\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 20\u001b[0;31m \u001b[0mX_list\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mY\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mYaudio\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpickle\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mload\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mopen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfilename\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m'rb'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 21\u001b[0m \u001b[0mX\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconcatenate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata_list\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 22\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mY\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mYaudio\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
4621 "\u001b[0;31mIOError\u001b[0m: [Errno 2] No such file or directory: '/import/c4dm-04/mariap/nmf_data_melodia_8_0.pickle'"
4622 ]
4623 }
4624 ],
4666 "source": [ 4625 "source": [
4667 "CLASS_INPUT_FILES = [output_file.split('.pickle')[0]+'_'+str(n)+'.pickle' for \n",
4668 " output_file in mapper.OUTPUT_FILES]\n",
4669 "mapper.OUTPUT_FILES = CLASS_INPUT_FILES\n",
4670 "mapper.INPUT_FILES = OUTPUT_FILES\n",
4671 "for n in range(n_iters):\n", 4626 "for n in range(n_iters):\n",
4672 " print \"iteration %d\" % n\n", 4627 " print \"iteration %d\" % n\n",
4628 " CLASS_INPUT_FILES = [output_file.split('.pickle')[0]+'_'+str(n)+'.pickle' for \n",
4629 " output_file in mapper.OUTPUT_FILES]\n",
4630 " mapper.INPUT_FILES = OUTPUT_FILES\n",
4673 " ldadata_list, Y, Yaudio = classification.load_data_from_pickle(CLASS_INPUT_FILES[2])\n", 4631 " ldadata_list, Y, Yaudio = classification.load_data_from_pickle(CLASS_INPUT_FILES[2])\n",
4674 " X = np.concatenate(ldadata_list, axis=1)\n", 4632 " X = np.concatenate(ldadata_list, axis=1)\n",
4675 " # classification and confusion\n", 4633 " # classification and confusion\n",
4676 " print \"classifying...\"\n", 4634 " print \"classifying...\"\n",
4677 " traininds, testinds = classification.get_train_test_indices(Yaudio)\n", 4635 " traininds, testinds = classification.get_train_test_indices(Yaudio)\n",
4702 ] 4660 ]
4703 }, 4661 },
4704 { 4662 {
4705 "cell_type": "code", 4663 "cell_type": "code",
4706 "execution_count": 5, 4664 "execution_count": 5,
4707 "metadata": { 4665 "metadata": {},
4708 "collapsed": false
4709 },
4710 "outputs": [ 4666 "outputs": [
4711 { 4667 {
4712 "data": { 4668 "data": {
4713 "text/plain": [ 4669 "text/plain": [
4714 "(8089, 381)" 4670 "(8089, 381)"
4724 ] 4680 ]
4725 }, 4681 },
4726 { 4682 {
4727 "cell_type": "code", 4683 "cell_type": "code",
4728 "execution_count": 10, 4684 "execution_count": 10,
4729 "metadata": { 4685 "metadata": {},
4730 "collapsed": false
4731 },
4732 "outputs": [ 4686 "outputs": [
4733 { 4687 {
4734 "name": "stdout", 4688 "name": "stdout",
4735 "output_type": "stream", 4689 "output_type": "stream",
4736 "text": [ 4690 "text": [
4756 ] 4710 ]
4757 }, 4711 },
4758 { 4712 {
4759 "cell_type": "code", 4713 "cell_type": "code",
4760 "execution_count": 13, 4714 "execution_count": 13,
4761 "metadata": { 4715 "metadata": {},
4762 "collapsed": false
4763 },
4764 "outputs": [ 4716 "outputs": [
4765 { 4717 {
4766 "name": "stdout", 4718 "name": "stdout",
4767 "output_type": "stream", 4719 "output_type": "stream",
4768 "text": [ 4720 "text": [
4865 ] 4817 ]
4866 }, 4818 },
4867 { 4819 {
4868 "cell_type": "code", 4820 "cell_type": "code",
4869 "execution_count": 33, 4821 "execution_count": 33,
4870 "metadata": { 4822 "metadata": {},
4871 "collapsed": false
4872 },
4873 "outputs": [ 4823 "outputs": [
4874 { 4824 {
4875 "name": "stdout", 4825 "name": "stdout",
4876 "output_type": "stream", 4826 "output_type": "stream",
4877 "text": [ 4827 "text": [
4887 ] 4837 ]
4888 }, 4838 },
4889 { 4839 {
4890 "cell_type": "code", 4840 "cell_type": "code",
4891 "execution_count": 34, 4841 "execution_count": 34,
4892 "metadata": { 4842 "metadata": {},
4893 "collapsed": false
4894 },
4895 "outputs": [ 4843 "outputs": [
4896 { 4844 {
4897 "data": { 4845 "data": {
4898 "text/plain": [ 4846 "text/plain": [
4899 "SpearmanrResult(correlation=1.0, pvalue=0.0)" 4847 "SpearmanrResult(correlation=1.0, pvalue=0.0)"