changeset 47:081ff4ea7da7 branch-tests

sensitivity experiment split
author Maria Panteli <m.x.panteli@gmail.com>
date Fri, 15 Sep 2017 17:33:14 +0100
parents 3ed4c6af5a93
children 08b9327f1935 590a1a54855a
files notebooks/sensitivity_experiment.ipynb scripts/classification.py
diffstat 2 files changed, 103 insertions(+), 109 deletions(-) [+]
line wrap: on
line diff
--- a/notebooks/sensitivity_experiment.ipynb	Fri Sep 15 16:34:30 2017 +0100
+++ b/notebooks/sensitivity_experiment.ipynb	Fri Sep 15 17:33:14 2017 +0100
@@ -3,7 +3,9 @@
   {
    "cell_type": "code",
    "execution_count": 15,
-   "metadata": {},
+   "metadata": {
+    "collapsed": false
+   },
    "outputs": [
     {
      "name": "stdout",
@@ -47,7 +49,9 @@
   {
    "cell_type": "code",
    "execution_count": 5,
-   "metadata": {},
+   "metadata": {
+    "collapsed": false
+   },
    "outputs": [
     {
      "data": {
@@ -68,7 +72,9 @@
   {
    "cell_type": "code",
    "execution_count": 48,
-   "metadata": {},
+   "metadata": {
+    "collapsed": false
+   },
    "outputs": [
     {
      "name": "stdout",
@@ -278,7 +284,9 @@
   {
    "cell_type": "code",
    "execution_count": 52,
-   "metadata": {},
+   "metadata": {
+    "collapsed": false
+   },
    "outputs": [
     {
      "name": "stdout",
@@ -447,7 +455,9 @@
   {
    "cell_type": "code",
    "execution_count": 56,
-   "metadata": {},
+   "metadata": {
+    "collapsed": false
+   },
    "outputs": [
     {
      "data": {
@@ -470,7 +480,9 @@
   {
    "cell_type": "code",
    "execution_count": 8,
-   "metadata": {},
+   "metadata": {
+    "collapsed": false
+   },
    "outputs": [
     {
      "data": {
@@ -729,7 +741,9 @@
   {
    "cell_type": "code",
    "execution_count": 47,
-   "metadata": {},
+   "metadata": {
+    "collapsed": false
+   },
    "outputs": [
     {
      "name": "stdout",
@@ -828,7 +842,9 @@
   {
    "cell_type": "code",
    "execution_count": 59,
-   "metadata": {},
+   "metadata": {
+    "collapsed": false
+   },
    "outputs": [
     {
      "name": "stdout",
@@ -1105,13 +1121,7 @@
       "loading speech/music segments...\n",
       "extracting onset patterns and mfccs...\n",
       "extracting chroma...\n",
-      "load precomputed pitch bihist /import/c4dm-04/mariap/FeatureCsvs/PB-melodia/\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
+      "load precomputed pitch bihist /import/c4dm-04/mariap/FeatureCsvs/PB-melodia/\n",
       "(45, 240)\n",
       "60 44 44 44 45\n",
       "file 39 of 5037\n",
@@ -1382,13 +1392,7 @@
       "60 44 44 44 45\n",
       "file 77 of 5037\n",
       "loading speech/music segments...\n",
-      "extracting onset patterns and mfccs...\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
+      "extracting onset patterns and mfccs...\n",
       "extracting chroma...\n",
       "load precomputed pitch bihist /import/c4dm-04/mariap/FeatureCsvs_BL_old/PB-melodia/\n",
       "(60, 240)\n",
@@ -1656,13 +1660,7 @@
       "loading speech/music segments...\n",
       "extracting onset patterns and mfccs...\n",
       "extracting chroma...\n",
-      "load precomputed pitch bihist /import/c4dm-04/mariap/FeatureCsvs/PB-melodia/\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
+      "load precomputed pitch bihist /import/c4dm-04/mariap/FeatureCsvs/PB-melodia/\n",
       "(45, 240)\n",
       "60 44 44 44 45\n",
       "file 116 of 5037\n",
@@ -1932,13 +1930,7 @@
       "(45, 240)\n",
       "60 44 44 44 45\n",
       "file 154 of 5037\n",
-      "loading speech/music segments...\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
+      "loading speech/music segments...\n",
       "extracting onset patterns and mfccs...\n",
       "extracting chroma...\n",
       "load precomputed pitch bihist /import/c4dm-04/mariap/FeatureCsvs/PB-melodia/\n",
@@ -2207,13 +2199,7 @@
       "loading speech/music segments...\n",
       "extracting onset patterns and mfccs...\n",
       "extracting chroma...\n",
-      "load precomputed pitch bihist /import/c4dm-04/mariap/FeatureCsvs_BL_old/PB-melodia/\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
+      "load precomputed pitch bihist /import/c4dm-04/mariap/FeatureCsvs_BL_old/PB-melodia/\n",
       "(60, 240)\n",
       "60 44 44 44 60\n",
       "file 193 of 5037\n",
@@ -2486,13 +2472,7 @@
       "60 44 44 44 45\n",
       "file 232 of 5037\n",
       "loading speech/music segments...\n",
-      "extracting onset patterns and mfccs...\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
+      "extracting onset patterns and mfccs...\n",
       "extracting chroma...\n",
       "load precomputed pitch bihist /import/c4dm-04/mariap/FeatureCsvs/PB-melodia/\n",
       "(45, 240)\n",
@@ -2760,13 +2740,7 @@
       "loading speech/music segments...\n",
       "extracting onset patterns and mfccs...\n",
       "extracting chroma...\n",
-      "load precomputed pitch bihist /import/c4dm-04/mariap/FeatureCsvs/PB-melodia/\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
+      "load precomputed pitch bihist /import/c4dm-04/mariap/FeatureCsvs/PB-melodia/\n",
       "(45, 240)\n",
       "60 44 44 44 45\n",
       "file 271 of 5037\n",
@@ -3037,13 +3011,7 @@
       "60 44 44 44 60\n",
       "file 309 of 5037\n",
       "loading speech/music segments...\n",
-      "extracting onset patterns and mfccs...\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
+      "extracting onset patterns and mfccs...\n",
       "extracting chroma...\n",
       "load precomputed pitch bihist /import/c4dm-04/mariap/FeatureCsvs/PB-melodia/\n",
       "(45, 240)\n",
@@ -3311,13 +3279,7 @@
       "loading speech/music segments...\n",
       "extracting onset patterns and mfccs...\n",
       "extracting chroma...\n",
-      "load precomputed pitch bihist /import/c4dm-04/mariap/FeatureCsvs/PB-melodia/\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
+      "load precomputed pitch bihist /import/c4dm-04/mariap/FeatureCsvs/PB-melodia/\n",
       "(45, 240)\n",
       "60 44 44 44 45\n",
       "file 348 of 5037\n",
@@ -3588,13 +3550,7 @@
       "60 44 44 44 45\n",
       "file 386 of 5037\n",
       "loading speech/music segments...\n",
-      "extracting onset patterns and mfccs...\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
+      "extracting onset patterns and mfccs...\n",
       "extracting chroma...\n",
       "load precomputed pitch bihist /import/c4dm-04/mariap/FeatureCsvs/PB-melodia/\n",
       "(45, 240)\n",
@@ -3867,13 +3823,7 @@
       "60 44 44 44 45\n",
       "file 425 of 5037\n",
       "loading speech/music segments...\n",
-      "extracting onset patterns and mfccs...\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
+      "extracting onset patterns and mfccs...\n",
       "extracting chroma...\n",
       "load precomputed pitch bihist /import/c4dm-04/mariap/FeatureCsvs/PB-melodia/\n",
       "(45, 240)\n",
@@ -4141,13 +4091,7 @@
       "60 44 44 44 45\n",
       "file 464 of 5037\n",
       "loading speech/music segments...\n",
-      "extracting onset patterns and mfccs...\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
+      "extracting onset patterns and mfccs...\n",
       "extracting chroma...\n",
       "load precomputed pitch bihist /import/c4dm-04/mariap/FeatureCsvs/PB-melodia/\n",
       "(45, 240)\n",
@@ -4415,13 +4359,7 @@
       "loading speech/music segments...\n",
       "extracting onset patterns and mfccs...\n",
       "extracting chroma...\n",
-      "load precomputed pitch bihist /import/c4dm-04/mariap/FeatureCsvs_BL_old/PB-melodia/\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
+      "load precomputed pitch bihist /import/c4dm-04/mariap/FeatureCsvs_BL_old/PB-melodia/\n",
       "(60, 240)\n",
       "60 44 44 44 60\n",
       "file 503 of 5037\n",
@@ -4615,8 +4553,21 @@
   },
   {
    "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "MAPPER_OUTPUT_FILES = mapper.OUTPUT_FILES"
+   ]
+  },
+  {
+   "cell_type": "code",
    "execution_count": 3,
-   "metadata": {},
+   "metadata": {
+    "collapsed": false
+   },
    "outputs": [
     {
      "name": "stdout",
@@ -4699,12 +4650,31 @@
     "    print \"mapping...\"\n",
     "    mapper.INPUT_FILES = [output_file.split('.pickle')[0]+'_'+str(n)+'.pickle' for \n",
     "                                 output_file in OUTPUT_FILES]\n",
+    "    mapper.OUTPUT_FILES = [output_file.split('.pickle')[0]+'_'+str(n)+'.pickle' for \n",
+    "                                 output_file in MAPPER_OUTPUT_FILES]\n",
     "    _, _, ldadata_list, _, _, Y, Yaudio = mapper.lda_map_and_average_frames(min_variance=0.99)\n",
+    "    mapper.write_output([], [], ldadata_list, [], [], Y, Yaudio)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "CLASS_INPUT_FILES = [output_file.split('.pickle')[0]+'_'+str(n)+'.pickle' for \n",
+    "                                 output_file in mapper.OUTPUT_FILES]\n",
+    "mapper.OUTPUT_FILES = CLASS_INPUT_FILES\n",
+    "mapper.INPUT_FILES = OUTPUT_FILES\n",
+    "for n in range(n_iters):\n",
+    "    print \"iteration %d\" % n\n",
+    "    ldadata_list, Y, Yaudio = classification.load_data_from_pickle(CLASS_INPUT_FILES[2])\n",
     "    X = np.concatenate(ldadata_list, axis=1)\n",
-    "    \n",
     "    # classification and confusion\n",
     "    print \"classifying...\"\n",
-    "    traininds, testinds = classification.get_train_test_indices()\n",
+    "    traininds, testinds = classification.get_train_test_indices(Yaudio)\n",
     "    X_train, Y_train, X_test, Y_test = classification.get_train_test_sets(X, Y, traininds, testinds)\n",
     "    accuracy, _ = classification.confusion_matrix(X_train, Y_train, X_test, Y_test, saveCF=False, plots=False)\n",
     "    print accuracy\n",
@@ -4734,7 +4704,9 @@
   {
    "cell_type": "code",
    "execution_count": 5,
-   "metadata": {},
+   "metadata": {
+    "collapsed": false
+   },
    "outputs": [
     {
      "data": {
@@ -4754,7 +4726,9 @@
   {
    "cell_type": "code",
    "execution_count": 10,
-   "metadata": {},
+   "metadata": {
+    "collapsed": false
+   },
    "outputs": [
     {
      "name": "stdout",
@@ -4784,7 +4758,9 @@
   {
    "cell_type": "code",
    "execution_count": 13,
-   "metadata": {},
+   "metadata": {
+    "collapsed": false
+   },
    "outputs": [
     {
      "name": "stdout",
@@ -4891,7 +4867,9 @@
   {
    "cell_type": "code",
    "execution_count": 33,
-   "metadata": {},
+   "metadata": {
+    "collapsed": false
+   },
    "outputs": [
     {
      "name": "stdout",
@@ -4911,7 +4889,9 @@
   {
    "cell_type": "code",
    "execution_count": 34,
-   "metadata": {},
+   "metadata": {
+    "collapsed": false
+   },
    "outputs": [
     {
      "data": {
--- a/scripts/classification.py	Fri Sep 15 16:34:30 2017 +0100
+++ b/scripts/classification.py	Fri Sep 15 17:33:14 2017 +0100
@@ -45,11 +45,25 @@
     feat_learner = util_feature_learning.Transformer()
     for filename in file_list:
         X, Y, Yaudio = load_data_from_pickle(filename)
-        traininds, testinds = get_train_test_indices()
+        traininds, testinds = get_train_test_indices(Yaudio)
         X_train, Y_train, X_test, Y_test = get_train_test_sets(X, Y, traininds, testinds)
         df_result = feat_learner.classify(X_train, Y_train, X_test, Y_test)
         df_results = pd.concat([df_results, df_result], axis=0, ignore_index=True)
-    return df_results    
+    return df_results
+
+
+def classify_each_feature(X_train, Y_train, X_test, Y_test):
+    n_dim = X_train.shape[1]
+    feat_labels, feat_inds = map_and_average.get_feat_inds(n_dim=n_dim)
+    #df_results = pd.DataFrame()
+    # first the classification with all features together
+    df_results = feat_learner.classify(X_train, Y_train, X_test, Y_test)
+    # then append for each feature separately
+    for i in range(len(feat_inds)):
+        df_result = feat_learner.classify(X_train[:, feat_inds[i]], Y_train, 
+                                          X_test[:, feat_inds[i]], Y_test)
+        df_results = pd.concat([df_results, df_result], axis=1, ignore_index=True)
+    return df_results
 
 
 def plot_CF(CF, labels=None, figurename=None):