diff code/Hierarchical Clustering.ipynb @ 1:995546d09284

add gensim notebook and matlab scripts
author DaveM
date Tue, 24 Jan 2017 17:44:45 +0000
parents 7d69c0d6f4c9
children
line wrap: on
line diff
--- a/code/Hierarchical Clustering.ipynb	Mon Jan 16 17:34:29 2017 +0000
+++ b/code/Hierarchical Clustering.ipynb	Tue Jan 24 17:44:45 2017 +0000
@@ -11,6 +11,7 @@
     "from matplotlib import pyplot as plt\n",
     "from scipy.cluster.hierarchy import dendrogram, linkage, cophenet\n",
     "from scipy.spatial.distance import pdist\n",
+    "import sklearn \n",
     "import numpy as np\n",
     "import csv\n",
     "\n",
@@ -41,9 +42,9 @@
    },
    "outputs": [],
    "source": [
-    "print X.shape\n",
-    "print filenames.shape\n",
-    "print features.shape"
+    "agglo = cluster.FeatureAgglomeration()\n",
+    "agglo.fit(X)\n",
+    "X_reduced = agglo.transform(X)"
    ]
   },
   {
@@ -59,6 +60,121 @@
   },
   {
    "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[[  8.51810000e-01   4.00000000e-06   2.46000000e-04 ...,   2.10260000e-02\n",
+      "    1.98220000e-02   1.04000000e-04]\n",
+      " [  9.52275000e-01   7.00000000e-06   1.82600000e-03 ...,   1.79490000e-02\n",
+      "    1.09020000e-02   7.20000000e-05]\n",
+      " [  1.92200000e-03   1.00000000e-06   1.39000000e-04 ...,   2.35900000e-02\n",
+      "    6.93800000e-03   2.61000000e-04]\n",
+      " ..., \n",
+      " [  9.96346000e-01   3.37000000e-04   1.23600000e-03 ...,   5.24103000e-01\n",
+      "    3.36967000e-01   5.39000000e-04]\n",
+      " [  9.99990000e-01   1.00000000e-06   0.00000000e+00 ...,   0.00000000e+00\n",
+      "    0.00000000e+00   0.00000000e+00]\n",
+      " [  9.96624000e-01   6.97000000e-04   2.59300000e-03 ...,   5.24615000e-01\n",
+      "    3.34985000e-01   5.45000000e-04]]\n"
+     ]
+    }
+   ],
+   "source": [
+    "print X"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "(8977, 1536)\n"
+     ]
+    }
+   ],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 42,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "{'nu_0': 0, 'kappa_0': 0, 'lambda_0': 0, 'mu_0': 0}\n"
+     ]
+    }
+   ],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "import pyBHC as bhc\n",
+    "from pyBHC import dists\n",
+    "\n",
+    "mu_init = []\n",
+    "sigma_init = []\n",
+    "S_init = []\n",
+    "cd = dists.NormalFixedCovar(mu_0=mu_init,sigma_0=sigma_init, S=S_init)\n",
+    "\n",
+    "# temp = cd.log_marginal_likelihood(X)\n",
+    "d = bhc.rbhc(X, cd)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
    "execution_count": null,
    "metadata": {
     "collapsed": true