DaveM@1
|
1 {
|
DaveM@1
|
2 "cells": [
|
DaveM@1
|
3 {
|
DaveM@1
|
4 "cell_type": "code",
|
DaveM@1
|
5 "execution_count": 1,
|
DaveM@1
|
6 "metadata": {
|
DaveM@1
|
7 "collapsed": false
|
DaveM@1
|
8 },
|
DaveM@1
|
9 "outputs": [
|
DaveM@1
|
10 {
|
DaveM@1
|
11 "name": "stdout",
|
DaveM@1
|
12 "output_type": "stream",
|
DaveM@1
|
13 "text": [
|
DaveM@1
|
14 "1.10.0\n"
|
DaveM@1
|
15 ]
|
DaveM@1
|
16 }
|
DaveM@1
|
17 ],
|
DaveM@1
|
18 "source": [
|
DaveM@1
|
19 "import six\n",
|
DaveM@1
|
20 "print six.__version__\n"
|
DaveM@1
|
21 ]
|
DaveM@1
|
22 },
|
DaveM@1
|
23 {
|
DaveM@1
|
24 "cell_type": "code",
|
DaveM@1
|
25 "execution_count": 2,
|
DaveM@1
|
26 "metadata": {
|
DaveM@1
|
27 "collapsed": false
|
DaveM@1
|
28 },
|
DaveM@1
|
29 "outputs": [],
|
DaveM@1
|
30 "source": [
|
DaveM@1
|
31 "import gensim\n"
|
DaveM@1
|
32 ]
|
DaveM@1
|
33 },
|
DaveM@1
|
34 {
|
DaveM@1
|
35 "cell_type": "code",
|
DaveM@1
|
36 "execution_count": 3,
|
DaveM@1
|
37 "metadata": {
|
DaveM@1
|
38 "collapsed": false
|
DaveM@1
|
39 },
|
DaveM@1
|
40 "outputs": [],
|
DaveM@1
|
41 "source": [
|
DaveM@1
|
42 "import logging\n",
|
DaveM@1
|
43 "import gensim\n",
|
DaveM@1
|
44 "import bz2\n",
|
DaveM@1
|
45 "logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)"
|
DaveM@1
|
46 ]
|
DaveM@1
|
47 },
|
DaveM@1
|
48 {
|
DaveM@1
|
49 "cell_type": "code",
|
DaveM@1
|
50 "execution_count": 4,
|
DaveM@1
|
51 "metadata": {
|
DaveM@1
|
52 "collapsed": false
|
DaveM@1
|
53 },
|
DaveM@1
|
54 "outputs": [
|
DaveM@1
|
55 {
|
DaveM@1
|
56 "ename": "IOError",
|
DaveM@1
|
57 "evalue": "[Errno 2] No such file or directory: 'wiki_en_wordids.txt'",
|
DaveM@1
|
58 "output_type": "error",
|
DaveM@1
|
59 "traceback": [
|
DaveM@1
|
60 "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
DaveM@1
|
61 "\u001b[0;31mIOError\u001b[0m Traceback (most recent call last)",
|
DaveM@1
|
62 "\u001b[0;32m<ipython-input-4-54645b7b2c38>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;31m# load id->word mapping (the dictionary), one of the results of step 2 above\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mid2word\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mgensim\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcorpora\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mDictionary\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mload_from_text\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'wiki_en_wordids.txt'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3\u001b[0m \u001b[0;31m# load corpus iterator\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0mmm\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mgensim\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcorpora\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mMmCorpus\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'wiki_en_tfidf.mm'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;31m# mm = gensim.corpora.MmCorpus(bz2.BZ2File('wiki_en_tfidf.mm.bz2')) # use this if you compressed the TFIDF output\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
DaveM@1
|
63 "\u001b[0;32m/Library/Python/2.7/site-packages/gensim/corpora/dictionary.pyc\u001b[0m in \u001b[0;36mload_from_text\u001b[0;34m(fname)\u001b[0m\n\u001b[1;32m 342\u001b[0m \"\"\"\n\u001b[1;32m 343\u001b[0m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mDictionary\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 344\u001b[0;31m \u001b[0;32mwith\u001b[0m \u001b[0mutils\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msmart_open\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfname\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 345\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mlineno\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mline\u001b[0m \u001b[0;32min\u001b[0m \u001b[0menumerate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 346\u001b[0m \u001b[0mline\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mutils\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto_unicode\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mline\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
DaveM@1
|
64 "\u001b[0;32m/Library/Python/2.7/site-packages/smart_open/smart_open_lib.pyc\u001b[0m in \u001b[0;36msmart_open\u001b[0;34m(uri, mode, **kw)\u001b[0m\n\u001b[1;32m 125\u001b[0m \u001b[0;31m# local files -- both read & write supported\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 126\u001b[0m \u001b[0;31m# compression, if any, is determined by the filename extension (.gz, .bz2)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 127\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mfile_smart_open\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mparsed_uri\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0muri_path\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmode\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 128\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0mparsed_uri\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mscheme\u001b[0m \u001b[0;32min\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0;34m\"s3\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"s3n\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 129\u001b[0m \u001b[0;31m# Get an S3 host. It is required for sigv4 operations.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
DaveM@1
|
65 "\u001b[0;32m/Library/Python/2.7/site-packages/smart_open/smart_open_lib.pyc\u001b[0m in \u001b[0;36mfile_smart_open\u001b[0;34m(fname, mode)\u001b[0m\n\u001b[1;32m 556\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mmake_closing\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mGzipFile\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfname\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmode\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 557\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 558\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mopen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfname\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmode\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 559\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 560\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
|
DaveM@1
|
66 "\u001b[0;31mIOError\u001b[0m: [Errno 2] No such file or directory: 'wiki_en_wordids.txt'"
|
DaveM@1
|
67 ]
|
DaveM@1
|
68 }
|
DaveM@1
|
69 ],
|
DaveM@1
|
70 "source": [
|
DaveM@1
|
71 "# load id->word mapping (the dictionary), one of the results of step 2 above\n",
|
DaveM@1
|
72 "id2word = gensim.corpora.Dictionary.load_from_text('wiki_en_wordids.txt')\n",
|
DaveM@1
|
73 "# load corpus iterator\n",
|
DaveM@1
|
74 "mm = gensim.corpora.MmCorpus('wiki_en_tfidf.mm')\n",
|
DaveM@1
|
75 "# mm = gensim.corpora.MmCorpus(bz2.BZ2File('wiki_en_tfidf.mm.bz2')) # use this if you compressed the TFIDF output\n",
|
DaveM@1
|
76 "\n",
|
DaveM@1
|
77 "print(mm)"
|
DaveM@1
|
78 ]
|
DaveM@1
|
79 },
|
DaveM@1
|
80 {
|
DaveM@1
|
81 "cell_type": "code",
|
DaveM@1
|
82 "execution_count": null,
|
DaveM@1
|
83 "metadata": {
|
DaveM@1
|
84 "collapsed": true
|
DaveM@1
|
85 },
|
DaveM@1
|
86 "outputs": [],
|
DaveM@1
|
87 "source": []
|
DaveM@1
|
88 }
|
DaveM@1
|
89 ],
|
DaveM@1
|
90 "metadata": {
|
DaveM@1
|
91 "kernelspec": {
|
DaveM@1
|
92 "display_name": "Python 2",
|
DaveM@1
|
93 "language": "python",
|
DaveM@1
|
94 "name": "python2"
|
DaveM@1
|
95 },
|
DaveM@1
|
96 "language_info": {
|
DaveM@1
|
97 "codemirror_mode": {
|
DaveM@1
|
98 "name": "ipython",
|
DaveM@1
|
99 "version": 2
|
DaveM@1
|
100 },
|
DaveM@1
|
101 "file_extension": ".py",
|
DaveM@1
|
102 "mimetype": "text/x-python",
|
DaveM@1
|
103 "name": "python",
|
DaveM@1
|
104 "nbconvert_exporter": "python",
|
DaveM@1
|
105 "pygments_lexer": "ipython2",
|
DaveM@1
|
106 "version": "2.7.10"
|
DaveM@1
|
107 }
|
DaveM@1
|
108 },
|
DaveM@1
|
109 "nbformat": 4,
|
DaveM@1
|
110 "nbformat_minor": 0
|
DaveM@1
|
111 }
|