DaveM@1: { DaveM@1: "cells": [ DaveM@1: { DaveM@1: "cell_type": "code", DaveM@1: "execution_count": 1, DaveM@1: "metadata": { DaveM@1: "collapsed": false DaveM@1: }, DaveM@1: "outputs": [ DaveM@1: { DaveM@1: "name": "stdout", DaveM@1: "output_type": "stream", DaveM@1: "text": [ DaveM@1: "1.10.0\n" DaveM@1: ] DaveM@1: } DaveM@1: ], DaveM@1: "source": [ DaveM@1: "import six\n", DaveM@1: "print six.__version__\n" DaveM@1: ] DaveM@1: }, DaveM@1: { DaveM@1: "cell_type": "code", DaveM@1: "execution_count": 2, DaveM@1: "metadata": { DaveM@1: "collapsed": false DaveM@1: }, DaveM@1: "outputs": [], DaveM@1: "source": [ DaveM@1: "import gensim\n" DaveM@1: ] DaveM@1: }, DaveM@1: { DaveM@1: "cell_type": "code", DaveM@1: "execution_count": 3, DaveM@1: "metadata": { DaveM@1: "collapsed": false DaveM@1: }, DaveM@1: "outputs": [], DaveM@1: "source": [ DaveM@1: "import logging\n", DaveM@1: "import gensim\n", DaveM@1: "import bz2\n", DaveM@1: "logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)" DaveM@1: ] DaveM@1: }, DaveM@1: { DaveM@1: "cell_type": "code", DaveM@1: "execution_count": 4, DaveM@1: "metadata": { DaveM@1: "collapsed": false DaveM@1: }, DaveM@1: "outputs": [ DaveM@1: { DaveM@1: "ename": "IOError", DaveM@1: "evalue": "[Errno 2] No such file or directory: 'wiki_en_wordids.txt'", DaveM@1: "output_type": "error", DaveM@1: "traceback": [ DaveM@1: "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", DaveM@1: "\u001b[0;31mIOError\u001b[0m Traceback (most recent call last)", DaveM@1: "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;31m# load id->word mapping (the dictionary), one of the results of step 2 above\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mid2word\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mgensim\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcorpora\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mDictionary\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mload_from_text\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'wiki_en_wordids.txt'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3\u001b[0m \u001b[0;31m# load corpus iterator\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0mmm\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mgensim\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcorpora\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mMmCorpus\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'wiki_en_tfidf.mm'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;31m# mm = gensim.corpora.MmCorpus(bz2.BZ2File('wiki_en_tfidf.mm.bz2')) # use this if you compressed the TFIDF output\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", DaveM@1: "\u001b[0;32m/Library/Python/2.7/site-packages/gensim/corpora/dictionary.pyc\u001b[0m in \u001b[0;36mload_from_text\u001b[0;34m(fname)\u001b[0m\n\u001b[1;32m 342\u001b[0m \"\"\"\n\u001b[1;32m 343\u001b[0m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mDictionary\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 344\u001b[0;31m \u001b[0;32mwith\u001b[0m \u001b[0mutils\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msmart_open\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfname\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 345\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mlineno\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mline\u001b[0m \u001b[0;32min\u001b[0m \u001b[0menumerate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 346\u001b[0m \u001b[0mline\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mutils\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto_unicode\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mline\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", DaveM@1: "\u001b[0;32m/Library/Python/2.7/site-packages/smart_open/smart_open_lib.pyc\u001b[0m in \u001b[0;36msmart_open\u001b[0;34m(uri, mode, **kw)\u001b[0m\n\u001b[1;32m 125\u001b[0m \u001b[0;31m# local files -- both read & write supported\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 126\u001b[0m \u001b[0;31m# compression, if any, is determined by the filename extension (.gz, .bz2)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 127\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mfile_smart_open\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mparsed_uri\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0muri_path\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmode\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 128\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0mparsed_uri\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mscheme\u001b[0m \u001b[0;32min\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0;34m\"s3\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"s3n\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 129\u001b[0m \u001b[0;31m# Get an S3 host. It is required for sigv4 operations.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", DaveM@1: "\u001b[0;32m/Library/Python/2.7/site-packages/smart_open/smart_open_lib.pyc\u001b[0m in \u001b[0;36mfile_smart_open\u001b[0;34m(fname, mode)\u001b[0m\n\u001b[1;32m 556\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mmake_closing\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mGzipFile\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfname\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmode\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 557\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 558\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mopen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfname\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmode\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 559\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 560\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", DaveM@1: "\u001b[0;31mIOError\u001b[0m: [Errno 2] No such file or directory: 'wiki_en_wordids.txt'" DaveM@1: ] DaveM@1: } DaveM@1: ], DaveM@1: "source": [ DaveM@1: "# load id->word mapping (the dictionary), one of the results of step 2 above\n", DaveM@1: "id2word = gensim.corpora.Dictionary.load_from_text('wiki_en_wordids.txt')\n", DaveM@1: "# load corpus iterator\n", DaveM@1: "mm = gensim.corpora.MmCorpus('wiki_en_tfidf.mm')\n", DaveM@1: "# mm = gensim.corpora.MmCorpus(bz2.BZ2File('wiki_en_tfidf.mm.bz2')) # use this if you compressed the TFIDF output\n", DaveM@1: "\n", DaveM@1: "print(mm)" DaveM@1: ] DaveM@1: }, DaveM@1: { DaveM@1: "cell_type": "code", DaveM@1: "execution_count": null, DaveM@1: "metadata": { DaveM@1: "collapsed": true DaveM@1: }, DaveM@1: "outputs": [], DaveM@1: "source": [] DaveM@1: } DaveM@1: ], DaveM@1: "metadata": { DaveM@1: "kernelspec": { DaveM@1: "display_name": "Python 2", DaveM@1: "language": "python", DaveM@1: "name": "python2" DaveM@1: }, DaveM@1: "language_info": { DaveM@1: "codemirror_mode": { DaveM@1: "name": "ipython", DaveM@1: "version": 2 DaveM@1: }, DaveM@1: "file_extension": ".py", DaveM@1: "mimetype": "text/x-python", DaveM@1: "name": "python", DaveM@1: "nbconvert_exporter": "python", DaveM@1: "pygments_lexer": "ipython2", DaveM@1: "version": "2.7.10" DaveM@1: } DaveM@1: }, DaveM@1: "nbformat": 4, DaveM@1: "nbformat_minor": 0 DaveM@1: }