Mercurial > hg > plosone_underreview
diff notebooks/results_30_seconds_and_figures.ipynb @ 95:4aa0763bf8d8 branch-tests
trying to clear up pickle and metadata
author | Maria Panteli <m.x.panteli@gmail.com> |
---|---|
date | Mon, 02 Oct 2017 19:00:35 +0100 |
parents | c829d4f20066 |
children | 5eba53437755 |
line wrap: on
line diff
--- a/notebooks/results_30_seconds_and_figures.ipynb Mon Oct 02 15:34:20 2017 +0100 +++ b/notebooks/results_30_seconds_and_figures.ipynb Mon Oct 02 19:00:35 2017 +0100 @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 32, + "execution_count": 44, "metadata": { "collapsed": false }, @@ -42,7 +42,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 86, "metadata": { "collapsed": false }, @@ -87,23 +87,11 @@ }, { "cell_type": "code", - "execution_count": 39, + "execution_count": 87, "metadata": { "collapsed": false }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/mariapanteli/anaconda/lib/python2.7/site-packages/pandas/core/indexing.py:121: SettingWithCopyWarning: \n", - "A value is trying to be set on a copy of a slice from a DataFrame\n", - "\n", - "See the the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", - " self._setitem_with_indexer(indexer, value)\n" - ] - } - ], + "outputs": [], "source": [ "# correct BL urls:\n", "bl_inds = np.where(np.isnan(ddf['BuyLinkTrackDownload']))[0]\n", @@ -115,7 +103,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 88, "metadata": { "collapsed": false }, @@ -126,7 +114,7 @@ "(8200, 109)" ] }, - "execution_count": 30, + "execution_count": 88, "metadata": {}, "output_type": "execute_result" } @@ -139,6 +127,381 @@ }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Index([u'Audio', u'AlbumTitle', u'Artist', u'Artist_Album', u'BuyLinkTrackDownload', u'CatalogNumber', u'CatalogNumber_Album', u'Collection', u'ContentType', u'ContentType_Album', u'Country', u'Country_Album', u'Culture', u'Culture_Album', u'Decade', u'Decade_Album', u'File', u'Folder', u'Format_Album', u'GEO3major', u'Genre_Album', u'ISO3', u'ImageURL', u'ImageURL_Album', u'Instrument', u'Instrument_Album', u'Keywords', u'Keywords_Album', u'Label_Album', u'Language', u'Language_Album', u'LocDetails', u'MetaFile', u'NAME', u'PriceTrackDownload', u'REGION', u'SampleAudio', u'SortDate', u'SortDate_Album', u'Subject', u'Subject_Album', u'Title', u'Title_Album', u'Unnamed: 0', u'X', u'X.UA.Compatible', u'X.UA.Compatible_Album', u'Year', u'Year_Album', u'blterms.mechanism', u'buylinkalbumdownload_Album', u'buylinkboxset_Album', u'buylinkcd_Album', u'buylinkcustomcassette_Album', u'buylinkcustomcd_Album', u'buylinklp_Album', u'charset', u'charset_Album', u'continent', u'dc.contributor', u'dc.description', u'dc.identifier', u'dc.rights', u'dc.source', u'dc.subject', u'dc.title', u'dc.type', u'dcterms.abstract', u'dcterms.contributor', u'dcterms.extent', u'dcterms.langauge', u'dcterms.language', u'dcterms.temporal', u'generator', u'generator_Album', u'marcrel.ARR', u'marcrel.AUT', u'marcrel.CMP', u'marcrel.CND', u'marcrel.DRT', u'marcrel.IVE', u'marcrel.IVR', u'marcrel.LYR', u'marcrel.PRF', u'marcrel.PRO', u'marcrel.RCE', u'marcrel.SPK', u'marcrel.perfomer', u'pricealbumdownload_Album', u'priceboxset_Album', u'pricecd_Album', u'pricecustomcassette_Album', u'pricecustomcd_Album', u'pricelp_Album', u'rdf.about', u'songurls_Album', u'viewport', u'viewport_Album', u'Language_name', u'Language_iso3', ...], dtype='object')\n" + ] + } + ], + "source": [ + "print ddf.columns\n", + "cols_to_keep = ['Country', 'continent', 'REGION', 'LocDetails',\n", + " 'Language', 'Language_iso3', 'Culture', 'Genre_Album', 'Year', 'Decade', \n", + " 'songurls_Album', 'Audio', 'Speech', 'Melspec', 'Chroma', 'Melodia']\n", + "cols_rename = ['Country', 'Continent', 'Region', 'Location_details', \n", + " 'Language', 'Language_iso3', 'Culture', 'Genre', 'Year', 'Decade',\n", + " 'Url', 'Audio', 'Speech', 'Melspec', 'Chroma', 'Melodia']\n", + "col_idx = []\n", + "for column in cols_to_keep:\n", + " if column not in ddf.columns:\n", + " print column\n", + " col_idx.append(np.where(ddf.columns==column)[0])\n", + "col_idx = np.concatenate(col_idx)\n", + "ddf_new = ddf.iloc[:, col_idx]\n", + "ddf_new.columns = cols_rename" + ] + }, + { + "cell_type": "code", + "execution_count": 79, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['__SampleAudio.wav' '__SampleAudio.wav' '__SampleAudio.wav' ...,\n", + " '_D:_025A-C0004X4953XX-0100A0.mp3' '_D:_025A-C0004X4984XX-0100A0.mp3'\n", + " '_D:_025A-C0004X4996XX-0400A0.mp3']\n", + "0 14923_SampleAudio.wav\n", + "1 14968_SampleAudio.wav\n", + "2 15137_SampleAudio.wav\n", + "3 15138_SampleAudio.wav\n", + "4 15139_SampleAudio.wav\n", + "5 15140_SampleAudio.wav\n", + "6 15141_SampleAudio.wav\n", + "7 15145_SampleAudio.wav\n", + "8 15146_SampleAudio.wav\n", + "9 15147_SampleAudio.wav\n", + "10 15148_SampleAudio.wav\n", + "11 15150_SampleAudio.wav\n", + "12 15180_SampleAudio.wav\n", + "13 15181_SampleAudio.wav\n", + "14 15184_SampleAudio.wav\n", + "...\n", + "8185 Violeta-Ruano-portraits-of-Saharawi-Music_025A...\n", + "8186 Violeta-Ruano-portraits-of-Saharawi-Music_025A...\n", + "8187 Violeta-Ruano-portraits-of-Saharawi-Music_025A...\n", + "8188 Violeta-Ruano-portraits-of-Saharawi-Music_025A...\n", + "8189 Violeta-Ruano-portraits-of-Saharawi-Music_025A...\n", + "8190 Wachsmann_025A-C0004X0023XX-0100A0.mp3\n", + "8191 Wachsmann_025A-C0004X0029XX-1900A0.mp3\n", + "8192 Wachsmann_025A-C0004X0037XX-0700A0.mp3\n", + "8193 Wachsmann_025A-C0004X0038XX-0100A0.mp3\n", + "8194 Wachsmann_025A-C0004X0046XX-0900A0.mp3\n", + "8195 Wachsmann_025A-C0004X0054XX-1300A0.mp3\n", + "8196 Wachsmann_025A-C0004X0063XX-1400A0.mp3\n", + "8197 Wachsmann_025A-C0004X4953XX-0100A0.mp3\n", + "8198 Wachsmann_025A-C0004X4984XX-0100A0.mp3\n", + "8199 Wachsmann_025A-C0004X4996XX-0400A0.mp3\n", + "Name: Audio, Length: 8200, dtype: object\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/mariapanteli/anaconda/lib/python2.7/site-packages/ipykernel/__main__.py:4: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame\n", + "\n", + "See the the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n" + ] + } + ], + "source": [ + "for i, yy in enumerate(Yaudio):\n", + " Yaudio[i] = os.path.split(os.path.split(yy)[0])[-1] + '_' + os.path.split(yy)[-1]\n", + "for i, yy in enumerate(ddf_new['Audio']):\n", + " ddf_new['Audio'].iloc[i] = os.path.split(os.path.split(yy)[0])[-1] + '_' + os.path.split(yy)[-1]\n", + "\n", + "print Yaudio\n", + "print ddf_new['Audio']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/mariapanteli/anaconda/lib/python2.7/site-packages/ipykernel/__main__.py:4: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame\n", + "\n", + "See the the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", + "/Users/mariapanteli/anaconda/lib/python2.7/site-packages/ipykernel/__main__.py:5: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame\n", + "\n", + "See the the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", + "/Users/mariapanteli/anaconda/lib/python2.7/site-packages/ipykernel/__main__.py:6: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame\n", + "\n", + "See the the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", + "/Users/mariapanteli/anaconda/lib/python2.7/site-packages/ipykernel/__main__.py:7: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame\n", + "\n", + "See the the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n" + ] + } + ], + "source": [ + "for i, yy in enumerate(ddf_new['Audio']):\n", + " new_name = os.path.split(os.path.split(yy)[0])[-1] + '_' + os.path.split(yy)[-1]\n", + " new_csv_name = new_name.split('.')[0]+'.csv'\n", + " ddf_new['Audio'].iloc[i] = os.path.join('Audio', new_name)\n", + " ddf_new['Chroma'].iloc[i] = os.path.join('Chroma', new_csv_name)\n", + " ddf_new['Melspec'].iloc[i] = os.path.join('Melspec', new_csv_name)\n", + " ddf_new['Melodia'].iloc[i] = os.path.join('Melodia', new_csv_name)\n", + " ddf_new['Speech'].iloc[i] = os.path.join('Speech', new_csv_name)\n", + " Yaudio[i] = new_name" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "ddf_new.to_csv('../data/metadata.csv', index=False)\n", + "pickle.dump([X_list, Y, Yaudio], open('../data/lda_data_8.pickle', 'wb'))" + ] + }, + { + "cell_type": "code", + "execution_count": 82, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>Country</th>\n", + " <th>Continent</th>\n", + " <th>Region</th>\n", + " <th>Location_details</th>\n", + " <th>Language</th>\n", + " <th>Language_iso3</th>\n", + " <th>Culture</th>\n", + " <th>Genre</th>\n", + " <th>Year</th>\n", + " <th>Decade</th>\n", + " <th>Url</th>\n", + " <th>Audio</th>\n", + " <th>Speech</th>\n", + " <th>Melspec</th>\n", + " <th>Chroma</th>\n", + " <th>Melodia</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td> United States of America</td>\n", + " <td> North America</td>\n", + " <td> North America</td>\n", + " <td> United States</td>\n", + " <td> English</td>\n", + " <td> eng</td>\n", + " <td> NaN</td>\n", + " <td> American Folk</td>\n", + " <td> 1981</td>\n", + " <td> 1980s</td>\n", + " <td> http://www.folkways.si.edu/bobbie-mcgee/the-bo...</td>\n", + " <td> 14923_SampleAudio.wav</td>\n", + " <td> /import/c4dm-02/people/mariap/SpeechMusic/1492...</td>\n", + " <td> /import/c4dm-02/people/mariap/MelSpec/14923_Sa...</td>\n", + " <td> /import/c4dm-04/mariap/FeatureCsvs/Smoothie-py...</td>\n", + " <td> /import/c4dm-02/people/mariap/Melodia/14923_Sa...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td> United States of America</td>\n", + " <td> North America</td>\n", + " <td> North America</td>\n", + " <td> United States</td>\n", + " <td> English</td>\n", + " <td> eng</td>\n", + " <td> NaN</td>\n", + " <td> American Folk|Struggle &amp; Protest</td>\n", + " <td> 1987</td>\n", + " <td> 1980s</td>\n", + " <td> http://www.folkways.si.edu/joe-glazer/senior-c...</td>\n", + " <td> 14968_SampleAudio.wav</td>\n", + " <td> /import/c4dm-02/people/mariap/SpeechMusic/1496...</td>\n", + " <td> /import/c4dm-02/people/mariap/MelSpec/14968_Sa...</td>\n", + " <td> /import/c4dm-04/mariap/FeatureCsvs/Smoothie-py...</td>\n", + " <td> /import/c4dm-02/people/mariap/Melodia/14968_Sa...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td> Saint Lucia</td>\n", + " <td> South America</td>\n", + " <td> South America</td>\n", + " <td> Saint Lucia</td>\n", + " <td> NaN</td>\n", + " <td> NaN</td>\n", + " <td> NaN</td>\n", + " <td> Caribbean|World</td>\n", + " <td> 1953</td>\n", + " <td> 1950s</td>\n", + " <td> http://www.folkways.si.edu/st-cecilias-day-ser...</td>\n", + " <td> 15137_SampleAudio.wav</td>\n", + " <td> /import/c4dm-02/people/mariap/SpeechMusic/1513...</td>\n", + " <td> /import/c4dm-02/people/mariap/MelSpec/15137_Sa...</td>\n", + " <td> /import/c4dm-04/mariap/FeatureCsvs/Smoothie-py...</td>\n", + " <td> /import/c4dm-02/people/mariap/Melodia/15137_Sa...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td> Saint Lucia</td>\n", + " <td> South America</td>\n", + " <td> South America</td>\n", + " <td> Saint Lucia</td>\n", + " <td> NaN</td>\n", + " <td> NaN</td>\n", + " <td> NaN</td>\n", + " <td> Caribbean|World</td>\n", + " <td> 1953</td>\n", + " <td> 1950s</td>\n", + " <td> http://www.folkways.si.edu/la-rose-la-margueri...</td>\n", + " <td> 15138_SampleAudio.wav</td>\n", + " <td> /import/c4dm-02/people/mariap/SpeechMusic/1513...</td>\n", + " <td> /import/c4dm-02/people/mariap/MelSpec/15138_Sa...</td>\n", + " <td> /import/c4dm-04/mariap/FeatureCsvs/Smoothie-py...</td>\n", + " <td> /import/c4dm-02/people/mariap/Melodia/15138_Sa...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td> Saint Lucia</td>\n", + " <td> South America</td>\n", + " <td> South America</td>\n", + " <td> Saint Lucia</td>\n", + " <td> NaN</td>\n", + " <td> NaN</td>\n", + " <td> NaN</td>\n", + " <td> Caribbean|World</td>\n", + " <td> 1953</td>\n", + " <td> 1950s</td>\n", + " <td> http://www.folkways.si.edu/la-rose-la-margueri...</td>\n", + " <td> 15139_SampleAudio.wav</td>\n", + " <td> /import/c4dm-02/people/mariap/SpeechMusic/1513...</td>\n", + " <td> /import/c4dm-02/people/mariap/MelSpec/15139_Sa...</td>\n", + " <td> /import/c4dm-04/mariap/FeatureCsvs/Smoothie-py...</td>\n", + " <td> /import/c4dm-02/people/mariap/Melodia/15139_Sa...</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " Country Continent Region Location_details \\\n", + "0 United States of America North America North America United States \n", + "1 United States of America North America North America United States \n", + "2 Saint Lucia South America South America Saint Lucia \n", + "3 Saint Lucia South America South America Saint Lucia \n", + "4 Saint Lucia South America South America Saint Lucia \n", + "\n", + " Language Language_iso3 Culture Genre Year \\\n", + "0 English eng NaN American Folk 1981 \n", + "1 English eng NaN American Folk|Struggle & Protest 1987 \n", + "2 NaN NaN NaN Caribbean|World 1953 \n", + "3 NaN NaN NaN Caribbean|World 1953 \n", + "4 NaN NaN NaN Caribbean|World 1953 \n", + "\n", + " Decade Url \\\n", + "0 1980s http://www.folkways.si.edu/bobbie-mcgee/the-bo... \n", + "1 1980s http://www.folkways.si.edu/joe-glazer/senior-c... \n", + "2 1950s http://www.folkways.si.edu/st-cecilias-day-ser... \n", + "3 1950s http://www.folkways.si.edu/la-rose-la-margueri... \n", + "4 1950s http://www.folkways.si.edu/la-rose-la-margueri... \n", + "\n", + " Audio Speech \\\n", + "0 14923_SampleAudio.wav /import/c4dm-02/people/mariap/SpeechMusic/1492... \n", + "1 14968_SampleAudio.wav /import/c4dm-02/people/mariap/SpeechMusic/1496... \n", + "2 15137_SampleAudio.wav /import/c4dm-02/people/mariap/SpeechMusic/1513... \n", + "3 15138_SampleAudio.wav /import/c4dm-02/people/mariap/SpeechMusic/1513... \n", + "4 15139_SampleAudio.wav /import/c4dm-02/people/mariap/SpeechMusic/1513... \n", + "\n", + " Melspec \\\n", + "0 /import/c4dm-02/people/mariap/MelSpec/14923_Sa... \n", + "1 /import/c4dm-02/people/mariap/MelSpec/14968_Sa... \n", + "2 /import/c4dm-02/people/mariap/MelSpec/15137_Sa... \n", + "3 /import/c4dm-02/people/mariap/MelSpec/15138_Sa... \n", + "4 /import/c4dm-02/people/mariap/MelSpec/15139_Sa... \n", + "\n", + " Chroma \\\n", + "0 /import/c4dm-04/mariap/FeatureCsvs/Smoothie-py... \n", + "1 /import/c4dm-04/mariap/FeatureCsvs/Smoothie-py... \n", + "2 /import/c4dm-04/mariap/FeatureCsvs/Smoothie-py... \n", + "3 /import/c4dm-04/mariap/FeatureCsvs/Smoothie-py... \n", + "4 /import/c4dm-04/mariap/FeatureCsvs/Smoothie-py... \n", + "\n", + " Melodia \n", + "0 /import/c4dm-02/people/mariap/Melodia/14923_Sa... \n", + "1 /import/c4dm-02/people/mariap/Melodia/14968_Sa... \n", + "2 /import/c4dm-02/people/mariap/Melodia/15137_Sa... \n", + "3 /import/c4dm-02/people/mariap/Melodia/15138_Sa... \n", + "4 /import/c4dm-02/people/mariap/Melodia/15139_Sa... " + ] + }, + "execution_count": 82, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ddf_new.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 85, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array(['__SampleAudio.wav', '__SampleAudio.wav', '__SampleAudio.wav', ...,\n", + " '_D:_025A-C0004X4953XX-0100A0.mp3',\n", + " '_D:_025A-C0004X4984XX-0100A0.mp3',\n", + " '_D:_025A-C0004X4996XX-0400A0.mp3'], \n", + " dtype='|S80')" + ] + }, + "execution_count": 85, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "Yaudio" + ] + }, + { + "cell_type": "code", "execution_count": 43, "metadata": { "collapsed": false