Mercurial > hg > dml-open-cliopatria
diff dml-cla/python/places_hist.py @ 0:718306e29690 tip
commiting public release
author | Daniel Wolff |
---|---|
date | Tue, 09 Feb 2016 21:05:06 +0100 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dml-cla/python/places_hist.py Tue Feb 09 21:05:06 2016 +0100 @@ -0,0 +1,71 @@ +# Part of DML (Digital Music Laboratory) +# Copyright 2014-2015 Daniel Wolff, City University + +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +# -*- coding: utf-8 -*- +__author__='wolffd' + + +from rdflib import RDF, RDFS +from csvutils import * +from aggregate import * +from n3Parser import get_rdf_graph_from_n3 +import numpy +from scipy.spatial import distance +from collections import defaultdict + + + +def per_file(inputs,opts={}): + places = [] + lists = [] + + #print_status(str(inputs)) + # simtype = opts['sim_type'] + + def accum(item): + + # add uri if everything went well + places.append(item['place']) + lists.append(item['list']) + + # accumulation + st=for_each(inputs,accum) + + # get the histogram + (histo,index) = histogram(places) + + # get the songs for each place + list = [] + for row in index.values(): + list += [lists[i] for i in row] + + return { 'result': { 'hist': histo }, #AK requested this removed, 'lists': list}, + 'stats' : st } + + + +# histogram the returns revers index as well +def histogram(strin = []): + # build histogram + histo = dict() + index = defaultdict(list) + for num, row in enumerate(strin): + histo[row] = histo.get(row, 0) + 1 + index[row] += [num] + + # return most frequent key + return ({'counts':histo.values(), 'places':histo.keys()}, index)