view dml-cla/python/places_hist.py @ 0:718306e29690 tip

commiting public release
author Daniel Wolff
date Tue, 09 Feb 2016 21:05:06 +0100
parents
children
line wrap: on
line source
# Part of DML (Digital Music Laboratory)
# Copyright 2014-2015 Daniel Wolff, City University
 
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
# 
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA

# -*- coding: utf-8 -*-
__author__='wolffd'


from rdflib import RDF, RDFS
from csvutils import *
from aggregate import *
from n3Parser import get_rdf_graph_from_n3 
import numpy
from scipy.spatial import distance
from collections import defaultdict



def per_file(inputs,opts={}):
    places = []
    lists = []
    
    #print_status(str(inputs))
    # simtype = opts['sim_type']
   
    def accum(item):

        # add uri if everything went well
        places.append(item['place'])
        lists.append(item['list'])
    
    # accumulation 
    st=for_each(inputs,accum)
    
    # get the histogram
    (histo,index) = histogram(places)
        
    # get the songs for each place
    list = []
    for row in index.values():
        list += [lists[i] for i in row]
    
    return { 'result': { 'hist': histo }, #AK requested this removed, 'lists': list}, 
             'stats' : st }
             
             

# histogram the returns revers index as well
def histogram(strin = []):
    # build histogram 
    histo = dict()
    index = defaultdict(list)
    for num, row in enumerate(strin):
        histo[row] = histo.get(row, 0) + 1 
        index[row] += [num]

    # return most frequent key
    return ({'counts':histo.values(), 'places':histo.keys()}, index)