diff dml-cla/python/places_hist.py @ 0:718306e29690 tip

commiting public release
author Daniel Wolff
date Tue, 09 Feb 2016 21:05:06 +0100
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/dml-cla/python/places_hist.py	Tue Feb 09 21:05:06 2016 +0100
@@ -0,0 +1,71 @@
+# Part of DML (Digital Music Laboratory)
+# Copyright 2014-2015 Daniel Wolff, City University
+ 
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+
+# -*- coding: utf-8 -*-
+__author__='wolffd'
+
+
+from rdflib import RDF, RDFS
+from csvutils import *
+from aggregate import *
+from n3Parser import get_rdf_graph_from_n3 
+import numpy
+from scipy.spatial import distance
+from collections import defaultdict
+
+
+
+def per_file(inputs,opts={}):
+    places = []
+    lists = []
+    
+    #print_status(str(inputs))
+    # simtype = opts['sim_type']
+   
+    def accum(item):
+
+        # add uri if everything went well
+        places.append(item['place'])
+        lists.append(item['list'])
+    
+    # accumulation 
+    st=for_each(inputs,accum)
+    
+    # get the histogram
+    (histo,index) = histogram(places)
+        
+    # get the songs for each place
+    list = []
+    for row in index.values():
+        list += [lists[i] for i in row]
+    
+    return { 'result': { 'hist': histo }, #AK requested this removed, 'lists': list}, 
+             'stats' : st }
+             
+             
+
+# histogram the returns revers index as well
+def histogram(strin = []):
+    # build histogram 
+    histo = dict()
+    index = defaultdict(list)
+    for num, row in enumerate(strin):
+        histo[row] = histo.get(row, 0) + 1 
+        index[row] += [num]
+
+    # return most frequent key
+    return ({'counts':histo.values(), 'places':histo.keys()}, index)