Daniel@0: # Part of DML (Digital Music Laboratory)
Daniel@0: # Copyright 2014-2015 Daniel Wolff, City University; Steven Hargreaves; Samer Abdallah, University of London
Daniel@0:  
Daniel@0: # This program is free software; you can redistribute it and/or
Daniel@0: # modify it under the terms of the GNU General Public License
Daniel@0: # as published by the Free Software Foundation; either version 2
Daniel@0: # of the License, or (at your option) any later version.
Daniel@0: # 
Daniel@0: # This program is distributed in the hope that it will be useful,
Daniel@0: # but WITHOUT ANY WARRANTY; without even the implied warranty of
Daniel@0: # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
Daniel@0: # GNU General Public License for more details.
Daniel@0: # 
Daniel@0: # You should have received a copy of the GNU General Public
Daniel@0: # License along with this library; if not, write to the Free Software
Daniel@0: # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
Daniel@0: 
Daniel@0: # -*- coding: utf-8 -*-
Daniel@0: __author__='wolffd, hargreavess, abdallahs'
Daniel@0: 
Daniel@0: # this script derives standard statistics for tuning frequency, 
Daniel@0: # in particular:
Daniel@0: # average
Daniel@0: # standard deviation
Daniel@0: # histogram
Daniel@0: 
Daniel@0: from rdflib import RDF, RDFS
Daniel@0: from csvutils import *
Daniel@0: from aggregate import *
Daniel@0: from n3Parser import get_rdf_graph_from_n3 
Daniel@0: import numpy
Daniel@0: 
Daniel@0: def transcription_from_csv(filename):
Daniel@0:     # we assume CSV: time, duration, pitch, velocity, note_name 
Daniel@0:     # return (time, duration, pitch, note_name)
Daniel@0:     return csv_map_rows(filename,5, lambda row:(float(row[0]),float(row[1]),float(row[2]),row[4]))
Daniel@0: 
Daniel@0: def transcription_from_n3(filename):
Daniel@0:     graph=get_rdf_graph_from_n3(filename)
Daniel@0:     notes = [ ( event_time_prop(graph, ev, tl_ns.beginsAt), 
Daniel@0:                 event_time_prop(graph, ev, tl_ns.duration),
Daniel@0:                 graph.value(ev,af_ns.feature).split(' ')[0],
Daniel@0:                 graph.value(ev,RDFS.label) )
Daniel@0:               for ev in subject((RDF.type, af_ns.Note)) ]
Daniel@0: 
Daniel@0: parser_table = { 'n3':transcription_from_n3, 
Daniel@0:                  'csv':transcription_from_csv }
Daniel@0: 
Daniel@0: offset = { 'D':7, 'E':5, 'A':0 }
Daniel@0: 
Daniel@0: def fold_pitch(freq,name): 
Daniel@0:     # semis =(4+offset[name[0]]-int(name[1]))
Daniel@0:     # print_status("folding by %d" % semis)
Daniel@0:     return freq * 2**(4+offset[name[0]]/12.0-int(name[1]))
Daniel@0: def tuning_note(n):
Daniel@0:     return n[1] in ['3','4','5'] and n[0] in ['A','E','D']
Daniel@0: 
Daniel@0: 
Daniel@0: def per_file(inputs):
Daniel@0:     means = []
Daniel@0:     hists = []
Daniel@0:     hist_edges =[]
Daniel@0: 
Daniel@0:     def accum(item):
Daniel@0:         # get duration and normalised frequency for all tuning pitches (A3,A4,A5)
Daniel@0:         a_notes = [ (note[1],fold_pitch(note[2],note[3])) 
Daniel@0:                     for note in decode_tagged(parser_table,item) 
Daniel@0:                     if tuning_note(note[3]) ]
Daniel@0: 
Daniel@0:         if len(a_notes)==0:
Daniel@0:             print_status("No notes for "+str(item))
Daniel@0:         else:
Daniel@0:             # get frequency and duration columns
Daniel@0:             freq = numpy_column(a_notes,1)
Daniel@0:             dur = numpy_column(a_notes,0)
Daniel@0:             # get mean values per clip now,
Daniel@0:             # then statistics over clips later
Daniel@0:             avg, std = weighted_stats(freq, weights = dur)
Daniel@0:             (counts, edges) = histogram(freq, 100, 390, 490, weights=dur)
Daniel@0:             
Daniel@0:             means.append(avg) 
Daniel@0:             hists.append(counts)
Daniel@0:             if len(hist_edges) == 0: 
Daniel@0:                 hist_edges.extend(edges)
Daniel@0:             
Daniel@0:     st=for_each(inputs,accum)
Daniel@0: 
Daniel@0:     avg, std = stats(numpy.array(means,dtype=float))
Daniel@0: 
Daniel@0:     # !!! does this make any sense?
Daniel@0:     hist_mean, hist_std = stats(numpy.array(hists,dtype=float))
Daniel@0: 
Daniel@0:     return { 'result': { 'mean': avg, 'std-dev': std, 
Daniel@0:                          'hist': continuous_hist(hist_edges,hist_mean) }, 
Daniel@0:              'stats' : st }
Daniel@0: 
Daniel@0: 
Daniel@0: def aggregate(inputs):
Daniel@0:     notes = [] # will contain all notes in all inputs
Daniel@0:     def accum(item):
Daniel@0:         # get duration and normalised frequency for all tuning pitches (A3,A4,A5)
Daniel@0:         # and collect them all in notes
Daniel@0:         notes.extend( [ (note[1],fold_pitch(note[2],note[3])) 
Daniel@0:                         for note in decode_tagged(parser_table,item) 
Daniel@0:                         if tuning_note(note[3]) ] )
Daniel@0:     
Daniel@0:     # execute accumulation for each accum                    
Daniel@0:     stats=for_each(inputs,accum)
Daniel@0:             
Daniel@0:     # get frequency and duration columns
Daniel@0:     dur = numpy_column(notes,0)
Daniel@0:     freq = numpy_column(notes,1)
Daniel@0: 
Daniel@0:     # get basic statistics
Daniel@0:     avg, std = weighted_stats(freq, weights=dur)
Daniel@0: 
Daniel@0:     # get histogram weighted by duration
Daniel@0:     counts, edges = histogram(freq, 100, 390, 490, weights=dur)
Daniel@0:         
Daniel@0:     return { 'result': { 'mean': avg, 'std_dev': std, 
Daniel@0:                          'hist': continuous_hist(edges,counts) },
Daniel@0:              'stats' : stats }
Daniel@0: 
Daniel@0: # convert one column, specified by datapos, to numpy array
Daniel@0: def numpy_column(data,datapos):
Daniel@0:     return numpy.array([ row[datapos] for row in data ], dtype=float)
Daniel@0: 
Daniel@0: #calculates the histogram
Daniel@0: # nbins: number of bins
Daniel@0: # lb: lower bound
Daniel@0: # ub: upper bound
Daniel@0: def histogram(colu, nbins, lb, ub, weights = []):
Daniel@0:     counts,edges = numpy.histogram(colu, bins=nbins, range=[lb, ub], weights=weights)
Daniel@0:     counts = counts / numpy.sum(counts)
Daniel@0:     
Daniel@0:     return (counts.tolist(), edges.tolist())
Daniel@0: 
Daniel@0: # calculates unweighted statistics for the histograms
Daniel@0: def stats(counts):
Daniel@0:     avg = numpy.average(counts, axis = 0).tolist()
Daniel@0:     std = numpy.std(counts, axis =0)
Daniel@0:     return (avg,std)
Daniel@0:     
Daniel@0: #calculates weighted statistics for  numerical input
Daniel@0: def weighted_stats(colu, weights = []):
Daniel@0:     avg = numpy.average(colu, axis = 0 ,weights = weights)
Daniel@0:     #weighted standard deviation
Daniel@0:     std = numpy.sqrt(numpy.average((colu-avg)**2, axis = 0, weights=weights))
Daniel@0:     #std = numpy.std(colu, weights = weights).tolist()
Daniel@0:     #med = numpy.median(colu, weights = weights).tolist()
Daniel@0:     # could use https://pypi.python.org/pypi/wquantiles for weighted median
Daniel@0:     return (avg,std)
Daniel@0: