Daniel@0
|
1 # Part of DML (Digital Music Laboratory)
|
Daniel@0
|
2 # Copyright 2014-2015 Daniel Wolff, City University; Steven Hargreaves; Samer Abdallah, University of London
|
Daniel@0
|
3
|
Daniel@0
|
4 # This program is free software; you can redistribute it and/or
|
Daniel@0
|
5 # modify it under the terms of the GNU General Public License
|
Daniel@0
|
6 # as published by the Free Software Foundation; either version 2
|
Daniel@0
|
7 # of the License, or (at your option) any later version.
|
Daniel@0
|
8 #
|
Daniel@0
|
9 # This program is distributed in the hope that it will be useful,
|
Daniel@0
|
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
|
Daniel@0
|
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
Daniel@0
|
12 # GNU General Public License for more details.
|
Daniel@0
|
13 #
|
Daniel@0
|
14 # You should have received a copy of the GNU General Public
|
Daniel@0
|
15 # License along with this library; if not, write to the Free Software
|
Daniel@0
|
16 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
Daniel@0
|
17
|
Daniel@0
|
18 # -*- coding: utf-8 -*-
|
Daniel@0
|
19 __author__='wolffd, hargreavess, abdallahs'
|
Daniel@0
|
20
|
Daniel@0
|
21 # this script derives standard statistics for tuning frequency,
|
Daniel@0
|
22 # in particular:
|
Daniel@0
|
23 # average
|
Daniel@0
|
24 # standard deviation
|
Daniel@0
|
25 # histogram
|
Daniel@0
|
26
|
Daniel@0
|
27 from rdflib import RDF, RDFS
|
Daniel@0
|
28 from csvutils import *
|
Daniel@0
|
29 from aggregate import *
|
Daniel@0
|
30 from n3Parser import get_rdf_graph_from_n3
|
Daniel@0
|
31 import numpy
|
Daniel@0
|
32
|
Daniel@0
|
33 def transcription_from_csv(filename):
|
Daniel@0
|
34 # we assume CSV: time, duration, pitch, velocity, note_name
|
Daniel@0
|
35 # return (time, duration, pitch, note_name)
|
Daniel@0
|
36 return csv_map_rows(filename,5, lambda row:(float(row[0]),float(row[1]),float(row[2]),row[4]))
|
Daniel@0
|
37
|
Daniel@0
|
38 def transcription_from_n3(filename):
|
Daniel@0
|
39 graph=get_rdf_graph_from_n3(filename)
|
Daniel@0
|
40 notes = [ ( event_time_prop(graph, ev, tl_ns.beginsAt),
|
Daniel@0
|
41 event_time_prop(graph, ev, tl_ns.duration),
|
Daniel@0
|
42 graph.value(ev,af_ns.feature).split(' ')[0],
|
Daniel@0
|
43 graph.value(ev,RDFS.label) )
|
Daniel@0
|
44 for ev in subject((RDF.type, af_ns.Note)) ]
|
Daniel@0
|
45
|
Daniel@0
|
46 parser_table = { 'n3':transcription_from_n3,
|
Daniel@0
|
47 'csv':transcription_from_csv }
|
Daniel@0
|
48
|
Daniel@0
|
49 offset = { 'D':7, 'E':5, 'A':0 }
|
Daniel@0
|
50
|
Daniel@0
|
51 def fold_pitch(freq,name):
|
Daniel@0
|
52 # semis =(4+offset[name[0]]-int(name[1]))
|
Daniel@0
|
53 # print_status("folding by %d" % semis)
|
Daniel@0
|
54 return freq * 2**(4+offset[name[0]]/12.0-int(name[1]))
|
Daniel@0
|
55 def tuning_note(n):
|
Daniel@0
|
56 return n[1] in ['3','4','5'] and n[0] in ['A','E','D']
|
Daniel@0
|
57
|
Daniel@0
|
58
|
Daniel@0
|
59 def per_file(inputs):
|
Daniel@0
|
60 means = []
|
Daniel@0
|
61 hists = []
|
Daniel@0
|
62 hist_edges =[]
|
Daniel@0
|
63
|
Daniel@0
|
64 def accum(item):
|
Daniel@0
|
65 # get duration and normalised frequency for all tuning pitches (A3,A4,A5)
|
Daniel@0
|
66 a_notes = [ (note[1],fold_pitch(note[2],note[3]))
|
Daniel@0
|
67 for note in decode_tagged(parser_table,item)
|
Daniel@0
|
68 if tuning_note(note[3]) ]
|
Daniel@0
|
69
|
Daniel@0
|
70 if len(a_notes)==0:
|
Daniel@0
|
71 print_status("No notes for "+str(item))
|
Daniel@0
|
72 else:
|
Daniel@0
|
73 # get frequency and duration columns
|
Daniel@0
|
74 freq = numpy_column(a_notes,1)
|
Daniel@0
|
75 dur = numpy_column(a_notes,0)
|
Daniel@0
|
76 # get mean values per clip now,
|
Daniel@0
|
77 # then statistics over clips later
|
Daniel@0
|
78 avg, std = weighted_stats(freq, weights = dur)
|
Daniel@0
|
79 (counts, edges) = histogram(freq, 100, 390, 490, weights=dur)
|
Daniel@0
|
80
|
Daniel@0
|
81 means.append(avg)
|
Daniel@0
|
82 hists.append(counts)
|
Daniel@0
|
83 if len(hist_edges) == 0:
|
Daniel@0
|
84 hist_edges.extend(edges)
|
Daniel@0
|
85
|
Daniel@0
|
86 st=for_each(inputs,accum)
|
Daniel@0
|
87
|
Daniel@0
|
88 avg, std = stats(numpy.array(means,dtype=float))
|
Daniel@0
|
89
|
Daniel@0
|
90 # !!! does this make any sense?
|
Daniel@0
|
91 hist_mean, hist_std = stats(numpy.array(hists,dtype=float))
|
Daniel@0
|
92
|
Daniel@0
|
93 return { 'result': { 'mean': avg, 'std-dev': std,
|
Daniel@0
|
94 'hist': continuous_hist(hist_edges,hist_mean) },
|
Daniel@0
|
95 'stats' : st }
|
Daniel@0
|
96
|
Daniel@0
|
97
|
Daniel@0
|
98 def aggregate(inputs):
|
Daniel@0
|
99 notes = [] # will contain all notes in all inputs
|
Daniel@0
|
100 def accum(item):
|
Daniel@0
|
101 # get duration and normalised frequency for all tuning pitches (A3,A4,A5)
|
Daniel@0
|
102 # and collect them all in notes
|
Daniel@0
|
103 notes.extend( [ (note[1],fold_pitch(note[2],note[3]))
|
Daniel@0
|
104 for note in decode_tagged(parser_table,item)
|
Daniel@0
|
105 if tuning_note(note[3]) ] )
|
Daniel@0
|
106
|
Daniel@0
|
107 # execute accumulation for each accum
|
Daniel@0
|
108 stats=for_each(inputs,accum)
|
Daniel@0
|
109
|
Daniel@0
|
110 # get frequency and duration columns
|
Daniel@0
|
111 dur = numpy_column(notes,0)
|
Daniel@0
|
112 freq = numpy_column(notes,1)
|
Daniel@0
|
113
|
Daniel@0
|
114 # get basic statistics
|
Daniel@0
|
115 avg, std = weighted_stats(freq, weights=dur)
|
Daniel@0
|
116
|
Daniel@0
|
117 # get histogram weighted by duration
|
Daniel@0
|
118 counts, edges = histogram(freq, 100, 390, 490, weights=dur)
|
Daniel@0
|
119
|
Daniel@0
|
120 return { 'result': { 'mean': avg, 'std_dev': std,
|
Daniel@0
|
121 'hist': continuous_hist(edges,counts) },
|
Daniel@0
|
122 'stats' : stats }
|
Daniel@0
|
123
|
Daniel@0
|
124 # convert one column, specified by datapos, to numpy array
|
Daniel@0
|
125 def numpy_column(data,datapos):
|
Daniel@0
|
126 return numpy.array([ row[datapos] for row in data ], dtype=float)
|
Daniel@0
|
127
|
Daniel@0
|
128 #calculates the histogram
|
Daniel@0
|
129 # nbins: number of bins
|
Daniel@0
|
130 # lb: lower bound
|
Daniel@0
|
131 # ub: upper bound
|
Daniel@0
|
132 def histogram(colu, nbins, lb, ub, weights = []):
|
Daniel@0
|
133 counts,edges = numpy.histogram(colu, bins=nbins, range=[lb, ub], weights=weights)
|
Daniel@0
|
134 counts = counts / numpy.sum(counts)
|
Daniel@0
|
135
|
Daniel@0
|
136 return (counts.tolist(), edges.tolist())
|
Daniel@0
|
137
|
Daniel@0
|
138 # calculates unweighted statistics for the histograms
|
Daniel@0
|
139 def stats(counts):
|
Daniel@0
|
140 avg = numpy.average(counts, axis = 0).tolist()
|
Daniel@0
|
141 std = numpy.std(counts, axis =0)
|
Daniel@0
|
142 return (avg,std)
|
Daniel@0
|
143
|
Daniel@0
|
144 #calculates weighted statistics for numerical input
|
Daniel@0
|
145 def weighted_stats(colu, weights = []):
|
Daniel@0
|
146 avg = numpy.average(colu, axis = 0 ,weights = weights)
|
Daniel@0
|
147 #weighted standard deviation
|
Daniel@0
|
148 std = numpy.sqrt(numpy.average((colu-avg)**2, axis = 0, weights=weights))
|
Daniel@0
|
149 #std = numpy.std(colu, weights = weights).tolist()
|
Daniel@0
|
150 #med = numpy.median(colu, weights = weights).tolist()
|
Daniel@0
|
151 # could use https://pypi.python.org/pypi/wquantiles for weighted median
|
Daniel@0
|
152 return (avg,std)
|
Daniel@0
|
153
|