Mercurial > hg > dml-open-cliopatria
comparison dml-cla/python/tuning_stats.py @ 0:718306e29690 tip
commiting public release
author | Daniel Wolff |
---|---|
date | Tue, 09 Feb 2016 21:05:06 +0100 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:718306e29690 |
---|---|
1 # Part of DML (Digital Music Laboratory) | |
2 # Copyright 2014-2015 Daniel Wolff, City University; Steven Hargreaves; Samer Abdallah, University of London | |
3 | |
4 # This program is free software; you can redistribute it and/or | |
5 # modify it under the terms of the GNU General Public License | |
6 # as published by the Free Software Foundation; either version 2 | |
7 # of the License, or (at your option) any later version. | |
8 # | |
9 # This program is distributed in the hope that it will be useful, | |
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
12 # GNU General Public License for more details. | |
13 # | |
14 # You should have received a copy of the GNU General Public | |
15 # License along with this library; if not, write to the Free Software | |
16 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
17 | |
18 # -*- coding: utf-8 -*- | |
19 __author__='wolffd, hargreavess, abdallahs' | |
20 | |
21 # this script derives standard statistics for tuning frequency, | |
22 # in particular: | |
23 # average | |
24 # standard deviation | |
25 # histogram | |
26 | |
27 from rdflib import RDF, RDFS | |
28 from csvutils import * | |
29 from aggregate import * | |
30 from n3Parser import get_rdf_graph_from_n3 | |
31 import numpy | |
32 | |
33 def transcription_from_csv(filename): | |
34 # we assume CSV: time, duration, pitch, velocity, note_name | |
35 # return (time, duration, pitch, note_name) | |
36 return csv_map_rows(filename,5, lambda row:(float(row[0]),float(row[1]),float(row[2]),row[4])) | |
37 | |
38 def transcription_from_n3(filename): | |
39 graph=get_rdf_graph_from_n3(filename) | |
40 notes = [ ( event_time_prop(graph, ev, tl_ns.beginsAt), | |
41 event_time_prop(graph, ev, tl_ns.duration), | |
42 graph.value(ev,af_ns.feature).split(' ')[0], | |
43 graph.value(ev,RDFS.label) ) | |
44 for ev in subject((RDF.type, af_ns.Note)) ] | |
45 | |
46 parser_table = { 'n3':transcription_from_n3, | |
47 'csv':transcription_from_csv } | |
48 | |
49 offset = { 'D':7, 'E':5, 'A':0 } | |
50 | |
51 def fold_pitch(freq,name): | |
52 # semis =(4+offset[name[0]]-int(name[1])) | |
53 # print_status("folding by %d" % semis) | |
54 return freq * 2**(4+offset[name[0]]/12.0-int(name[1])) | |
55 def tuning_note(n): | |
56 return n[1] in ['3','4','5'] and n[0] in ['A','E','D'] | |
57 | |
58 | |
59 def per_file(inputs): | |
60 means = [] | |
61 hists = [] | |
62 hist_edges =[] | |
63 | |
64 def accum(item): | |
65 # get duration and normalised frequency for all tuning pitches (A3,A4,A5) | |
66 a_notes = [ (note[1],fold_pitch(note[2],note[3])) | |
67 for note in decode_tagged(parser_table,item) | |
68 if tuning_note(note[3]) ] | |
69 | |
70 if len(a_notes)==0: | |
71 print_status("No notes for "+str(item)) | |
72 else: | |
73 # get frequency and duration columns | |
74 freq = numpy_column(a_notes,1) | |
75 dur = numpy_column(a_notes,0) | |
76 # get mean values per clip now, | |
77 # then statistics over clips later | |
78 avg, std = weighted_stats(freq, weights = dur) | |
79 (counts, edges) = histogram(freq, 100, 390, 490, weights=dur) | |
80 | |
81 means.append(avg) | |
82 hists.append(counts) | |
83 if len(hist_edges) == 0: | |
84 hist_edges.extend(edges) | |
85 | |
86 st=for_each(inputs,accum) | |
87 | |
88 avg, std = stats(numpy.array(means,dtype=float)) | |
89 | |
90 # !!! does this make any sense? | |
91 hist_mean, hist_std = stats(numpy.array(hists,dtype=float)) | |
92 | |
93 return { 'result': { 'mean': avg, 'std-dev': std, | |
94 'hist': continuous_hist(hist_edges,hist_mean) }, | |
95 'stats' : st } | |
96 | |
97 | |
98 def aggregate(inputs): | |
99 notes = [] # will contain all notes in all inputs | |
100 def accum(item): | |
101 # get duration and normalised frequency for all tuning pitches (A3,A4,A5) | |
102 # and collect them all in notes | |
103 notes.extend( [ (note[1],fold_pitch(note[2],note[3])) | |
104 for note in decode_tagged(parser_table,item) | |
105 if tuning_note(note[3]) ] ) | |
106 | |
107 # execute accumulation for each accum | |
108 stats=for_each(inputs,accum) | |
109 | |
110 # get frequency and duration columns | |
111 dur = numpy_column(notes,0) | |
112 freq = numpy_column(notes,1) | |
113 | |
114 # get basic statistics | |
115 avg, std = weighted_stats(freq, weights=dur) | |
116 | |
117 # get histogram weighted by duration | |
118 counts, edges = histogram(freq, 100, 390, 490, weights=dur) | |
119 | |
120 return { 'result': { 'mean': avg, 'std_dev': std, | |
121 'hist': continuous_hist(edges,counts) }, | |
122 'stats' : stats } | |
123 | |
124 # convert one column, specified by datapos, to numpy array | |
125 def numpy_column(data,datapos): | |
126 return numpy.array([ row[datapos] for row in data ], dtype=float) | |
127 | |
128 #calculates the histogram | |
129 # nbins: number of bins | |
130 # lb: lower bound | |
131 # ub: upper bound | |
132 def histogram(colu, nbins, lb, ub, weights = []): | |
133 counts,edges = numpy.histogram(colu, bins=nbins, range=[lb, ub], weights=weights) | |
134 counts = counts / numpy.sum(counts) | |
135 | |
136 return (counts.tolist(), edges.tolist()) | |
137 | |
138 # calculates unweighted statistics for the histograms | |
139 def stats(counts): | |
140 avg = numpy.average(counts, axis = 0).tolist() | |
141 std = numpy.std(counts, axis =0) | |
142 return (avg,std) | |
143 | |
144 #calculates weighted statistics for numerical input | |
145 def weighted_stats(colu, weights = []): | |
146 avg = numpy.average(colu, axis = 0 ,weights = weights) | |
147 #weighted standard deviation | |
148 std = numpy.sqrt(numpy.average((colu-avg)**2, axis = 0, weights=weights)) | |
149 #std = numpy.std(colu, weights = weights).tolist() | |
150 #med = numpy.median(colu, weights = weights).tolist() | |
151 # could use https://pypi.python.org/pypi/wquantiles for weighted median | |
152 return (avg,std) | |
153 |