Mercurial > hg > dml-open-cliopatria
comparison dml-cla/python/tuning_stats_byyear.py @ 0:718306e29690 tip
commiting public release
author | Daniel Wolff |
---|---|
date | Tue, 09 Feb 2016 21:05:06 +0100 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:718306e29690 |
---|---|
1 # Part of DML (Digital Music Laboratory) | |
2 # Copyright 2014-2015 Daniel Wolff, City University | |
3 | |
4 # This program is free software; you can redistribute it and/or | |
5 # modify it under the terms of the GNU General Public License | |
6 # as published by the Free Software Foundation; either version 2 | |
7 # of the License, or (at your option) any later version. | |
8 # | |
9 # This program is distributed in the hope that it will be useful, | |
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
12 # GNU General Public License for more details. | |
13 # | |
14 # You should have received a copy of the GNU General Public | |
15 # License along with this library; if not, write to the Free Software | |
16 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
17 | |
18 # -*- coding: utf-8 -*- | |
19 __author__='wolffd' | |
20 __date__ ="$11-Mar-2015 12:47:23$" | |
21 | |
22 # this script derives standard statistics for tuning frequency, | |
23 # results are combined by year(range) | |
24 # average | |
25 # standard deviation | |
26 | |
27 # test JSON: | |
28 #{ "module":"tuning_stats_byyear", | |
29 # "function":"per_file", | |
30 # "arguments": [[ | |
31 # {"transcription": { "tag": "csv", "value":"D:\\mirg\\Pitch_Analysis\\026A-1CL0001773XX-0100A0_vamp_silvet_silvet_notes.csv"}, "date": "2015-12-14"}, | |
32 # {"transcription": { "tag": "csv", "value":"D:\\mirg\\Pitch_Analysis\\026A-1CL0002164XX-0100A0_vamp_silvet_silvet_notes.csv"}, "date": "2015-12-14"}, | |
33 # {"transcription": { "tag": "csv", "value":"D:\\mirg\\Pitch_Analysis\\026A-1CL0001773XX-0100A0_vamp_silvet_silvet_notes.csv"}, "date": "2015-12-14"}, | |
34 # {"transcription": { "tag": "csv", "value":"D:\\mirg\\Pitch_Analysis\\026A-1CL0002164XX-0100A0_vamp_silvet_silvet_notes.csv"}, "date": "2015-12-14"}, | |
35 # {"transcription": { "tag": "csv", "value":"D:\\mirg\\Pitch_Analysis\\026A-1CL0001773XX-0100A0_vamp_silvet_silvet_notes.csv"}, "date": "12.5.1993"}, | |
36 # {"transcription": { "tag": "csv", "value":"D:\\mirg\\Pitch_Analysis\\026A-1CL0001773XX-0100A0_vamp_silvet_silvet_notes.csv"}, "date": "2015-12-14"}, | |
37 # {"transcription": { "tag": "csv", "value":"D:\\mirg\\Pitch_Analysis\\026A-1CL0001773XX-0100A0_vamp_silvet_silvet_notes.csv"}, "date": "1907"}]] | |
38 #} | |
39 | |
40 from aggregate import * | |
41 import numpy | |
42 import re | |
43 | |
44 # get code for single-collection tuning statistics | |
45 from tuning_stats import fold_pitch, numpy_column, transcription_from_csv, \ | |
46 transcription_from_n3,tuning_note,stats, weighted_stats | |
47 | |
48 # width of bins in years | |
49 bin_width = 1 | |
50 | |
51 # parsers for n3 / csv | |
52 parser_table = { 'n3':transcription_from_n3, | |
53 'csv':transcription_from_csv } | |
54 | |
55 | |
56 datematch = '' | |
57 # gets statistics on a per-file basis and returns histograms | |
58 # over the file averages | |
59 # inputs['filename']: filenames of fine-tune pitch extracted from recordings | |
60 # inputs['date']: string containing year-... for each recording | |
61 def per_file(inputs): | |
62 means = [] | |
63 years = [] | |
64 | |
65 def accum(item): | |
66 # see if this has a valid date | |
67 y = parse_years(item['date']) | |
68 if y > 1000: | |
69 | |
70 # get duration and normalised frequency for all tuning pitches (A3,A4,A5) | |
71 a_notes = [ (note[1],fold_pitch(note[2],note[3])) | |
72 for note in decode_tagged(parser_table,item['transcription']) | |
73 if tuning_note(note[3]) ] | |
74 | |
75 if len(a_notes)==0: | |
76 print_status("No notes for "+str(item['filename'])) | |
77 else: | |
78 # get frequency and duration columns | |
79 freq = numpy_column(a_notes,1) | |
80 dur = numpy_column(a_notes,0) | |
81 | |
82 # get mean values per clip now, | |
83 # then statistics over clips later | |
84 avg, std = weighted_stats(freq, weights = dur) | |
85 | |
86 # only append years if there is data | |
87 years.append(y) | |
88 means.append(avg) | |
89 else: | |
90 print_status("No year found for "+str(item['filename'])) | |
91 | |
92 # get statistics per file | |
93 st=for_each(inputs,accum) | |
94 | |
95 # get year bins | |
96 years = numpy.array(years) | |
97 yearbins = range(numpy.min(years),numpy.max(years),bin_width) | |
98 #yearbinends = numpy.array(yearbins) + bin_width | |
99 | |
100 avg = [] | |
101 std = [] | |
102 | |
103 # foreach over the year-bundled outputs | |
104 for year in yearbins: | |
105 valid_idx = [i for (i, val) in enumerate(years) if val >= year and val < (year + bin_width)] | |
106 valid_means = [means[i] for i in valid_idx] | |
107 | |
108 # get statistics | |
109 y_avg,y_std = stats(numpy.array(valid_means,dtype=float)) | |
110 avg.append(y_avg) | |
111 std.append(y_std) | |
112 | |
113 return { 'result': { 'mean': avg, 'std-dev': std,'years': yearbins}, | |
114 'stats' : st } | |
115 | |
116 # parses year from string | |
117 # input: string containing 4-digit year | |
118 def parse_years(date): | |
119 yearstr = re.search(r'[12]\d{3}', date) | |
120 if yearstr: | |
121 return int(yearstr.group(0)) | |
122 else: | |
123 return -1 | |
124 | |
125 |