Daniel@0
|
1 # Part of DML (Digital Music Laboratory)
|
Daniel@0
|
2 # Copyright 2014-2015 Daniel Wolff, City University
|
Daniel@0
|
3
|
Daniel@0
|
4 # This program is free software; you can redistribute it and/or
|
Daniel@0
|
5 # modify it under the terms of the GNU General Public License
|
Daniel@0
|
6 # as published by the Free Software Foundation; either version 2
|
Daniel@0
|
7 # of the License, or (at your option) any later version.
|
Daniel@0
|
8 #
|
Daniel@0
|
9 # This program is distributed in the hope that it will be useful,
|
Daniel@0
|
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
|
Daniel@0
|
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
Daniel@0
|
12 # GNU General Public License for more details.
|
Daniel@0
|
13 #
|
Daniel@0
|
14 # You should have received a copy of the GNU General Public
|
Daniel@0
|
15 # License along with this library; if not, write to the Free Software
|
Daniel@0
|
16 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
Daniel@0
|
17
|
Daniel@0
|
18 # -*- coding: utf-8 -*-
|
Daniel@0
|
19 __author__='wolffd'
|
Daniel@0
|
20 __date__ ="$11-Mar-2015 12:47:23$"
|
Daniel@0
|
21
|
Daniel@0
|
22 # this script derives standard statistics for tuning frequency,
|
Daniel@0
|
23 # results are combined by year(range)
|
Daniel@0
|
24 # average
|
Daniel@0
|
25 # standard deviation
|
Daniel@0
|
26
|
Daniel@0
|
27 # test JSON:
|
Daniel@0
|
28 #{ "module":"tuning_stats_byyear",
|
Daniel@0
|
29 # "function":"per_file",
|
Daniel@0
|
30 # "arguments": [[
|
Daniel@0
|
31 # {"transcription": { "tag": "csv", "value":"D:\\mirg\\Pitch_Analysis\\026A-1CL0001773XX-0100A0_vamp_silvet_silvet_notes.csv"}, "date": "2015-12-14"},
|
Daniel@0
|
32 # {"transcription": { "tag": "csv", "value":"D:\\mirg\\Pitch_Analysis\\026A-1CL0002164XX-0100A0_vamp_silvet_silvet_notes.csv"}, "date": "2015-12-14"},
|
Daniel@0
|
33 # {"transcription": { "tag": "csv", "value":"D:\\mirg\\Pitch_Analysis\\026A-1CL0001773XX-0100A0_vamp_silvet_silvet_notes.csv"}, "date": "2015-12-14"},
|
Daniel@0
|
34 # {"transcription": { "tag": "csv", "value":"D:\\mirg\\Pitch_Analysis\\026A-1CL0002164XX-0100A0_vamp_silvet_silvet_notes.csv"}, "date": "2015-12-14"},
|
Daniel@0
|
35 # {"transcription": { "tag": "csv", "value":"D:\\mirg\\Pitch_Analysis\\026A-1CL0001773XX-0100A0_vamp_silvet_silvet_notes.csv"}, "date": "12.5.1993"},
|
Daniel@0
|
36 # {"transcription": { "tag": "csv", "value":"D:\\mirg\\Pitch_Analysis\\026A-1CL0001773XX-0100A0_vamp_silvet_silvet_notes.csv"}, "date": "2015-12-14"},
|
Daniel@0
|
37 # {"transcription": { "tag": "csv", "value":"D:\\mirg\\Pitch_Analysis\\026A-1CL0001773XX-0100A0_vamp_silvet_silvet_notes.csv"}, "date": "1907"}]]
|
Daniel@0
|
38 #}
|
Daniel@0
|
39
|
Daniel@0
|
40 from aggregate import *
|
Daniel@0
|
41 import numpy
|
Daniel@0
|
42 import re
|
Daniel@0
|
43
|
Daniel@0
|
44 # get code for single-collection tuning statistics
|
Daniel@0
|
45 from tuning_stats import fold_pitch, numpy_column, transcription_from_csv, \
|
Daniel@0
|
46 transcription_from_n3,tuning_note,stats, weighted_stats
|
Daniel@0
|
47
|
Daniel@0
|
48 # width of bins in years
|
Daniel@0
|
49 bin_width = 1
|
Daniel@0
|
50
|
Daniel@0
|
51 # parsers for n3 / csv
|
Daniel@0
|
52 parser_table = { 'n3':transcription_from_n3,
|
Daniel@0
|
53 'csv':transcription_from_csv }
|
Daniel@0
|
54
|
Daniel@0
|
55
|
Daniel@0
|
56 datematch = ''
|
Daniel@0
|
57 # gets statistics on a per-file basis and returns histograms
|
Daniel@0
|
58 # over the file averages
|
Daniel@0
|
59 # inputs['filename']: filenames of fine-tune pitch extracted from recordings
|
Daniel@0
|
60 # inputs['date']: string containing year-... for each recording
|
Daniel@0
|
61 def per_file(inputs):
|
Daniel@0
|
62 means = []
|
Daniel@0
|
63 years = []
|
Daniel@0
|
64
|
Daniel@0
|
65 def accum(item):
|
Daniel@0
|
66 # see if this has a valid date
|
Daniel@0
|
67 y = parse_years(item['date'])
|
Daniel@0
|
68 if y > 1000:
|
Daniel@0
|
69
|
Daniel@0
|
70 # get duration and normalised frequency for all tuning pitches (A3,A4,A5)
|
Daniel@0
|
71 a_notes = [ (note[1],fold_pitch(note[2],note[3]))
|
Daniel@0
|
72 for note in decode_tagged(parser_table,item['transcription'])
|
Daniel@0
|
73 if tuning_note(note[3]) ]
|
Daniel@0
|
74
|
Daniel@0
|
75 if len(a_notes)==0:
|
Daniel@0
|
76 print_status("No notes for "+str(item['filename']))
|
Daniel@0
|
77 else:
|
Daniel@0
|
78 # get frequency and duration columns
|
Daniel@0
|
79 freq = numpy_column(a_notes,1)
|
Daniel@0
|
80 dur = numpy_column(a_notes,0)
|
Daniel@0
|
81
|
Daniel@0
|
82 # get mean values per clip now,
|
Daniel@0
|
83 # then statistics over clips later
|
Daniel@0
|
84 avg, std = weighted_stats(freq, weights = dur)
|
Daniel@0
|
85
|
Daniel@0
|
86 # only append years if there is data
|
Daniel@0
|
87 years.append(y)
|
Daniel@0
|
88 means.append(avg)
|
Daniel@0
|
89 else:
|
Daniel@0
|
90 print_status("No year found for "+str(item['filename']))
|
Daniel@0
|
91
|
Daniel@0
|
92 # get statistics per file
|
Daniel@0
|
93 st=for_each(inputs,accum)
|
Daniel@0
|
94
|
Daniel@0
|
95 # get year bins
|
Daniel@0
|
96 years = numpy.array(years)
|
Daniel@0
|
97 yearbins = range(numpy.min(years),numpy.max(years),bin_width)
|
Daniel@0
|
98 #yearbinends = numpy.array(yearbins) + bin_width
|
Daniel@0
|
99
|
Daniel@0
|
100 avg = []
|
Daniel@0
|
101 std = []
|
Daniel@0
|
102
|
Daniel@0
|
103 # foreach over the year-bundled outputs
|
Daniel@0
|
104 for year in yearbins:
|
Daniel@0
|
105 valid_idx = [i for (i, val) in enumerate(years) if val >= year and val < (year + bin_width)]
|
Daniel@0
|
106 valid_means = [means[i] for i in valid_idx]
|
Daniel@0
|
107
|
Daniel@0
|
108 # get statistics
|
Daniel@0
|
109 y_avg,y_std = stats(numpy.array(valid_means,dtype=float))
|
Daniel@0
|
110 avg.append(y_avg)
|
Daniel@0
|
111 std.append(y_std)
|
Daniel@0
|
112
|
Daniel@0
|
113 return { 'result': { 'mean': avg, 'std-dev': std,'years': yearbins},
|
Daniel@0
|
114 'stats' : st }
|
Daniel@0
|
115
|
Daniel@0
|
116 # parses year from string
|
Daniel@0
|
117 # input: string containing 4-digit year
|
Daniel@0
|
118 def parse_years(date):
|
Daniel@0
|
119 yearstr = re.search(r'[12]\d{3}', date)
|
Daniel@0
|
120 if yearstr:
|
Daniel@0
|
121 return int(yearstr.group(0))
|
Daniel@0
|
122 else:
|
Daniel@0
|
123 return -1
|
Daniel@0
|
124
|
Daniel@0
|
125
|