Daniel@0: # Part of DML (Digital Music Laboratory) Daniel@0: # Copyright 2014-2015 Daniel Wolff, City University Daniel@0: Daniel@0: # This program is free software; you can redistribute it and/or Daniel@0: # modify it under the terms of the GNU General Public License Daniel@0: # as published by the Free Software Foundation; either version 2 Daniel@0: # of the License, or (at your option) any later version. Daniel@0: # Daniel@0: # This program is distributed in the hope that it will be useful, Daniel@0: # but WITHOUT ANY WARRANTY; without even the implied warranty of Daniel@0: # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the Daniel@0: # GNU General Public License for more details. Daniel@0: # Daniel@0: # You should have received a copy of the GNU General Public Daniel@0: # License along with this library; if not, write to the Free Software Daniel@0: # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA Daniel@0: Daniel@0: # -*- coding: utf-8 -*- Daniel@0: __author__='wolffd' Daniel@0: __date__ ="$11-Mar-2015 12:47:23$" Daniel@0: Daniel@0: # this script derives standard statistics for tuning frequency, Daniel@0: # results are combined by year(range) Daniel@0: # average Daniel@0: # standard deviation Daniel@0: Daniel@0: # test JSON: Daniel@0: #{ "module":"tuning_stats_byyear", Daniel@0: # "function":"per_file", Daniel@0: # "arguments": [[ Daniel@0: # {"transcription": { "tag": "csv", "value":"D:\\mirg\\Pitch_Analysis\\026A-1CL0001773XX-0100A0_vamp_silvet_silvet_notes.csv"}, "date": "2015-12-14"}, Daniel@0: # {"transcription": { "tag": "csv", "value":"D:\\mirg\\Pitch_Analysis\\026A-1CL0002164XX-0100A0_vamp_silvet_silvet_notes.csv"}, "date": "2015-12-14"}, Daniel@0: # {"transcription": { "tag": "csv", "value":"D:\\mirg\\Pitch_Analysis\\026A-1CL0001773XX-0100A0_vamp_silvet_silvet_notes.csv"}, "date": "2015-12-14"}, Daniel@0: # {"transcription": { "tag": "csv", "value":"D:\\mirg\\Pitch_Analysis\\026A-1CL0002164XX-0100A0_vamp_silvet_silvet_notes.csv"}, "date": "2015-12-14"}, Daniel@0: # {"transcription": { "tag": "csv", "value":"D:\\mirg\\Pitch_Analysis\\026A-1CL0001773XX-0100A0_vamp_silvet_silvet_notes.csv"}, "date": "12.5.1993"}, Daniel@0: # {"transcription": { "tag": "csv", "value":"D:\\mirg\\Pitch_Analysis\\026A-1CL0001773XX-0100A0_vamp_silvet_silvet_notes.csv"}, "date": "2015-12-14"}, Daniel@0: # {"transcription": { "tag": "csv", "value":"D:\\mirg\\Pitch_Analysis\\026A-1CL0001773XX-0100A0_vamp_silvet_silvet_notes.csv"}, "date": "1907"}]] Daniel@0: #} Daniel@0: Daniel@0: from aggregate import * Daniel@0: import numpy Daniel@0: import re Daniel@0: Daniel@0: # get code for single-collection tuning statistics Daniel@0: from tuning_stats import fold_pitch, numpy_column, transcription_from_csv, \ Daniel@0: transcription_from_n3,tuning_note,stats, weighted_stats Daniel@0: Daniel@0: # width of bins in years Daniel@0: bin_width = 1 Daniel@0: Daniel@0: # parsers for n3 / csv Daniel@0: parser_table = { 'n3':transcription_from_n3, Daniel@0: 'csv':transcription_from_csv } Daniel@0: Daniel@0: Daniel@0: datematch = '' Daniel@0: # gets statistics on a per-file basis and returns histograms Daniel@0: # over the file averages Daniel@0: # inputs['filename']: filenames of fine-tune pitch extracted from recordings Daniel@0: # inputs['date']: string containing year-... for each recording Daniel@0: def per_file(inputs): Daniel@0: means = [] Daniel@0: years = [] Daniel@0: Daniel@0: def accum(item): Daniel@0: # see if this has a valid date Daniel@0: y = parse_years(item['date']) Daniel@0: if y > 1000: Daniel@0: Daniel@0: # get duration and normalised frequency for all tuning pitches (A3,A4,A5) Daniel@0: a_notes = [ (note[1],fold_pitch(note[2],note[3])) Daniel@0: for note in decode_tagged(parser_table,item['transcription']) Daniel@0: if tuning_note(note[3]) ] Daniel@0: Daniel@0: if len(a_notes)==0: Daniel@0: print_status("No notes for "+str(item['filename'])) Daniel@0: else: Daniel@0: # get frequency and duration columns Daniel@0: freq = numpy_column(a_notes,1) Daniel@0: dur = numpy_column(a_notes,0) Daniel@0: Daniel@0: # get mean values per clip now, Daniel@0: # then statistics over clips later Daniel@0: avg, std = weighted_stats(freq, weights = dur) Daniel@0: Daniel@0: # only append years if there is data Daniel@0: years.append(y) Daniel@0: means.append(avg) Daniel@0: else: Daniel@0: print_status("No year found for "+str(item['filename'])) Daniel@0: Daniel@0: # get statistics per file Daniel@0: st=for_each(inputs,accum) Daniel@0: Daniel@0: # get year bins Daniel@0: years = numpy.array(years) Daniel@0: yearbins = range(numpy.min(years),numpy.max(years),bin_width) Daniel@0: #yearbinends = numpy.array(yearbins) + bin_width Daniel@0: Daniel@0: avg = [] Daniel@0: std = [] Daniel@0: Daniel@0: # foreach over the year-bundled outputs Daniel@0: for year in yearbins: Daniel@0: valid_idx = [i for (i, val) in enumerate(years) if val >= year and val < (year + bin_width)] Daniel@0: valid_means = [means[i] for i in valid_idx] Daniel@0: Daniel@0: # get statistics Daniel@0: y_avg,y_std = stats(numpy.array(valid_means,dtype=float)) Daniel@0: avg.append(y_avg) Daniel@0: std.append(y_std) Daniel@0: Daniel@0: return { 'result': { 'mean': avg, 'std-dev': std,'years': yearbins}, Daniel@0: 'stats' : st } Daniel@0: Daniel@0: # parses year from string Daniel@0: # input: string containing 4-digit year Daniel@0: def parse_years(date): Daniel@0: yearstr = re.search(r'[12]\d{3}', date) Daniel@0: if yearstr: Daniel@0: return int(yearstr.group(0)) Daniel@0: else: Daniel@0: return -1 Daniel@0: Daniel@0: