Mercurial > hg > dml-open-cliopatria
view dml-cla/python/tuning_stats_byyear.py @ 0:718306e29690 tip
commiting public release
author | Daniel Wolff |
---|---|
date | Tue, 09 Feb 2016 21:05:06 +0100 |
parents | |
children |
line wrap: on
line source
# Part of DML (Digital Music Laboratory) # Copyright 2014-2015 Daniel Wolff, City University # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public # License along with this library; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA # -*- coding: utf-8 -*- __author__='wolffd' __date__ ="$11-Mar-2015 12:47:23$" # this script derives standard statistics for tuning frequency, # results are combined by year(range) # average # standard deviation # test JSON: #{ "module":"tuning_stats_byyear", # "function":"per_file", # "arguments": [[ # {"transcription": { "tag": "csv", "value":"D:\\mirg\\Pitch_Analysis\\026A-1CL0001773XX-0100A0_vamp_silvet_silvet_notes.csv"}, "date": "2015-12-14"}, # {"transcription": { "tag": "csv", "value":"D:\\mirg\\Pitch_Analysis\\026A-1CL0002164XX-0100A0_vamp_silvet_silvet_notes.csv"}, "date": "2015-12-14"}, # {"transcription": { "tag": "csv", "value":"D:\\mirg\\Pitch_Analysis\\026A-1CL0001773XX-0100A0_vamp_silvet_silvet_notes.csv"}, "date": "2015-12-14"}, # {"transcription": { "tag": "csv", "value":"D:\\mirg\\Pitch_Analysis\\026A-1CL0002164XX-0100A0_vamp_silvet_silvet_notes.csv"}, "date": "2015-12-14"}, # {"transcription": { "tag": "csv", "value":"D:\\mirg\\Pitch_Analysis\\026A-1CL0001773XX-0100A0_vamp_silvet_silvet_notes.csv"}, "date": "12.5.1993"}, # {"transcription": { "tag": "csv", "value":"D:\\mirg\\Pitch_Analysis\\026A-1CL0001773XX-0100A0_vamp_silvet_silvet_notes.csv"}, "date": "2015-12-14"}, # {"transcription": { "tag": "csv", "value":"D:\\mirg\\Pitch_Analysis\\026A-1CL0001773XX-0100A0_vamp_silvet_silvet_notes.csv"}, "date": "1907"}]] #} from aggregate import * import numpy import re # get code for single-collection tuning statistics from tuning_stats import fold_pitch, numpy_column, transcription_from_csv, \ transcription_from_n3,tuning_note,stats, weighted_stats # width of bins in years bin_width = 1 # parsers for n3 / csv parser_table = { 'n3':transcription_from_n3, 'csv':transcription_from_csv } datematch = '' # gets statistics on a per-file basis and returns histograms # over the file averages # inputs['filename']: filenames of fine-tune pitch extracted from recordings # inputs['date']: string containing year-... for each recording def per_file(inputs): means = [] years = [] def accum(item): # see if this has a valid date y = parse_years(item['date']) if y > 1000: # get duration and normalised frequency for all tuning pitches (A3,A4,A5) a_notes = [ (note[1],fold_pitch(note[2],note[3])) for note in decode_tagged(parser_table,item['transcription']) if tuning_note(note[3]) ] if len(a_notes)==0: print_status("No notes for "+str(item['filename'])) else: # get frequency and duration columns freq = numpy_column(a_notes,1) dur = numpy_column(a_notes,0) # get mean values per clip now, # then statistics over clips later avg, std = weighted_stats(freq, weights = dur) # only append years if there is data years.append(y) means.append(avg) else: print_status("No year found for "+str(item['filename'])) # get statistics per file st=for_each(inputs,accum) # get year bins years = numpy.array(years) yearbins = range(numpy.min(years),numpy.max(years),bin_width) #yearbinends = numpy.array(yearbins) + bin_width avg = [] std = [] # foreach over the year-bundled outputs for year in yearbins: valid_idx = [i for (i, val) in enumerate(years) if val >= year and val < (year + bin_width)] valid_means = [means[i] for i in valid_idx] # get statistics y_avg,y_std = stats(numpy.array(valid_means,dtype=float)) avg.append(y_avg) std.append(y_std) return { 'result': { 'mean': avg, 'std-dev': std,'years': yearbins}, 'stats' : st } # parses year from string # input: string containing 4-digit year def parse_years(date): yearstr = re.search(r'[12]\d{3}', date) if yearstr: return int(yearstr.group(0)) else: return -1