Mercurial > hg > dml-open-cliopatria
diff dml-cla/python/tuning_stats_byyear.py @ 0:718306e29690 tip
commiting public release
author | Daniel Wolff |
---|---|
date | Tue, 09 Feb 2016 21:05:06 +0100 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dml-cla/python/tuning_stats_byyear.py Tue Feb 09 21:05:06 2016 +0100 @@ -0,0 +1,125 @@ +# Part of DML (Digital Music Laboratory) +# Copyright 2014-2015 Daniel Wolff, City University + +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +# -*- coding: utf-8 -*- +__author__='wolffd' +__date__ ="$11-Mar-2015 12:47:23$" + +# this script derives standard statistics for tuning frequency, +# results are combined by year(range) +# average +# standard deviation + +# test JSON: +#{ "module":"tuning_stats_byyear", +# "function":"per_file", +# "arguments": [[ +# {"transcription": { "tag": "csv", "value":"D:\\mirg\\Pitch_Analysis\\026A-1CL0001773XX-0100A0_vamp_silvet_silvet_notes.csv"}, "date": "2015-12-14"}, +# {"transcription": { "tag": "csv", "value":"D:\\mirg\\Pitch_Analysis\\026A-1CL0002164XX-0100A0_vamp_silvet_silvet_notes.csv"}, "date": "2015-12-14"}, +# {"transcription": { "tag": "csv", "value":"D:\\mirg\\Pitch_Analysis\\026A-1CL0001773XX-0100A0_vamp_silvet_silvet_notes.csv"}, "date": "2015-12-14"}, +# {"transcription": { "tag": "csv", "value":"D:\\mirg\\Pitch_Analysis\\026A-1CL0002164XX-0100A0_vamp_silvet_silvet_notes.csv"}, "date": "2015-12-14"}, +# {"transcription": { "tag": "csv", "value":"D:\\mirg\\Pitch_Analysis\\026A-1CL0001773XX-0100A0_vamp_silvet_silvet_notes.csv"}, "date": "12.5.1993"}, +# {"transcription": { "tag": "csv", "value":"D:\\mirg\\Pitch_Analysis\\026A-1CL0001773XX-0100A0_vamp_silvet_silvet_notes.csv"}, "date": "2015-12-14"}, +# {"transcription": { "tag": "csv", "value":"D:\\mirg\\Pitch_Analysis\\026A-1CL0001773XX-0100A0_vamp_silvet_silvet_notes.csv"}, "date": "1907"}]] +#} + +from aggregate import * +import numpy +import re + +# get code for single-collection tuning statistics +from tuning_stats import fold_pitch, numpy_column, transcription_from_csv, \ + transcription_from_n3,tuning_note,stats, weighted_stats + +# width of bins in years +bin_width = 1 + +# parsers for n3 / csv +parser_table = { 'n3':transcription_from_n3, + 'csv':transcription_from_csv } + + +datematch = '' +# gets statistics on a per-file basis and returns histograms +# over the file averages +# inputs['filename']: filenames of fine-tune pitch extracted from recordings +# inputs['date']: string containing year-... for each recording +def per_file(inputs): + means = [] + years = [] + + def accum(item): + # see if this has a valid date + y = parse_years(item['date']) + if y > 1000: + + # get duration and normalised frequency for all tuning pitches (A3,A4,A5) + a_notes = [ (note[1],fold_pitch(note[2],note[3])) + for note in decode_tagged(parser_table,item['transcription']) + if tuning_note(note[3]) ] + + if len(a_notes)==0: + print_status("No notes for "+str(item['filename'])) + else: + # get frequency and duration columns + freq = numpy_column(a_notes,1) + dur = numpy_column(a_notes,0) + + # get mean values per clip now, + # then statistics over clips later + avg, std = weighted_stats(freq, weights = dur) + + # only append years if there is data + years.append(y) + means.append(avg) + else: + print_status("No year found for "+str(item['filename'])) + + # get statistics per file + st=for_each(inputs,accum) + + # get year bins + years = numpy.array(years) + yearbins = range(numpy.min(years),numpy.max(years),bin_width) + #yearbinends = numpy.array(yearbins) + bin_width + + avg = [] + std = [] + + # foreach over the year-bundled outputs + for year in yearbins: + valid_idx = [i for (i, val) in enumerate(years) if val >= year and val < (year + bin_width)] + valid_means = [means[i] for i in valid_idx] + + # get statistics + y_avg,y_std = stats(numpy.array(valid_means,dtype=float)) + avg.append(y_avg) + std.append(y_std) + + return { 'result': { 'mean': avg, 'std-dev': std,'years': yearbins}, + 'stats' : st } + +# parses year from string +# input: string containing 4-digit year +def parse_years(date): + yearstr = re.search(r'[12]\d{3}', date) + if yearstr: + return int(yearstr.group(0)) + else: + return -1 + +