dml-open-cliopatria: dml-cla/python/tuning_stats

comparison dml-cla/python/tuning_stats_byyear.py @ 0:718306e29690 tip

commiting public release

author	Daniel Wolff
date	Tue, 09 Feb 2016 21:05:06 +0100
parents
children

comparison

equal deleted inserted replaced

--1:000000000000
+:718306e29690
+# Part of DML (Digital Music Laboratory)
+# Copyright 2014-2015 Daniel Wolff, City University
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+# -*- coding: utf-8 -*-
+__author__='wolffd'
+__date__ ="$11-Mar-2015 12:47:23$"
+# this script derives standard statistics for tuning frequency,
+# results are combined by year(range)
+# average
+# standard deviation
+# test JSON:
+#{ "module":"tuning_stats_byyear",
+#      "function":"per_file",
+#      "arguments": [[
+#      {"transcription": { "tag": "csv", "value":"D:\\mirg\\Pitch_Analysis\\026A-1CL0001773XX-0100A0_vamp_silvet_silvet_notes.csv"}, "date": "2015-12-14"},
+#      {"transcription": { "tag": "csv", "value":"D:\\mirg\\Pitch_Analysis\\026A-1CL0002164XX-0100A0_vamp_silvet_silvet_notes.csv"}, "date": "2015-12-14"},
+#      {"transcription": { "tag": "csv", "value":"D:\\mirg\\Pitch_Analysis\\026A-1CL0001773XX-0100A0_vamp_silvet_silvet_notes.csv"}, "date": "2015-12-14"},
+#      {"transcription": { "tag": "csv", "value":"D:\\mirg\\Pitch_Analysis\\026A-1CL0002164XX-0100A0_vamp_silvet_silvet_notes.csv"}, "date": "2015-12-14"},
+#      {"transcription": { "tag": "csv", "value":"D:\\mirg\\Pitch_Analysis\\026A-1CL0001773XX-0100A0_vamp_silvet_silvet_notes.csv"}, "date": "12.5.1993"},
+#      {"transcription": { "tag": "csv", "value":"D:\\mirg\\Pitch_Analysis\\026A-1CL0001773XX-0100A0_vamp_silvet_silvet_notes.csv"}, "date": "2015-12-14"},
+#      {"transcription": { "tag": "csv", "value":"D:\\mirg\\Pitch_Analysis\\026A-1CL0001773XX-0100A0_vamp_silvet_silvet_notes.csv"}, "date": "1907"}]]
+#}
+from aggregate import *
+import numpy
+import re
+# get code for single-collection tuning statistics
+from tuning_stats import fold_pitch, numpy_column, transcription_from_csv, \
+transcription_from_n3,tuning_note,stats, weighted_stats
+# width of bins in years
+bin_width = 1
+# parsers for n3 / csv
+parser_table = { 'n3':transcription_from_n3,
+'csv':transcription_from_csv }
+datematch = ''
+# gets statistics on a per-file basis and returns histograms
+# over the file averages
+# inputs['filename']: filenames of fine-tune pitch extracted from recordings
+# inputs['date']: string containing year-... for each recording
+def per_file(inputs):
+means = []
+years = []
+def accum(item):
+# see if this has a valid date
+y = parse_years(item['date'])
+if y > 1000:
+# get duration and normalised frequency for all tuning pitches (A3,A4,A5)
+a_notes = [ (note[1],fold_pitch(note[2],note[3]))
+for note in decode_tagged(parser_table,item['transcription'])
+if tuning_note(note[3]) ]
+if len(a_notes)==0:
+print_status("No notes for "+str(item['filename']))
+else:
+# get frequency and duration columns
+freq = numpy_column(a_notes,1)
+dur = numpy_column(a_notes,0)
+# get mean values per clip now,
+# then statistics over clips later
+avg, std = weighted_stats(freq, weights = dur)
+# only append years if there is data
+years.append(y)
+means.append(avg)
+else:
+print_status("No year found for "+str(item['filename']))
+# get statistics per file
+st=for_each(inputs,accum)
+# get year bins
+years = numpy.array(years)
+yearbins = range(numpy.min(years),numpy.max(years),bin_width)
+#yearbinends = numpy.array(yearbins) + bin_width
+avg = []
+std = []
+# foreach over the year-bundled outputs
+for year in yearbins:
+valid_idx = [i for (i, val) in enumerate(years) if val >= year and val < (year + bin_width)]
+valid_means = [means[i] for i in valid_idx]
+# get statistics
+y_avg,y_std = stats(numpy.array(valid_means,dtype=float))
+avg.append(y_avg)
+std.append(y_std)
+return { 'result': { 'mean': avg, 'std-dev': std,'years': yearbins},
+'stats' : st }
+# parses year from string
+# input: string containing 4-digit year
+def parse_years(date):
+yearstr = re.search(r'[12]\d{3}', date)
+if yearstr:
+return int(yearstr.group(0))
+else:
+return -1

Mercurial > hg > dml-open-cliopatria

comparison dml-cla/python/tuning_stats_byyear.py @ 0:718306e29690 tip