Daniel@0: # Part of DML (Digital Music Laboratory)
Daniel@0: # Copyright 2014-2015 Daniel Wolff, City University
Daniel@0:  
Daniel@0: # This program is free software; you can redistribute it and/or
Daniel@0: # modify it under the terms of the GNU General Public License
Daniel@0: # as published by the Free Software Foundation; either version 2
Daniel@0: # of the License, or (at your option) any later version.
Daniel@0: # 
Daniel@0: # This program is distributed in the hope that it will be useful,
Daniel@0: # but WITHOUT ANY WARRANTY; without even the implied warranty of
Daniel@0: # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
Daniel@0: # GNU General Public License for more details.
Daniel@0: # 
Daniel@0: # You should have received a copy of the GNU General Public
Daniel@0: # License along with this library; if not, write to the Free Software
Daniel@0: # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
Daniel@0: 
Daniel@0: # -*- coding: utf-8 -*-
Daniel@0: __author__='wolffd'
Daniel@0: __date__ ="$11-Mar-2015 12:47:23$"
Daniel@0: 
Daniel@0: # this script derives standard statistics for tuning frequency, 
Daniel@0: # results are combined by year(range)
Daniel@0: # average
Daniel@0: # standard deviation
Daniel@0: 
Daniel@0: # test JSON:
Daniel@0: #{ "module":"tuning_stats_byyear",
Daniel@0: #      "function":"per_file",
Daniel@0: #      "arguments": [[
Daniel@0: #      {"transcription": { "tag": "csv", "value":"D:\\mirg\\Pitch_Analysis\\026A-1CL0001773XX-0100A0_vamp_silvet_silvet_notes.csv"}, "date": "2015-12-14"},
Daniel@0: #      {"transcription": { "tag": "csv", "value":"D:\\mirg\\Pitch_Analysis\\026A-1CL0002164XX-0100A0_vamp_silvet_silvet_notes.csv"}, "date": "2015-12-14"},
Daniel@0: #      {"transcription": { "tag": "csv", "value":"D:\\mirg\\Pitch_Analysis\\026A-1CL0001773XX-0100A0_vamp_silvet_silvet_notes.csv"}, "date": "2015-12-14"},
Daniel@0: #      {"transcription": { "tag": "csv", "value":"D:\\mirg\\Pitch_Analysis\\026A-1CL0002164XX-0100A0_vamp_silvet_silvet_notes.csv"}, "date": "2015-12-14"},
Daniel@0: #      {"transcription": { "tag": "csv", "value":"D:\\mirg\\Pitch_Analysis\\026A-1CL0001773XX-0100A0_vamp_silvet_silvet_notes.csv"}, "date": "12.5.1993"},
Daniel@0: #      {"transcription": { "tag": "csv", "value":"D:\\mirg\\Pitch_Analysis\\026A-1CL0001773XX-0100A0_vamp_silvet_silvet_notes.csv"}, "date": "2015-12-14"},
Daniel@0: #      {"transcription": { "tag": "csv", "value":"D:\\mirg\\Pitch_Analysis\\026A-1CL0001773XX-0100A0_vamp_silvet_silvet_notes.csv"}, "date": "1907"}]]
Daniel@0: #}
Daniel@0: 
Daniel@0: from aggregate import *
Daniel@0: import numpy
Daniel@0: import re
Daniel@0: 
Daniel@0: # get code for single-collection tuning statistics
Daniel@0: from tuning_stats import fold_pitch, numpy_column, transcription_from_csv, \
Daniel@0:         transcription_from_n3,tuning_note,stats, weighted_stats
Daniel@0: 
Daniel@0: # width of bins in years
Daniel@0: bin_width = 1
Daniel@0: 
Daniel@0: # parsers for n3 / csv
Daniel@0: parser_table = { 'n3':transcription_from_n3, 
Daniel@0:                  'csv':transcription_from_csv }
Daniel@0: 
Daniel@0: 
Daniel@0: datematch = ''
Daniel@0: # gets statistics on a per-file basis and returns histograms 
Daniel@0: # over the file averages
Daniel@0: # inputs['filename']: filenames of fine-tune pitch extracted from recordings
Daniel@0: # inputs['date']: string containing year-... for each recording
Daniel@0: def per_file(inputs):
Daniel@0:     means = []
Daniel@0:     years = []
Daniel@0: 
Daniel@0:     def accum(item):
Daniel@0:         # see if this has a valid date
Daniel@0:         y = parse_years(item['date'])
Daniel@0:         if y > 1000:
Daniel@0: 
Daniel@0:             # get duration and normalised frequency for all tuning pitches (A3,A4,A5)
Daniel@0:             a_notes = [ (note[1],fold_pitch(note[2],note[3])) 
Daniel@0:                         for note in decode_tagged(parser_table,item['transcription']) 
Daniel@0:                         if tuning_note(note[3]) ]
Daniel@0: 
Daniel@0:             if len(a_notes)==0:
Daniel@0:                 print_status("No notes for "+str(item['filename']))
Daniel@0:             else:
Daniel@0:                 # get frequency and duration columns
Daniel@0:                 freq = numpy_column(a_notes,1)
Daniel@0:                 dur = numpy_column(a_notes,0)
Daniel@0:                 
Daniel@0:                 # get mean values per clip now,
Daniel@0:                 # then statistics over clips later
Daniel@0:                 avg, std = weighted_stats(freq, weights = dur)
Daniel@0:                 
Daniel@0:                 # only append years if there is data
Daniel@0:                 years.append(y)
Daniel@0:                 means.append(avg) 
Daniel@0:         else:
Daniel@0:             print_status("No year found for "+str(item['filename']))
Daniel@0: 
Daniel@0:     # get statistics per file      
Daniel@0:     st=for_each(inputs,accum)
Daniel@0: 
Daniel@0:     # get year bins
Daniel@0:     years = numpy.array(years)
Daniel@0:     yearbins = range(numpy.min(years),numpy.max(years),bin_width)
Daniel@0:     #yearbinends = numpy.array(yearbins) + bin_width
Daniel@0:     
Daniel@0:     avg = []
Daniel@0:     std = []
Daniel@0:     
Daniel@0:     # foreach over the year-bundled outputs
Daniel@0:     for year in yearbins:
Daniel@0:        valid_idx = [i for (i, val) in enumerate(years) if val >= year and val < (year + bin_width)]
Daniel@0:        valid_means = [means[i] for i in valid_idx]
Daniel@0:        
Daniel@0:        # get statistics
Daniel@0:        y_avg,y_std = stats(numpy.array(valid_means,dtype=float))
Daniel@0:        avg.append(y_avg)
Daniel@0:        std.append(y_std)
Daniel@0: 
Daniel@0:     return { 'result': { 'mean': avg, 'std-dev': std,'years': yearbins}, 
Daniel@0:              'stats' : st }
Daniel@0: 
Daniel@0: # parses year from string
Daniel@0: # input: string containing 4-digit year
Daniel@0: def parse_years(date):
Daniel@0:     yearstr = re.search(r'[12]\d{3}', date)
Daniel@0:     if yearstr:
Daniel@0:         return int(yearstr.group(0))
Daniel@0:     else: 
Daniel@0:         return -1
Daniel@0: 
Daniel@0: