Daniel@0: # Part of DML (Digital Music Laboratory) Daniel@0: # Copyright 2014-2015 Daniel Wolff, City University; Steven Hargreaves; Samer Abdallah, University of London Daniel@0: Daniel@0: # This program is free software; you can redistribute it and/or Daniel@0: # modify it under the terms of the GNU General Public License Daniel@0: # as published by the Free Software Foundation; either version 2 Daniel@0: # of the License, or (at your option) any later version. Daniel@0: # Daniel@0: # This program is distributed in the hope that it will be useful, Daniel@0: # but WITHOUT ANY WARRANTY; without even the implied warranty of Daniel@0: # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the Daniel@0: # GNU General Public License for more details. Daniel@0: # Daniel@0: # You should have received a copy of the GNU General Public Daniel@0: # License along with this library; if not, write to the Free Software Daniel@0: # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA Daniel@0: Daniel@0: # -*- coding: utf-8 -*- Daniel@0: __author__='wolffd, hargreavess, abdallahs' Daniel@0: Daniel@0: # this script derives standard statistics for tuning frequency, Daniel@0: # in particular: Daniel@0: # average Daniel@0: # standard deviation Daniel@0: # histogram Daniel@0: Daniel@0: from rdflib import RDF, RDFS Daniel@0: from csvutils import * Daniel@0: from aggregate import * Daniel@0: from n3Parser import get_rdf_graph_from_n3 Daniel@0: import numpy Daniel@0: Daniel@0: def transcription_from_csv(filename): Daniel@0: # we assume CSV: time, duration, pitch, velocity, note_name Daniel@0: # return (time, duration, pitch, note_name) Daniel@0: return csv_map_rows(filename,5, lambda row:(float(row[0]),float(row[1]),float(row[2]),row[4])) Daniel@0: Daniel@0: def transcription_from_n3(filename): Daniel@0: graph=get_rdf_graph_from_n3(filename) Daniel@0: notes = [ ( event_time_prop(graph, ev, tl_ns.beginsAt), Daniel@0: event_time_prop(graph, ev, tl_ns.duration), Daniel@0: graph.value(ev,af_ns.feature).split(' ')[0], Daniel@0: graph.value(ev,RDFS.label) ) Daniel@0: for ev in subject((RDF.type, af_ns.Note)) ] Daniel@0: Daniel@0: parser_table = { 'n3':transcription_from_n3, Daniel@0: 'csv':transcription_from_csv } Daniel@0: Daniel@0: offset = { 'D':7, 'E':5, 'A':0 } Daniel@0: Daniel@0: def fold_pitch(freq,name): Daniel@0: # semis =(4+offset[name[0]]-int(name[1])) Daniel@0: # print_status("folding by %d" % semis) Daniel@0: return freq * 2**(4+offset[name[0]]/12.0-int(name[1])) Daniel@0: def tuning_note(n): Daniel@0: return n[1] in ['3','4','5'] and n[0] in ['A','E','D'] Daniel@0: Daniel@0: Daniel@0: def per_file(inputs): Daniel@0: means = [] Daniel@0: hists = [] Daniel@0: hist_edges =[] Daniel@0: Daniel@0: def accum(item): Daniel@0: # get duration and normalised frequency for all tuning pitches (A3,A4,A5) Daniel@0: a_notes = [ (note[1],fold_pitch(note[2],note[3])) Daniel@0: for note in decode_tagged(parser_table,item) Daniel@0: if tuning_note(note[3]) ] Daniel@0: Daniel@0: if len(a_notes)==0: Daniel@0: print_status("No notes for "+str(item)) Daniel@0: else: Daniel@0: # get frequency and duration columns Daniel@0: freq = numpy_column(a_notes,1) Daniel@0: dur = numpy_column(a_notes,0) Daniel@0: # get mean values per clip now, Daniel@0: # then statistics over clips later Daniel@0: avg, std = weighted_stats(freq, weights = dur) Daniel@0: (counts, edges) = histogram(freq, 100, 390, 490, weights=dur) Daniel@0: Daniel@0: means.append(avg) Daniel@0: hists.append(counts) Daniel@0: if len(hist_edges) == 0: Daniel@0: hist_edges.extend(edges) Daniel@0: Daniel@0: st=for_each(inputs,accum) Daniel@0: Daniel@0: avg, std = stats(numpy.array(means,dtype=float)) Daniel@0: Daniel@0: # !!! does this make any sense? Daniel@0: hist_mean, hist_std = stats(numpy.array(hists,dtype=float)) Daniel@0: Daniel@0: return { 'result': { 'mean': avg, 'std-dev': std, Daniel@0: 'hist': continuous_hist(hist_edges,hist_mean) }, Daniel@0: 'stats' : st } Daniel@0: Daniel@0: Daniel@0: def aggregate(inputs): Daniel@0: notes = [] # will contain all notes in all inputs Daniel@0: def accum(item): Daniel@0: # get duration and normalised frequency for all tuning pitches (A3,A4,A5) Daniel@0: # and collect them all in notes Daniel@0: notes.extend( [ (note[1],fold_pitch(note[2],note[3])) Daniel@0: for note in decode_tagged(parser_table,item) Daniel@0: if tuning_note(note[3]) ] ) Daniel@0: Daniel@0: # execute accumulation for each accum Daniel@0: stats=for_each(inputs,accum) Daniel@0: Daniel@0: # get frequency and duration columns Daniel@0: dur = numpy_column(notes,0) Daniel@0: freq = numpy_column(notes,1) Daniel@0: Daniel@0: # get basic statistics Daniel@0: avg, std = weighted_stats(freq, weights=dur) Daniel@0: Daniel@0: # get histogram weighted by duration Daniel@0: counts, edges = histogram(freq, 100, 390, 490, weights=dur) Daniel@0: Daniel@0: return { 'result': { 'mean': avg, 'std_dev': std, Daniel@0: 'hist': continuous_hist(edges,counts) }, Daniel@0: 'stats' : stats } Daniel@0: Daniel@0: # convert one column, specified by datapos, to numpy array Daniel@0: def numpy_column(data,datapos): Daniel@0: return numpy.array([ row[datapos] for row in data ], dtype=float) Daniel@0: Daniel@0: #calculates the histogram Daniel@0: # nbins: number of bins Daniel@0: # lb: lower bound Daniel@0: # ub: upper bound Daniel@0: def histogram(colu, nbins, lb, ub, weights = []): Daniel@0: counts,edges = numpy.histogram(colu, bins=nbins, range=[lb, ub], weights=weights) Daniel@0: counts = counts / numpy.sum(counts) Daniel@0: Daniel@0: return (counts.tolist(), edges.tolist()) Daniel@0: Daniel@0: # calculates unweighted statistics for the histograms Daniel@0: def stats(counts): Daniel@0: avg = numpy.average(counts, axis = 0).tolist() Daniel@0: std = numpy.std(counts, axis =0) Daniel@0: return (avg,std) Daniel@0: Daniel@0: #calculates weighted statistics for numerical input Daniel@0: def weighted_stats(colu, weights = []): Daniel@0: avg = numpy.average(colu, axis = 0 ,weights = weights) Daniel@0: #weighted standard deviation Daniel@0: std = numpy.sqrt(numpy.average((colu-avg)**2, axis = 0, weights=weights)) Daniel@0: #std = numpy.std(colu, weights = weights).tolist() Daniel@0: #med = numpy.median(colu, weights = weights).tolist() Daniel@0: # could use https://pypi.python.org/pypi/wquantiles for weighted median Daniel@0: return (avg,std) Daniel@0: