dml-open-backendtools: collection_analysis/tools/vampstats_pitch

annotate collection_analysis/tools/vampstats_pitch_weighted.py @ 0:e34cf1b6fe09 tip

commit

author	Daniel Wolff
date	Sat, 20 Feb 2016 18:14:24 +0100
parents
children

rev	line source
Daniel@0	1 # Part of DML (Digital Music Laboratory)
Daniel@0	2 # Copyright 2014-2015 Daniel Wolff, City University
Daniel@0	3
Daniel@0	4 # This program is free software; you can redistribute it and/or
Daniel@0	5 # modify it under the terms of the GNU General Public License
Daniel@0	6 # as published by the Free Software Foundation; either version 2
Daniel@0	7 # of the License, or (at your option) any later version.
Daniel@0	8 #
Daniel@0	9 # This program is distributed in the hope that it will be useful,
Daniel@0	10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
Daniel@0	11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Daniel@0	12 # GNU General Public License for more details.
Daniel@0	13 #
Daniel@0	14 # You should have received a copy of the GNU General Public
Daniel@0	15 # License along with this library; if not, write to the Free Software
Daniel@0	16 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Daniel@0	17
Daniel@0	18 #!/usr/bin/python
Daniel@0	19 # -- coding: utf-8 --
Daniel@0	20
Daniel@0	21 # creates a histogram from given input files or folder
Daniel@0	22
Daniel@0	23 __author__="Daniel Wolff, Dan"
Daniel@0	24 __date__ ="$11-Feb-2015 18:18:47$"
Daniel@0	25
Daniel@0	26 import sys
Daniel@0	27 import os
Daniel@0	28 import csv
Daniel@0	29 import numpy
Daniel@0	30 import csv2json as c2j
Daniel@0	31 import re
Daniel@0	32
Daniel@0	33
Daniel@0	34 # global feature extensions
Daniel@0	35 #ext = tuple([".n3",".csv",".mid"])
Daniel@0	36 ext = tuple([".csv"])
Daniel@0	37
Daniel@0	38 floater = re.compile("((\d+)(.\d+)*)")
Daniel@0	39 # reads in any csv and returns a list of structure
Daniel@0	40 # time(float), data1, data2 ....data2
Daniel@0	41 def read_vamp_csv(filein = '', datapos = 0):
Daniel@0	42 output = []
Daniel@0	43 badcount = 0
Daniel@0	44 with open(filein, 'rb') as csvfile:
Daniel@0	45 contents = csv.reader(csvfile, delimiter=',', quotechar='"')
Daniel@0	46 for row in contents:
Daniel@0	47 if len(row) >= datapos + 2:
Daniel@0	48 output.append([float(row[0])] + row[1:])
Daniel@0	49 else:
Daniel@0	50 badcount += 1
Daniel@0	51 print "Ignored " + str(badcount) + " short rows"
Daniel@0	52 return output
Daniel@0	53
Daniel@0	54 #calculates the histogram
Daniel@0	55 def histogram(data, datapos = 1, nbins = -1):
Daniel@0	56
Daniel@0	57 # symbols or numerical input?
Daniel@0	58 if not nbins == -1:
Daniel@0	59
Daniel@0	60 #convert to numpy data\
Daniel@0	61 ddata = string2numpy(data,datapos)
Daniel@0	62
Daniel@0	63 # get time weights
Daniel@0	64 tw_data = string2numpy(data,2)
Daniel@0	65
Daniel@0	66 # get loudness weights
Daniel@0	67 lw_data = string2numpy(data,3)
Daniel@0	68
Daniel@0	69 count,index = numpy.histogram(ddata,nbins-1, weights=numpy.multiply(tw_data,lw_data))
Daniel@0	70 count = count.tolist()
Daniel@0	71 index = index.tolist()
Daniel@0	72
Daniel@0	73 # here for strings
Daniel@0	74 else:
Daniel@0	75 # build histogram on strings
Daniel@0	76 histo = dict()
Daniel@0	77 for row in data:
Daniel@0	78 histo[row[datapos+1]] = histo.get(row[datapos+1], 0) + 1
Daniel@0	79 index = histo.keys()
Daniel@0	80 count = histo.values()
Daniel@0	81
Daniel@0	82 # return histogram
Daniel@0	83 return {"count":count, "index":index}
Daniel@0	84
Daniel@0	85 #calculates statistics for numerical input
Daniel@0	86 def numstats(data,datapos):
Daniel@0	87
Daniel@0	88 #convert to numpy data
Daniel@0	89 ddata = string2numpy(data,datapos)
Daniel@0	90
Daniel@0	91 avg = numpy.average(ddata).tolist()
Daniel@0	92 med = numpy.median(ddata).tolist()
Daniel@0	93 std = numpy.std(ddata).tolist()
Daniel@0	94
Daniel@0	95 # return data
Daniel@0	96 return {"average": avg, "median": med, "std": std}
Daniel@0	97
Daniel@0	98 def featurefilesinpath(path):
Daniel@0	99 # ---
Daniel@0	100 # we traverse the file structure
Daniel@0	101 # and list files to copy
Daniel@0	102 # ---
Daniel@0	103 files = []
Daniel@0	104 for (dirpath, dirnames, filenames) in os.walk(path):
Daniel@0	105 for file in filenames:
Daniel@0	106 # we copy all requested files and the transform files as well!
Daniel@0	107 if (file.endswith(ext)):
Daniel@0	108 source = os.path.join(dirpath, file).replace('\\','/')
Daniel@0	109 files.append(source)
Daniel@0	110 return files
Daniel@0	111
Daniel@0	112 # convert to numpy
Daniel@0	113 def string2numpy(data,datapos):
Daniel@0	114 try:
Daniel@0	115 ddata = numpy.array(data, dtype=float)[:, datapos+1]
Daniel@0	116 except:
Daniel@0	117 edata = []
Daniel@0	118 for row in data:
Daniel@0	119 #edata.append(float(floater.match(row[datapos+1]).group(1)))
Daniel@0	120 m = re.search("[a-zA-Z]", row[datapos+1])
Daniel@0	121 if m is not None:
Daniel@0	122 # take onlly the specified column datapos+1
Daniel@0	123 edata.append(row[datapos+1][:(m.start()-1)])
Daniel@0	124 else:
Daniel@0	125 # take onlly the specified column datapos+1
Daniel@0	126 edata.append(row[datapos+1])
Daniel@0	127 ddata = numpy.array(edata,dtype=float)
Daniel@0	128 return ddata
Daniel@0	129
Daniel@0	130 # main entry point
Daniel@0	131 if __name__ == "__main__":
Daniel@0	132 print "Usage: vampstats datapos nbins file1/dir1 file2/dir2 ...."
Daniel@0	133 print "datapos: column of data after timecode to process"
Daniel@0	134 print "nbins: -1 for categorical data, otherwise number of bins for histogram"
Daniel@0	135
Daniel@0	136 datapos = int(sys.argv[1])
Daniel@0	137 nbins = int(sys.argv[2])
Daniel@0	138
Daniel@0	139 # check and collate files
Daniel@0	140 files = []
Daniel@0	141 for path in sys.argv[3:]:
Daniel@0	142 if os.path.isdir(path):
Daniel@0	143 files.extend(featurefilesinpath(path))
Daniel@0	144 else:
Daniel@0	145 if os.path.isfile(path):
Daniel@0	146 files.extend(path)
Daniel@0	147 print "Number of files now loading: " + str(len(files))
Daniel@0	148
Daniel@0	149 # we collate all data first and then count.
Daniel@0	150 # @todo: read all files and create dictionary first for large tasks
Daniel@0	151 data = []
Daniel@0	152 for file in files:
Daniel@0	153 print file
Daniel@0	154 data.extend(read_vamp_csv(file, datapos))
Daniel@0	155
Daniel@0	156 print "Total data size in memory: " + str(sys.getsizeof(data))
Daniel@0	157
Daniel@0	158 # now get the histogram for all data
Daniel@0	159 histo = histogram(data,datapos,nbins)
Daniel@0	160 print histo
Daniel@0	161 print "Please input a description for the histogram analysis features"
Daniel@0	162 c2j.data2json(histo)
Daniel@0	163
Daniel@0	164 # further numerical analysis if this is not categorical data
Daniel@0	165 if not nbins == -1:
Daniel@0	166 ns = numstats(data,datapos)
Daniel@0	167 print ns
Daniel@0	168 print "Please input a description for the general statistics features"
Daniel@0	169 c2j.data2json(ns)
Daniel@0	170
Daniel@0	171
Daniel@0	172

Mercurial > hg > dml-open-backendtools

annotate collection_analysis/tools/vampstats_pitch_weighted.py @ 0:e34cf1b6fe09 tip