Mercurial > hg > dml-open-backendtools
comparison collection_analysis/tools/vampstats_pitch_weighted.py @ 0:e34cf1b6fe09 tip
commit
author | Daniel Wolff |
---|---|
date | Sat, 20 Feb 2016 18:14:24 +0100 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:e34cf1b6fe09 |
---|---|
1 # Part of DML (Digital Music Laboratory) | |
2 # Copyright 2014-2015 Daniel Wolff, City University | |
3 | |
4 # This program is free software; you can redistribute it and/or | |
5 # modify it under the terms of the GNU General Public License | |
6 # as published by the Free Software Foundation; either version 2 | |
7 # of the License, or (at your option) any later version. | |
8 # | |
9 # This program is distributed in the hope that it will be useful, | |
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
12 # GNU General Public License for more details. | |
13 # | |
14 # You should have received a copy of the GNU General Public | |
15 # License along with this library; if not, write to the Free Software | |
16 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
17 | |
18 #!/usr/bin/python | |
19 # -*- coding: utf-8 -*- | |
20 | |
21 # creates a histogram from given input files or folder | |
22 | |
23 __author__="Daniel Wolff, Dan" | |
24 __date__ ="$11-Feb-2015 18:18:47$" | |
25 | |
26 import sys | |
27 import os | |
28 import csv | |
29 import numpy | |
30 import csv2json as c2j | |
31 import re | |
32 | |
33 | |
34 # global feature extensions | |
35 #ext = tuple([".n3",".csv",".mid"]) | |
36 ext = tuple([".csv"]) | |
37 | |
38 floater = re.compile("((\d+)(.\d+)*)") | |
39 # reads in any csv and returns a list of structure | |
40 # time(float), data1, data2 ....data2 | |
41 def read_vamp_csv(filein = '', datapos = 0): | |
42 output = [] | |
43 badcount = 0 | |
44 with open(filein, 'rb') as csvfile: | |
45 contents = csv.reader(csvfile, delimiter=',', quotechar='"') | |
46 for row in contents: | |
47 if len(row) >= datapos + 2: | |
48 output.append([float(row[0])] + row[1:]) | |
49 else: | |
50 badcount += 1 | |
51 print "Ignored " + str(badcount) + " short rows" | |
52 return output | |
53 | |
54 #calculates the histogram | |
55 def histogram(data, datapos = 1, nbins = -1): | |
56 | |
57 # symbols or numerical input? | |
58 if not nbins == -1: | |
59 | |
60 #convert to numpy data\ | |
61 ddata = string2numpy(data,datapos) | |
62 | |
63 # get time weights | |
64 tw_data = string2numpy(data,2) | |
65 | |
66 # get loudness weights | |
67 lw_data = string2numpy(data,3) | |
68 | |
69 count,index = numpy.histogram(ddata,nbins-1, weights=numpy.multiply(tw_data,lw_data)) | |
70 count = count.tolist() | |
71 index = index.tolist() | |
72 | |
73 # here for strings | |
74 else: | |
75 # build histogram on strings | |
76 histo = dict() | |
77 for row in data: | |
78 histo[row[datapos+1]] = histo.get(row[datapos+1], 0) + 1 | |
79 index = histo.keys() | |
80 count = histo.values() | |
81 | |
82 # return histogram | |
83 return {"count":count, "index":index} | |
84 | |
85 #calculates statistics for numerical input | |
86 def numstats(data,datapos): | |
87 | |
88 #convert to numpy data | |
89 ddata = string2numpy(data,datapos) | |
90 | |
91 avg = numpy.average(ddata).tolist() | |
92 med = numpy.median(ddata).tolist() | |
93 std = numpy.std(ddata).tolist() | |
94 | |
95 # return data | |
96 return {"average": avg, "median": med, "std": std} | |
97 | |
98 def featurefilesinpath(path): | |
99 # --- | |
100 # we traverse the file structure | |
101 # and list files to copy | |
102 # --- | |
103 files = [] | |
104 for (dirpath, dirnames, filenames) in os.walk(path): | |
105 for file in filenames: | |
106 # we copy all requested files and the transform files as well! | |
107 if (file.endswith(ext)): | |
108 source = os.path.join(dirpath, file).replace('\\','/') | |
109 files.append(source) | |
110 return files | |
111 | |
112 # convert to numpy | |
113 def string2numpy(data,datapos): | |
114 try: | |
115 ddata = numpy.array(data, dtype=float)[:, datapos+1] | |
116 except: | |
117 edata = [] | |
118 for row in data: | |
119 #edata.append(float(floater.match(row[datapos+1]).group(1))) | |
120 m = re.search("[a-zA-Z]", row[datapos+1]) | |
121 if m is not None: | |
122 # take onlly the specified column datapos+1 | |
123 edata.append(row[datapos+1][:(m.start()-1)]) | |
124 else: | |
125 # take onlly the specified column datapos+1 | |
126 edata.append(row[datapos+1]) | |
127 ddata = numpy.array(edata,dtype=float) | |
128 return ddata | |
129 | |
130 # main entry point | |
131 if __name__ == "__main__": | |
132 print "Usage: vampstats datapos nbins file1/dir1 file2/dir2 ...." | |
133 print "datapos: column of data after timecode to process" | |
134 print "nbins: -1 for categorical data, otherwise number of bins for histogram" | |
135 | |
136 datapos = int(sys.argv[1]) | |
137 nbins = int(sys.argv[2]) | |
138 | |
139 # check and collate files | |
140 files = [] | |
141 for path in sys.argv[3:]: | |
142 if os.path.isdir(path): | |
143 files.extend(featurefilesinpath(path)) | |
144 else: | |
145 if os.path.isfile(path): | |
146 files.extend(path) | |
147 print "Number of files now loading: " + str(len(files)) | |
148 | |
149 # we collate all data first and then count. | |
150 # @todo: read all files and create dictionary first for large tasks | |
151 data = [] | |
152 for file in files: | |
153 print file | |
154 data.extend(read_vamp_csv(file, datapos)) | |
155 | |
156 print "Total data size in memory: " + str(sys.getsizeof(data)) | |
157 | |
158 # now get the histogram for all data | |
159 histo = histogram(data,datapos,nbins) | |
160 print histo | |
161 print "Please input a description for the histogram analysis features" | |
162 c2j.data2json(histo) | |
163 | |
164 # further numerical analysis if this is not categorical data | |
165 if not nbins == -1: | |
166 ns = numstats(data,datapos) | |
167 print ns | |
168 print "Please input a description for the general statistics features" | |
169 c2j.data2json(ns) | |
170 | |
171 | |
172 |