mitian@13
|
1 #!/usr/bin/env python
|
mitian@13
|
2 # encoding: utf-8
|
mitian@13
|
3 """
|
mitian@13
|
4 ProbSegmenter.py
|
mitian@13
|
5 """
|
mitian@13
|
6
|
mitian@13
|
7 import matplotlib
|
mitian@13
|
8 matplotlib.use('Agg')
|
mitian@13
|
9 import matplotlib.pyplot as plt
|
mitian@13
|
10
|
mitian@13
|
11 import sys, os, optparse, cPickle
|
mitian@13
|
12 import numpy as np
|
mitian@13
|
13 from numpy import abs,log,exp,floor,sum,sqrt,cos,hstack, power
|
mitian@13
|
14 from math import ceil, floor
|
mitian@13
|
15 from scipy.signal import correlate2d, convolve2d
|
mitian@13
|
16 from itertools import combinations
|
mitian@13
|
17 from os.path import join, isdir, isfile, abspath, dirname, basename, split, splitext
|
mitian@13
|
18
|
mitian@13
|
19 from sklearn.decomposition import PCA
|
mitian@13
|
20 from sklearn.mixture import GMM
|
mitian@13
|
21 from sklearn.metrics.pairwise import pairwise_distances
|
mitian@13
|
22 from sklearn.cluster import KMeans, DBSCAN
|
mitian@13
|
23 from scipy.spatial.distance import squareform, pdist
|
mitian@13
|
24
|
mitian@13
|
25 from utils.GmmMetrics import GmmDistance
|
mitian@13
|
26 from utils.RankClustering import rClustering
|
mitian@13
|
27 from utils.kmeans import Kmeans
|
mitian@13
|
28 from utils.ComputationCache import Meta, with_pickle_dump
|
mitian@13
|
29 from utils.SegUtil import normaliseFeature, upSample, getSSM, getMean, getStd, getDelta
|
mitian@13
|
30
|
mitian@13
|
31 def parse_args():
|
mitian@13
|
32 # define parser
|
mitian@13
|
33 op = optparse.OptionParser()
|
mitian@13
|
34 # IO options
|
mitian@13
|
35 op.add_option('-i', '--input', action="store", dest="INPUT", default='/Volumes/c4dm-scratch/mi/seg/qupujicheng/features1', type="str", help="Loading features from.." )
|
mitian@13
|
36 op.add_option('-a', '--audioset', action="store", dest="AUDIO", default=None, type="str", help="Select audio datasets ['qupujicheng', 'salami', 'beatles'] ")
|
mitian@13
|
37 op.add_option('-g', '--groundtruth', action="store", dest="GT", default='/Volumes/c4dm-scratch/mi/seg/qupujicheng/annotations/lowercase', type="str", help="Loading annotation files from.. ")
|
mitian@13
|
38 op.add_option('-o', '--output', action="store", dest="OUTPUT", default='/Volumes/c4dm-scratch/mi/seg/qupujicheng/clustering', type="str", help="Loading annotation files from.. ")
|
mitian@13
|
39 op.add_option('-c', '--cache', action="store", dest="CACHE", default='/Volumes/c4dm-scratch/mi/seg/qupujicheng/clustering/cache', type="str", help="Loading annotation files from.. ")
|
mitian@13
|
40 op.add_option('-t', '--test', action="store_true", dest="TEST", help="Select TEST mode.")
|
mitian@13
|
41 op.add_option('-v', '--verbose', action="store_true", dest="VERBOSE", help="Select VERBOSE mode to save features/SSMs.")
|
mitian@13
|
42
|
mitian@13
|
43 return op.parse_args()
|
mitian@13
|
44
|
mitian@13
|
45 options, args = parse_args()
|
mitian@13
|
46
|
mitian@13
|
47 class FeatureObj() :
|
mitian@13
|
48 __slots__ = ['key','audio','timestamps','features']
|
mitian@13
|
49
|
mitian@13
|
50 class AudioObj():
|
mitian@13
|
51 __slots__ = ['feature_list','feature_matrix','distance_matrix','name']
|
mitian@13
|
52
|
mitian@13
|
53 class Segmenter(object):
|
mitian@13
|
54 '''The main segmentation object'''
|
mitian@13
|
55
|
mitian@13
|
56 meta = Meta()
|
mitian@13
|
57
|
mitian@13
|
58 @classmethod
|
mitian@13
|
59 def set_cache_filename(cls, filename, cache = True, cache_location=""):
|
mitian@13
|
60 cls.meta.cache = cache
|
mitian@13
|
61 cls.meta.cache_file_base = filename
|
mitian@13
|
62 cls.meta.cache_location = cache_location
|
mitian@13
|
63
|
mitian@13
|
64 @with_pickle_dump(meta)
|
mitian@13
|
65 def getGMMs(self, feature, filename, gmmWindow=10, stepsize=1, save=True):
|
mitian@13
|
66 gmm_list = []
|
mitian@13
|
67 steps = int((feature.shape[0] - gmmWindow + stepsize) / stepsize)
|
mitian@13
|
68 for i in xrange(steps):
|
mitian@13
|
69 gmm_list.append(GmmDistance(feature[i*stepsize:(i*stepsize+gmmWindow), :].T, components = 2))
|
mitian@13
|
70 # if save:
|
mitian@13
|
71 # with open(join('cache', filename), 'w+') as f:
|
mitian@13
|
72 # f.write(cPickle.dumps(gmm_list))
|
mitian@13
|
73 return gmm_list
|
mitian@13
|
74
|
mitian@13
|
75 def pairwiseSKL(self, gmm_list):
|
mitian@13
|
76 '''Compute pairwise symmetrised KL divergence of a list of GMMs.'''
|
mitian@13
|
77 n_GMMs = len(gmm_list)
|
mitian@13
|
78 distance_matrix = np.zeros((n_GMMs, n_GMMs))
|
mitian@13
|
79 for i in xrange(n_GMMs):
|
mitian@13
|
80 for j in xrange(i, n_GMMs):
|
mitian@13
|
81 distance_matrix[i][j] = gmm_list[i].skl_distance_full(gmm_list[j])
|
mitian@13
|
82 distance_matrix[j][i] = distance_matrix[i][j]
|
mitian@13
|
83
|
mitian@13
|
84 np.fill_diagonal(distance_matrix, 0.0)
|
mitian@13
|
85 distance_matrix[np.isnan(distance_matrix)] = 0
|
mitian@13
|
86 # distance_matrix[np.isinf(distance_matrix)] = np.finfo(np.float64).max
|
mitian@13
|
87 if np.isinf(distance_matrix).any():
|
mitian@13
|
88 data = np.sort(np.ndarray.flatten(distance_matrix))
|
mitian@13
|
89 pos = np.where(data == np.inf)[0][0]
|
mitian@13
|
90 fMax = data[pos-1]
|
mitian@13
|
91 print len(data), pos, fMax
|
mitian@13
|
92 distance_matrix[np.isinf(distance_matrix)] = fMax
|
mitian@13
|
93 return distance_matrix
|
mitian@13
|
94
|
mitian@13
|
95 def smoothLabels(self, label_list, size=5):
|
mitian@13
|
96 '''Smooth label list within given length.'''
|
mitian@13
|
97 prev_labels = -1
|
mitian@13
|
98 next_labels = -1
|
mitian@13
|
99 chain = 0
|
mitian@13
|
100 for i in xrange(size, len(label_list)-size):
|
mitian@13
|
101 label = label_list[i]
|
mitian@13
|
102 if label == prev_label:
|
mitian@13
|
103 chain += 1
|
mitian@13
|
104 else:
|
mitian@13
|
105 if chain < size:
|
mitian@13
|
106 label_list[i] = prev_label
|
mitian@13
|
107 chain = 0
|
mitian@13
|
108 prev_label = label_list[i]
|
mitian@13
|
109 print chain
|
mitian@13
|
110 return label_list
|
mitian@13
|
111
|
mitian@13
|
112 def getInitialCentroids(self, neighborhood_size, k=10):
|
mitian@13
|
113 candidates = []
|
mitian@13
|
114 size = len(neighborhood_size)
|
mitian@13
|
115 for i in xrange(1, size-1):
|
mitian@13
|
116 d1 = neighborhood_size[i] - neighborhood_size[i-1]
|
mitian@13
|
117 d2 = neighborhood_size[i] - neighborhood_size[i+1]
|
mitian@13
|
118 if d1 > 0 and d2 > 0:
|
mitian@13
|
119 candidates.append((i, max(d1, d2)))
|
mitian@13
|
120 print 'candidates', len(candidates), candidates, size
|
mitian@13
|
121 ranked_nodes = sorted(candidates, key=lambda x: x[1],reverse=True)[:k]
|
mitian@13
|
122 ranked_nodes = [ranked_nodes[i][0] for i in xrange(len(ranked_nodes))]
|
mitian@13
|
123 return ranked_nodes
|
mitian@13
|
124
|
mitian@13
|
125
|
mitian@13
|
126 def process(self):
|
mitian@13
|
127
|
mitian@13
|
128 audio_files = [x for x in os.listdir(options.GT) if not x.startswith(".") ]
|
mitian@13
|
129 audio_files.sort()
|
mitian@13
|
130 if options.TEST:
|
mitian@13
|
131 # audio_files = ["""17 We Are The Champions.wav"""]
|
mitian@13
|
132 audio_files = audio_files[:2]
|
mitian@13
|
133 audio_list = []
|
mitian@13
|
134
|
mitian@13
|
135 fobj_list = []
|
mitian@13
|
136 feature_list = [i for i in os.listdir(options.INPUT) if not i.startswith('.')]
|
mitian@13
|
137 feature_list = ['pcamean', 'dct', 'contrast6']
|
mitian@13
|
138
|
mitian@13
|
139 feature_list.sort()
|
mitian@13
|
140
|
mitian@13
|
141 winlength = 50
|
mitian@14
|
142 stepsize = 25
|
mitian@13
|
143
|
mitian@13
|
144 if options.AUDIO == None:
|
mitian@13
|
145 print 'Must specify audio dataset for evaluvation!'
|
mitian@13
|
146
|
mitian@13
|
147 for i, audio in enumerate(audio_files) :
|
mitian@13
|
148 ao = AudioObj()
|
mitian@13
|
149 ao.name = splitext(audio)[0]
|
mitian@13
|
150
|
mitian@13
|
151 ao_featureset = []
|
mitian@13
|
152 for feature in feature_list :
|
mitian@13
|
153 for f in os.listdir(join(options.INPUT, feature)):
|
mitian@13
|
154 if f[:f.find('_vamp')]==ao.name:
|
mitian@13
|
155 data = np.genfromtxt(join(options.INPUT, feature, f), delimiter=',', filling_values=0.0)[:, 1:]
|
mitian@13
|
156 ao_featureset.append(data)
|
mitian@13
|
157 break
|
mitian@13
|
158
|
mitian@13
|
159 n_features = len(ao_featureset)
|
mitian@13
|
160 if n_features == 0: continue
|
mitian@13
|
161
|
mitian@13
|
162 if n_features > 1:
|
mitian@13
|
163 # find the feature with the fewer number of frames (the last a few frames should be generally empty)
|
mitian@13
|
164 n_frame = np.min([x.shape[0] for x in ao_featureset])
|
mitian@13
|
165 ao_featureset = [x[:n_frame,:] for x in ao_featureset]
|
mitian@13
|
166 feature_matrix = np.hstack((ao_featureset))
|
mitian@13
|
167 else:
|
mitian@13
|
168 feature_matrix = ao_featureset[0]
|
mitian@13
|
169
|
mitian@13
|
170 print "Processing data for audio file:", audio, n_features
|
mitian@13
|
171 if options.AUDIO == 'salami':
|
mitian@13
|
172 annotation_file = join(options.GT, ao.name+'.txt') # iso, salami
|
mitian@13
|
173 ao.gt = np.genfromtxt(annotation_file, usecols=0)
|
mitian@13
|
174 elif options.AUDIO == 'qupujicheng':
|
mitian@13
|
175 annotation_file = join(options.GT, ao.name+'.csv') # qupujicheng
|
mitian@13
|
176 ao.gt = np.genfromtxt(annotation_file, usecols=0, delimiter=',')
|
mitian@13
|
177 elif options.AUDIO == 'beatles':
|
mitian@13
|
178 annotation_file = join(options.GT, ao.name+'.lab') # beatles
|
mitian@13
|
179 ao.gt = np.genfromtxt(annotation_file, usecols=(0,1))
|
mitian@13
|
180 ao.gt = np.unique(np.ndarray.flatten(ao.gt))
|
mitian@13
|
181
|
mitian@13
|
182 n_frames = feature_matrix.shape[0]
|
mitian@13
|
183
|
mitian@13
|
184 timestamps = np.genfromtxt(join(options.INPUT, feature, f), delimiter=',', filling_values=0.0, usecols=0)
|
mitian@13
|
185 # map timestamps to the reduced representations
|
mitian@13
|
186 timestamps = timestamps[0::stepsize]
|
mitian@13
|
187
|
mitian@13
|
188 # # normalise the feature matrix, get rid of negative features, ensure numerical stability by adding a small constant
|
mitian@13
|
189 feature_matrix = normaliseFeature(feature_matrix)
|
mitian@13
|
190
|
mitian@13
|
191 # np.savetxt('test/feature_maxtrix-'+ao.name+"-wl%i-ss%i.txt" %(winlength,stepsize), feature_matrix, delimiter=',')
|
mitian@13
|
192 fr = basename(options.INPUT)
|
mitian@13
|
193 print 'fr', fr, options.INPUT
|
mitian@13
|
194 feature_name = ''
|
mitian@13
|
195 for feature in feature_list:
|
mitian@13
|
196 feature_name += ('-' + feature)
|
mitian@13
|
197 # np.savetxt(join(options.OUTPUT, test, ao.name+feature_name+"-%s-wl%i-ss%i-ssm.txt" %(fr,winlength,stepsize)), feature_matrix, delimiter=',')
|
mitian@13
|
198
|
mitian@13
|
199 # PCA
|
mitian@13
|
200 pca = PCA(n_components=6)
|
mitian@13
|
201 pca.fit(feature_matrix)
|
mitian@13
|
202 feature_matrix = pca.transform(feature_matrix)
|
mitian@13
|
203
|
mitian@13
|
204 cach_filename = ao.name+feature_name+"-%s-wl%i-ss%i.txt" %(fr,winlength,stepsize)
|
mitian@13
|
205 self.set_cache_filename(filename=cach_filename, cache_location=options.CACHE)
|
mitian@13
|
206 gmm_list = self.getGMMs(feature_matrix, filename=cach_filename, gmmWindow=winlength, stepsize=stepsize)
|
mitian@13
|
207 print 'number of GMMs:', len(gmm_list)
|
mitian@13
|
208
|
mitian@13
|
209 skl_matrix = self.pairwiseSKL(gmm_list)
|
mitian@13
|
210 ssm = getSSM(skl_matrix)
|
mitian@13
|
211 np.savetxt(join(options.CACHE, ao.name+feature_name+"-%s-wl%i-ss%i-ssm.txt" %(fr,winlength,stepsize)), ssm, delimiter=",")
|
mitian@13
|
212
|
mitian@13
|
213 # # 1. DBSCAN clustering of raw feature
|
mitian@13
|
214 # db1 = DBSCAN(eps=10, min_samples=10).fit(feature_array)
|
mitian@13
|
215 # core_samples_mask1 = np.zeros_like(db1.labels_, dtype=bool)
|
mitian@13
|
216 # core_samples_mask1[db1.core_sample_indices_] = True
|
mitian@13
|
217 # labels1 = db1.labels_
|
mitian@13
|
218
|
mitian@13
|
219 # # 2. DBSCAN clustering of GMMs
|
mitian@13
|
220 # db2 = DBSCAN(eps=0.05, min_samples=10, metric='precomputed').fit(skl_matrix)
|
mitian@13
|
221 # core_samples_mask2 = np.zeros_like(db2.labels_, dtype=bool)
|
mitian@13
|
222 # core_samples_mask2[db2.core_sample_indices_] = True
|
mitian@13
|
223 # labels2 = db2.labels_
|
mitian@13
|
224
|
mitian@13
|
225 # 3. RC clustering of raw GMMs
|
mitian@13
|
226 rc = rClustering(eps=1.15, k=5, rank='max_neighbors')
|
mitian@13
|
227 rc.set_cache_filename(ao.name+feature_name+"-%s-wl%i-ss%i.txt" %(fr,winlength,stepsize), cache_location=options.CACHE)
|
mitian@13
|
228 rc.fit(gmm_list)
|
mitian@13
|
229 rc.test()
|
mitian@13
|
230 classification = rc.classification
|
mitian@13
|
231 print 'classification', classification
|
mitian@13
|
232 neighborhood_size, average_div, node_rank = rc.getNodeRank()
|
mitian@13
|
233
|
mitian@13
|
234 # centroid_list = self.getInitialCentroids(neighborhood_size, k=10)
|
mitian@13
|
235 # print 'initial centroids', centroid_list
|
mitian@13
|
236
|
mitian@13
|
237 # k-means clustering of GMMs
|
mitian@13
|
238 KmeansClustering = Kmeans(gmm_list, K=5, initial_centroids=set(classification))
|
mitian@13
|
239 labels = KmeansClustering.fit()
|
mitian@13
|
240
|
mitian@13
|
241 f1 = np.array(zip(timestamps[:len(labels)], labels))
|
mitian@13
|
242 # f2 = np.array(zip(timestamps[:len(labels2)], labels2))
|
mitian@13
|
243 f3 = np.array(zip(timestamps[:len(classification)], classification))
|
mitian@13
|
244 f4 = np.array(zip(timestamps[:len(neighborhood_size)], neighborhood_size))
|
mitian@13
|
245 f5 = np.array(zip(timestamps[:len(node_rank)], node_rank))
|
mitian@13
|
246 f6 = np.array(zip(timestamps[:len(average_div)], average_div))
|
mitian@13
|
247 #
|
mitian@13
|
248 np.savetxt(join(options.OUTPUT, 'kmeans')+splitext(audio)[0]+feature_name+splitext(audio)[0]+'.csv', f1, delimiter=',')
|
mitian@13
|
249 # np.savetxt(join(options.OUTPUT, 'dbscan_gmm')+splitext(audio)[0]+'.csv', f2, delimiter=',')
|
mitian@13
|
250 # np.savetxt(join(options.OUTPUT, 'classification')+splitext(audio)[0]+'.csv', f3, delimiter=',')
|
mitian@13
|
251 np.savetxt(join(options.OUTPUT, 'neighborhood_size')+splitext(audio)[0]+'.csv', f4, delimiter=',')
|
mitian@13
|
252 np.savetxt(join(options.OUTPUT, 'node_rank')+splitext(audio)[0]+'.csv', f5, delimiter=',')
|
mitian@13
|
253 np.savetxt(join(options.OUTPUT, 'average_div')+splitext(audio)[0]+'.csv', f6, delimiter=',')
|
mitian@13
|
254
|
mitian@13
|
255 def main():
|
mitian@13
|
256 segmenter = Segmenter()
|
mitian@13
|
257 segmenter.process()
|
mitian@13
|
258
|
mitian@13
|
259
|
mitian@13
|
260 if __name__ == '__main__':
|
mitian@13
|
261 main()
|
mitian@13
|
262
|