mitian@13: #!/usr/bin/env python mitian@13: # encoding: utf-8 mitian@13: """ mitian@13: ProbSegmenter.py mitian@13: """ mitian@13: mitian@13: import matplotlib mitian@13: matplotlib.use('Agg') mitian@13: import matplotlib.pyplot as plt mitian@13: mitian@13: import sys, os, optparse, cPickle mitian@13: import numpy as np mitian@13: from numpy import abs,log,exp,floor,sum,sqrt,cos,hstack, power mitian@13: from math import ceil, floor mitian@13: from scipy.signal import correlate2d, convolve2d mitian@13: from itertools import combinations mitian@13: from os.path import join, isdir, isfile, abspath, dirname, basename, split, splitext mitian@13: mitian@13: from sklearn.decomposition import PCA mitian@13: from sklearn.mixture import GMM mitian@13: from sklearn.metrics.pairwise import pairwise_distances mitian@13: from sklearn.cluster import KMeans, DBSCAN mitian@13: from scipy.spatial.distance import squareform, pdist mitian@13: mitian@13: from utils.GmmMetrics import GmmDistance mitian@13: from utils.RankClustering import rClustering mitian@13: from utils.kmeans import Kmeans mitian@13: from utils.ComputationCache import Meta, with_pickle_dump mitian@13: from utils.SegUtil import normaliseFeature, upSample, getSSM, getMean, getStd, getDelta mitian@13: mitian@13: def parse_args(): mitian@13: # define parser mitian@13: op = optparse.OptionParser() mitian@13: # IO options mitian@13: op.add_option('-i', '--input', action="store", dest="INPUT", default='/Volumes/c4dm-scratch/mi/seg/qupujicheng/features1', type="str", help="Loading features from.." ) mitian@13: op.add_option('-a', '--audioset', action="store", dest="AUDIO", default=None, type="str", help="Select audio datasets ['qupujicheng', 'salami', 'beatles'] ") mitian@13: op.add_option('-g', '--groundtruth', action="store", dest="GT", default='/Volumes/c4dm-scratch/mi/seg/qupujicheng/annotations/lowercase', type="str", help="Loading annotation files from.. ") mitian@13: op.add_option('-o', '--output', action="store", dest="OUTPUT", default='/Volumes/c4dm-scratch/mi/seg/qupujicheng/clustering', type="str", help="Loading annotation files from.. ") mitian@13: op.add_option('-c', '--cache', action="store", dest="CACHE", default='/Volumes/c4dm-scratch/mi/seg/qupujicheng/clustering/cache', type="str", help="Loading annotation files from.. ") mitian@13: op.add_option('-t', '--test', action="store_true", dest="TEST", help="Select TEST mode.") mitian@13: op.add_option('-v', '--verbose', action="store_true", dest="VERBOSE", help="Select VERBOSE mode to save features/SSMs.") mitian@13: mitian@13: return op.parse_args() mitian@13: mitian@13: options, args = parse_args() mitian@13: mitian@13: class FeatureObj() : mitian@13: __slots__ = ['key','audio','timestamps','features'] mitian@13: mitian@13: class AudioObj(): mitian@13: __slots__ = ['feature_list','feature_matrix','distance_matrix','name'] mitian@13: mitian@13: class Segmenter(object): mitian@13: '''The main segmentation object''' mitian@13: mitian@13: meta = Meta() mitian@13: mitian@13: @classmethod mitian@13: def set_cache_filename(cls, filename, cache = True, cache_location=""): mitian@13: cls.meta.cache = cache mitian@13: cls.meta.cache_file_base = filename mitian@13: cls.meta.cache_location = cache_location mitian@13: mitian@13: @with_pickle_dump(meta) mitian@13: def getGMMs(self, feature, filename, gmmWindow=10, stepsize=1, save=True): mitian@13: gmm_list = [] mitian@13: steps = int((feature.shape[0] - gmmWindow + stepsize) / stepsize) mitian@13: for i in xrange(steps): mitian@13: gmm_list.append(GmmDistance(feature[i*stepsize:(i*stepsize+gmmWindow), :].T, components = 2)) mitian@13: # if save: mitian@13: # with open(join('cache', filename), 'w+') as f: mitian@13: # f.write(cPickle.dumps(gmm_list)) mitian@13: return gmm_list mitian@13: mitian@13: def pairwiseSKL(self, gmm_list): mitian@13: '''Compute pairwise symmetrised KL divergence of a list of GMMs.''' mitian@13: n_GMMs = len(gmm_list) mitian@13: distance_matrix = np.zeros((n_GMMs, n_GMMs)) mitian@13: for i in xrange(n_GMMs): mitian@13: for j in xrange(i, n_GMMs): mitian@13: distance_matrix[i][j] = gmm_list[i].skl_distance_full(gmm_list[j]) mitian@13: distance_matrix[j][i] = distance_matrix[i][j] mitian@13: mitian@13: np.fill_diagonal(distance_matrix, 0.0) mitian@13: distance_matrix[np.isnan(distance_matrix)] = 0 mitian@13: # distance_matrix[np.isinf(distance_matrix)] = np.finfo(np.float64).max mitian@13: if np.isinf(distance_matrix).any(): mitian@13: data = np.sort(np.ndarray.flatten(distance_matrix)) mitian@13: pos = np.where(data == np.inf)[0][0] mitian@13: fMax = data[pos-1] mitian@13: print len(data), pos, fMax mitian@13: distance_matrix[np.isinf(distance_matrix)] = fMax mitian@13: return distance_matrix mitian@13: mitian@13: def smoothLabels(self, label_list, size=5): mitian@13: '''Smooth label list within given length.''' mitian@13: prev_labels = -1 mitian@13: next_labels = -1 mitian@13: chain = 0 mitian@13: for i in xrange(size, len(label_list)-size): mitian@13: label = label_list[i] mitian@13: if label == prev_label: mitian@13: chain += 1 mitian@13: else: mitian@13: if chain < size: mitian@13: label_list[i] = prev_label mitian@13: chain = 0 mitian@13: prev_label = label_list[i] mitian@13: print chain mitian@13: return label_list mitian@13: mitian@13: def getInitialCentroids(self, neighborhood_size, k=10): mitian@13: candidates = [] mitian@13: size = len(neighborhood_size) mitian@13: for i in xrange(1, size-1): mitian@13: d1 = neighborhood_size[i] - neighborhood_size[i-1] mitian@13: d2 = neighborhood_size[i] - neighborhood_size[i+1] mitian@13: if d1 > 0 and d2 > 0: mitian@13: candidates.append((i, max(d1, d2))) mitian@13: print 'candidates', len(candidates), candidates, size mitian@13: ranked_nodes = sorted(candidates, key=lambda x: x[1],reverse=True)[:k] mitian@13: ranked_nodes = [ranked_nodes[i][0] for i in xrange(len(ranked_nodes))] mitian@13: return ranked_nodes mitian@13: mitian@13: mitian@13: def process(self): mitian@13: mitian@13: audio_files = [x for x in os.listdir(options.GT) if not x.startswith(".") ] mitian@13: audio_files.sort() mitian@13: if options.TEST: mitian@13: # audio_files = ["""17 We Are The Champions.wav"""] mitian@13: audio_files = audio_files[:2] mitian@13: audio_list = [] mitian@13: mitian@13: fobj_list = [] mitian@13: feature_list = [i for i in os.listdir(options.INPUT) if not i.startswith('.')] mitian@13: feature_list = ['pcamean', 'dct', 'contrast6'] mitian@13: mitian@13: feature_list.sort() mitian@13: mitian@13: winlength = 50 mitian@14: stepsize = 25 mitian@13: mitian@13: if options.AUDIO == None: mitian@13: print 'Must specify audio dataset for evaluvation!' mitian@13: mitian@13: for i, audio in enumerate(audio_files) : mitian@13: ao = AudioObj() mitian@13: ao.name = splitext(audio)[0] mitian@13: mitian@13: ao_featureset = [] mitian@13: for feature in feature_list : mitian@13: for f in os.listdir(join(options.INPUT, feature)): mitian@13: if f[:f.find('_vamp')]==ao.name: mitian@13: data = np.genfromtxt(join(options.INPUT, feature, f), delimiter=',', filling_values=0.0)[:, 1:] mitian@13: ao_featureset.append(data) mitian@13: break mitian@13: mitian@13: n_features = len(ao_featureset) mitian@13: if n_features == 0: continue mitian@13: mitian@13: if n_features > 1: mitian@13: # find the feature with the fewer number of frames (the last a few frames should be generally empty) mitian@13: n_frame = np.min([x.shape[0] for x in ao_featureset]) mitian@13: ao_featureset = [x[:n_frame,:] for x in ao_featureset] mitian@13: feature_matrix = np.hstack((ao_featureset)) mitian@13: else: mitian@13: feature_matrix = ao_featureset[0] mitian@13: mitian@13: print "Processing data for audio file:", audio, n_features mitian@13: if options.AUDIO == 'salami': mitian@13: annotation_file = join(options.GT, ao.name+'.txt') # iso, salami mitian@13: ao.gt = np.genfromtxt(annotation_file, usecols=0) mitian@13: elif options.AUDIO == 'qupujicheng': mitian@13: annotation_file = join(options.GT, ao.name+'.csv') # qupujicheng mitian@13: ao.gt = np.genfromtxt(annotation_file, usecols=0, delimiter=',') mitian@13: elif options.AUDIO == 'beatles': mitian@13: annotation_file = join(options.GT, ao.name+'.lab') # beatles mitian@13: ao.gt = np.genfromtxt(annotation_file, usecols=(0,1)) mitian@13: ao.gt = np.unique(np.ndarray.flatten(ao.gt)) mitian@13: mitian@13: n_frames = feature_matrix.shape[0] mitian@13: mitian@13: timestamps = np.genfromtxt(join(options.INPUT, feature, f), delimiter=',', filling_values=0.0, usecols=0) mitian@13: # map timestamps to the reduced representations mitian@13: timestamps = timestamps[0::stepsize] mitian@13: mitian@13: # # normalise the feature matrix, get rid of negative features, ensure numerical stability by adding a small constant mitian@13: feature_matrix = normaliseFeature(feature_matrix) mitian@13: mitian@13: # np.savetxt('test/feature_maxtrix-'+ao.name+"-wl%i-ss%i.txt" %(winlength,stepsize), feature_matrix, delimiter=',') mitian@13: fr = basename(options.INPUT) mitian@13: print 'fr', fr, options.INPUT mitian@13: feature_name = '' mitian@13: for feature in feature_list: mitian@13: feature_name += ('-' + feature) mitian@13: # np.savetxt(join(options.OUTPUT, test, ao.name+feature_name+"-%s-wl%i-ss%i-ssm.txt" %(fr,winlength,stepsize)), feature_matrix, delimiter=',') mitian@13: mitian@13: # PCA mitian@13: pca = PCA(n_components=6) mitian@13: pca.fit(feature_matrix) mitian@13: feature_matrix = pca.transform(feature_matrix) mitian@13: mitian@13: cach_filename = ao.name+feature_name+"-%s-wl%i-ss%i.txt" %(fr,winlength,stepsize) mitian@13: self.set_cache_filename(filename=cach_filename, cache_location=options.CACHE) mitian@13: gmm_list = self.getGMMs(feature_matrix, filename=cach_filename, gmmWindow=winlength, stepsize=stepsize) mitian@13: print 'number of GMMs:', len(gmm_list) mitian@13: mitian@13: skl_matrix = self.pairwiseSKL(gmm_list) mitian@13: ssm = getSSM(skl_matrix) mitian@13: np.savetxt(join(options.CACHE, ao.name+feature_name+"-%s-wl%i-ss%i-ssm.txt" %(fr,winlength,stepsize)), ssm, delimiter=",") mitian@13: mitian@13: # # 1. DBSCAN clustering of raw feature mitian@13: # db1 = DBSCAN(eps=10, min_samples=10).fit(feature_array) mitian@13: # core_samples_mask1 = np.zeros_like(db1.labels_, dtype=bool) mitian@13: # core_samples_mask1[db1.core_sample_indices_] = True mitian@13: # labels1 = db1.labels_ mitian@13: mitian@13: # # 2. DBSCAN clustering of GMMs mitian@13: # db2 = DBSCAN(eps=0.05, min_samples=10, metric='precomputed').fit(skl_matrix) mitian@13: # core_samples_mask2 = np.zeros_like(db2.labels_, dtype=bool) mitian@13: # core_samples_mask2[db2.core_sample_indices_] = True mitian@13: # labels2 = db2.labels_ mitian@13: mitian@13: # 3. RC clustering of raw GMMs mitian@13: rc = rClustering(eps=1.15, k=5, rank='max_neighbors') mitian@13: rc.set_cache_filename(ao.name+feature_name+"-%s-wl%i-ss%i.txt" %(fr,winlength,stepsize), cache_location=options.CACHE) mitian@13: rc.fit(gmm_list) mitian@13: rc.test() mitian@13: classification = rc.classification mitian@13: print 'classification', classification mitian@13: neighborhood_size, average_div, node_rank = rc.getNodeRank() mitian@13: mitian@13: # centroid_list = self.getInitialCentroids(neighborhood_size, k=10) mitian@13: # print 'initial centroids', centroid_list mitian@13: mitian@13: # k-means clustering of GMMs mitian@13: KmeansClustering = Kmeans(gmm_list, K=5, initial_centroids=set(classification)) mitian@13: labels = KmeansClustering.fit() mitian@13: mitian@13: f1 = np.array(zip(timestamps[:len(labels)], labels)) mitian@13: # f2 = np.array(zip(timestamps[:len(labels2)], labels2)) mitian@13: f3 = np.array(zip(timestamps[:len(classification)], classification)) mitian@13: f4 = np.array(zip(timestamps[:len(neighborhood_size)], neighborhood_size)) mitian@13: f5 = np.array(zip(timestamps[:len(node_rank)], node_rank)) mitian@13: f6 = np.array(zip(timestamps[:len(average_div)], average_div)) mitian@13: # mitian@13: np.savetxt(join(options.OUTPUT, 'kmeans')+splitext(audio)[0]+feature_name+splitext(audio)[0]+'.csv', f1, delimiter=',') mitian@13: # np.savetxt(join(options.OUTPUT, 'dbscan_gmm')+splitext(audio)[0]+'.csv', f2, delimiter=',') mitian@13: # np.savetxt(join(options.OUTPUT, 'classification')+splitext(audio)[0]+'.csv', f3, delimiter=',') mitian@13: np.savetxt(join(options.OUTPUT, 'neighborhood_size')+splitext(audio)[0]+'.csv', f4, delimiter=',') mitian@13: np.savetxt(join(options.OUTPUT, 'node_rank')+splitext(audio)[0]+'.csv', f5, delimiter=',') mitian@13: np.savetxt(join(options.OUTPUT, 'average_div')+splitext(audio)[0]+'.csv', f6, delimiter=',') mitian@13: mitian@13: def main(): mitian@13: segmenter = Segmenter() mitian@13: segmenter.process() mitian@13: mitian@13: mitian@13: if __name__ == '__main__': mitian@13: main() mitian@13: