view ProbSegmenter.py @ 17:c01fcb752221

new annotations
author mitian
date Fri, 21 Aug 2015 10:15:29 +0100
parents 6dae41887406
children
line wrap: on
line source
#!/usr/bin/env python
# encoding: utf-8
"""
ProbSegmenter.py
"""

import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt

import sys, os, optparse, cPickle
import numpy as np
from numpy import abs,log,exp,floor,sum,sqrt,cos,hstack, power
from math import ceil, floor
from scipy.signal import correlate2d, convolve2d
from itertools import combinations
from os.path import join, isdir, isfile, abspath, dirname, basename, split, splitext

from sklearn.decomposition import PCA
from sklearn.mixture import GMM
from sklearn.metrics.pairwise import pairwise_distances
from sklearn.cluster import KMeans, DBSCAN
from scipy.spatial.distance import squareform, pdist

from utils.GmmMetrics import GmmDistance
from utils.RankClustering import rClustering
from utils.kmeans import Kmeans
from utils.ComputationCache import Meta, with_pickle_dump
from utils.SegUtil import normaliseFeature, upSample, getSSM, getMean, getStd, getDelta

def parse_args():
	# define parser
	op = optparse.OptionParser()
	# IO options
	op.add_option('-i', '--input', action="store", dest="INPUT", default='/Volumes/c4dm-scratch/mi/seg/qupujicheng/features1', type="str", help="Loading features from.." )
	op.add_option('-a', '--audioset', action="store", dest="AUDIO", default=None, type="str", help="Select audio datasets ['qupujicheng', 'salami', 'beatles'] ")
	op.add_option('-g', '--groundtruth', action="store", dest="GT", default='/Volumes/c4dm-scratch/mi/seg/qupujicheng/annotations/lowercase', type="str", help="Loading annotation files from.. ")
	op.add_option('-o', '--output', action="store", dest="OUTPUT", default='/Volumes/c4dm-scratch/mi/seg/qupujicheng/clustering', type="str", help="Loading annotation files from.. ")
	op.add_option('-c', '--cache', action="store", dest="CACHE", default='/Volumes/c4dm-scratch/mi/seg/qupujicheng/clustering/cache', type="str", help="Loading annotation files from.. ")
	op.add_option('-t', '--test', action="store_true", dest="TEST", help="Select TEST mode.")
	op.add_option('-v', '--verbose', action="store_true", dest="VERBOSE", help="Select VERBOSE mode to save features/SSMs.")

	return op.parse_args()

options, args = parse_args()

class FeatureObj() :
	__slots__ = ['key','audio','timestamps','features']

class AudioObj():
	__slots__ = ['feature_list','feature_matrix','distance_matrix','name']

class Segmenter(object):
	'''The main segmentation object'''
	
	meta = Meta()
	
	@classmethod	
	def set_cache_filename(cls, filename, cache = True, cache_location=""):
		cls.meta.cache = cache
		cls.meta.cache_file_base = filename	
		cls.meta.cache_location = cache_location	
	
	@with_pickle_dump(meta)
	def getGMMs(self, feature, filename, gmmWindow=10, stepsize=1, save=True):
		gmm_list = []
		steps = int((feature.shape[0] - gmmWindow + stepsize) / stepsize)
		for i in xrange(steps):
			gmm_list.append(GmmDistance(feature[i*stepsize:(i*stepsize+gmmWindow), :].T, components = 2))
		# if save:
		# 	with open(join('cache', filename), 'w+') as f:
		# 		f.write(cPickle.dumps(gmm_list))
		return gmm_list
	
	def pairwiseSKL(self, gmm_list):
		'''Compute pairwise symmetrised KL divergence of a list of GMMs.'''
		n_GMMs = len(gmm_list)
		distance_matrix = np.zeros((n_GMMs, n_GMMs))
		for i in xrange(n_GMMs):
			for j in xrange(i, n_GMMs):
				distance_matrix[i][j] = gmm_list[i].skl_distance_full(gmm_list[j])
				distance_matrix[j][i] = distance_matrix[i][j]

		np.fill_diagonal(distance_matrix, 0.0)
		distance_matrix[np.isnan(distance_matrix)] = 0
		# distance_matrix[np.isinf(distance_matrix)] = np.finfo(np.float64).max
		if np.isinf(distance_matrix).any():
			data = np.sort(np.ndarray.flatten(distance_matrix))
			pos = np.where(data == np.inf)[0][0]
			fMax = data[pos-1]
			print len(data), pos, fMax
			distance_matrix[np.isinf(distance_matrix)] = fMax
		return distance_matrix
	
	def smoothLabels(self, label_list, size=5):
		'''Smooth label list within given length.'''		
		prev_labels = -1
		next_labels = -1
		chain = 0
		for i in xrange(size, len(label_list)-size):
			label = label_list[i]
			if label == prev_label:
				chain += 1
			else:
				if chain < size:
					label_list[i] = prev_label
				chain = 0
			prev_label = label_list[i]
			print chain
		return label_list
	
	def getInitialCentroids(self, neighborhood_size, k=10):
		candidates = []
		size = len(neighborhood_size)
		for i in xrange(1, size-1):
			d1 = neighborhood_size[i] - neighborhood_size[i-1]
			d2 = neighborhood_size[i] - neighborhood_size[i+1]
			if d1 > 0 and d2 > 0:
				candidates.append((i, max(d1, d2)))
		print 'candidates', len(candidates), candidates, size
		ranked_nodes = sorted(candidates, key=lambda x: x[1],reverse=True)[:k]
		ranked_nodes = [ranked_nodes[i][0] for i in xrange(len(ranked_nodes))]
		return ranked_nodes
				
		
	def process(self):
		
		audio_files = [x for x in os.listdir(options.GT) if not x.startswith(".") ]
		audio_files.sort()
		if options.TEST:
			# audio_files = ["""17 We Are The Champions.wav"""]
			audio_files = audio_files[:2]
		audio_list = []

		fobj_list = []
		feature_list = [i for i in os.listdir(options.INPUT) if not i.startswith('.')]
		feature_list = ['pcamean', 'dct', 'contrast6']
		
		feature_list.sort()
		
		winlength = 50
		stepsize = 25
		
		if options.AUDIO == None:
			print 'Must specify audio dataset for evaluvation!'
			
		for i, audio in enumerate(audio_files) :
			ao = AudioObj()
			ao.name = splitext(audio)[0]
			
			ao_featureset = []
			for feature in feature_list :
				for f in os.listdir(join(options.INPUT, feature)):
					if f[:f.find('_vamp')]==ao.name: 
						data = np.genfromtxt(join(options.INPUT, feature, f), delimiter=',', filling_values=0.0)[:, 1:]
						ao_featureset.append(data)
						break

			n_features = len(ao_featureset)		
			if n_features == 0: continue 

			if n_features > 1:
				# find the feature with the fewer number of frames (the last a few frames should be generally empty)
				n_frame = np.min([x.shape[0] for x in ao_featureset])
				ao_featureset = [x[:n_frame,:] for x in ao_featureset]
				feature_matrix = np.hstack((ao_featureset))
			else:
				feature_matrix = ao_featureset[0]	
						
			print "Processing data for audio file:", audio, n_features
			if options.AUDIO == 'salami':
				annotation_file = join(options.GT, ao.name+'.txt') # iso, salami
				ao.gt = np.genfromtxt(annotation_file, usecols=0)	
			elif options.AUDIO == 'qupujicheng':
				annotation_file = join(options.GT, ao.name+'.csv') # qupujicheng
				ao.gt = np.genfromtxt(annotation_file, usecols=0, delimiter=',')	
			elif options.AUDIO == 'beatles':
				annotation_file = join(options.GT, ao.name+'.lab') # beatles
				ao.gt = np.genfromtxt(annotation_file, usecols=(0,1))
				ao.gt = np.unique(np.ndarray.flatten(ao.gt))
				
			n_frames = feature_matrix.shape[0]
			
			timestamps = np.genfromtxt(join(options.INPUT, feature, f), delimiter=',', filling_values=0.0, usecols=0)
			# map timestamps to the reduced representations
			timestamps = timestamps[0::stepsize]
			
			# # normalise the feature matrix, get rid of negative features, ensure numerical stability by adding a small constant
			feature_matrix = normaliseFeature(feature_matrix)
			
			# np.savetxt('test/feature_maxtrix-'+ao.name+"-wl%i-ss%i.txt" %(winlength,stepsize), feature_matrix, delimiter=',')
			fr = basename(options.INPUT)
			print 'fr', fr, options.INPUT
			feature_name = ''
			for feature in feature_list:
				feature_name += ('-' + feature)
			# np.savetxt(join(options.OUTPUT, test, ao.name+feature_name+"-%s-wl%i-ss%i-ssm.txt" %(fr,winlength,stepsize)), feature_matrix, delimiter=',')
			
			# PCA
			pca = PCA(n_components=6)
			pca.fit(feature_matrix)
			feature_matrix = pca.transform(feature_matrix)
			
			cach_filename = ao.name+feature_name+"-%s-wl%i-ss%i.txt" %(fr,winlength,stepsize)
			self.set_cache_filename(filename=cach_filename, cache_location=options.CACHE)
			gmm_list = self.getGMMs(feature_matrix, filename=cach_filename, gmmWindow=winlength, stepsize=stepsize)
			print 'number of GMMs:', len(gmm_list)
			
			skl_matrix = self.pairwiseSKL(gmm_list)
			ssm = getSSM(skl_matrix)
			np.savetxt(join(options.CACHE, ao.name+feature_name+"-%s-wl%i-ss%i-ssm.txt" %(fr,winlength,stepsize)), ssm, delimiter=",")
			
			# # 1. DBSCAN clustering of raw feature
			# db1 = DBSCAN(eps=10, min_samples=10).fit(feature_array)
			# core_samples_mask1 = np.zeros_like(db1.labels_, dtype=bool)
			# core_samples_mask1[db1.core_sample_indices_] = True
			# labels1 = db1.labels_
			
			# # 2. DBSCAN clustering of GMMs
			# db2 = DBSCAN(eps=0.05, min_samples=10, metric='precomputed').fit(skl_matrix)
			# core_samples_mask2 = np.zeros_like(db2.labels_, dtype=bool)
			# core_samples_mask2[db2.core_sample_indices_] = True
			# labels2 = db2.labels_
			
			# 3. RC clustering of raw GMMs
			rc = rClustering(eps=1.15, k=5, rank='max_neighbors')
			rc.set_cache_filename(ao.name+feature_name+"-%s-wl%i-ss%i.txt" %(fr,winlength,stepsize), cache_location=options.CACHE)
			rc.fit(gmm_list)
			rc.test()
			classification = rc.classification
			print 'classification', classification
			neighborhood_size, average_div, node_rank = rc.getNodeRank()
			
			# centroid_list = self.getInitialCentroids(neighborhood_size, k=10)
			# print 'initial centroids', centroid_list
			
			# k-means clustering of GMMs 
			KmeansClustering = Kmeans(gmm_list, K=5, initial_centroids=set(classification))
			labels = KmeansClustering.fit()
						
			f1 = np.array(zip(timestamps[:len(labels)], labels))
			# f2 = np.array(zip(timestamps[:len(labels2)], labels2))
			f3 = np.array(zip(timestamps[:len(classification)], classification))
			f4 = np.array(zip(timestamps[:len(neighborhood_size)], neighborhood_size))
			f5 = np.array(zip(timestamps[:len(node_rank)], node_rank))
			f6 = np.array(zip(timestamps[:len(average_div)], average_div))
			# 
			np.savetxt(join(options.OUTPUT, 'kmeans')+splitext(audio)[0]+feature_name+splitext(audio)[0]+'.csv', f1, delimiter=',')
			# np.savetxt(join(options.OUTPUT, 'dbscan_gmm')+splitext(audio)[0]+'.csv', f2, delimiter=',')
			# np.savetxt(join(options.OUTPUT, 'classification')+splitext(audio)[0]+'.csv', f3, delimiter=',')
			np.savetxt(join(options.OUTPUT, 'neighborhood_size')+splitext(audio)[0]+'.csv', f4, delimiter=',')
			np.savetxt(join(options.OUTPUT, 'node_rank')+splitext(audio)[0]+'.csv', f5, delimiter=',')
			np.savetxt(join(options.OUTPUT, 'average_div')+splitext(audio)[0]+'.csv', f6, delimiter=',')
			
def main():
	segmenter = Segmenter()
	segmenter.process()
	

if __name__ == '__main__':
	main()