changeset 14:6dae41887406

added funcs for ssm enhancement in segutil scipt
author mitian
date Fri, 12 Jun 2015 17:44:39 +0100
parents cc8ceb270e79
children 289a4b2b2b16
files ProbSegmenter.py SegEval.py utils/SegUtil.py utils/plotSSM.py
diffstat 4 files changed, 84 insertions(+), 28 deletions(-) [+]
line wrap: on
line diff
--- a/ProbSegmenter.py	Fri Jun 05 18:02:05 2015 +0100
+++ b/ProbSegmenter.py	Fri Jun 12 17:44:39 2015 +0100
@@ -139,7 +139,7 @@
 		feature_list.sort()
 		
 		winlength = 50
-		stepsize = 50
+		stepsize = 25
 		
 		if options.AUDIO == None:
 			print 'Must specify audio dataset for evaluvation!'
--- a/SegEval.py	Fri Jun 05 18:02:05 2015 +0100
+++ b/SegEval.py	Fri Jun 12 17:44:39 2015 +0100
@@ -22,7 +22,6 @@
 import numpy as np
 import scipy as sp
 from scipy.signal import correlate2d, convolve2d, filtfilt, resample
-from scipy.ndimage.filters import *
 from sklearn.decomposition import PCA
 from sklearn.mixture import GMM
 from sklearn.cluster import KMeans
@@ -49,9 +48,9 @@
 # Algorithm params
 h = 8               # Size of median filter for features in C-NMF
 R = 15              # Size of the median filter for the activation matrix C-NMF
-rank = 4            # Rank of decomposition for the boundaries
-rank_labels = 6     # Rank of decomposition for the labels
-R_labels = 6        # Size of the median filter for the labels
+rank = 3            # Rank of decomposition for the boundaries
+rank_labels = 16     # Rank of decomposition for the labels
+R_labels = 4        # Size of the median filter for the labels
 # Foote
 M = 2           # Median filter for the audio features (in beats)
 Mg = 32         # Gaussian kernel size
--- a/utils/SegUtil.py	Fri Jun 05 18:02:05 2015 +0100
+++ b/utils/SegUtil.py	Fri Jun 12 17:44:39 2015 +0100
@@ -19,9 +19,11 @@
 from scipy.spatial import distance
 from scipy.ndimage import filters, zoom
 from scipy import signal
+from scipy.signal import correlate2d, convolve2d, filtfilt, resample, butter
 import pylab as plt
 from scipy.spatial.distance import squareform, pdist
-from scipy.ndimage.filters import *
+from scipy.ndimage.filters import maximum_filter, minimum_filter, percentile_filter, uniform_filter
+from scipy.ndimage.filters import median_filter as med_filter
 from sklearn.metrics.pairwise import pairwise_distances
 
 
@@ -45,7 +47,6 @@
 	if not os.path.exists(directory):
 		os.makedirs(directory)
 
-
 def median_filter(X, M=8):
 	"""Median filter along the first axis of the feature matrix X."""
 	for i in xrange(X.shape[1]):
@@ -293,6 +294,24 @@
 		return np.min(X)
 	fMin = 0
 	return fMin
+
+def lp(signal, fc=0.34, axis=-1):
+	'''Low pass filter function
+	signal: Raw signal to be smoothed.
+	fc: Cutoff frequency of the butterworth filter. Normalized from 0 to 1, where 1 is the Nyquist frequency.
+	axis: The axis of x to which the filter is applied. Default is -1.'''
+	bCoeffs, aCoeffs = butter(2, fc)
+	lp_smoothed_signal = filtfilt(bCoeffs, aCoeffs, signal, axis)
+	return lp_smoothed_signal
+
+def hp(signal, fc=0.34, axis=-1):
+	'''Low pass filter function
+	signal: Raw signal to be smoothed.
+	fc: Cutoff frequency of the butterworth filter.
+	axis: The axis of x to which the filter is applied. Default is -1.'''
+	bCoeffs, aCoeffs = butter(2, fc, 'highpass')
+	hp_smoothed_signal = filtfilt(bCoeffs, aCoeffs, signal, axis)
+	return hp_smoothed_signal
 	
 def getMean(feature, winlen, stepsize):
 	means = []
@@ -315,7 +334,7 @@
 	return delta_feature
 
 
-def getSSM(feature_array, metric='cosine', norm='simple', reduce=False):
+def getSSM(feature_array, metric='cosine', norm='exp', reduce=False):
 	'''Compute SSM given input feature array. 
 	args: norm: ['simple', 'remove_noise']
 	'''
@@ -323,13 +342,50 @@
 	dm = np.nan_to_num(dm)
 	if norm == 'simple':
 		ssm = 1 - (dm - np.min(dm)) / (np.max(dm) - np.min(dm))
+	if norm == 'exp': # Use with cosine metric only
+		ssm = np.exp(dm - 1)
 	if reduce:
 		ssm = reduceSSM(ssm)
 	return ssm
 
+def enhanceSSM(ssm, fc=0.34, med_size=(5,5), max_size=(5,5), min_size=(5,5), filter_type='min', axis=-1):
+	'''A series of filtering for SSM enhancement
+	fc: cutoff frequency for LP filtering.
+	med_size: Median filter window size.
+			  int or tuple. If using an integer for a 2d input, axis must be specified.
+	filter_type: Select either to use maximum filter or minimum filter.
+			float ['min', 'max', None]
+	max_size: Maximum filter window size.
+			  int or tuple. If using an integer for a 2d input, axis must be specified.
+			  Use this when homogeneity in the SSM is expressed by LARGE value.
+	min_size: Mininum filter window size.
+			  int or tuple. If using an integer for a 2d input, axis must be specified.
+			  Use this when homogeneity in the SSM is expressed by SMALL value. 
+			  (eg. When cosine metric and exp normalization and used for distance computation.)'''
 
+	ssm_lp = lp(enhanced_ssm, fc=fc)
+	
+	# Use scipy.ndimage.filters.median_filter instead
+	ssm_med = med_filter(ssm_lp, size=med_size)
+	
+	if filter_type == 'min':
+		enhanced_ssm = minimum_filter(ssm_med, size=min_size)
+	elif filter_type == 'max':
+		enhanced_ssm = maximum_filter(ssm_med, size=max_size)	
+	else:
+		enhanced_ssm = ssm_med
+	return enhanced_ssm
+	
 def reduceSSM(ssm, maxfilter_size = 2, remove_size=50):
-	reduced_ssm = ssm
+	'''Adaptive thresholding using OTSU method
+	Required package: skimage (0.10+)'''
+	
+	from skimage.morphology import disk
+	# from skimage.filters import threshold_otsu, rank #skimage 0.12
+	from skimage.filter.rank import otsu #skimage 0.10
+	from skimage.filter import threshold_otsu
+	
+	reduced_ssm = copy(ssm)
 	reduced_ssm[reduced_ssm<0.75] = 0
 	# # reduced_ssm = maximum_filter(reduced_ssm,size=maxfilter_size)
 	# # reduced_ssm = morphology.remove_small_objects(reduced_ssm.astype(bool), min_size=remove_size)
--- a/utils/plotSSM.py	Fri Jun 05 18:02:05 2015 +0100
+++ b/utils/plotSSM.py	Fri Jun 12 17:44:39 2015 +0100
@@ -34,6 +34,7 @@
 from skimage.morphology import disk
 
 from PeakPickerUtil import PeakPicker
+from SegUtil import getMean, getStd, getDelta, getSSM, reduceSSM, upSample, normaliseFeature
 
 def parse_args():
 	# define parser
@@ -482,13 +483,13 @@
 		for audio in audio_files:
 			ao = AudioObj()
 			ao.name = splitext(audio)[0]
-			# annotation_file = join(options.GT, ao.name+'.txt') # iso, salami
-			# ao.gt = np.genfromtxt(annotation_file, usecols=0)	
-			# ao.label = np.genfromtxt(annotation_file, usecols=1, dtype=str)
+			annotation_file = join(options.GT, ao.name+'.txt') # iso, salami
+			ao.gt = np.genfromtxt(annotation_file, usecols=0)	
+			ao.label = np.genfromtxt(annotation_file, usecols=1, dtype=str)
 	
-			annotation_file = join(options.GT, ao.name+'.csv') # qupujicheng
-			ao.gt = np.genfromtxt(annotation_file, usecols=0, delimiter=',')	
-			ao.label = np.genfromtxt(annotation_file, usecols=1, delimiter=',', dtype=str)
+			# annotation_file = join(options.GT, ao.name+'.csv') # qupujicheng
+			# ao.gt = np.genfromtxt(annotation_file, usecols=0, delimiter=',')	
+			# ao.label = np.genfromtxt(annotation_file, usecols=1, delimiter=',', dtype=str)
 
 			# annotation_file = join(options.GT, ao.name+'.lab') # beatles
 			# ao.gt = np.genfromtxt(annotation_file, usecols=(0,1))
@@ -572,11 +573,11 @@
 			featureRate = float(self.SampleRate) /self.stepSize
 			pca = PCA(n_components=5)
 			
-			ao.gammatone_features = self.normaliseFeature(ao.gammatone_features)	
+			ao.gammatone_features = normaliseFeature(ao.gammatone_features)	
 			ao.gammatone_features = resample(ao.gammatone_features, step)
 			ao.gammatone_features[np.isnan(ao.gammatone_features)] = 0.0		
 			ao.gammatone_features[np.isinf(ao.gammatone_features)] = 0.0		
-			ao.timbre_features = self.normaliseFeature(ao.timbre_features)
+			ao.timbre_features = normaliseFeature(ao.timbre_features)
 			ao.timbre_features = resample(ao.timbre_features, step)
 			ao.timbre_features[np.isnan(ao.timbre_features)] = 0.0
 			ao.timbre_features[np.isinf(ao.timbre_features)] = 0.0
@@ -584,37 +585,37 @@
 			# ao.lpc_features = resample(ao.lpc_features, step)
 			# ao.lpc_features[np.isnan(ao.lpc_features)] = 0.0
 			# ao.lpc_features[np.isinf(ao.lpc_features)] = 0.0
-			ao.harmonic_features = self.normaliseFeature(ao.harmonic_features)
+			ao.harmonic_features = normaliseFeature(ao.harmonic_features)
 			ao.harmonic_features = resample(ao.harmonic_features, step)
-			ao.tempo_features = self.normaliseFeature(ao.tempo_features)	
+			ao.tempo_features = normaliseFeature(ao.tempo_features)	
 			ao.harmonic_features[np.isinf(ao.harmonic_features)] = 0.0
 			ao.tempo_features[np.isnan(ao.tempo_features)] = 0.0
 			ao.tempo_features[np.isinf(ao.tempo_features)] = 0.0
 			
-			ao.gammatone_features = self.getMean(ao.gammatone_features, winlen=aggregation_window, stepsize=aggregation_step)
+			ao.gammatone_features = getMean(ao.gammatone_features, winlen=aggregation_window, stepsize=aggregation_step)
 			pca.fit(ao.gammatone_features)
 			ao.gammatone_features = pca.transform(ao.gammatone_features)
-			ao.gammatone_ssm = self.getSSM(ao.gammatone_features)
+			ao.gammatone_ssm = getSSM(ao.gammatone_features)
 			
-			ao.tempo_features = self.getMean(ao.tempo_features, winlen=aggregation_window, stepsize=aggregation_step)
+			ao.tempo_features = getMean(ao.tempo_features, winlen=aggregation_window, stepsize=aggregation_step)
 			pca.fit(ao.tempo_features)
 			ao.tempo_features = pca.transform(ao.tempo_features)
-			ao.tempo_ssm = self.getSSM(ao.tempo_features)
+			ao.tempo_ssm = getSSM(ao.tempo_features)
 			
-			ao.timbre_features = self.getMean(ao.timbre_features, winlen=aggregation_window, stepsize=aggregation_step)
+			ao.timbre_features = getMean(ao.timbre_features, winlen=aggregation_window, stepsize=aggregation_step)
 			pca.fit(ao.timbre_features)
 			ao.timbre_features = pca.transform(ao.timbre_features)
-			ao.timbre_ssm = self.getSSM(ao.timbre_features)
+			ao.timbre_ssm = getSSM(ao.timbre_features)
 
 			# ao.lpc_features = self.getMean(ao.lpc_features, winlen=aggregation_window, stepsize=aggregation_step)
 			# pca.fit(ao.lpc_features)
 			# ao.lpc_features = pca.transform(ao.lpc_features)
 			# ao.lpc_ssm = self.getSSM(ao.lpc_features)
 
-			ao.harmonic_features = self.getMean(ao.harmonic_features, winlen=aggregation_window, stepsize=aggregation_step)
+			ao.harmonic_features = getMean(ao.harmonic_features, winlen=aggregation_window, stepsize=aggregation_step)
 			pca.fit(ao.harmonic_features)
 			ao.harmonic_features = pca.transform(ao.harmonic_features)
-			ao.harmonic_ssm = self.getSSM(ao.harmonic_features)
+			ao.harmonic_ssm = getSSM(ao.harmonic_features)
 			
 			ao.ssm_timestamps = np.array(map(lambda x: ao.tempo_timestamps[aggregation_step*x], np.arange(0, ao.gammatone_ssm.shape[0])))
 			
@@ -665,7 +666,7 @@
 				np.savetxt(join(options.OUTPUT, 'ssm_data', ao.name+'-chroma.txt'), np.array(ao.harmonic_ssm), delimiter=',')
 				# np.savetxt(join(options.OUTPUT, 'ssm_data', ao.name+'-hpss_chroma.txt'), np.array(ao.harmonic_ssm), delimiter=',')
 			
-			audio_list.append(ao)
+			# audio_list.append(ao)
 			
 		
 def main():