Mercurial > hg > segmentation
changeset 14:6dae41887406
added funcs for ssm enhancement in segutil scipt
author | mitian |
---|---|
date | Fri, 12 Jun 2015 17:44:39 +0100 |
parents | cc8ceb270e79 |
children | 289a4b2b2b16 |
files | ProbSegmenter.py SegEval.py utils/SegUtil.py utils/plotSSM.py |
diffstat | 4 files changed, 84 insertions(+), 28 deletions(-) [+] |
line wrap: on
line diff
--- a/ProbSegmenter.py Fri Jun 05 18:02:05 2015 +0100 +++ b/ProbSegmenter.py Fri Jun 12 17:44:39 2015 +0100 @@ -139,7 +139,7 @@ feature_list.sort() winlength = 50 - stepsize = 50 + stepsize = 25 if options.AUDIO == None: print 'Must specify audio dataset for evaluvation!'
--- a/SegEval.py Fri Jun 05 18:02:05 2015 +0100 +++ b/SegEval.py Fri Jun 12 17:44:39 2015 +0100 @@ -22,7 +22,6 @@ import numpy as np import scipy as sp from scipy.signal import correlate2d, convolve2d, filtfilt, resample -from scipy.ndimage.filters import * from sklearn.decomposition import PCA from sklearn.mixture import GMM from sklearn.cluster import KMeans @@ -49,9 +48,9 @@ # Algorithm params h = 8 # Size of median filter for features in C-NMF R = 15 # Size of the median filter for the activation matrix C-NMF -rank = 4 # Rank of decomposition for the boundaries -rank_labels = 6 # Rank of decomposition for the labels -R_labels = 6 # Size of the median filter for the labels +rank = 3 # Rank of decomposition for the boundaries +rank_labels = 16 # Rank of decomposition for the labels +R_labels = 4 # Size of the median filter for the labels # Foote M = 2 # Median filter for the audio features (in beats) Mg = 32 # Gaussian kernel size
--- a/utils/SegUtil.py Fri Jun 05 18:02:05 2015 +0100 +++ b/utils/SegUtil.py Fri Jun 12 17:44:39 2015 +0100 @@ -19,9 +19,11 @@ from scipy.spatial import distance from scipy.ndimage import filters, zoom from scipy import signal +from scipy.signal import correlate2d, convolve2d, filtfilt, resample, butter import pylab as plt from scipy.spatial.distance import squareform, pdist -from scipy.ndimage.filters import * +from scipy.ndimage.filters import maximum_filter, minimum_filter, percentile_filter, uniform_filter +from scipy.ndimage.filters import median_filter as med_filter from sklearn.metrics.pairwise import pairwise_distances @@ -45,7 +47,6 @@ if not os.path.exists(directory): os.makedirs(directory) - def median_filter(X, M=8): """Median filter along the first axis of the feature matrix X.""" for i in xrange(X.shape[1]): @@ -293,6 +294,24 @@ return np.min(X) fMin = 0 return fMin + +def lp(signal, fc=0.34, axis=-1): + '''Low pass filter function + signal: Raw signal to be smoothed. + fc: Cutoff frequency of the butterworth filter. Normalized from 0 to 1, where 1 is the Nyquist frequency. + axis: The axis of x to which the filter is applied. Default is -1.''' + bCoeffs, aCoeffs = butter(2, fc) + lp_smoothed_signal = filtfilt(bCoeffs, aCoeffs, signal, axis) + return lp_smoothed_signal + +def hp(signal, fc=0.34, axis=-1): + '''Low pass filter function + signal: Raw signal to be smoothed. + fc: Cutoff frequency of the butterworth filter. + axis: The axis of x to which the filter is applied. Default is -1.''' + bCoeffs, aCoeffs = butter(2, fc, 'highpass') + hp_smoothed_signal = filtfilt(bCoeffs, aCoeffs, signal, axis) + return hp_smoothed_signal def getMean(feature, winlen, stepsize): means = [] @@ -315,7 +334,7 @@ return delta_feature -def getSSM(feature_array, metric='cosine', norm='simple', reduce=False): +def getSSM(feature_array, metric='cosine', norm='exp', reduce=False): '''Compute SSM given input feature array. args: norm: ['simple', 'remove_noise'] ''' @@ -323,13 +342,50 @@ dm = np.nan_to_num(dm) if norm == 'simple': ssm = 1 - (dm - np.min(dm)) / (np.max(dm) - np.min(dm)) + if norm == 'exp': # Use with cosine metric only + ssm = np.exp(dm - 1) if reduce: ssm = reduceSSM(ssm) return ssm +def enhanceSSM(ssm, fc=0.34, med_size=(5,5), max_size=(5,5), min_size=(5,5), filter_type='min', axis=-1): + '''A series of filtering for SSM enhancement + fc: cutoff frequency for LP filtering. + med_size: Median filter window size. + int or tuple. If using an integer for a 2d input, axis must be specified. + filter_type: Select either to use maximum filter or minimum filter. + float ['min', 'max', None] + max_size: Maximum filter window size. + int or tuple. If using an integer for a 2d input, axis must be specified. + Use this when homogeneity in the SSM is expressed by LARGE value. + min_size: Mininum filter window size. + int or tuple. If using an integer for a 2d input, axis must be specified. + Use this when homogeneity in the SSM is expressed by SMALL value. + (eg. When cosine metric and exp normalization and used for distance computation.)''' + ssm_lp = lp(enhanced_ssm, fc=fc) + + # Use scipy.ndimage.filters.median_filter instead + ssm_med = med_filter(ssm_lp, size=med_size) + + if filter_type == 'min': + enhanced_ssm = minimum_filter(ssm_med, size=min_size) + elif filter_type == 'max': + enhanced_ssm = maximum_filter(ssm_med, size=max_size) + else: + enhanced_ssm = ssm_med + return enhanced_ssm + def reduceSSM(ssm, maxfilter_size = 2, remove_size=50): - reduced_ssm = ssm + '''Adaptive thresholding using OTSU method + Required package: skimage (0.10+)''' + + from skimage.morphology import disk + # from skimage.filters import threshold_otsu, rank #skimage 0.12 + from skimage.filter.rank import otsu #skimage 0.10 + from skimage.filter import threshold_otsu + + reduced_ssm = copy(ssm) reduced_ssm[reduced_ssm<0.75] = 0 # # reduced_ssm = maximum_filter(reduced_ssm,size=maxfilter_size) # # reduced_ssm = morphology.remove_small_objects(reduced_ssm.astype(bool), min_size=remove_size)
--- a/utils/plotSSM.py Fri Jun 05 18:02:05 2015 +0100 +++ b/utils/plotSSM.py Fri Jun 12 17:44:39 2015 +0100 @@ -34,6 +34,7 @@ from skimage.morphology import disk from PeakPickerUtil import PeakPicker +from SegUtil import getMean, getStd, getDelta, getSSM, reduceSSM, upSample, normaliseFeature def parse_args(): # define parser @@ -482,13 +483,13 @@ for audio in audio_files: ao = AudioObj() ao.name = splitext(audio)[0] - # annotation_file = join(options.GT, ao.name+'.txt') # iso, salami - # ao.gt = np.genfromtxt(annotation_file, usecols=0) - # ao.label = np.genfromtxt(annotation_file, usecols=1, dtype=str) + annotation_file = join(options.GT, ao.name+'.txt') # iso, salami + ao.gt = np.genfromtxt(annotation_file, usecols=0) + ao.label = np.genfromtxt(annotation_file, usecols=1, dtype=str) - annotation_file = join(options.GT, ao.name+'.csv') # qupujicheng - ao.gt = np.genfromtxt(annotation_file, usecols=0, delimiter=',') - ao.label = np.genfromtxt(annotation_file, usecols=1, delimiter=',', dtype=str) + # annotation_file = join(options.GT, ao.name+'.csv') # qupujicheng + # ao.gt = np.genfromtxt(annotation_file, usecols=0, delimiter=',') + # ao.label = np.genfromtxt(annotation_file, usecols=1, delimiter=',', dtype=str) # annotation_file = join(options.GT, ao.name+'.lab') # beatles # ao.gt = np.genfromtxt(annotation_file, usecols=(0,1)) @@ -572,11 +573,11 @@ featureRate = float(self.SampleRate) /self.stepSize pca = PCA(n_components=5) - ao.gammatone_features = self.normaliseFeature(ao.gammatone_features) + ao.gammatone_features = normaliseFeature(ao.gammatone_features) ao.gammatone_features = resample(ao.gammatone_features, step) ao.gammatone_features[np.isnan(ao.gammatone_features)] = 0.0 ao.gammatone_features[np.isinf(ao.gammatone_features)] = 0.0 - ao.timbre_features = self.normaliseFeature(ao.timbre_features) + ao.timbre_features = normaliseFeature(ao.timbre_features) ao.timbre_features = resample(ao.timbre_features, step) ao.timbre_features[np.isnan(ao.timbre_features)] = 0.0 ao.timbre_features[np.isinf(ao.timbre_features)] = 0.0 @@ -584,37 +585,37 @@ # ao.lpc_features = resample(ao.lpc_features, step) # ao.lpc_features[np.isnan(ao.lpc_features)] = 0.0 # ao.lpc_features[np.isinf(ao.lpc_features)] = 0.0 - ao.harmonic_features = self.normaliseFeature(ao.harmonic_features) + ao.harmonic_features = normaliseFeature(ao.harmonic_features) ao.harmonic_features = resample(ao.harmonic_features, step) - ao.tempo_features = self.normaliseFeature(ao.tempo_features) + ao.tempo_features = normaliseFeature(ao.tempo_features) ao.harmonic_features[np.isinf(ao.harmonic_features)] = 0.0 ao.tempo_features[np.isnan(ao.tempo_features)] = 0.0 ao.tempo_features[np.isinf(ao.tempo_features)] = 0.0 - ao.gammatone_features = self.getMean(ao.gammatone_features, winlen=aggregation_window, stepsize=aggregation_step) + ao.gammatone_features = getMean(ao.gammatone_features, winlen=aggregation_window, stepsize=aggregation_step) pca.fit(ao.gammatone_features) ao.gammatone_features = pca.transform(ao.gammatone_features) - ao.gammatone_ssm = self.getSSM(ao.gammatone_features) + ao.gammatone_ssm = getSSM(ao.gammatone_features) - ao.tempo_features = self.getMean(ao.tempo_features, winlen=aggregation_window, stepsize=aggregation_step) + ao.tempo_features = getMean(ao.tempo_features, winlen=aggregation_window, stepsize=aggregation_step) pca.fit(ao.tempo_features) ao.tempo_features = pca.transform(ao.tempo_features) - ao.tempo_ssm = self.getSSM(ao.tempo_features) + ao.tempo_ssm = getSSM(ao.tempo_features) - ao.timbre_features = self.getMean(ao.timbre_features, winlen=aggregation_window, stepsize=aggregation_step) + ao.timbre_features = getMean(ao.timbre_features, winlen=aggregation_window, stepsize=aggregation_step) pca.fit(ao.timbre_features) ao.timbre_features = pca.transform(ao.timbre_features) - ao.timbre_ssm = self.getSSM(ao.timbre_features) + ao.timbre_ssm = getSSM(ao.timbre_features) # ao.lpc_features = self.getMean(ao.lpc_features, winlen=aggregation_window, stepsize=aggregation_step) # pca.fit(ao.lpc_features) # ao.lpc_features = pca.transform(ao.lpc_features) # ao.lpc_ssm = self.getSSM(ao.lpc_features) - ao.harmonic_features = self.getMean(ao.harmonic_features, winlen=aggregation_window, stepsize=aggregation_step) + ao.harmonic_features = getMean(ao.harmonic_features, winlen=aggregation_window, stepsize=aggregation_step) pca.fit(ao.harmonic_features) ao.harmonic_features = pca.transform(ao.harmonic_features) - ao.harmonic_ssm = self.getSSM(ao.harmonic_features) + ao.harmonic_ssm = getSSM(ao.harmonic_features) ao.ssm_timestamps = np.array(map(lambda x: ao.tempo_timestamps[aggregation_step*x], np.arange(0, ao.gammatone_ssm.shape[0]))) @@ -665,7 +666,7 @@ np.savetxt(join(options.OUTPUT, 'ssm_data', ao.name+'-chroma.txt'), np.array(ao.harmonic_ssm), delimiter=',') # np.savetxt(join(options.OUTPUT, 'ssm_data', ao.name+'-hpss_chroma.txt'), np.array(ao.harmonic_ssm), delimiter=',') - audio_list.append(ao) + # audio_list.append(ao) def main():