segmentation: SegEval.py comparison

comparison SegEval.py @ 0:26838b1f560f

initial commit of a segmenter project

author	mi tian
date	Thu, 02 Apr 2015 18:09:27 +0100
parents
children	c11ea9e0357f

comparison

equal deleted inserted replaced

--1:000000000000
+:26838b1f560f
+#!/usr/bin/env python
+# encoding: utf-8
+"""
+SegEval.py
+The main segmentation program.
+Created by mi tian on 2015-04-02.
+Copyright (c) 2015 __MyCompanyName__. All rights reserved.
+"""
+# Load starndard python libs
+import sys, os, optparse, csv
+from itertools import combinations
+from os.path import join, isdir, isfile, abspath, dirname, basename, split, splitext
+from copy import copy
+import matplotlib
+# matplotlib.use('Agg')
+import matplotlib.pyplot as plt
+import matplotlib.gridspec as gridspec
+import numpy as np
+import scipy as sp
+from scipy.signal import correlate2d, convolve2d, filtfilt, resample
+from scipy.ndimage.filters import *
+from sklearn.decomposition import PCA
+from sklearn.mixture import GMM
+from sklearn.cluster import KMeans
+from sklearn.preprocessing import normalize
+from sklearn.metrics.pairwise import pairwise_distances
+# Load dependencies
+from utils.SegUtil import getMean, getStd, getDelta, getSSM, reduceSSM, upSample, normaliseFeature
+from utils.PeakPickerUtil import PeakPicker
+from utils.gmmdist import *
+from utils.GmmMetrics import GmmDistance
+from utils.RankClustering import rClustering
+from utils.kmeans import Kmeans
+from utils.PathTracker import PathTracker
+# Load bourdary retrieval utilities
+import cnmf as cnmf_S
+import foote as foote_S
+import sf as sf_S
+import fmc2d as fmc2d_S
+# Define arg parser
+def parse_args():
+	op = optparse.OptionParser()
+	# IO options
+	op.add_option('-g', '--gammatonegram-features', action="store", dest="GF", default='/Volumes/c4dm-03/people/mit/features/gammatonegram/qupujicheng/2048', type="str", help="Loading gammatone features from.." )
+	op.add_option('-s', '--spectrogram-features', action="store", dest="SF", default='/Volumes/c4dm-03/people/mit/features/spectrogram/qupujicheng/2048', type="str", help="Loading spectral features from.." )
+	op.add_option('-t', '--tempogram-features', action="store", dest="TF", default='/Volumes/c4dm-03/people/mit/features/tempogram/qupujicheng/tempo_features_6s', type="str", help="Loading tempogram features from.." )
+	op.add_option('-f', '--featureset', action="store", dest="FEATURES", default='[0, 1, 2, 3]', type="str", help="Choose feature subsets (input a list of integers) used for segmentation -- gammtone, chroma, timbre, tempo -- 0, 1, 2, 3." )
+	op.add_option('-a', '--annotations', action="store", dest="GT", default='/Volumes/c4dm-03/people/mit/annotation/qupujicheng/lowercase', type="str", help="Loading annotation files from.. ")
+	op.add_option('-o', '--ouput', action="store", dest="OUTPUT", default='/Volumes/c4dm-03/people/mit/segmentation/gammatone/qupujicheng', type="str", help="Write segmentation results to ")
+	# boundary retrieval options
+	op.add_option('-b', '--bounrary-method', action="store", dest="BOUNDARY", default=['novelty', 'cnmf', 'sf', 'fmc2d'], help="Choose boundary retrieval algorithm ('novelty', 'cnmf', 'sf', 'fmc2d')." )
+	# Plot/print/mode options
+	op.add_option('-p', '--plot', action="store_true", dest="PLOT", default=False, help="Save plots")
+	op.add_option('-e', '--test-mode', action="store_true", dest="TEST", default=False, help="Test mode")
+	op.add_option('-v', '--verbose-mode', action="store_true", dest="VERBOSE", default=False, help="Print results in verbose mode.")
+	return op.parse_args()
+options, args = parse_args()
+class FeatureObj() :
+	__slots__ = ['key', 'audio', 'timestamps', 'gammatone_features', 'tempo_features', 'timbre_features', 'harmonic_features', 'gammatone_ssm', 'tempo_ssm', 'timbre_features', 'harmonic_ssm', 'ssm_timestamps']
+class AudioObj():
+	__slots__ = ['name', 'feature_list', 'gt', 'label', 'gammatone_features', 'tempo_features', 'timbre_features', 'harmonic_features', 'combined_features',\
+	'gammatone_ssm', 'tempo_ssm', 'timbre_ssm', 'harmonic_ssm', 'combined_ssm', 'ssm', 'ssm_timestamps', 'tempo_timestamps']
+class EvalObj():
+	__slots__ = ['TP', 'FP', 'FN', 'P', 'R', 'F', 'AD', 'DA']
+class SSMseg(object):
+	'''The main segmentation object'''
+	def __init__(self):
+		self.SampleRate = 44100
+		self.NqHz = self.SampleRate/2
+		self.timestamp = []
+		self.previousSample = 0.0
+		self.featureWindow = 6.0
+		self.featureStep = 3.0
+		self.kernel_size = 64 # Adjust this param according to the feature resolution.pq
+		self.blockSize = 2048
+		self.stepSize = 1024
+		'''NOTE: Match the following params with those used for feature extraction!'''
+		'''NOTE: Unlike spectrogram ones, Gammatone features are extracted without taking an FFT. The windowing is done under the purpose of chunking
+		the audio to facilitate the gammatone filtering with the specified blockSize and stepSize. The resulting gammatonegram is aggregated every
+		gammatoneLen without overlap.'''
+		self.gammatoneLen = 2048
+		self.gammatoneBandGroups = [0, 2, 6, 10, 13, 17, 20]
+		self.nGammatoneBands = 20
+		self.lowFreq = 100
+		self.highFreq = self.SampleRate / 4
+		'''Settings for extracting tempogram features.'''
+		self.tempoWindow = 6.0
+		self.bpmBands = [30, 45, 60, 80, 100, 120, 180, 240, 400, 600]
+		'''Peak picking settings'''
+		self.threshold = 50
+		self.confidence_threshold = 0.5
+		self.delta_threshold = 0.0
+		self.backtracking_threshold = 1.9
+		self.polyfitting_on = True
+		self.medfilter_on = True
+		self.LPfilter_on = True
+		self.whitening_on = False
+		self.aCoeffs = [1.0000, -0.5949, 0.2348]
+		self.bCoeffs = [0.1600,	 0.3200, 0.1600]
+		self.cutoff = 0.34
+		self.medianWin = 7
+	def pairwiseF(self, annotation, detection, tolerance=3.0, combine=1.0):
+		'''Pairwise F measure evaluation of detection rates.'''
+		# print 'detection', detection
+		detection = np.append(detection, annotation[-1])
+		res = EvalObj()
+		res.TP = 0	# Total number of matched ground truth and experimental data points
+		gt = len(annotation)	# Total number of ground truth data points
+		dt = len(detection) # Total number of experimental data points
+		foundIdx = []
+		D_AD = np.zeros(gt)
+		D_DA = np.zeros(dt)
+		for dtIdx in xrange(dt):
+			D_DA[dtIdx] = np.min(abs(detection[dtIdx] - annotation))
+		for gtIdx in xrange(gt):
+			D_AD[gtIdx] = np.min(abs(annotation[gtIdx] - detection))
+			for dtIdx in xrange(dt):
+				if (annotation[gtIdx] >= detection[dtIdx] - tolerance/2.0) and (annotation[gtIdx] <= detection[dtIdx] + tolerance/2.0):
+					res.TP = res.TP + 1.0
+					foundIdx.append(gtIdx)
+		foundIdx = list(set(foundIdx))
+		res.TP = len(foundIdx)
+		res.FP = max(0, dt - res.TP)
+		res.FN = max(0, gt - res.TP)
+		res.AD = np.mean(D_AD)
+		res.DA = np.mean(D_DA)
+		res.P, res.R, res.F = 0.0, 0.0, 0.0
+		if res.TP == 0:
+			return res
+		res.P = res.TP / float(dt)
+		res.R = res.TP / float(gt)
+		res.F = 2 * res.P * res.R / (res.P + res.R)
+		return res
+	def process(self):
+		'''For the aggregated input features, discard a propertion each time as the pairwise distances within the feature space descending.
+		In the meanwhile evaluate the segmentation result and track the trend of perfomance changing by measuring the feature selection
+		threshold - segmentation f measure curve.
+		'''
+		peak_picker = PeakPicker()
+		peak_picker.params.alpha = 9.0 # Alpha norm
+		peak_picker.params.delta = self.delta_threshold # Adaptive thresholding delta
+		peak_picker.params.QuadThresh_a = (100 - self.threshold) / 1000.0
+		peak_picker.params.QuadThresh_b = 0.0
+		peak_picker.params.QuadThresh_c = (100 - self.threshold) / 1500.0
+		peak_picker.params.rawSensitivity = 20
+		peak_picker.params.aCoeffs = self.aCoeffs
+		peak_picker.params.bCoeffs = self.bCoeffs
+		peak_picker.params.preWin = self.medianWin
+		peak_picker.params.postWin = self.medianWin + 1
+		peak_picker.params.LP_on = self.LPfilter_on
+		peak_picker.params.Medfilt_on = self.medfilter_on
+		peak_picker.params.Polyfit_on = self.polyfitting_on
+		peak_picker.params.isMedianPositive = False
+		# Settings used for feature extraction
+		feature_window_frame = int(self.SampleRate / self.gammatoneLen * self.featureWindow)
+		feature_step_frame = int(0.5 * self.SampleRate / self.gammatoneLen * self.featureStep)
+		aggregation_window, aggregation_step = 100, 50
+		featureRate = float(self.SampleRate) / self.stepSize
+		audio_files = [x for x in os.listdir(options.GT) if not x.startswith(".") ]
+		# audio_files = audio_files[:2]
+		audio_files.sort()
+		audio_list = []
+		gammatone_feature_list = [i for i in os.listdir(options.GF) if not i.startswith('.')]
+		gammatone_feature_list = ['contrast4', 'rolloff', 'dct']
+		tempo_feature_list = [i for i in os.listdir(options.TF) if not i.startswith('.')]
+		tempo_feature_list = ['intensity_bpm', 'loudness_bpm']
+		timbre_feature_list = ['mfcc']
+		harmonic_feature_list = ['nnls']
+		gammatone_feature_list = [join(options.GF, f) for f in gammatone_feature_list]
+		timbre_feature_list = [join(options.SF, f) for f in timbre_feature_list]
+		tempo_feature_list = [join(options.TF, f) for f in tempo_feature_list]
+		harmonic_feature_list = [join(options.SF, f) for f in harmonic_feature_list]
+		fobj_list = []
+		# For each audio file, load specific features
+		for audio in audio_files:
+			ao = AudioObj()
+			ao.name = splitext(audio)[0]
+			print ao.name
+			# annotation_file = join(options.GT, ao.name+'.txt') # iso, salami
+			# ao.gt = np.genfromtxt(annotation_file, usecols=0)
+			# ao.label = np.genfromtxt(annotation_file, usecols=1, dtype=str)
+			annotation_file = join(options.GT, ao.name+'.csv') # qupujicheng
+			ao.gt = np.genfromtxt(annotation_file, usecols=0, delimiter=',')
+			ao.label = np.genfromtxt(annotation_file, usecols=1, delimiter=',', dtype=str)
+			gammatone_featureset, timbre_featureset, tempo_featureset, harmonic_featureset = [], [], [], []
+			for feature in gammatone_feature_list:
+				for f in os.listdir(feature):
+					if f[:f.find('_vamp')]==ao.name:
+						gammatone_featureset.append(np.genfromtxt(join(feature, f), delimiter=',',filling_values=0.0)[:,1:])
+						break
+			if len(gammatone_feature_list) > 1:
+				n_frame = np.min([x.shape[0] for x in gammatone_featureset])
+				gammatone_featureset = [x[:n_frame,:] for x in gammatone_featureset]
+				ao.gammatone_features = np.hstack((gammatone_featureset))
+			else:
+				ao.gammatone_features = gammatone_featureset[0]
+			for feature in timbre_feature_list:
+				for f in os.listdir(feature):
+					if f[:f.find('_vamp')]==ao.name:
+						timbre_featureset.append(np.genfromtxt(join(feature, f), delimiter=',',filling_values=0.0)[:,1:])
+						break
+			if len(timbre_feature_list) > 1:
+				n_frame = np.min([x.shape[0] for x in timbre_featureset])
+				timbre_featureset = [x[:n_frame,:] for x in timbre_featureset]
+				ao.timbre_features = np.hstack((timbre_featureset))
+			else:
+				ao.timbre_features = timbre_featureset[0]
+			for feature in tempo_feature_list:
+				for f in os.listdir(feature):
+					if f[:f.find('_vamp')]==ao.name:
+						tempo_featureset.append(np.genfromtxt(join(feature, f), delimiter=',',filling_values=0.0)[1:,1:])
+						ao.tempo_timestamps = np.genfromtxt(join(feature, f), delimiter=',',filling_values=0.0)[1:,0]
+						break
+			if len(tempo_feature_list) > 1:
+				n_frame = np.min([x.shape[0] for x in tempo_featureset])
+				tempo_featureset = [x[:n_frame,:] for x in tempo_featureset]
+				ao.tempo_features = np.hstack((tempo_featureset))
+			else:
+				ao.tempo_features = tempo_featureset[0]
+			for feature in harmonic_feature_list:
+				for f in os.listdir(feature):
+					if f[:f.find('_vamp')]==ao.name:
+						harmonic_featureset.append(np.genfromtxt(join(feature, f), delimiter=',',filling_values=0.0)[:,1:])
+						break
+			if len(harmonic_feature_list) > 1:
+				n_frame = np.min([x.shape[0] for x in harmonic_featureset])
+				harmonic_featureset = [x[:n_frame,:] for x in harmonic_featureset]
+				ao.harmonic_features = np.hstack((harmonic_featureset))
+			else:
+				ao.harmonic_features = harmonic_featureset[0]
+			# Get aggregated features for computing ssm
+			aggregation_window, aggregation_step = 1,1
+			featureRate = float(self.SampleRate) /self.stepSize
+			pca = PCA(n_components=5)
+			# Resample and normalise features
+			ao.gammatone_features = resample(ao.gammatone_features, step)
+			ao.gammatone_features = normaliseFeature(ao.gammatone_features)
+			ao.timbre_features = resample(ao.timbre_features, step)
+			ao.timbre_features = normaliseFeature(ao.timbre_features)
+			ao.harmonic_features = resample(ao.harmonic_features, step)
+			ao.harmonic_features = normaliseFeature(ao.harmonic_features)
+			ao.tempo_features = normaliseFeature(ao.harmonic_features)
+			pca.fit(ao.gammatone_features)
+			ao.gammatone_features = pca.transform(ao.gammatone_features)
+			ao.gammatone_ssm = getSSM(ao.gammatone_features)
+			pca.fit(ao.tempo_features)
+			ao.tempo_features = pca.transform(ao.tempo_features)
+			ao.tempo_ssm = getSSM(ao.tempo_features)
+			pca.fit(ao.timbre_features)
+			ao.timbre_features = pca.transform(ao.timbre_features)
+			ao.timbre_ssm = getSSM(ao.timbre_features)
+			pca.fit(ao.harmonic_features)
+			ao.harmonic_features = pca.transform(ao.harmonic_features)
+			ao.harmonic_ssm = getSSM(ao.harmonic_features)
+			ao.ssm_timestamps = np.array(map(lambda x: ao.tempo_timestamps[aggregation_step*x], np.arange(0, ao.gammatone_ssm.shape[0])))
+			audio_list.append(ao)
+		# Segment input audio using specified boundary retrieval method.
+		print 'Segmenting using %s method' %options.BOUNDARY
+		for i,ao in enumerate(audio_list):
+			print 'processing: %s' %ao.name
+			ao_featureset = [ao.gammatone_features, ao.harmonic_features, ao.timbre_features, ao.tempo_features]
+			feature_sel = [int(x) for x in options.FEATURES if x.isdigit()]
+			ao_featureset = [ao_featureset[i] for i in feature_sel]
+			gammatone_novelty, smoothed_gammatone_novelty, gammatone_novelty_peaks = getNoveltyPeaks(ao.gammatone_ssm, self.kernel_size, peak_picker)
+			timbre_novelty, smoothed_timbre_novelty, timbre_novelty_peaks = getNoveltyPeaks(ao.timbre_ssm, self.kernel_size, peak_picker)
+			tempo_novelty, smoothed_harmonic_novelty, harmonic_novelty_peaks = getNoveltyPeaks(ao.tempo_ssm, self.kernel_size, peak_picker)
+			harmonic_novelty, smoothed_tempo_novelty, tempo_novelty_peaks = getNoveltyPeaks(ao.harmonic_ssm, self.kernel_size, peak_picker)
+			# Peak picking from the novelty curve
+			smoothed_gammatone_novelty, gammatone_novelty_peaks = peak_picker.process(gammatone_novelty)
+			gammatone_detection = [ao.ssm_timestamps[int(np.rint(i))] for i in gammatone_novelty_peaks]
+			smoothed_timbre_novelty, timbre_novelty_peaks = peak_picker.process(timbre_novelty)
+			timbre_detection = [ao.ssm_timestamps[int(np.rint(i))] for i in timbre_novelty_peaks]
+			smoothed_harmonic_novelty, harmonic_novelty_peaks = peak_picker.process(harmonic_novelty)
+			harmonic_detection = [ao.ssm_timestamps[int(np.rint(i))] for i in harmonic_novelty_peaks]
+			smoothed_tempo_novelty, tempo_novelty_peaks = peak_picker.process(tempo_novelty)
+			tempo_detection = [ao.ssm_timestamps[int(np.rint(i))] for i in tempo_novelty_peaks]
+			if (len(gammatone_novelty_peaks) == 0 or len(harmonic_novelty_peaks)== 0 or len(timbre_novelty_peaks) == 0 or len(tempo_novelty_peaks) == 0):
+				print ao.name, len(gammatone_novelty_peaks), len(harmonic_novelty_peaks), len(timbre_novelty_peaks), len(tempo_novelty_peaks)
+			smoothed_gammatone_novelty -= np.min(smoothed_gammatone_novelty)
+			smoothed_harmonic_novelty -= np.min(smoothed_harmonic_novelty)
+			smoothed_timbre_novelty -= np.min(smoothed_timbre_novelty)
+			smoothed_tempo_novelty -= np.min(smoothed_tempo_novelty)
+			combined_sdf = (np.array(smoothed_gammatone_novelty) + np.array(smoothed_harmonic_novelty) + np.array(smoothed_timbre_novelty) + np.array(smoothed_tempo_novelty))
+def main():
+	segmenter = SSMseg()
+	segmenter.process()
+if __name__ == '__main__':
+	main()

Mercurial > hg > segmentation

comparison SegEval.py @ 0:26838b1f560f