view sf.py @ 18:b4bf37f94e92

prepared to add another annotation
author mitian
date Wed, 09 Dec 2015 16:27:10 +0000
parents 294f66d285af
children
line wrap: on
line source
#!/usr/bin/env python
# coding: utf-8
"""
This script identifies the boundaries of a given track using the Structural
Features method:

Serrà, J., Müller, M., Grosche, P., & Arcos, J. L. (2012). Unsupervised
Detection of Music Boundaries by Time Series Structure Features.
In Proc. of the 26th AAAI Conference on Artificial Intelligence
(pp. 1613–1619).

Toronto, Canada.
"""

__author__ = "Oriol Nieto"
__copyright__ = "Copyright 2014, Music and Audio Research Lab (MARL)"
__license__ = "GPL"
__version__ = "1.0"
__email__ = "oriol@nyu.edu"


import numpy as np
from scipy.spatial import distance
from scipy import signal
from scipy.ndimage import filters
import pylab as plt

# Local stuff
from utils import SegUtil

def median_filter(X, M=8):
	"""Median filter along the first axis of the feature matrix X."""
	for i in xrange(X.shape[1]):
		X[:, i] = filters.median_filter(X[:, i], size=M)
	return X


def gaussian_filter(X, M=8, axis=0):
	"""Gaussian filter along the first axis of the feature matrix X."""
	for i in xrange(X.shape[axis]):
		if axis == 1:
			X[:, i] = filters.gaussian_filter(X[:, i], sigma=M / 2.)
		elif axis == 0:
			X[i, :] = filters.gaussian_filter(X[i, :], sigma=M / 2.)
	return X


def compute_gaussian_krnl(M):
	"""Creates a gaussian kernel following Serra's paper."""
	g = signal.gaussian(M, M / 3., sym=True)
	G = np.dot(g.reshape(-1, 1), g.reshape(1, -1))
	G[M / 2:, :M / 2] = -G[M / 2:, :M / 2]
	G[:M / 2, M / 1:] = -G[:M / 2, M / 1:]
	return G


def compute_nc(X):
	"""Computes the novelty curve from the structural features."""
	N = X.shape[0]
	# nc = np.sum(np.diff(X, axis=0), axis=1) # Difference between SF's

	nc = np.zeros(N)
	for i in xrange(N - 1):
		nc[i] = distance.euclidean(X[i, :], X[i + 1, :])

	# Normalize
	nc += np.abs(nc.min())
	nc /= nc.max()
	return nc


def pick_peaks(nc, L=16, offset_denom=0.1):
	"""Obtain peaks from a novelty curve using an adaptive threshold."""
	offset = nc.mean() * float(offset_denom)
	th = filters.median_filter(nc, size=L) + offset
	peaks = []
	for i in xrange(1, nc.shape[0] - 1):
		# is it a peak?
		if nc[i - 1] < nc[i] and nc[i] > nc[i + 1]:
			# is it above the threshold?
			if nc[i] > th[i]:
				peaks.append(i)
	#plt.plot(nc)
	#plt.plot(th)
	#for peak in peaks:
		#plt.axvline(peak, color="m")
	#plt.show()
	return peaks


def circular_shift(X):
	"""Shifts circularly the X squre matrix in order to get a
		time-lag matrix."""
	N = X.shape[0]
	L = np.zeros(X.shape)
	for i in xrange(N):
		L[i, :] = np.asarray([X[(i + j) % N, j] for j in xrange(N)])
	return L


def embedded_space(X, m, tau=1):
	"""Time-delay embedding with m dimensions and tau delays."""
	N = X.shape[0] - int(np.ceil(m))
	Y = np.zeros((N, int(np.ceil(X.shape[1] * m))))
	for i in xrange(N):
		rem = int((m % 1) * X.shape[1])	 # Reminder for float m
		Y[i, :] = np.concatenate((X[i:i + int(m), :].flatten(),
								 X[i + int(m), :rem]))
	return Y


def segmentation(F, return_ssm=False):
	"""Main process."""

	# Structural Features params
	Mp = 32			# Size of the adaptive threshold for peak picking
	od = 0.1		# Offset coefficient for adaptive thresholding

	M = 16			# Size of gaussian kernel in beats
	m = 3			# Number of embedded dimensions
	k = 0.06		# k*N-nearest neighbors for the recurrence plot

	# Emedding the feature space (i.e. shingle)
	E = embedded_space(F, m)

	# Recurrence matrix
	R = SegUtil.recurrence_matrix(E.T, k=k * int(F.shape[0]),
								width=0,  # zeros from the diagonal
								metric="seuclidean",
								sym=True).astype(np.float32)

	# Check size in case the track is too short
	if R.shape[0] > 0:
		# Circular shift
		L = circular_shift(R)

		# Obtain structural features by filtering the lag matrix
		SF = gaussian_filter(L.T, M=M, axis=1)
		SF = gaussian_filter(L.T, M=1, axis=0)
		# plt.imshow(SF.T, interpolation="nearest", aspect="auto"); plt.show()

		# Compute the novelty curve
		nc = compute_nc(SF)
		if nc.max == 0:
			return nc, np.asarray([]) 
			
		# Find peaks in the novelty curve
		est_bounds = pick_peaks(nc, L=Mp, offset_denom=od)

		# Re-align embedded space
		est_bound_idxs = np.asarray(est_bounds) + int(np.ceil(m / 2.))
	else:
		est_bound_idxs = []

	if len(est_bound_idxs) == 0:
		est_bound_idxs = np.asarray([0])  # Return first one
	
	if return_ssm==True:
		return E, R, SF, nc, est_bound_idxs

	return nc, est_bound_idxs