mi@0: #!/usr/bin/env python mi@0: # coding: utf-8 mi@0: """ mi@0: This script identifies the boundaries of a given track using the Structural mi@0: Features method: mi@0: mi@0: Serrà, J., Müller, M., Grosche, P., & Arcos, J. L. (2012). Unsupervised mi@0: Detection of Music Boundaries by Time Series Structure Features. mi@0: In Proc. of the 26th AAAI Conference on Artificial Intelligence mi@0: (pp. 1613–1619). mi@0: mi@0: Toronto, Canada. mi@0: """ mi@0: mi@0: __author__ = "Oriol Nieto" mi@0: __copyright__ = "Copyright 2014, Music and Audio Research Lab (MARL)" mi@0: __license__ = "GPL" mi@0: __version__ = "1.0" mi@0: __email__ = "oriol@nyu.edu" mi@0: mi@0: mi@0: import numpy as np mi@0: from scipy.spatial import distance mi@0: from scipy import signal mi@0: from scipy.ndimage import filters mi@0: import pylab as plt mi@0: mi@0: # Local stuff mi@0: from utils import SegUtil mi@0: mi@0: def median_filter(X, M=8): mitian@18: """Median filter along the first axis of the feature matrix X.""" mitian@18: for i in xrange(X.shape[1]): mitian@18: X[:, i] = filters.median_filter(X[:, i], size=M) mitian@18: return X mi@0: mi@0: mi@0: def gaussian_filter(X, M=8, axis=0): mitian@18: """Gaussian filter along the first axis of the feature matrix X.""" mitian@18: for i in xrange(X.shape[axis]): mitian@18: if axis == 1: mitian@18: X[:, i] = filters.gaussian_filter(X[:, i], sigma=M / 2.) mitian@18: elif axis == 0: mitian@18: X[i, :] = filters.gaussian_filter(X[i, :], sigma=M / 2.) mitian@18: return X mi@0: mi@0: mi@0: def compute_gaussian_krnl(M): mitian@18: """Creates a gaussian kernel following Serra's paper.""" mitian@18: g = signal.gaussian(M, M / 3., sym=True) mitian@18: G = np.dot(g.reshape(-1, 1), g.reshape(1, -1)) mitian@18: G[M / 2:, :M / 2] = -G[M / 2:, :M / 2] mitian@18: G[:M / 2, M / 1:] = -G[:M / 2, M / 1:] mitian@18: return G mi@0: mi@0: mi@0: def compute_nc(X): mitian@18: """Computes the novelty curve from the structural features.""" mitian@18: N = X.shape[0] mitian@18: # nc = np.sum(np.diff(X, axis=0), axis=1) # Difference between SF's mi@0: mitian@18: nc = np.zeros(N) mitian@18: for i in xrange(N - 1): mitian@18: nc[i] = distance.euclidean(X[i, :], X[i + 1, :]) mi@0: mitian@18: # Normalize mitian@18: nc += np.abs(nc.min()) mitian@18: nc /= nc.max() mitian@18: return nc mi@0: mi@0: mi@0: def pick_peaks(nc, L=16, offset_denom=0.1): mitian@18: """Obtain peaks from a novelty curve using an adaptive threshold.""" mitian@18: offset = nc.mean() * float(offset_denom) mitian@18: th = filters.median_filter(nc, size=L) + offset mitian@18: peaks = [] mitian@18: for i in xrange(1, nc.shape[0] - 1): mitian@18: # is it a peak? mitian@18: if nc[i - 1] < nc[i] and nc[i] > nc[i + 1]: mitian@18: # is it above the threshold? mitian@18: if nc[i] > th[i]: mitian@18: peaks.append(i) mitian@18: #plt.plot(nc) mitian@18: #plt.plot(th) mitian@18: #for peak in peaks: mitian@18: #plt.axvline(peak, color="m") mitian@18: #plt.show() mitian@18: return peaks mi@0: mi@0: mi@0: def circular_shift(X): mitian@18: """Shifts circularly the X squre matrix in order to get a mitian@18: time-lag matrix.""" mitian@18: N = X.shape[0] mitian@18: L = np.zeros(X.shape) mitian@18: for i in xrange(N): mitian@18: L[i, :] = np.asarray([X[(i + j) % N, j] for j in xrange(N)]) mitian@18: return L mi@0: mi@0: mi@0: def embedded_space(X, m, tau=1): mitian@18: """Time-delay embedding with m dimensions and tau delays.""" mitian@18: N = X.shape[0] - int(np.ceil(m)) mitian@18: Y = np.zeros((N, int(np.ceil(X.shape[1] * m)))) mitian@18: for i in xrange(N): mitian@18: rem = int((m % 1) * X.shape[1]) # Reminder for float m mitian@18: Y[i, :] = np.concatenate((X[i:i + int(m), :].flatten(), mitian@18: X[i + int(m), :rem])) mitian@18: return Y mi@0: mi@0: mitian@18: def segmentation(F, return_ssm=False): mitian@18: """Main process.""" mi@0: mitian@18: # Structural Features params mitian@18: Mp = 32 # Size of the adaptive threshold for peak picking mitian@18: od = 0.1 # Offset coefficient for adaptive thresholding mi@0: mitian@18: M = 16 # Size of gaussian kernel in beats mitian@18: m = 3 # Number of embedded dimensions mitian@18: k = 0.06 # k*N-nearest neighbors for the recurrence plot mi@0: mitian@18: # Emedding the feature space (i.e. shingle) mitian@18: E = embedded_space(F, m) mi@0: mitian@18: # Recurrence matrix mitian@18: R = SegUtil.recurrence_matrix(E.T, k=k * int(F.shape[0]), mitian@18: width=0, # zeros from the diagonal mitian@18: metric="seuclidean", mitian@18: sym=True).astype(np.float32) mi@0: mitian@18: # Check size in case the track is too short mitian@18: if R.shape[0] > 0: mitian@18: # Circular shift mitian@18: L = circular_shift(R) mi@0: mitian@18: # Obtain structural features by filtering the lag matrix mitian@18: SF = gaussian_filter(L.T, M=M, axis=1) mitian@18: SF = gaussian_filter(L.T, M=1, axis=0) mitian@18: # plt.imshow(SF.T, interpolation="nearest", aspect="auto"); plt.show() mi@0: mitian@18: # Compute the novelty curve mitian@18: nc = compute_nc(SF) mitian@18: if nc.max == 0: mitian@18: return nc, np.asarray([]) mitian@18: mitian@18: # Find peaks in the novelty curve mitian@18: est_bounds = pick_peaks(nc, L=Mp, offset_denom=od) mi@0: mitian@18: # Re-align embedded space mitian@18: est_bound_idxs = np.asarray(est_bounds) + int(np.ceil(m / 2.)) mitian@18: else: mitian@18: est_bound_idxs = [] mi@0: mitian@18: if len(est_bound_idxs) == 0: mitian@18: est_bound_idxs = np.asarray([0]) # Return first one mitian@18: mitian@18: if return_ssm==True: mitian@18: return E, R, SF, nc, est_bound_idxs mi@0: mitian@18: return nc, est_bound_idxs