mi@0
|
1 #!/usr/bin/env python
|
mi@0
|
2 # coding: utf-8
|
mi@0
|
3 """
|
mi@0
|
4 This script identifies the boundaries of a given track using the Structural
|
mi@0
|
5 Features method:
|
mi@0
|
6
|
mi@0
|
7 Serrà, J., Müller, M., Grosche, P., & Arcos, J. L. (2012). Unsupervised
|
mi@0
|
8 Detection of Music Boundaries by Time Series Structure Features.
|
mi@0
|
9 In Proc. of the 26th AAAI Conference on Artificial Intelligence
|
mi@0
|
10 (pp. 1613–1619).
|
mi@0
|
11
|
mi@0
|
12 Toronto, Canada.
|
mi@0
|
13 """
|
mi@0
|
14
|
mi@0
|
15 __author__ = "Oriol Nieto"
|
mi@0
|
16 __copyright__ = "Copyright 2014, Music and Audio Research Lab (MARL)"
|
mi@0
|
17 __license__ = "GPL"
|
mi@0
|
18 __version__ = "1.0"
|
mi@0
|
19 __email__ = "oriol@nyu.edu"
|
mi@0
|
20
|
mi@0
|
21
|
mi@0
|
22 import numpy as np
|
mi@0
|
23 from scipy.spatial import distance
|
mi@0
|
24 from scipy import signal
|
mi@0
|
25 from scipy.ndimage import filters
|
mi@0
|
26 import pylab as plt
|
mi@0
|
27
|
mi@0
|
28 # Local stuff
|
mi@0
|
29 from utils import SegUtil
|
mi@0
|
30
|
mi@0
|
31 def median_filter(X, M=8):
|
mitian@18
|
32 """Median filter along the first axis of the feature matrix X."""
|
mitian@18
|
33 for i in xrange(X.shape[1]):
|
mitian@18
|
34 X[:, i] = filters.median_filter(X[:, i], size=M)
|
mitian@18
|
35 return X
|
mi@0
|
36
|
mi@0
|
37
|
mi@0
|
38 def gaussian_filter(X, M=8, axis=0):
|
mitian@18
|
39 """Gaussian filter along the first axis of the feature matrix X."""
|
mitian@18
|
40 for i in xrange(X.shape[axis]):
|
mitian@18
|
41 if axis == 1:
|
mitian@18
|
42 X[:, i] = filters.gaussian_filter(X[:, i], sigma=M / 2.)
|
mitian@18
|
43 elif axis == 0:
|
mitian@18
|
44 X[i, :] = filters.gaussian_filter(X[i, :], sigma=M / 2.)
|
mitian@18
|
45 return X
|
mi@0
|
46
|
mi@0
|
47
|
mi@0
|
48 def compute_gaussian_krnl(M):
|
mitian@18
|
49 """Creates a gaussian kernel following Serra's paper."""
|
mitian@18
|
50 g = signal.gaussian(M, M / 3., sym=True)
|
mitian@18
|
51 G = np.dot(g.reshape(-1, 1), g.reshape(1, -1))
|
mitian@18
|
52 G[M / 2:, :M / 2] = -G[M / 2:, :M / 2]
|
mitian@18
|
53 G[:M / 2, M / 1:] = -G[:M / 2, M / 1:]
|
mitian@18
|
54 return G
|
mi@0
|
55
|
mi@0
|
56
|
mi@0
|
57 def compute_nc(X):
|
mitian@18
|
58 """Computes the novelty curve from the structural features."""
|
mitian@18
|
59 N = X.shape[0]
|
mitian@18
|
60 # nc = np.sum(np.diff(X, axis=0), axis=1) # Difference between SF's
|
mi@0
|
61
|
mitian@18
|
62 nc = np.zeros(N)
|
mitian@18
|
63 for i in xrange(N - 1):
|
mitian@18
|
64 nc[i] = distance.euclidean(X[i, :], X[i + 1, :])
|
mi@0
|
65
|
mitian@18
|
66 # Normalize
|
mitian@18
|
67 nc += np.abs(nc.min())
|
mitian@18
|
68 nc /= nc.max()
|
mitian@18
|
69 return nc
|
mi@0
|
70
|
mi@0
|
71
|
mi@0
|
72 def pick_peaks(nc, L=16, offset_denom=0.1):
|
mitian@18
|
73 """Obtain peaks from a novelty curve using an adaptive threshold."""
|
mitian@18
|
74 offset = nc.mean() * float(offset_denom)
|
mitian@18
|
75 th = filters.median_filter(nc, size=L) + offset
|
mitian@18
|
76 peaks = []
|
mitian@18
|
77 for i in xrange(1, nc.shape[0] - 1):
|
mitian@18
|
78 # is it a peak?
|
mitian@18
|
79 if nc[i - 1] < nc[i] and nc[i] > nc[i + 1]:
|
mitian@18
|
80 # is it above the threshold?
|
mitian@18
|
81 if nc[i] > th[i]:
|
mitian@18
|
82 peaks.append(i)
|
mitian@18
|
83 #plt.plot(nc)
|
mitian@18
|
84 #plt.plot(th)
|
mitian@18
|
85 #for peak in peaks:
|
mitian@18
|
86 #plt.axvline(peak, color="m")
|
mitian@18
|
87 #plt.show()
|
mitian@18
|
88 return peaks
|
mi@0
|
89
|
mi@0
|
90
|
mi@0
|
91 def circular_shift(X):
|
mitian@18
|
92 """Shifts circularly the X squre matrix in order to get a
|
mitian@18
|
93 time-lag matrix."""
|
mitian@18
|
94 N = X.shape[0]
|
mitian@18
|
95 L = np.zeros(X.shape)
|
mitian@18
|
96 for i in xrange(N):
|
mitian@18
|
97 L[i, :] = np.asarray([X[(i + j) % N, j] for j in xrange(N)])
|
mitian@18
|
98 return L
|
mi@0
|
99
|
mi@0
|
100
|
mi@0
|
101 def embedded_space(X, m, tau=1):
|
mitian@18
|
102 """Time-delay embedding with m dimensions and tau delays."""
|
mitian@18
|
103 N = X.shape[0] - int(np.ceil(m))
|
mitian@18
|
104 Y = np.zeros((N, int(np.ceil(X.shape[1] * m))))
|
mitian@18
|
105 for i in xrange(N):
|
mitian@18
|
106 rem = int((m % 1) * X.shape[1]) # Reminder for float m
|
mitian@18
|
107 Y[i, :] = np.concatenate((X[i:i + int(m), :].flatten(),
|
mitian@18
|
108 X[i + int(m), :rem]))
|
mitian@18
|
109 return Y
|
mi@0
|
110
|
mi@0
|
111
|
mitian@18
|
112 def segmentation(F, return_ssm=False):
|
mitian@18
|
113 """Main process."""
|
mi@0
|
114
|
mitian@18
|
115 # Structural Features params
|
mitian@18
|
116 Mp = 32 # Size of the adaptive threshold for peak picking
|
mitian@18
|
117 od = 0.1 # Offset coefficient for adaptive thresholding
|
mi@0
|
118
|
mitian@18
|
119 M = 16 # Size of gaussian kernel in beats
|
mitian@18
|
120 m = 3 # Number of embedded dimensions
|
mitian@18
|
121 k = 0.06 # k*N-nearest neighbors for the recurrence plot
|
mi@0
|
122
|
mitian@18
|
123 # Emedding the feature space (i.e. shingle)
|
mitian@18
|
124 E = embedded_space(F, m)
|
mi@0
|
125
|
mitian@18
|
126 # Recurrence matrix
|
mitian@18
|
127 R = SegUtil.recurrence_matrix(E.T, k=k * int(F.shape[0]),
|
mitian@18
|
128 width=0, # zeros from the diagonal
|
mitian@18
|
129 metric="seuclidean",
|
mitian@18
|
130 sym=True).astype(np.float32)
|
mi@0
|
131
|
mitian@18
|
132 # Check size in case the track is too short
|
mitian@18
|
133 if R.shape[0] > 0:
|
mitian@18
|
134 # Circular shift
|
mitian@18
|
135 L = circular_shift(R)
|
mi@0
|
136
|
mitian@18
|
137 # Obtain structural features by filtering the lag matrix
|
mitian@18
|
138 SF = gaussian_filter(L.T, M=M, axis=1)
|
mitian@18
|
139 SF = gaussian_filter(L.T, M=1, axis=0)
|
mitian@18
|
140 # plt.imshow(SF.T, interpolation="nearest", aspect="auto"); plt.show()
|
mi@0
|
141
|
mitian@18
|
142 # Compute the novelty curve
|
mitian@18
|
143 nc = compute_nc(SF)
|
mitian@18
|
144 if nc.max == 0:
|
mitian@18
|
145 return nc, np.asarray([])
|
mitian@18
|
146
|
mitian@18
|
147 # Find peaks in the novelty curve
|
mitian@18
|
148 est_bounds = pick_peaks(nc, L=Mp, offset_denom=od)
|
mi@0
|
149
|
mitian@18
|
150 # Re-align embedded space
|
mitian@18
|
151 est_bound_idxs = np.asarray(est_bounds) + int(np.ceil(m / 2.))
|
mitian@18
|
152 else:
|
mitian@18
|
153 est_bound_idxs = []
|
mi@0
|
154
|
mitian@18
|
155 if len(est_bound_idxs) == 0:
|
mitian@18
|
156 est_bound_idxs = np.asarray([0]) # Return first one
|
mitian@18
|
157
|
mitian@18
|
158 if return_ssm==True:
|
mitian@18
|
159 return E, R, SF, nc, est_bound_idxs
|
mi@0
|
160
|
mitian@18
|
161 return nc, est_bound_idxs
|