mi@0
|
1 """
|
mi@0
|
2 C-NMF method for segmentation, modified from here:
|
mi@0
|
3
|
mi@0
|
4 Nieto, O., Jehan, T., Convex Non-negative Matrix Factorization For Automatic
|
mi@0
|
5 Music Structure Identification. Proc. of the 38th IEEE International Conference
|
mi@0
|
6 on Acoustics, Speech, and Signal Processing (ICASSP). Vancouver, Canada, 2013.
|
mi@0
|
7 """
|
mi@0
|
8
|
mi@0
|
9 __author__ = "Oriol Nieto"
|
mi@0
|
10 __copyright__ = "Copyright 2014, Music and Audio Research Lab (MARL)"
|
mi@0
|
11 __license__ = "GPL"
|
mi@0
|
12 __version__ = "1.0"
|
mi@0
|
13 __email__ = "oriol@nyu.edu"
|
mi@0
|
14
|
mi@0
|
15 import numpy as np
|
mi@0
|
16 import pymf
|
mi@0
|
17
|
mi@0
|
18 # Local stuff
|
mi@0
|
19 from utils import SegUtil
|
mi@0
|
20
|
mitian@7
|
21 # Algorithm params
|
mitian@17
|
22 h = 8 # Size of median filter for features in C-NMF
|
mitian@17
|
23 R = 15 # Size of the median filter for the activation matrix C-NMF
|
mitian@17
|
24 rank = 4 # Rank of decomposition for the boundaries
|
mitian@17
|
25 rank_labels = 6 # Rank of decomposition for the labels
|
mitian@17
|
26 R_labels = 6 # Size of the median filter for the labels
|
mi@0
|
27
|
mi@0
|
28 def cnmf(S, rank, niter=500):
|
mitian@17
|
29 """(Convex) Non-Negative Matrix Factorization.
|
mi@0
|
30
|
mitian@17
|
31 Parameters
|
mitian@17
|
32 ----------
|
mitian@17
|
33 S: np.array(p, N)
|
mitian@17
|
34 Features matrix. p row features and N column observations.
|
mitian@17
|
35 rank: int
|
mitian@17
|
36 Rank of decomposition
|
mitian@17
|
37 niter: int
|
mitian@17
|
38 Number of iterations to be used
|
mi@0
|
39
|
mitian@17
|
40 Returns
|
mitian@17
|
41 -------
|
mitian@17
|
42 F: np.array
|
mitian@17
|
43 Cluster matrix (decomposed matrix)
|
mitian@17
|
44 G: np.array
|
mitian@17
|
45 Activation matrix (decomposed matrix)
|
mitian@17
|
46 (s.t. S ~= F * G)
|
mitian@17
|
47 """
|
mitian@17
|
48 nmf_mdl = pymf.CNMF(S, num_bases=rank)
|
mitian@17
|
49 nmf_mdl.factorize(niter=niter)
|
mitian@17
|
50 F = np.asarray(nmf_mdl.W)
|
mitian@17
|
51 G = np.asarray(nmf_mdl.H)
|
mitian@17
|
52 return F, G
|
mi@0
|
53
|
mitian@11
|
54 def nmf(S, rank, nither=500):
|
mitian@11
|
55 nmf_mdl = pymf.NMF(S, num_bases=rank, niter=nither)
|
mitian@11
|
56 nmf_mdl.factorize()
|
mitian@17
|
57 F = np.asarray(nmf_mdl.W)
|
mitian@17
|
58 G = np.asarray(nmf_mdl.H)
|
mitian@17
|
59 return F, G
|
mitian@11
|
60
|
mitian@11
|
61
|
mi@0
|
62 def most_frequent(x):
|
mitian@17
|
63 """Returns the most frequent value in x."""
|
mitian@17
|
64 return np.argmax(np.bincount(x))
|
mi@0
|
65
|
mi@0
|
66
|
mi@0
|
67 def compute_labels(X, rank, R, bound_idxs, niter=300):
|
mitian@17
|
68 """Computes the labels using the bounds."""
|
mi@0
|
69
|
mitian@17
|
70 X = X.T
|
mitian@17
|
71 try:
|
mitian@17
|
72 F, G = cnmf(X, rank, niter=niter)
|
mitian@17
|
73 except:
|
mitian@17
|
74 return [1]
|
mi@0
|
75
|
mitian@17
|
76 label_frames = filter_activation_matrix(G.T, R)
|
mitian@17
|
77 label_frames = np.asarray(label_frames, dtype=int)
|
mi@0
|
78
|
mitian@17
|
79 # Get labels from the label frames
|
mitian@17
|
80 labels = []
|
mitian@17
|
81 bound_inters = zip(bound_idxs[:-1], bound_idxs[1:])
|
mitian@17
|
82 for bound_inter in bound_inters:
|
mitian@17
|
83 if bound_inter[1] - bound_inter[0] <= 0:
|
mitian@17
|
84 labels.append(np.max(label_frames) + 1)
|
mitian@17
|
85 else:
|
mitian@17
|
86 labels.append(most_frequent(
|
mitian@17
|
87 label_frames[bound_inter[0]:bound_inter[1]]))
|
mi@0
|
88
|
mitian@17
|
89 return labels
|
mi@0
|
90
|
mi@0
|
91
|
mi@0
|
92 def filter_activation_matrix(G, R):
|
mitian@17
|
93 """Filters the activation matrix G, and returns a flattened copy."""
|
mitian@17
|
94 idx = np.argmax(G, axis=1)
|
mitian@17
|
95 max_idx = np.arange(G.shape[0])
|
mitian@17
|
96 max_idx = (max_idx, idx.flatten())
|
mitian@17
|
97 G[:, :] = 0
|
mitian@17
|
98 G[max_idx] = idx + 1
|
mitian@17
|
99 G = np.sum(G, axis=1)
|
mitian@17
|
100 G = SegUtil.median_filter(G[:, np.newaxis], R)
|
mitian@17
|
101 return G.flatten()
|
mi@0
|
102
|
mi@0
|
103
|
mitian@11
|
104 def segmentation(X, rank=4, R=15, h=8, niter=300, CNMF=True):
|
mitian@17
|
105 """
|
mitian@17
|
106 Gets the segmentation (boundaries and labels) from the factorization
|
mitian@17
|
107 matrices.
|
mi@0
|
108
|
mitian@17
|
109 Parameters
|
mitian@17
|
110 ----------
|
mitian@17
|
111 X: np.array()
|
mitian@17
|
112 Features matrix (e.g. chromagram)
|
mitian@17
|
113 rank: int
|
mitian@17
|
114 Rank of decomposition
|
mitian@17
|
115 R: int
|
mitian@17
|
116 Size of the median filter for activation matrix
|
mitian@17
|
117 niter: int
|
mitian@17
|
118 Number of iterations for k-means
|
mitian@17
|
119 bound_idxs : list
|
mitian@17
|
120 Use previously found boundaries (None to detect them)
|
mitian@11
|
121 CNMF : bool
|
mitian@11
|
122 If True, use CNMF; otherwise use NMF
|
mitian@11
|
123
|
mitian@17
|
124 Returns
|
mitian@17
|
125 -------
|
mitian@17
|
126 bounds_idx: np.array
|
mitian@17
|
127 Bound indeces found
|
mitian@17
|
128 labels: np.array
|
mitian@17
|
129 Indeces of the labels representing the similarity between segments.
|
mitian@17
|
130 """
|
mi@0
|
131
|
mitian@17
|
132 # Filter
|
mitian@17
|
133 X = SegUtil.median_filter(X, M=h)
|
mitian@17
|
134 X = X.T
|
mi@0
|
135
|
mitian@17
|
136 # Find non filtered boundaries
|
mitian@17
|
137 bound_idxs = None
|
mitian@17
|
138 while True:
|
mitian@17
|
139 if bound_idxs is None:
|
mitian@17
|
140 try:
|
mitian@18
|
141 if CNMF: F, G0 = cnmf(X, rank, niter=niter)
|
mitian@18
|
142 else: F, G0 = nmf(X, rank, niter=niter)
|
mitian@17
|
143 except:
|
mitian@18
|
144 return np.empty(0), np.empty(0), [1]
|
mi@0
|
145
|
mitian@17
|
146 # Filter G
|
mitian@18
|
147 G = filter_activation_matrix(G0.T, R)
|
mitian@17
|
148 if bound_idxs is None:
|
mitian@17
|
149 bound_idxs = np.where(np.diff(G) != 0)[0] + 1
|
mi@0
|
150
|
mitian@17
|
151 if len(np.unique(bound_idxs)) <= 2:
|
mitian@17
|
152 rank += 1
|
mitian@17
|
153 bound_idxs = None
|
mitian@17
|
154 else:
|
mitian@17
|
155 break
|
mitian@18
|
156
|
mitian@18
|
157 # Obtain decomposition matrices Rd
|
mitian@18
|
158 R = F.dot(G0)
|
mitian@18
|
159 print 'R, F, G', R.shape, F.shape, G.shape
|
mitian@18
|
160 return F, G0, R, bound_idxs
|