mi@0: """ mi@0: C-NMF method for segmentation, modified from here: mi@0: mi@0: Nieto, O., Jehan, T., Convex Non-negative Matrix Factorization For Automatic mi@0: Music Structure Identification. Proc. of the 38th IEEE International Conference mi@0: on Acoustics, Speech, and Signal Processing (ICASSP). Vancouver, Canada, 2013. mi@0: """ mi@0: mi@0: __author__ = "Oriol Nieto" mi@0: __copyright__ = "Copyright 2014, Music and Audio Research Lab (MARL)" mi@0: __license__ = "GPL" mi@0: __version__ = "1.0" mi@0: __email__ = "oriol@nyu.edu" mi@0: mi@0: import numpy as np mi@0: import pymf mi@0: mi@0: # Local stuff mi@0: from utils import SegUtil mi@0: mitian@7: # Algorithm params mitian@17: h = 8 # Size of median filter for features in C-NMF mitian@17: R = 15 # Size of the median filter for the activation matrix C-NMF mitian@17: rank = 4 # Rank of decomposition for the boundaries mitian@17: rank_labels = 6 # Rank of decomposition for the labels mitian@17: R_labels = 6 # Size of the median filter for the labels mi@0: mi@0: def cnmf(S, rank, niter=500): mitian@17: """(Convex) Non-Negative Matrix Factorization. mi@0: mitian@17: Parameters mitian@17: ---------- mitian@17: S: np.array(p, N) mitian@17: Features matrix. p row features and N column observations. mitian@17: rank: int mitian@17: Rank of decomposition mitian@17: niter: int mitian@17: Number of iterations to be used mi@0: mitian@17: Returns mitian@17: ------- mitian@17: F: np.array mitian@17: Cluster matrix (decomposed matrix) mitian@17: G: np.array mitian@17: Activation matrix (decomposed matrix) mitian@17: (s.t. S ~= F * G) mitian@17: """ mitian@17: nmf_mdl = pymf.CNMF(S, num_bases=rank) mitian@17: nmf_mdl.factorize(niter=niter) mitian@17: F = np.asarray(nmf_mdl.W) mitian@17: G = np.asarray(nmf_mdl.H) mitian@17: return F, G mi@0: mitian@11: def nmf(S, rank, nither=500): mitian@11: nmf_mdl = pymf.NMF(S, num_bases=rank, niter=nither) mitian@11: nmf_mdl.factorize() mitian@17: F = np.asarray(nmf_mdl.W) mitian@17: G = np.asarray(nmf_mdl.H) mitian@17: return F, G mitian@11: mitian@11: mi@0: def most_frequent(x): mitian@17: """Returns the most frequent value in x.""" mitian@17: return np.argmax(np.bincount(x)) mi@0: mi@0: mi@0: def compute_labels(X, rank, R, bound_idxs, niter=300): mitian@17: """Computes the labels using the bounds.""" mi@0: mitian@17: X = X.T mitian@17: try: mitian@17: F, G = cnmf(X, rank, niter=niter) mitian@17: except: mitian@17: return [1] mi@0: mitian@17: label_frames = filter_activation_matrix(G.T, R) mitian@17: label_frames = np.asarray(label_frames, dtype=int) mi@0: mitian@17: # Get labels from the label frames mitian@17: labels = [] mitian@17: bound_inters = zip(bound_idxs[:-1], bound_idxs[1:]) mitian@17: for bound_inter in bound_inters: mitian@17: if bound_inter[1] - bound_inter[0] <= 0: mitian@17: labels.append(np.max(label_frames) + 1) mitian@17: else: mitian@17: labels.append(most_frequent( mitian@17: label_frames[bound_inter[0]:bound_inter[1]])) mi@0: mitian@17: return labels mi@0: mi@0: mi@0: def filter_activation_matrix(G, R): mitian@17: """Filters the activation matrix G, and returns a flattened copy.""" mitian@17: idx = np.argmax(G, axis=1) mitian@17: max_idx = np.arange(G.shape[0]) mitian@17: max_idx = (max_idx, idx.flatten()) mitian@17: G[:, :] = 0 mitian@17: G[max_idx] = idx + 1 mitian@17: G = np.sum(G, axis=1) mitian@17: G = SegUtil.median_filter(G[:, np.newaxis], R) mitian@17: return G.flatten() mi@0: mi@0: mitian@11: def segmentation(X, rank=4, R=15, h=8, niter=300, CNMF=True): mitian@17: """ mitian@17: Gets the segmentation (boundaries and labels) from the factorization mitian@17: matrices. mi@0: mitian@17: Parameters mitian@17: ---------- mitian@17: X: np.array() mitian@17: Features matrix (e.g. chromagram) mitian@17: rank: int mitian@17: Rank of decomposition mitian@17: R: int mitian@17: Size of the median filter for activation matrix mitian@17: niter: int mitian@17: Number of iterations for k-means mitian@17: bound_idxs : list mitian@17: Use previously found boundaries (None to detect them) mitian@11: CNMF : bool mitian@11: If True, use CNMF; otherwise use NMF mitian@11: mitian@17: Returns mitian@17: ------- mitian@17: bounds_idx: np.array mitian@17: Bound indeces found mitian@17: labels: np.array mitian@17: Indeces of the labels representing the similarity between segments. mitian@17: """ mi@0: mitian@17: # Filter mitian@17: X = SegUtil.median_filter(X, M=h) mitian@17: X = X.T mi@0: mitian@17: # Find non filtered boundaries mitian@17: bound_idxs = None mitian@17: while True: mitian@17: if bound_idxs is None: mitian@17: try: mitian@18: if CNMF: F, G0 = cnmf(X, rank, niter=niter) mitian@18: else: F, G0 = nmf(X, rank, niter=niter) mitian@17: except: mitian@18: return np.empty(0), np.empty(0), [1] mi@0: mitian@17: # Filter G mitian@18: G = filter_activation_matrix(G0.T, R) mitian@17: if bound_idxs is None: mitian@17: bound_idxs = np.where(np.diff(G) != 0)[0] + 1 mi@0: mitian@17: if len(np.unique(bound_idxs)) <= 2: mitian@17: rank += 1 mitian@17: bound_idxs = None mitian@17: else: mitian@17: break mitian@18: mitian@18: # Obtain decomposition matrices Rd mitian@18: R = F.dot(G0) mitian@18: print 'R, F, G', R.shape, F.shape, G.shape mitian@18: return F, G0, R, bound_idxs