mi@0: """ mi@0: C-NMF method for segmentation, modified from here: mi@0: mi@0: Nieto, O., Jehan, T., Convex Non-negative Matrix Factorization For Automatic mi@0: Music Structure Identification. Proc. of the 38th IEEE International Conference mi@0: on Acoustics, Speech, and Signal Processing (ICASSP). Vancouver, Canada, 2013. mi@0: """ mi@0: mi@0: __author__ = "Oriol Nieto" mi@0: __copyright__ = "Copyright 2014, Music and Audio Research Lab (MARL)" mi@0: __license__ = "GPL" mi@0: __version__ = "1.0" mi@0: __email__ = "oriol@nyu.edu" mi@0: mi@0: import numpy as np mi@0: import pymf mi@0: mi@0: # Local stuff mi@0: from utils import SegUtil mi@0: mi@0: mi@0: def cnmf(S, rank, niter=500): mi@0: """(Convex) Non-Negative Matrix Factorization. mi@0: mi@0: Parameters mi@0: ---------- mi@0: S: np.array(p, N) mi@0: Features matrix. p row features and N column observations. mi@0: rank: int mi@0: Rank of decomposition mi@0: niter: int mi@0: Number of iterations to be used mi@0: mi@0: Returns mi@0: ------- mi@0: F: np.array mi@0: Cluster matrix (decomposed matrix) mi@0: G: np.array mi@0: Activation matrix (decomposed matrix) mi@0: (s.t. S ~= F * G) mi@0: """ mi@0: nmf_mdl = pymf.CNMF(S, num_bases=rank) mi@0: nmf_mdl.factorize(niter=niter) mi@0: F = np.asarray(nmf_mdl.W) mi@0: G = np.asarray(nmf_mdl.H) mi@0: return F, G mi@0: mi@0: mi@0: def most_frequent(x): mi@0: """Returns the most frequent value in x.""" mi@0: return np.argmax(np.bincount(x)) mi@0: mi@0: mi@0: def compute_labels(X, rank, R, bound_idxs, niter=300): mi@0: """Computes the labels using the bounds.""" mi@0: mi@0: X = X.T mi@0: try: mi@0: F, G = cnmf(X, rank, niter=niter) mi@0: except: mi@0: return [1] mi@0: mi@0: label_frames = filter_activation_matrix(G.T, R) mi@0: label_frames = np.asarray(label_frames, dtype=int) mi@0: mi@0: # Get labels from the label frames mi@0: labels = [] mi@0: bound_inters = zip(bound_idxs[:-1], bound_idxs[1:]) mi@0: for bound_inter in bound_inters: mi@0: if bound_inter[1] - bound_inter[0] <= 0: mi@0: labels.append(np.max(label_frames) + 1) mi@0: else: mi@0: labels.append(most_frequent( mi@0: label_frames[bound_inter[0]:bound_inter[1]])) mi@0: mi@0: return labels mi@0: mi@0: mi@0: def filter_activation_matrix(G, R): mi@0: """Filters the activation matrix G, and returns a flattened copy.""" mi@0: idx = np.argmax(G, axis=1) mi@0: max_idx = np.arange(G.shape[0]) mi@0: max_idx = (max_idx, idx.flatten()) mi@0: G[:, :] = 0 mi@0: G[max_idx] = idx + 1 mi@0: G = np.sum(G, axis=1) mi@0: G = utils.median_filter(G[:, np.newaxis], R) mi@0: return G.flatten() mi@0: mi@0: mi@0: def segmentation(X, rank, R, h, niter=300): mi@0: """ mi@0: Gets the segmentation (boundaries and labels) from the factorization mi@0: matrices. mi@0: mi@0: Parameters mi@0: ---------- mi@0: X: np.array() mi@0: Features matrix (e.g. chromagram) mi@0: rank: int mi@0: Rank of decomposition mi@0: R: int mi@0: Size of the median filter for activation matrix mi@0: niter: int mi@0: Number of iterations for k-means mi@0: bound_idxs : list mi@0: Use previously found boundaries (None to detect them) mi@0: mi@0: Returns mi@0: ------- mi@0: bounds_idx: np.array mi@0: Bound indeces found mi@0: labels: np.array mi@0: Indeces of the labels representing the similarity between segments. mi@0: """ mi@0: mi@0: # Filter mi@0: X = utils.median_filter(X, M=h) mi@0: X = X.T mi@0: mi@0: # Find non filtered boundaries mi@0: bound_idxs = None mi@0: while True: mi@0: if bound_idxs is None: mi@0: try: mi@0: F, G = cnmf(X, rank, niter=niter) mi@0: except: mi@0: return np.empty(0), [1] mi@0: mi@0: # Filter G mi@0: G = filter_activation_matrix(G.T, R) mi@0: if bound_idxs is None: mi@0: bound_idxs = np.where(np.diff(G) != 0)[0] + 1 mi@0: mi@0: if len(np.unique(bound_idxs)) <= 2: mi@0: rank += 1 mi@0: bound_idxs = None mi@0: else: mi@0: break mi@0: mi@0: return bound_idxs