Mercurial > hg > segmentation
view snmf.py @ 16:8b814fe5781d
added segmentation method applying nmf to ssm
author | mitian |
---|---|
date | Wed, 17 Jun 2015 18:02:33 +0100 |
parents | |
children |
line wrap: on
line source
#!/usr/bin/env python # encoding: utf-8 """ snmf.py -- A modified version of C-NMF by Nieto. Input is no longer feature matrix but SSM. C-NMF method for segmentation, modified from here: Nieto, O., Jehan, T., Convex Non-negative Matrix Factorization For Automatic Music Structure Identification. Proc. of the 38th IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP). Vancouver, Canada, 2013. """ import sys import os import pymf from utils import SegUtil from utils.SegUtil import median_filter import numpy as np # Algorithm params h = 8 # Size of median filter for SSMs in C-NMF R = 16 # Size of the median filter for the activation matrix C-NMF rank = 3 # Rank of decomposition for the boundaries rank_labels = 16 # Rank of decomposition for the labels R_labels = 4 # Size of the median filter for the labels def cnmf(S, rank, niter=500): """(Convex) Non-Negative Matrix Factorization. Parameters ---------- S: np.array(N, N) SSM. rank: int Rank of decomposition niter: int Number of iterations to be used Returns ------- F: np.array Cluster matrix (decomposed matrix) G: np.array Activation matrix (decomposed matrix) (s.t. S ~= F * G) """ nmf_mdl = pymf.CNMF(S, num_bases=rank) nmf_mdl.factorize(niter=niter) F = np.asarray(nmf_mdl.W) G = np.asarray(nmf_mdl.H) return F, G def nmf(S, rank, nither=500): nmf_mdl = pymf.NMF(S, num_bases=rank, niter=nither) nmf_mdl.factorize() F = np.asarray(nmf_mdl.W) G = np.asarray(nmf_mdl.H) return F, G def most_frequent(x): """Returns the most frequent value in x.""" return np.argmax(np.bincount(x)) def compute_labels(X, rank, R, bound_idxs, niter=300): """Computes the labels using the bounds.""" X = X.T try: F, G = cnmf(X, rank, niter=niter) except: return [1] label_frames = filter_activation_matrix(G.T, R) label_frames = np.asarray(label_frames, dtype=int) # Get labels from the label frames labels = [] bound_inters = zip(bound_idxs[:-1], bound_idxs[1:]) for bound_inter in bound_inters: if bound_inter[1] - bound_inter[0] <= 0: labels.append(np.max(label_frames) + 1) else: labels.append(most_frequent( label_frames[bound_inter[0]:bound_inter[1]])) return labels def filter_activation_matrix(G, R): """Filters the activation matrix G, and returns a flattened copy.""" idx = np.argmax(G, axis=1) max_idx = np.arange(G.shape[0]) max_idx = (max_idx, idx.flatten()) G[:, :] = 0 G[max_idx] = idx + 1 G = np.sum(G, axis=1) G = SegUtil.median_filter(G[:, np.newaxis], R) return G.flatten() def segmentation(X, rank=4, R=15, h=8, niter=300, CNMF=True): """ Gets the segmentation (boundaries and labels) from the factorization matrices. Parameters ---------- X: np.array() Features matrix (e.g. chromagram) rank: int Rank of decomposition R: int Size of the median filter for activation matrix niter: int Number of iterations for k-means bound_idxs : list Use previously found boundaries (None to detect them) CNMF : bool If True, use CNMF; otherwise use NMF Returns ------- bounds_idx: np.array Bound indeces found labels: np.array Indeces of the labels representing the similarity between segments. """ # Filter X = median_filter(X, M=h) X = X.T # Find non filtered boundaries bound_idxs = None while True: if bound_idxs is None: try: if CNMF: F, G = cnmf(X, rank, niter=niter) else: F, G = nmf(X, rank, niter=niter) except: return np.empty(0), [1] # Filter G G = filter_activation_matrix(G.T, R) if bound_idxs is None: bound_idxs = np.where(np.diff(G) != 0)[0] + 1 if len(np.unique(bound_idxs)) <= 2: rank += 1 bound_idxs = None else: break return G, bound_idxs