mi@0: """ mi@0: 2D-FMC for segmentation, published here: mi@0: mi@0: Nieto, O., & Bello, J. P. (2014). Music Segment Similarity Using 2D-Fourier mi@0: Magnitude Coefficients. In Proc. of the 39th IEEE International Conference on mi@0: Acoustics Speech and Signal Processing (pp. 664-668). Florence, Italy. mi@0: """ mi@0: mi@0: __author__ = "Oriol Nieto" mi@0: __copyright__ = "Copyright 2014, Music and Audio Research Lab (MARL)" mi@0: __license__ = "GPL" mi@0: __version__ = "1.0" mi@0: __email__ = "oriol@nyu.edu" mi@0: mi@0: import numpy as np mi@0: import scipy.cluster.vq as vq mi@0: import logging mi@0: mi@0: # Local stuff mi@0: from utils import utils_2dfmc as utils2d mi@0: from utils.xmeans import XMeans mi@0: mi@0: mi@0: MIN_LEN = 4 # Minimum lenght for the segments mi@0: mi@0: mi@0: def get_pcp_segments(PCP, bound_idxs): mi@0: """Returns a set of segments defined by the bound_idxs.""" mi@0: pcp_segments = [] mi@0: for i in xrange(len(bound_idxs)-1): mi@0: pcp_segments.append(PCP[bound_idxs[i]:bound_idxs[i+1], :]) mi@0: return pcp_segments mi@0: mi@0: mi@0: def pcp_segments_to_2dfmc_fixed(pcp_segments, N=75): mi@0: """From a list of PCP segments, return a list of 2D-Fourier Magnitude mi@0: Coefs using a fixed segment size (N) and aggregating.""" mi@0: mi@0: fmcs = [] mi@0: for pcp_segment in pcp_segments: mi@0: X = [] mi@0: mi@0: # Append so that we never lose a segment mi@0: skip = False mi@0: while pcp_segment.shape[0] < MIN_LEN: mi@0: try: mi@0: pcp_segment = np.vstack((pcp_segment, mi@0: pcp_segment[-1][np.newaxis, :])) mi@0: except: mi@0: logging.warning("Error: Can't stack PCP arrays, " mi@0: "skipping segment") mi@0: skip = True mi@0: break mi@0: mi@0: if skip: mi@0: continue mi@0: mi@0: curr_len = pcp_segment.shape[0] mi@0: mi@0: if curr_len > N: mi@0: # Sub segment if greater than minimum mi@0: for i in xrange(curr_len - N + 1): mi@0: sub_segment = pcp_segment[i:i + N] mi@0: X.append(utils2d.compute_ffmc2d(sub_segment)) mi@0: mi@0: # Aggregate mi@0: X = np.max(np.asarray(X), axis=0) mi@0: mi@0: fmcs.append(X) mi@0: mi@0: elif curr_len <= N: mi@0: # Zero-pad mi@0: X = np.zeros((N, pcp_segment.shape[1])) mi@0: X[:curr_len, :] = pcp_segment mi@0: mi@0: # 2D-FMC mi@0: fmcs.append(utils2d.compute_ffmc2d(X)) mi@0: mi@0: return np.asarray(fmcs) mi@0: mi@0: mi@0: def compute_labels_kmeans(fmcs, k=6): mi@0: # Removing the higher frequencies seem to yield better results mi@0: fmcs = fmcs[:, fmcs.shape[1]/2:] mi@0: mi@0: fmcs = np.log1p(fmcs) mi@0: wfmcs = vq.whiten(fmcs) mi@0: mi@0: dic, dist = vq.kmeans(wfmcs, k, iter=100) mi@0: labels, dist = vq.vq(wfmcs, dic) mi@0: mi@0: return labels mi@0: mi@0: mi@0: def compute_similarity(PCP, bound_idxs, xmeans=False, k=5, N=32): mi@0: """Main function to compute the segment similarity of file file_struct.""" mi@0: mi@0: # Get PCP segments mi@0: pcp_segments = get_pcp_segments(PCP, bound_idxs) mi@0: mi@0: # Get the 2d-FMCs segments mi@0: fmcs = pcp_segments_to_2dfmc_fixed(pcp_segments, N=N) mi@0: if fmcs == [] or fmcs is None: mi@0: return np.arange(len(bound_idxs) - 1) mi@0: mi@0: # Compute the labels using kmeans mi@0: if xmeans: mi@0: xm = XMeans(fmcs, plot=False) mi@0: k = xm.estimate_K_knee(th=0.01, maxK=8) mi@0: est_labels = compute_labels_kmeans(fmcs, k=k) mi@0: mi@0: # Plot results mi@0: #plot_pcp_wgt(PCP, bound_idxs) mi@0: mi@0: return est_labels