mi@0
|
1 """
|
mi@0
|
2 2D-FMC for segmentation, published here:
|
mi@0
|
3
|
mi@0
|
4 Nieto, O., & Bello, J. P. (2014). Music Segment Similarity Using 2D-Fourier
|
mi@0
|
5 Magnitude Coefficients. In Proc. of the 39th IEEE International Conference on
|
mi@0
|
6 Acoustics Speech and Signal Processing (pp. 664-668). Florence, Italy.
|
mi@0
|
7 """
|
mi@0
|
8
|
mi@0
|
9 __author__ = "Oriol Nieto"
|
mi@0
|
10 __copyright__ = "Copyright 2014, Music and Audio Research Lab (MARL)"
|
mi@0
|
11 __license__ = "GPL"
|
mi@0
|
12 __version__ = "1.0"
|
mi@0
|
13 __email__ = "oriol@nyu.edu"
|
mi@0
|
14
|
mi@0
|
15 import numpy as np
|
mi@0
|
16 import scipy.cluster.vq as vq
|
mi@0
|
17 import logging
|
mi@0
|
18
|
mi@0
|
19 # Local stuff
|
mi@0
|
20 from utils import utils_2dfmc as utils2d
|
mi@0
|
21 from utils.xmeans import XMeans
|
mi@0
|
22
|
mi@0
|
23
|
mi@0
|
24 MIN_LEN = 4 # Minimum lenght for the segments
|
mi@0
|
25
|
mi@0
|
26
|
mi@0
|
27 def get_pcp_segments(PCP, bound_idxs):
|
mi@0
|
28 """Returns a set of segments defined by the bound_idxs."""
|
mi@0
|
29 pcp_segments = []
|
mi@0
|
30 for i in xrange(len(bound_idxs)-1):
|
mi@0
|
31 pcp_segments.append(PCP[bound_idxs[i]:bound_idxs[i+1], :])
|
mi@0
|
32 return pcp_segments
|
mi@0
|
33
|
mi@0
|
34
|
mi@0
|
35 def pcp_segments_to_2dfmc_fixed(pcp_segments, N=75):
|
mi@0
|
36 """From a list of PCP segments, return a list of 2D-Fourier Magnitude
|
mi@0
|
37 Coefs using a fixed segment size (N) and aggregating."""
|
mi@0
|
38
|
mi@0
|
39 fmcs = []
|
mi@0
|
40 for pcp_segment in pcp_segments:
|
mi@0
|
41 X = []
|
mi@0
|
42
|
mi@0
|
43 # Append so that we never lose a segment
|
mi@0
|
44 skip = False
|
mi@0
|
45 while pcp_segment.shape[0] < MIN_LEN:
|
mi@0
|
46 try:
|
mi@0
|
47 pcp_segment = np.vstack((pcp_segment,
|
mi@0
|
48 pcp_segment[-1][np.newaxis, :]))
|
mi@0
|
49 except:
|
mi@0
|
50 logging.warning("Error: Can't stack PCP arrays, "
|
mi@0
|
51 "skipping segment")
|
mi@0
|
52 skip = True
|
mi@0
|
53 break
|
mi@0
|
54
|
mi@0
|
55 if skip:
|
mi@0
|
56 continue
|
mi@0
|
57
|
mi@0
|
58 curr_len = pcp_segment.shape[0]
|
mi@0
|
59
|
mi@0
|
60 if curr_len > N:
|
mi@0
|
61 # Sub segment if greater than minimum
|
mi@0
|
62 for i in xrange(curr_len - N + 1):
|
mi@0
|
63 sub_segment = pcp_segment[i:i + N]
|
mi@0
|
64 X.append(utils2d.compute_ffmc2d(sub_segment))
|
mi@0
|
65
|
mi@0
|
66 # Aggregate
|
mi@0
|
67 X = np.max(np.asarray(X), axis=0)
|
mi@0
|
68
|
mi@0
|
69 fmcs.append(X)
|
mi@0
|
70
|
mi@0
|
71 elif curr_len <= N:
|
mi@0
|
72 # Zero-pad
|
mi@0
|
73 X = np.zeros((N, pcp_segment.shape[1]))
|
mi@0
|
74 X[:curr_len, :] = pcp_segment
|
mi@0
|
75
|
mi@0
|
76 # 2D-FMC
|
mi@0
|
77 fmcs.append(utils2d.compute_ffmc2d(X))
|
mi@0
|
78
|
mi@0
|
79 return np.asarray(fmcs)
|
mi@0
|
80
|
mi@0
|
81
|
mi@0
|
82 def compute_labels_kmeans(fmcs, k=6):
|
mi@0
|
83 # Removing the higher frequencies seem to yield better results
|
mi@0
|
84 fmcs = fmcs[:, fmcs.shape[1]/2:]
|
mi@0
|
85
|
mi@0
|
86 fmcs = np.log1p(fmcs)
|
mi@0
|
87 wfmcs = vq.whiten(fmcs)
|
mi@0
|
88
|
mi@0
|
89 dic, dist = vq.kmeans(wfmcs, k, iter=100)
|
mi@0
|
90 labels, dist = vq.vq(wfmcs, dic)
|
mi@0
|
91
|
mi@0
|
92 return labels
|
mi@0
|
93
|
mi@0
|
94
|
mi@0
|
95 def compute_similarity(PCP, bound_idxs, xmeans=False, k=5, N=32):
|
mi@0
|
96 """Main function to compute the segment similarity of file file_struct."""
|
mi@0
|
97
|
mi@0
|
98 # Get PCP segments
|
mi@0
|
99 pcp_segments = get_pcp_segments(PCP, bound_idxs)
|
mi@0
|
100
|
mi@0
|
101 # Get the 2d-FMCs segments
|
mi@0
|
102 fmcs = pcp_segments_to_2dfmc_fixed(pcp_segments, N=N)
|
mi@0
|
103 if fmcs == [] or fmcs is None:
|
mi@0
|
104 return np.arange(len(bound_idxs) - 1)
|
mi@0
|
105
|
mi@0
|
106 # Compute the labels using kmeans
|
mi@0
|
107 if xmeans:
|
mi@0
|
108 xm = XMeans(fmcs, plot=False)
|
mi@0
|
109 k = xm.estimate_K_knee(th=0.01, maxK=8)
|
mi@0
|
110 est_labels = compute_labels_kmeans(fmcs, k=k)
|
mi@0
|
111
|
mi@0
|
112 # Plot results
|
mi@0
|
113 #plot_pcp_wgt(PCP, bound_idxs)
|
mi@0
|
114
|
mi@0
|
115 return est_labels
|