e@0
|
1 # -*- coding: utf-8 -*-
|
e@0
|
2 """
|
e@0
|
3 Created on Mon Jun 8 11:19:15 2015
|
e@0
|
4
|
e@0
|
5 @author: mmxgn
|
e@0
|
6 """
|
e@0
|
7 # Codes taken from: https://github.com/urinieto/msaf/blob/master/msaf/algorithms/foote/segmenter.py
|
e@0
|
8
|
e@0
|
9
|
e@0
|
10
|
e@0
|
11 if __name__=="__main__":
|
e@0
|
12 from sys import argv
|
e@0
|
13 if len(argv) != 3:
|
e@0
|
14 print("Incorrect number of arguments:")
|
e@0
|
15 print("Usage: ")
|
e@0
|
16 print("%s <input>")
|
e@0
|
17 print("")
|
e@0
|
18 print("Arguments:")
|
e@0
|
19 print("<input>\tThe input filename. Can be .wav, .mp3, etc...")
|
e@0
|
20 print("<output_folder>\tThe output folders. Segments will be stored under names 'name_segN'")
|
e@0
|
21 sys.exit(-1)
|
e@0
|
22 else:
|
e@0
|
23 print("[II] Applying the method found in: ")
|
e@0
|
24 print("[II] Automatic Audio Segmentation using a measure of Audio Novelty")
|
e@0
|
25 print("[II] - Jonathar Foote ")
|
e@0
|
26 print("[II] Loading libraries")
|
e@0
|
27
|
e@0
|
28 import essentia
|
e@0
|
29 from essentia import Pool
|
e@0
|
30 from essentia.standard import *
|
e@0
|
31 import csv
|
e@0
|
32 import yaml
|
e@0
|
33
|
e@0
|
34 # reqyures matplotlib
|
e@0
|
35 from pylab import *
|
e@0
|
36
|
e@0
|
37 #requires numpy
|
e@0
|
38 from numpy import *
|
e@0
|
39
|
e@0
|
40 import wave
|
e@0
|
41
|
e@0
|
42
|
e@0
|
43 from scipy.spatial import distance
|
e@0
|
44 from scipy.ndimage import filters
|
e@0
|
45 d = {}
|
e@0
|
46 v = {}
|
e@0
|
47
|
e@0
|
48 fname = argv[1]
|
e@0
|
49 outfdir = argv[2]
|
e@0
|
50
|
e@0
|
51 print "[II] Using filename: %s" % fname
|
e@0
|
52 print "[II] Using output folder: %s" % outfdir
|
e@0
|
53
|
e@0
|
54 name = fname.split('.')[-2].split('/')[-1]
|
e@0
|
55
|
e@0
|
56 print "[II] Segments will be saved in the form '%s/%s_segN.mp3'" % (outfdir, name)
|
e@0
|
57
|
e@0
|
58
|
e@0
|
59 trackname = fname.split('.')[0].split('/')[-1]
|
e@0
|
60
|
e@0
|
61
|
e@0
|
62 # if outfname.partition('.')[-1].lower() not in ['json', 'yaml']:
|
e@0
|
63 # print("Please choose a .json or .yaml as an output file.")
|
e@0
|
64 # sys.exit(-1)
|
e@0
|
65 # else:
|
e@0
|
66 # if outfname.partition('.')[-1].lower() == 'json':
|
e@0
|
67 # output = YamlOutput(filename = outfname, format='json')
|
e@0
|
68 # else:
|
e@0
|
69 # output = YamlOutput(filename = outfname, format='yaml')
|
e@0
|
70
|
e@0
|
71 print("Feature extraction of `%s\'" % fname)
|
e@0
|
72
|
e@0
|
73 # Sampling Rate
|
e@0
|
74 SR = 21000.0
|
e@0
|
75
|
e@0
|
76
|
e@0
|
77 # Audio Loader
|
e@0
|
78 loader = MonoLoader(filename = fname, sampleRate=SR)
|
e@0
|
79
|
e@0
|
80 # Lowpass audio
|
e@0
|
81 lp = LowPass(cutoffFrequency=SR/4, sampleRate=SR)
|
e@0
|
82
|
e@0
|
83 # Audio
|
e@0
|
84 audio = lp(loader())
|
e@0
|
85
|
e@0
|
86
|
e@0
|
87
|
e@0
|
88 # For MFCCs
|
e@0
|
89
|
e@0
|
90 w_hanning = Windowing(type = "hann")
|
e@0
|
91 spectrum = Spectrum()
|
e@0
|
92 mfcc = MFCC()
|
e@0
|
93
|
e@0
|
94
|
e@0
|
95 frameSize = int(0.2 * SR) # Change this depending whether it's music or sound
|
e@0
|
96
|
e@0
|
97 pool = essentia.Pool()
|
e@0
|
98
|
e@0
|
99
|
e@0
|
100
|
e@0
|
101 for frame in FrameGenerator(audio, frameSize = frameSize, hopSize = frameSize/2):
|
e@0
|
102 mfcc_bands, mfcc_coeffs = mfcc(spectrum(w_hanning(frame)))
|
e@0
|
103 pool.add("lowlevel.mfcc_selfsim", mfcc_coeffs)
|
e@0
|
104
|
e@0
|
105 mfcc_coeffs = pool['lowlevel.mfcc_selfsim']
|
e@0
|
106
|
e@0
|
107 # selfsim = 1 - pairwise_distances(mfcc_coeffs)#, metric = "cosine")
|
e@0
|
108 selfsim = distance.pdist(mfcc_coeffs, metric='seuclidean')
|
e@0
|
109 selfsim = distance.squareform(selfsim)
|
e@0
|
110 selfsim /= selfsim.max()
|
e@0
|
111 selfsim = 1 - selfsim
|
e@0
|
112 # Calculating cosine distances as a better metric
|
e@0
|
113
|
e@0
|
114 C = array([[1,-1],[-1,1]])
|
e@0
|
115
|
e@0
|
116 def Novelty(S, C = array([[1, -1],[-1, 1]])):
|
e@0
|
117 L = C.shape[0]
|
e@0
|
118
|
e@0
|
119 horconcat = concatenate((S[:, 0:L/2], S, S[:,-L/2:]), axis=1)
|
e@0
|
120 verconcat = concatenate((horconcat[0:L/2,:], horconcat, horconcat[-L/2:,:]), axis=0)
|
e@0
|
121
|
e@0
|
122
|
e@0
|
123 N = zeros((S.shape[0],))
|
e@0
|
124
|
e@0
|
125 for i in range(0, len(N)):
|
e@0
|
126 S_ = 0
|
e@0
|
127 for m in range(-L/2, L/2):
|
e@0
|
128 for n in range(-L/2, L/2):
|
e@0
|
129 # print (m,n), (L/2+m, L/2+n)
|
e@0
|
130 S_ += C[L/2+m, L/2+n]*verconcat[i+m+L/2, i+n-L/2]
|
e@0
|
131 # S_ += verconcat[i+m+L/2, i+m-L/2]
|
e@0
|
132
|
e@0
|
133 # print S_
|
e@0
|
134 N[i] = S_
|
e@0
|
135
|
e@0
|
136 return N
|
e@0
|
137
|
e@0
|
138 def novel(S, C = array([[1, -1], [-1, 1]])):
|
e@0
|
139 N = S.shape[0]
|
e@0
|
140 M = C.shape[0]
|
e@0
|
141
|
e@0
|
142 novelty = zeros(N)
|
e@0
|
143
|
e@0
|
144 for i in xrange(M/2, N-M/2+1):
|
e@0
|
145 novelty[i] = sum(S[i-M/2:i+M/2,i-M/2:i+M/2] * C)
|
e@0
|
146
|
e@0
|
147 novelty += novelty.min()
|
e@0
|
148 novelty /= novelty.max()
|
e@0
|
149
|
e@0
|
150 return novelty
|
e@0
|
151
|
e@0
|
152
|
e@0
|
153
|
e@0
|
154 def pick_peaks(nc, L=32):
|
e@0
|
155 # Codes taken from: https://github.com/urinieto/msaf/blob/master/msaf/algorithms/foote/segmenter.py
|
e@0
|
156
|
e@0
|
157 """Obtain peaks from a novelty curve using an adaptive threshold."""
|
e@0
|
158 offset = nc.mean() / 20.
|
e@0
|
159
|
e@0
|
160 nc = filters.gaussian_filter1d(nc, sigma=4) # Smooth out nc
|
e@0
|
161
|
e@0
|
162 th = filters.median_filter(nc, size=L) + offset
|
e@0
|
163 #th = filters.gaussian_filter(nc, sigma=L/2., mode="nearest") + offset
|
e@0
|
164
|
e@0
|
165 peaks = []
|
e@0
|
166 for i in xrange(1, nc.shape[0] - 1):
|
e@0
|
167 # is it a peak?
|
e@0
|
168 if nc[i - 1] < nc[i] and nc[i] > nc[i + 1]:
|
e@0
|
169 # is it above the threshold?
|
e@0
|
170 if nc[i] > th[i]:
|
e@0
|
171 peaks.append(i)
|
e@0
|
172 #plt.plot(nc)
|
e@0
|
173 #plt.plot(th)
|
e@0
|
174 #for peak in peaks:
|
e@0
|
175 #plt.axvline(peak)
|
e@0
|
176 #plt.show()
|
e@0
|
177
|
e@0
|
178 return peaks
|
e@0
|
179
|
e@0
|
180 from scipy import signal
|
e@0
|
181 def compute_gaussian_krnl(M):
|
e@0
|
182 """Creates a gaussian kernel following Foote's paper."""
|
e@0
|
183 g = signal.gaussian(M, M / 3., sym=True)
|
e@0
|
184 G = np.dot(g.reshape(-1, 1), g.reshape(1, -1))
|
e@0
|
185 G[M / 2:, :M / 2] = -G[M / 2:, :M / 2]
|
e@0
|
186 G[:M / 2, M / 2:] = -G[:M / 2, M / 2:]
|
e@0
|
187 return G
|
e@0
|
188
|
e@0
|
189 K = compute_gaussian_krnl(96)
|
e@0
|
190 def kernelMatrix(L):
|
e@0
|
191 k1 = concatenate((ones((L/2,L/2)), -1*ones((L/2,L/2))))
|
e@0
|
192 k1 = concatenate((k1,-k1),axis=1)
|
e@0
|
193 return k1
|
e@0
|
194
|
e@0
|
195 N = novel(selfsim, K)
|
e@0
|
196 peaks = pick_peaks(N)
|
e@0
|
197
|
e@0
|
198 boundaries = array(peaks)*frameSize/2
|
e@0
|
199
|
e@0
|
200 sampleRate = SR
|
e@0
|
201
|
e@0
|
202 audio = MonoLoader(filename=fname, sampleRate = sampleRate)()
|
e@0
|
203
|
e@0
|
204 from scipy.io.wavfile import write as wavwrite
|
e@0
|
205
|
e@0
|
206 for b in range(1, len(boundaries)):
|
e@0
|
207 outname = '%s/%s_seg%d.wav' % (outfdir, name, b)
|
e@0
|
208 segment = audio[boundaries[b-1]:boundaries[b]]
|
e@0
|
209 if len(segment) >= 5*SR:
|
e@0
|
210 #audioout = MonoWriter(sampleRate = SR, filename=outname)
|
e@0
|
211 #audioout(segment)
|
e@0
|
212
|
e@0
|
213 wavwrite(outname, SR, segment)
|
e@0
|
214 print "[II] Saving %s" % outname
|
e@0
|
215
|
e@0
|
216
|
e@0
|
217
|
e@0
|
218
|
e@0
|
219 |