e@0: # -*- coding: utf-8 -*- e@0: """ e@0: Created on Mon Jun 8 11:19:15 2015 e@0: e@0: @author: mmxgn e@0: """ e@0: # Codes taken from: https://github.com/urinieto/msaf/blob/master/msaf/algorithms/foote/segmenter.py e@0: e@0: e@0: e@0: if __name__=="__main__": e@0: from sys import argv e@0: if len(argv) != 3: e@0: print("Incorrect number of arguments:") e@0: print("Usage: ") e@0: print("%s ") e@0: print("") e@0: print("Arguments:") e@0: print("\tThe input filename. Can be .wav, .mp3, etc...") e@0: print("\tThe output folders. Segments will be stored under names 'name_segN'") e@0: sys.exit(-1) e@0: else: e@0: print("[II] Applying the method found in: ") e@0: print("[II] Automatic Audio Segmentation using a measure of Audio Novelty") e@0: print("[II] - Jonathar Foote ") e@0: print("[II] Loading libraries") e@0: e@0: import essentia e@0: from essentia import Pool e@0: from essentia.standard import * e@0: import csv e@0: import yaml e@0: e@0: # reqyures matplotlib e@0: from pylab import * e@0: e@0: #requires numpy e@0: from numpy import * e@0: e@0: import wave e@0: e@0: e@0: from scipy.spatial import distance e@0: from scipy.ndimage import filters e@0: d = {} e@0: v = {} e@0: e@0: fname = argv[1] e@0: outfdir = argv[2] e@0: e@0: print "[II] Using filename: %s" % fname e@0: print "[II] Using output folder: %s" % outfdir e@0: e@0: name = fname.split('.')[-2].split('/')[-1] e@0: e@0: print "[II] Segments will be saved in the form '%s/%s_segN.mp3'" % (outfdir, name) e@0: e@0: e@0: trackname = fname.split('.')[0].split('/')[-1] e@0: e@0: e@0: # if outfname.partition('.')[-1].lower() not in ['json', 'yaml']: e@0: # print("Please choose a .json or .yaml as an output file.") e@0: # sys.exit(-1) e@0: # else: e@0: # if outfname.partition('.')[-1].lower() == 'json': e@0: # output = YamlOutput(filename = outfname, format='json') e@0: # else: e@0: # output = YamlOutput(filename = outfname, format='yaml') e@0: e@0: print("Feature extraction of `%s\'" % fname) e@0: e@0: # Sampling Rate e@0: SR = 21000.0 e@0: e@0: e@0: # Audio Loader e@0: loader = MonoLoader(filename = fname, sampleRate=SR) e@0: e@0: # Lowpass audio e@0: lp = LowPass(cutoffFrequency=SR/4, sampleRate=SR) e@0: e@0: # Audio e@0: audio = lp(loader()) e@0: e@0: e@0: e@0: # For MFCCs e@0: e@0: w_hanning = Windowing(type = "hann") e@0: spectrum = Spectrum() e@0: mfcc = MFCC() e@0: e@0: e@0: frameSize = int(0.2 * SR) # Change this depending whether it's music or sound e@0: e@0: pool = essentia.Pool() e@0: e@0: e@0: e@0: for frame in FrameGenerator(audio, frameSize = frameSize, hopSize = frameSize/2): e@0: mfcc_bands, mfcc_coeffs = mfcc(spectrum(w_hanning(frame))) e@0: pool.add("lowlevel.mfcc_selfsim", mfcc_coeffs) e@0: e@0: mfcc_coeffs = pool['lowlevel.mfcc_selfsim'] e@0: e@0: # selfsim = 1 - pairwise_distances(mfcc_coeffs)#, metric = "cosine") e@0: selfsim = distance.pdist(mfcc_coeffs, metric='seuclidean') e@0: selfsim = distance.squareform(selfsim) e@0: selfsim /= selfsim.max() e@0: selfsim = 1 - selfsim e@0: # Calculating cosine distances as a better metric e@0: e@0: C = array([[1,-1],[-1,1]]) e@0: e@0: def Novelty(S, C = array([[1, -1],[-1, 1]])): e@0: L = C.shape[0] e@0: e@0: horconcat = concatenate((S[:, 0:L/2], S, S[:,-L/2:]), axis=1) e@0: verconcat = concatenate((horconcat[0:L/2,:], horconcat, horconcat[-L/2:,:]), axis=0) e@0: e@0: e@0: N = zeros((S.shape[0],)) e@0: e@0: for i in range(0, len(N)): e@0: S_ = 0 e@0: for m in range(-L/2, L/2): e@0: for n in range(-L/2, L/2): e@0: # print (m,n), (L/2+m, L/2+n) e@0: S_ += C[L/2+m, L/2+n]*verconcat[i+m+L/2, i+n-L/2] e@0: # S_ += verconcat[i+m+L/2, i+m-L/2] e@0: e@0: # print S_ e@0: N[i] = S_ e@0: e@0: return N e@0: e@0: def novel(S, C = array([[1, -1], [-1, 1]])): e@0: N = S.shape[0] e@0: M = C.shape[0] e@0: e@0: novelty = zeros(N) e@0: e@0: for i in xrange(M/2, N-M/2+1): e@0: novelty[i] = sum(S[i-M/2:i+M/2,i-M/2:i+M/2] * C) e@0: e@0: novelty += novelty.min() e@0: novelty /= novelty.max() e@0: e@0: return novelty e@0: e@0: e@0: e@0: def pick_peaks(nc, L=32): e@0: # Codes taken from: https://github.com/urinieto/msaf/blob/master/msaf/algorithms/foote/segmenter.py e@0: e@0: """Obtain peaks from a novelty curve using an adaptive threshold.""" e@0: offset = nc.mean() / 20. e@0: e@0: nc = filters.gaussian_filter1d(nc, sigma=4) # Smooth out nc e@0: e@0: th = filters.median_filter(nc, size=L) + offset e@0: #th = filters.gaussian_filter(nc, sigma=L/2., mode="nearest") + offset e@0: e@0: peaks = [] e@0: for i in xrange(1, nc.shape[0] - 1): e@0: # is it a peak? e@0: if nc[i - 1] < nc[i] and nc[i] > nc[i + 1]: e@0: # is it above the threshold? e@0: if nc[i] > th[i]: e@0: peaks.append(i) e@0: #plt.plot(nc) e@0: #plt.plot(th) e@0: #for peak in peaks: e@0: #plt.axvline(peak) e@0: #plt.show() e@0: e@0: return peaks e@0: e@0: from scipy import signal e@0: def compute_gaussian_krnl(M): e@0: """Creates a gaussian kernel following Foote's paper.""" e@0: g = signal.gaussian(M, M / 3., sym=True) e@0: G = np.dot(g.reshape(-1, 1), g.reshape(1, -1)) e@0: G[M / 2:, :M / 2] = -G[M / 2:, :M / 2] e@0: G[:M / 2, M / 2:] = -G[:M / 2, M / 2:] e@0: return G e@0: e@0: K = compute_gaussian_krnl(96) e@0: def kernelMatrix(L): e@0: k1 = concatenate((ones((L/2,L/2)), -1*ones((L/2,L/2)))) e@0: k1 = concatenate((k1,-k1),axis=1) e@0: return k1 e@0: e@0: N = novel(selfsim, K) e@0: peaks = pick_peaks(N) e@0: e@0: boundaries = array(peaks)*frameSize/2 e@0: e@0: sampleRate = SR e@0: e@0: audio = MonoLoader(filename=fname, sampleRate = sampleRate)() e@0: e@0: from scipy.io.wavfile import write as wavwrite e@0: e@0: for b in range(1, len(boundaries)): e@0: outname = '%s/%s_seg%d.wav' % (outfdir, name, b) e@0: segment = audio[boundaries[b-1]:boundaries[b]] e@0: if len(segment) >= 5*SR: e@0: #audioout = MonoWriter(sampleRate = SR, filename=outname) e@0: #audioout(segment) e@0: e@0: wavwrite(outname, SR, segment) e@0: print "[II] Saving %s" % outname e@0: e@0: e@0: e@0: e@0: