Mercurial > hg > chourdakisreiss2016
view experiment-reverb/code/selfsim.py @ 2:c87a9505f294 tip
Added LICENSE for code, removed .wav files
author | Emmanouil Theofanis Chourdakis <e.t.chourdakis@qmul.ac.uk> |
---|---|
date | Sat, 30 Sep 2017 13:25:50 +0100 |
parents | 246d5546657c |
children |
line wrap: on
line source
# -*- coding: utf-8 -*- """ Created on Mon Jun 8 11:19:15 2015 @author: mmxgn """ # Codes taken from: https://github.com/urinieto/msaf/blob/master/msaf/algorithms/foote/segmenter.py if __name__=="__main__": from sys import argv if len(argv) != 3: print("Incorrect number of arguments:") print("Usage: ") print("%s <input>") print("") print("Arguments:") print("<input>\tThe input filename. Can be .wav, .mp3, etc...") print("<output_folder>\tThe output folders. Segments will be stored under names 'name_segN'") sys.exit(-1) else: print("[II] Applying the method found in: ") print("[II] Automatic Audio Segmentation using a measure of Audio Novelty") print("[II] - Jonathar Foote ") print("[II] Loading libraries") import essentia from essentia import Pool from essentia.standard import * import csv import yaml # reqyures matplotlib from pylab import * #requires numpy from numpy import * import wave from scipy.spatial import distance from scipy.ndimage import filters d = {} v = {} fname = argv[1] outfdir = argv[2] print "[II] Using filename: %s" % fname print "[II] Using output folder: %s" % outfdir name = fname.split('.')[-2].split('/')[-1] print "[II] Segments will be saved in the form '%s/%s_segN.mp3'" % (outfdir, name) trackname = fname.split('.')[0].split('/')[-1] # if outfname.partition('.')[-1].lower() not in ['json', 'yaml']: # print("Please choose a .json or .yaml as an output file.") # sys.exit(-1) # else: # if outfname.partition('.')[-1].lower() == 'json': # output = YamlOutput(filename = outfname, format='json') # else: # output = YamlOutput(filename = outfname, format='yaml') print("Feature extraction of `%s\'" % fname) # Sampling Rate SR = 21000.0 # Audio Loader loader = MonoLoader(filename = fname, sampleRate=SR) # Lowpass audio lp = LowPass(cutoffFrequency=SR/4, sampleRate=SR) # Audio audio = lp(loader()) # For MFCCs w_hanning = Windowing(type = "hann") spectrum = Spectrum() mfcc = MFCC() frameSize = int(0.2 * SR) # Change this depending whether it's music or sound pool = essentia.Pool() for frame in FrameGenerator(audio, frameSize = frameSize, hopSize = frameSize/2): mfcc_bands, mfcc_coeffs = mfcc(spectrum(w_hanning(frame))) pool.add("lowlevel.mfcc_selfsim", mfcc_coeffs) mfcc_coeffs = pool['lowlevel.mfcc_selfsim'] # selfsim = 1 - pairwise_distances(mfcc_coeffs)#, metric = "cosine") selfsim = distance.pdist(mfcc_coeffs, metric='seuclidean') selfsim = distance.squareform(selfsim) selfsim /= selfsim.max() selfsim = 1 - selfsim # Calculating cosine distances as a better metric C = array([[1,-1],[-1,1]]) def Novelty(S, C = array([[1, -1],[-1, 1]])): L = C.shape[0] horconcat = concatenate((S[:, 0:L/2], S, S[:,-L/2:]), axis=1) verconcat = concatenate((horconcat[0:L/2,:], horconcat, horconcat[-L/2:,:]), axis=0) N = zeros((S.shape[0],)) for i in range(0, len(N)): S_ = 0 for m in range(-L/2, L/2): for n in range(-L/2, L/2): # print (m,n), (L/2+m, L/2+n) S_ += C[L/2+m, L/2+n]*verconcat[i+m+L/2, i+n-L/2] # S_ += verconcat[i+m+L/2, i+m-L/2] # print S_ N[i] = S_ return N def novel(S, C = array([[1, -1], [-1, 1]])): N = S.shape[0] M = C.shape[0] novelty = zeros(N) for i in xrange(M/2, N-M/2+1): novelty[i] = sum(S[i-M/2:i+M/2,i-M/2:i+M/2] * C) novelty += novelty.min() novelty /= novelty.max() return novelty def pick_peaks(nc, L=32): # Codes taken from: https://github.com/urinieto/msaf/blob/master/msaf/algorithms/foote/segmenter.py """Obtain peaks from a novelty curve using an adaptive threshold.""" offset = nc.mean() / 20. nc = filters.gaussian_filter1d(nc, sigma=4) # Smooth out nc th = filters.median_filter(nc, size=L) + offset #th = filters.gaussian_filter(nc, sigma=L/2., mode="nearest") + offset peaks = [] for i in xrange(1, nc.shape[0] - 1): # is it a peak? if nc[i - 1] < nc[i] and nc[i] > nc[i + 1]: # is it above the threshold? if nc[i] > th[i]: peaks.append(i) #plt.plot(nc) #plt.plot(th) #for peak in peaks: #plt.axvline(peak) #plt.show() return peaks from scipy import signal def compute_gaussian_krnl(M): """Creates a gaussian kernel following Foote's paper.""" g = signal.gaussian(M, M / 3., sym=True) G = np.dot(g.reshape(-1, 1), g.reshape(1, -1)) G[M / 2:, :M / 2] = -G[M / 2:, :M / 2] G[:M / 2, M / 2:] = -G[:M / 2, M / 2:] return G K = compute_gaussian_krnl(96) def kernelMatrix(L): k1 = concatenate((ones((L/2,L/2)), -1*ones((L/2,L/2)))) k1 = concatenate((k1,-k1),axis=1) return k1 N = novel(selfsim, K) peaks = pick_peaks(N) boundaries = array(peaks)*frameSize/2 sampleRate = SR audio = MonoLoader(filename=fname, sampleRate = sampleRate)() from scipy.io.wavfile import write as wavwrite for b in range(1, len(boundaries)): outname = '%s/%s_seg%d.wav' % (outfdir, name, b) segment = audio[boundaries[b-1]:boundaries[b]] if len(segment) >= 5*SR: #audioout = MonoWriter(sampleRate = SR, filename=outname) #audioout(segment) wavwrite(outname, SR, segment) print "[II] Saving %s" % outname