e@0: # -*- coding: utf-8 -*-
e@0: """
e@0: Created on Mon Jun  8 11:19:15 2015
e@0: 
e@0: @author: mmxgn
e@0: """
e@0: # Codes taken from: https://github.com/urinieto/msaf/blob/master/msaf/algorithms/foote/segmenter.py
e@0: 
e@0: 
e@0: 
e@0: if __name__=="__main__":
e@0:     from sys import argv
e@0:     if len(argv) != 3:
e@0:         print("Incorrect number of arguments:")
e@0:         print("Usage: ")
e@0:         print("%s <input>")
e@0:         print("")
e@0:         print("Arguments:")
e@0:         print("<input>\tThe input filename. Can be .wav, .mp3, etc...")     
e@0:         print("<output_folder>\tThe output folders. Segments will be stored under names 'name_segN'")
e@0:         sys.exit(-1)
e@0:     else:
e@0:         print("[II] Applying the method found in: ")
e@0:         print("[II] Automatic Audio Segmentation using a measure of Audio Novelty")
e@0:         print("[II] - Jonathar Foote ")
e@0:         print("[II] Loading libraries")
e@0:         
e@0:         import essentia
e@0:         from essentia import Pool
e@0:         from essentia.standard import  *
e@0:         import csv
e@0:         import yaml
e@0:         
e@0:         # reqyures matplotlib
e@0:         from pylab import *
e@0:         
e@0:         #requires numpy
e@0:         from numpy import *
e@0:         
e@0:         import wave
e@0:        
e@0:         
e@0:         from scipy.spatial import distance
e@0:         from scipy.ndimage import filters
e@0:         d = {}
e@0:         v = {}
e@0:         
e@0:         fname = argv[1]        
e@0:         outfdir = argv[2]
e@0:         
e@0:         print "[II] Using filename: %s" % fname
e@0:         print "[II] Using output folder: %s" % outfdir
e@0:         
e@0:         name = fname.split('.')[-2].split('/')[-1]
e@0:         
e@0:         print "[II] Segments will be saved in the form '%s/%s_segN.mp3'" % (outfdir, name)
e@0: 
e@0: 
e@0:         trackname = fname.split('.')[0].split('/')[-1]  
e@0: 
e@0:         
e@0: #        if outfname.partition('.')[-1].lower() not in ['json', 'yaml']:
e@0: #            print("Please choose a .json or .yaml as an output file.")
e@0: #            sys.exit(-1)
e@0: #        else:
e@0: #            if outfname.partition('.')[-1].lower() == 'json':
e@0: #                output = YamlOutput(filename = outfname, format='json') 
e@0: #            else:
e@0: #                output = YamlOutput(filename = outfname, format='yaml')
e@0:         
e@0:         print("Feature extraction of `%s\'" % fname)
e@0: 
e@0:         # Sampling Rate
e@0:         SR = 21000.0
e@0:         
e@0:         
e@0:         # Audio Loader
e@0:         loader = MonoLoader(filename = fname, sampleRate=SR)
e@0:         
e@0:         # Lowpass audio         
e@0:         lp = LowPass(cutoffFrequency=SR/4, sampleRate=SR)
e@0:         
e@0:         # Audio
e@0:         audio = lp(loader())
e@0:         
e@0:         
e@0:         
e@0:         # For MFCCs
e@0:         
e@0:         w_hanning = Windowing(type = "hann")
e@0:         spectrum = Spectrum()
e@0:         mfcc = MFCC()
e@0:         
e@0:         
e@0:         frameSize = int(0.2 * SR) # Change this depending whether it's music or sound
e@0:         
e@0:         pool = essentia.Pool()
e@0:         
e@0:         
e@0:         
e@0:         for frame in FrameGenerator(audio, frameSize = frameSize, hopSize = frameSize/2):
e@0:             mfcc_bands, mfcc_coeffs = mfcc(spectrum(w_hanning(frame)))
e@0:             pool.add("lowlevel.mfcc_selfsim", mfcc_coeffs)
e@0:             
e@0:         mfcc_coeffs = pool['lowlevel.mfcc_selfsim']
e@0:         
e@0:       #  selfsim = 1 - pairwise_distances(mfcc_coeffs)#, metric = "cosine")
e@0:         selfsim = distance.pdist(mfcc_coeffs, metric='seuclidean')
e@0:         selfsim = distance.squareform(selfsim)
e@0:         selfsim /= selfsim.max()
e@0:         selfsim = 1 - selfsim
e@0:         # Calculating cosine distances as a better metric
e@0:         
e@0:         C = array([[1,-1],[-1,1]])        
e@0:         
e@0:         def Novelty(S, C = array([[1, -1],[-1, 1]])):
e@0:             L = C.shape[0]
e@0:             
e@0:             horconcat = concatenate((S[:, 0:L/2], S, S[:,-L/2:]), axis=1)
e@0:             verconcat = concatenate((horconcat[0:L/2,:], horconcat, horconcat[-L/2:,:]), axis=0)
e@0:             
e@0:                     
e@0:             N = zeros((S.shape[0],))
e@0:             
e@0:             for i in range(0, len(N)):
e@0:                 S_ = 0
e@0:                 for m in range(-L/2, L/2):
e@0:                     for n in range(-L/2, L/2):
e@0:                        # print (m,n), (L/2+m, L/2+n)
e@0:                         S_ += C[L/2+m, L/2+n]*verconcat[i+m+L/2, i+n-L/2]     
e@0:                        # S_ += verconcat[i+m+L/2, i+m-L/2]
e@0:                         
e@0:               #  print S_        
e@0:                 N[i] = S_
e@0:                 
e@0:             return N
e@0:                 
e@0:         def novel(S, C = array([[1, -1], [-1, 1]])):
e@0:             N = S.shape[0]
e@0:             M = C.shape[0]
e@0:             
e@0:             novelty = zeros(N)
e@0:             
e@0:             for i in xrange(M/2, N-M/2+1):
e@0:                 novelty[i] = sum(S[i-M/2:i+M/2,i-M/2:i+M/2] * C)
e@0:                 
e@0:             novelty += novelty.min()
e@0:             novelty /= novelty.max()
e@0:             
e@0:             return novelty
e@0:             
e@0:             
e@0:                     
e@0:         def pick_peaks(nc, L=32):
e@0:             # Codes taken from: https://github.com/urinieto/msaf/blob/master/msaf/algorithms/foote/segmenter.py
e@0:             
e@0:             """Obtain peaks from a novelty curve using an adaptive threshold."""
e@0:             offset = nc.mean() / 20.
e@0:         
e@0:             nc = filters.gaussian_filter1d(nc, sigma=4)  # Smooth out nc
e@0:         
e@0:             th = filters.median_filter(nc, size=L) + offset
e@0:             #th = filters.gaussian_filter(nc, sigma=L/2., mode="nearest") + offset
e@0:         
e@0:             peaks = []
e@0:             for i in xrange(1, nc.shape[0] - 1):
e@0:                 # is it a peak?
e@0:                 if nc[i - 1] < nc[i] and nc[i] > nc[i + 1]:
e@0:                     # is it above the threshold?
e@0:                     if nc[i] > th[i]:
e@0:                         peaks.append(i)
e@0:             #plt.plot(nc)
e@0:             #plt.plot(th)
e@0:             #for peak in peaks:
e@0:                 #plt.axvline(peak)
e@0:             #plt.show()
e@0:         
e@0:             return peaks
e@0: 
e@0:         from scipy import signal
e@0:         def compute_gaussian_krnl(M):
e@0:             """Creates a gaussian kernel following Foote's paper."""
e@0:             g = signal.gaussian(M, M / 3., sym=True)
e@0:             G = np.dot(g.reshape(-1, 1), g.reshape(1, -1))
e@0:             G[M / 2:, :M / 2] = -G[M / 2:, :M / 2]
e@0:             G[:M / 2, M / 2:] = -G[:M / 2, M / 2:]
e@0:             return G        
e@0:             
e@0:         K = compute_gaussian_krnl(96)
e@0:         def kernelMatrix(L):
e@0:             k1 = concatenate((ones((L/2,L/2)), -1*ones((L/2,L/2))))
e@0:             k1 = concatenate((k1,-k1),axis=1)
e@0:             return k1
e@0:             
e@0:         N = novel(selfsim, K)
e@0:         peaks = pick_peaks(N)
e@0: 
e@0:         boundaries = array(peaks)*frameSize/2 
e@0:         
e@0:         sampleRate = SR
e@0:         
e@0:         audio = MonoLoader(filename=fname, sampleRate = sampleRate)()
e@0:         
e@0:         from scipy.io.wavfile import write as wavwrite 
e@0:         
e@0:         for b in range(1, len(boundaries)):
e@0:             outname = '%s/%s_seg%d.wav' % (outfdir, name, b)
e@0:             segment = audio[boundaries[b-1]:boundaries[b]]
e@0:             if len(segment) >= 5*SR:
e@0:                 #audioout = MonoWriter(sampleRate = SR, filename=outname)
e@0:                 #audioout(segment)
e@0:                 
e@0:                 wavwrite(outname, SR, segment)
e@0:                 print "[II] Saving %s" % outname
e@0:             
e@0:         
e@0:         
e@0:             
e@0: