view scripts/OPMellin.py @ 36:3b67cd634b9a branch-tests

tests for pitch bihist, before refactoring
author Maria Panteli
date Thu, 14 Sep 2017 14:16:29 +0100
parents e50c63cf96be
children e5e8e8a96948
line wrap: on
line source
# -*- coding: utf-8 -*-
"""
Created on Mon Aug 17 13:37:00 2015

@author: mariapanteli
"""

import librosa
import scipy.signal
# import matplotlib.pyplot as plt
import numpy

class OPMellin:
    def __init__(self, win2sec=8):
        self.y = None
        self.sr = None
        self.melspec = None
        self.melsr = None
        self.op = None
        self.opmellin = None
        self.win2sec = win2sec
        
    def load_audiofile(self, filename='test.wav', sr=None, segment=True):
        self.y, self.sr = librosa.load(filename, sr=sr)
        #self.y, self.sr = librosa.load(filename) # default sr=22050
        if segment:
            tracklength = self.y.shape[0]/float(self.sr)
            startSample = 0
            endSample = None
            if tracklength > 90:
                startPointSec = (tracklength/2.)-20
                startSample = round(startPointSec*self.sr)
                endSample = startSample+45*self.sr
            self.y = self.y[startSample:endSample]


    # def mel_spectrogram(self, y=None, sr=None, filename='1_21.wav'):
    def mel_spectrogram(self, y=None, sr=None):
        if self.y is None:
            self.y = y
        if self.sr is None:
            self.sr = sr
        win1 = int(round(0.04*self.sr))
        hop1 = int(round(win1/8.))
        nfft1 = int(2**numpy.ceil(numpy.log2(win1)))
        nmels = 40
        # y, sr = librosa.load(filename, sr=fs)        
        # melspec = librosa.feature.melspectrogram(y=y, sr=sr, n_fft=nfft, win_length=win1, window=scipy.signal.hamming, hop_length=hop1, n_mels=nmels)
        # melspec = librosa.feature.melspectrogram(y=self.y, sr=self.sr, n_fft=nfft, hop_length=hop1, n_mels=nmels)
        D = numpy.abs(librosa.stft(self.y, n_fft=nfft1, hop_length=hop1, win_length=win1, window=scipy.signal.hamming))**2
        #melspec = librosa.feature.melspectrogram(S=D, sr=self.sr, n_mels=nmels)
        melspec = librosa.feature.melspectrogram(S=D, sr=self.sr, n_mels=nmels, fmax=8000)
        melsr = self.sr/float(hop1)
        # librosa.display.specshow(melspec)
        # return melspec, melsr
        self.melspec = melspec
        self.melsr = melsr
 
    # post process spectrogram
    def post_process_spec(self, melspec=None, log=True, medianfilt=False, sqrt=False, smooth=False, diff=True, subtractmean=True, halfwave=True, maxnormal=True):
        if self.melspec is None:
            self.melspec = melspec
        if log:
            self.melspec = librosa.logamplitude(self.melspec)
        if medianfilt:
            ks = int(0.1 * self.melsr) # 100ms kernel size
            if ks % 2 == 0: # ks must be odd
                ks += 1
            nmels = self.melspec.shape[0]
            for i in range(nmels):
                self.melspec[i,:] = scipy.signal.medfilt(self.melspec[i,:],kernel_size = ks)
        if sqrt:
            self.melspec = self.melspec**.5
        if smooth:
            step = 50.0
            nmels = self.melspec.shape[0]
            mm = self.melspec
            for i in range(nmels):
                mm[i,:] = numpy.convolve(mm[i,:], numpy.ones(step)/step, mode='same')
            self.melspec = mm
        if diff:
            self.melspec = numpy.concatenate((self.melspec,self.melspec[:,-1,None]),axis=1)  # append one frame before diff to keep number of frames the same
            self.melspec = numpy.diff(self.melspec, n=1, axis=1)
        if subtractmean:
            mean = self.melspec.mean(axis=1)
            mean.shape = (mean.shape[0], 1)
            self.melspec = self.melspec - mean
        if halfwave:
            self.melspec[numpy.where(self.melspec < 0)] = 0
        if maxnormal:
            if self.melspec.max() != 0:  # avoid division by 0
                self.melspec = self.melspec/self.melspec.max()
        # return melspec


    def onset_patterns(self, melspec=None, melsr=None, fft=True, logfilter=False, center=True, bpmrange=True):
        if self.melspec is None:
            self.melspec = melspec
        if self.melsr is None:
            self.melsr = melsr
        win2 = int(round(self.win2sec*self.melsr))
        hop2 = int(round(0.5*self.melsr))
        nmels, nmelframes = self.melspec.shape
        if fft:  # periodicity via fft
            #nfft2 = int(2**numpy.ceil(numpy.log2(win2)))
            nfft2 = 2048  # nfft2 does not depend on win2??
            melspectemp = self.melspec
            if ((nfft2 > win2) and (center is False)):
                # pad the signal by nfft2-win2 so that frame decomposition (n_frames) 
                # is still based on win2 and not nfft2
                melspectemp = numpy.concatenate([numpy.zeros((nmels,int((nfft2 - win2) // 2))),self.melspec, numpy.zeros((nmels,int((nfft2 - win2) // 2)))],axis=1)
            if melspectemp.shape[1]<nfft2:
                # pad with zeros to have at least one 8-sec window
                nzeros = nfft2 - melspectemp.shape[1]
                melspectemp = numpy.concatenate([numpy.zeros((nmels,int(numpy.ceil(nzeros / 2.)))),melspectemp, numpy.zeros((nmels,int(numpy.ceil(nzeros / 2.))))],axis=1)
                #melspectemp = numpy.concatenate([melspectemp, numpy.zeros((nmels,nzeros))],axis=1)
            # nframes = int(round(numpy.ceil(round(nmelframes/hop2))))
            ff0 = numpy.abs(librosa.stft(y=melspectemp[0, :], win_length=win2, hop_length=hop2, n_fft=nfft2, window=scipy.signal.hamming, center=center))
            nframes = ff0.shape[1]
            # nmags, nframes = ff0.shape
            if logfilter:  # log filterbank
                nbins = 25
                binsperoct = 5
                fft2 = numpy.zeros((nmels, nbins, nframes))     
                logfb = librosa.filters.logfrequency(self.melsr, nfft2, n_bins=nbins, bins_per_octave=binsperoct, fmin=0.5)
                # logfb = librosa.filters.logfrequency(self.melsr, nfft2, n_bins=25, bins_per_octave=5, fmin=0.5)
                for i in range(nmels):
                    fftmags = numpy.abs(librosa.stft(y=melspectemp[i, :], win_length=win2, hop_length=hop2, n_fft=nfft2, window=scipy.signal.hamming, center=center))
                    fftmags = numpy.dot(logfb, fftmags)  # log
                    fft2[i, :, :] = fftmags
            else:
                # nmags = min(round(nfft2/2), 200)
                if bpmrange:
                    # freqresinbpm = float(self.melsr)/float(nfft2)*60.
                    freqresinbpm = float(self.melsr)/float(nfft2/2.)*60.
                    minmag = int(numpy.floor(30./freqresinbpm))  # min tempo 30bpm
                    maxmag = int(numpy.ceil(960./freqresinbpm))  # max tempo 960 bpm
                    magsinds = range(minmag, maxmag)
                else:
                    magsinds = range(0, 200)
                nmags = len(magsinds)
                fft2 = numpy.zeros((nmels, nmags, nframes))
                for i in range(nmels):
                    fftmags = numpy.abs(librosa.stft(y=melspectemp[i, :], win_length=win2, hop_length=hop2, n_fft=nfft2, window=scipy.signal.hamming, center=center))
                    fftmags = fftmags[magsinds, :]
                    fft2[i, :, :] = fftmags
            op = fft2
        else:  # periodicity via autocorrelation
            nmags = min(win2, 1000)
            if nmelframes < win2:
                nframes = 1
            else:
                nframes = int(nmelframes/float(win2))
            autocor = numpy.zeros((nmels, nmags, nframes))
            for i in range(nmels):
                ac = numpy.zeros((nmags, nframes))
                for j in range(nframes):
                    ac[:, j] = librosa.autocorrelate(self.melspec[i, (j*win2):min((j+1)*win2, nmelframes)], max_size=nmags)
                autocor[i, :, :] = ac
            op = autocor
        self.op = op

    def post_process_op(self, medianfiltOP=False):
        if medianfiltOP:
            hop2 = int(round(0.5*self.melsr))
            ssr = self.melsr/float(hop2)
            ks = int(0.5 * ssr)  # 100ms kernel size
            if ks % 2 == 0:  # ks must be odd
                ks += 1
            nmels, nmags, nframes = self.op.shape
            for i in range(nmels):
                for j in range(nframes):
                    #self.op[i,:,j] = scipy.signal.medfilt(self.op[i,:,j],kernel_size = ks)
                    self.op[i,:,j] = numpy.convolve(self.op[i,:,j], numpy.ones(ks)/ks, mode='same')

    def mellin_transform(self, op=None):
        if self.op is None:
            self.op = op
        nmels, nmags, nframes = self.op.shape
        #nmagsout = min(200, nmags)
        nmagsout = 200
        u_max = numpy.log(nmags)
        delta_c = numpy.pi / u_max
        c_max = nmagsout
        c = numpy.arange(delta_c, c_max, delta_c)
        k = range(1, nmags)
        exponent = 0.5 - c*1j

        normMat = 1./(exponent * numpy.sqrt(2*numpy.pi))
        normMat.shape = (normMat.shape[0], 1)
        normMat = numpy.repeat(normMat.T, nmels, axis=0)
        kernelMat = numpy.asarray([numpy.power(ki, exponent) for ki in k])
        opmellin = numpy.zeros((nmels, nmagsout, nframes))
        for i in range(nframes):
            self.op[:, -1, i] = 0
            deltaMat = - numpy.diff(self.op[:, :, i])
            mellin = numpy.abs(numpy.dot(deltaMat, kernelMat) * normMat)
            opmellin[:, :, i] = mellin[:, :nmagsout]
        self.opmellin = opmellin


    def post_process_opmellin(self, opmellin=None, aveFreq=False, normFrame=True):
        if self.opmellin is None:
            self.opmellin = opmellin
        if aveFreq:
            self.opmellin = numpy.mean(self.opmellin, axis=0)
        else:  # just reshape
            nmels, nmags, nframes = self.opmellin.shape
            self.opmellin = self.opmellin.reshape((nmels*nmags, nframes))
        if normFrame:
            min_opmellin = numpy.amin(self.opmellin, axis=0, keepdims=True)
            max_opmellin = numpy.amax(self.opmellin, axis=0, keepdims=True)
            denom = max_opmellin - min_opmellin
            denom[denom==0] = 1 # avoid division by 0 if frame is all 0s-silent
            self.opmellin = (self.opmellin - min_opmellin) / denom


    def get_opmellin(self, filename='test.wav', logfilter=False, center=True, medianfiltspec=False, medianfiltOP=False):
        self.load_audiofile(filename=filename)        
        self.mel_spectrogram()
        #self.post_process_spec(medianfilt=medianfiltspec)
        self.post_process_spec(log=False, sqrt=True, medianfilt=medianfiltspec)  # sqrt seems to work better
        self.onset_patterns(logfilter=logfilter, center=center)
        self.post_process_op(medianfiltOP=medianfiltOP)
        self.mellin_transform()
        self.post_process_opmellin()
        return self.opmellin

    def get_opmellin_from_melspec(self, melspec=[], melsr=[]):
        self.melspec = melspec
        self.melsr = melsr
        self.post_process_spec(log=False, sqrt=True, medianfilt=True)  # sqrt seems to work better
        self.onset_patterns(logfilter=False, center=False)
        self.post_process_op(medianfiltOP=True)
        self.mellin_transform()
        self.post_process_opmellin()
        return self.opmellin

if __name__ == '__main__':
    op = OPMellin()
    op.get_opmellin()