annotate util/scale_transform.py @ 7:b7169083b9ea tip

fix typo in variable name
author Maria Panteli
date Tue, 01 Jan 2019 15:51:38 +0200
parents c4ef4a02fc19
children
rev   line source
Maria@1 1 # -*- coding: utf-8 -*-
Maria@1 2 """
Maria@1 3 Created on Mon Aug 17 13:37:00 2015
Maria@1 4
Maria@1 5 @author: mariapanteli
Maria@1 6 """
Maria@1 7 """Scale transform descriptor from mel spectrogram"""
Maria@1 8
Maria@1 9 import numpy
Maria@1 10 import librosa
Maria@1 11 import scipy.signal
Maria@1 12
Maria@1 13 class ScaleTransform:
Maria@1 14 def __init__(self):
Maria@1 15 self.y = None
Maria@1 16 self.sr = None
Maria@1 17 self.nmels = 40
Maria@1 18 self.melspec = None
Maria@1 19 self.melsr = None
Maria@1 20 self.op = None
Maria@1 21 self.opmellin = None
Maria@1 22
Maria@1 23 def load_audiofile(self, filename='test.wav', sr=None):
Maria@1 24 """Load audio"""
Maria@1 25 self.y, self.sr = librosa.load(filename, sr=sr)
Maria@1 26
Maria@1 27 def mel_spectrogram(self, y=None, sr=None):
Maria@1 28 """Get mel spectrogram"""
Maria@1 29 if self.y is None:
Maria@1 30 self.y = y
Maria@1 31 if self.sr is None:
Maria@1 32 self.sr = sr
Maria@1 33 win1 = int(round(0.04 * self.sr))
Maria@1 34 hop1 = int(round(win1 / 8.))
Maria@1 35 nfft1 = int(2 ** numpy.ceil(numpy.log2(win1)))
Maria@1 36 D = numpy.abs(librosa.stft(self.y, n_fft=nfft1, hop_length=hop1, win_length=win1, window=scipy.signal.hamming)) ** 2
Maria@1 37 melspec = librosa.feature.melspectrogram(S=D, sr=self.sr, n_mels=self.nmels, fmax=8000)
Maria@1 38 melsr = self.sr/float(hop1)
Maria@1 39 self.melspec = melspec
Maria@1 40 self.melsr = melsr
Maria@1 41
Maria@1 42 def post_process_spec(self, melspec=None, medianfilt=True, sqrt=True, diff=True, subtractmean=True, halfwave=True, maxnormal=True):
Maria@1 43 """Some post processing of the mel spectrogram"""
Maria@1 44 if self.melspec is None:
Maria@1 45 self.melspec = melspec
Maria@1 46 if medianfilt:
Maria@1 47 ks = int(0.1 * self.melsr) # 100ms kernel size
Maria@1 48 if ks % 2 == 0: ks += 1 # ks must be odd
Maria@1 49 for i in range(self.nmels):
Maria@1 50 self.melspec[i, :] = scipy.signal.medfilt(self.melspec[i, :], kernel_size=ks)
Maria@1 51 if sqrt:
Maria@1 52 self.melspec = self.melspec ** .5
Maria@1 53 if diff:
Maria@1 54 # append one frame before diff to keep number of frames the same
Maria@1 55 self.melspec = numpy.concatenate((self.melspec, self.melspec[:, -1, None]), axis=1)
Maria@1 56 self.melspec = numpy.diff(self.melspec, n=1, axis=1)
Maria@1 57 if subtractmean:
Maria@1 58 mean = self.melspec.mean(axis=1)
Maria@1 59 mean.shape = (mean.shape[0], 1)
Maria@1 60 self.melspec = self.melspec - mean
Maria@1 61 if halfwave:
Maria@1 62 self.melspec[numpy.where(self.melspec < 0)] = 0
Maria@1 63 if maxnormal:
Maria@1 64 self.melspec = self.melspec / self.melspec.max()
Maria@1 65
Maria@1 66 def onset_patterns(self, melspec=None, melsr=None, center=False):
Maria@1 67 """Get rhythm periodicities by applying stft in each mel band"""
Maria@1 68 if self.melspec is None:
Maria@1 69 self.melspec = melspec
Maria@1 70 if self.melsr is None:
Maria@1 71 self.melsr = melsr
Maria@1 72 win2 = int(round(8 * self.melsr))
Maria@1 73 hop2 = int(round(0.5 * self.melsr))
Maria@1 74 nfft2 = int(2**numpy.ceil(numpy.log2(win2)))
Maria@1 75
Maria@1 76 # some preprocessing for the second frame decomposition
Maria@1 77 melspectemp = self.melspec
Maria@1 78 if melspectemp.shape[1] < nfft2:
Maria@1 79 # if buffer too short pad with zeros to have at least one 8-sec window
Maria@1 80 nzeros = nfft2 - melspectemp.shape[1]
Maria@1 81 melspectemp = numpy.concatenate([numpy.zeros((self.nmels, int(numpy.ceil(nzeros / 2.)))), melspectemp, numpy.zeros((self.nmels,int(numpy.ceil(nzeros / 2.))))], axis=1)
Maria@1 82 temp = numpy.abs(librosa.stft(y=melspectemp[0, :], win_length=win2, hop_length=hop2, n_fft=nfft2, window=scipy.signal.hamming, center=center))
Maria@1 83 nframes = temp.shape[1]
Maria@1 84
Maria@1 85 # filter periodicities in the range 30-960 bpm
Maria@1 86 freqresinbpm = float(self.melsr) / float(nfft2/2.)*60.
Maria@1 87 minmag = int(numpy.floor(30. / freqresinbpm)) # min tempo 30bpm
Maria@1 88 maxmag = int(numpy.ceil(960. / freqresinbpm)) # max tempo 960 bpm
Maria@1 89 magsinds = range(minmag, maxmag) # indices of selected stft magnitudes
Maria@1 90
Maria@1 91 # loop over all mel_bands and get rhythm periodicities (stft magnitudes)
Maria@1 92 nmags = len(magsinds)
Maria@1 93 fft2 = numpy.zeros((self.nmels, nmags, nframes))
Maria@1 94 for i in range(self.nmels):
Maria@1 95 fftmags = numpy.abs(librosa.stft(y=melspectemp[i, :], win_length=win2, hop_length=hop2, n_fft=nfft2, window=scipy.signal.hamming, center=center))
Maria@1 96 fftmags = fftmags[magsinds, :]
Maria@1 97 fft2[i, :, :] = fftmags
Maria@1 98 op = fft2
Maria@1 99 self.op = op
Maria@1 100
Maria@1 101
Maria@1 102 def post_process_op(self, median_filt=True):
Maria@1 103 """Some smoothing of the onset patterns"""
Maria@1 104 if median_filt:
Maria@1 105 hop2 = int(round(0.5 * self.melsr))
Maria@1 106 ssr = self.melsr/float(hop2)
Maria@1 107 ks = int(0.5 * ssr) # 100ms kernel size
Maria@1 108 if ks % 2 == 0: ks += 1 # ks must be odd
Maria@1 109 nmels, nmags, nframes = self.op.shape
Maria@1 110 for i in range(nmels):
Maria@1 111 for j in range(nframes):
Maria@1 112 self.op[i, :, j] = numpy.convolve(self.op[i, :, j], numpy.ones(ks) / ks, mode='same')
Maria@1 113
Maria@1 114
Maria@1 115 def mellin_transform(self, op=None):
Maria@1 116 """
Maria@1 117 Apply mellin transform to remove tempo (scale) information.
Maria@1 118 Code adapted from a MATLAB implementation by Andre Holzapfel.
Maria@1 119 """
Maria@1 120 if self.op is None:
Maria@1 121 self.op = op
Maria@1 122 nmels, nmags, nframes = self.op.shape
Maria@1 123 nmagsout = 200
Maria@1 124 u_max = numpy.log(nmags)
Maria@1 125 delta_c = numpy.pi / u_max
Maria@1 126 c_max = nmagsout
Maria@1 127 c = numpy.arange(delta_c, c_max, delta_c)
Maria@1 128 k = range(1, nmags)
Maria@1 129 exponent = 0.5 - c * 1j
Maria@1 130
Maria@1 131 normMat = 1. / (exponent * numpy.sqrt(2 * numpy.pi))
Maria@1 132 normMat.shape = (normMat.shape[0], 1)
Maria@1 133 normMat = numpy.repeat(normMat.T, nmels, axis=0)
Maria@1 134 kernelMat = numpy.asarray([numpy.power(ki, exponent) for ki in k])
Maria@1 135 opmellin = numpy.zeros((nmels, nmagsout, nframes))
Maria@1 136 for i in range(nframes):
Maria@1 137 self.op[:, -1, i] = 0
Maria@1 138 deltaMat = - numpy.diff(self.op[:, :, i])
Maria@1 139 mellin = numpy.abs(numpy.dot(deltaMat, kernelMat) * normMat)
Maria@1 140 opmellin[:, :, i] = mellin[:, :nmagsout]
Maria@1 141 self.opmellin = opmellin
Maria@1 142
Maria@1 143
Maria@1 144 def post_process_mellin(self, opmellin=None, normFrame=True, aveBands=False):
Maria@1 145 """Some post processing of the scale transform"""
Maria@1 146 if self.opmellin is None:
Maria@1 147 self.opmellin = opmellin
Maria@1 148 if aveBands:
Maria@1 149 self.opmellin = numpy.mean(self.opmellin, axis=0, keepdims=True)
Maria@1 150 nmels, nmags, nframes = self.opmellin.shape
Maria@1 151 self.opmellin = self.opmellin.reshape((nmels*nmags, nframes))
Maria@1 152 if normFrame:
Maria@1 153 min_opmellin = numpy.amin(self.opmellin, axis=0, keepdims=True)
Maria@1 154 max_opmellin = numpy.amax(self.opmellin, axis=0, keepdims=True)
Maria@1 155 denom = max_opmellin - min_opmellin
Maria@1 156 denom[denom==0] = 1 # avoid division by 0 if frame is all 0s-silent
Maria@1 157 self.opmellin = (self.opmellin - min_opmellin) / denom
Maria@1 158
Maria@1 159
Maria@1 160 def get_scale_transform(self, filename='test.wav'):
Maria@1 161 """Return scale transform for filename"""
Maria@1 162 self.load_audiofile(filename=filename)
Maria@1 163 self.mel_spectrogram()
Maria@1 164 self.post_process_spec()
Maria@1 165 self.onset_patterns()
Maria@1 166 self.post_process_op()
Maria@1 167 self.mellin_transform()
Maria@1 168 self.post_process_mellin(aveBands=True)
Maria@1 169 return self.opmellin
Maria@1 170
Maria@1 171
Maria@1 172 if __name__ == '__main__':
Maria@1 173 op = ScaleTransform()
Maria@1 174 op.get_scale_transform()