view scripts/PitchBihist.py @ 23:56cbf155680a branch-tests

on melodia
author mpanteli <m.x.panteli@gmail.com>
date Wed, 13 Sep 2017 13:52:26 +0100
parents e50c63cf96be
children 3b67cd634b9a
line wrap: on
line source
# -*- coding: utf-8 -*-
"""
Created on Tue Feb  2 22:26:10 2016

@author: mariapanteli
"""
import smoothiecore as s
import numpy
import scipy.signal
# import librosa


class PitchBihist:
    def __init__(self, win2sec=8):
        self.y = None
        self.sr = None
        self.chroma = None
        self.chromasr = None
        self.bihist = None
        self.win2sec = win2sec

    def bihist_from_chroma(self, filename='test.wav', secondframedecomp=True):
        self.chroma, self.chromasr = s.get_smoothie_for_bihist(filename=filename, hopinsec=0.005)
        if secondframedecomp:
            win2 = int(round(8*self.chromasr))
            hop2 = int(round(0.5*self.chromasr))
            nbins, norigframes = self.chroma.shape
            if norigframes<win2:
                nframes = 1
            else:
                nframes = int(1+numpy.floor((norigframes-win2)/float(hop2)))
            bihistframes = numpy.empty((nbins*nbins, nframes))
            for i in range(nframes):  # loop over all 8-sec frames
                frame = self.chroma[:, (i*hop2):min((i*hop2+win2),norigframes)]
                bihist = self.bihistogram(frame)
                bihist = numpy.reshape(bihist, -1)
                bihistframes[:, i] = bihist
            self.bihist = bihistframes
        else:
            self.bihist = numpy.reshape(self.bihistogram(), -1)

    def bihist_from_melodia(self, filename='sample_melodia.csv', secondframedecomp=True, stop_sec=None):
        def hz_to_cents(freq_Hz, ref_Hz=32.703, n_cents=1200):
            """ convert frequency values from Hz to cents
                reference frequency at C1 
            """        
            freq_cents = numpy.round(n_cents * numpy.log2(freq_Hz/ref_Hz))
            return freq_cents
        def wrap_to_octave(cents, octave_length=1200):
            """ wrap to a single octave 0-1200
            """
            octave_cents = cents % octave_length
            return octave_cents
        
        n_bins = 60
        data = numpy.loadtxt(filename, delimiter=',')
        times, freqs = (data[:, 0], data[:, 1])
        self.chromasr = 1. / (times[1] - times[0])
        #self.chromasr = 128.
        if stop_sec is not None:
            stop_idx = numpy.where(times < stop_sec)[0]
            times, freqs = times[stop_idx], freqs[stop_idx]
        freqs[freqs<=0] = numpy.nan
        #melody = freqs[freqs>0]
        melody = freqs
        n_frames = len(melody)
        melody_cents = hz_to_cents(melody, n_cents=n_bins)
        melody_octave = wrap_to_octave(melody_cents, octave_length=n_bins)
        melody_matrix = numpy.zeros((n_bins, n_frames))
        for time, pitch in enumerate(melody_octave):
            if not numpy.isnan(pitch):
                melody_matrix[int(pitch), time] = 1
        if secondframedecomp:
            win2 = int(round(self.win2sec*self.chromasr))
            hop2 = int(round(0.5*self.chromasr))
            nbins, norigframes = melody_matrix.shape
            if norigframes<win2:
                nframes = 1
                win2 = norigframes
            else:
                nframes = int(1+numpy.floor((norigframes-win2)/float(hop2)))
            bihistframes = numpy.empty((nbins*nbins, nframes))
            for i in range(nframes):  # loop over all 8-sec frames
                frame = melody_matrix[:, (i*hop2):(i*hop2+win2)]
                bihist = self.bihistogram(frame)
                bihist = numpy.reshape(bihist, -1)
                bihistframes[:, i] = bihist
            self.bihist = bihistframes
        else:
            self.bihist = self.bihistogram(melody_matrix)
        return self.bihist

    def bihistogram(self, spec, winsec=0.5, align=True):
        win = int(round(winsec*self.chromasr))
        ker = numpy.concatenate([numpy.zeros((win, 1)), numpy.ones((win+1, 1))], axis=0)
        spec = spec.T  # transpose to have franes as rows in convolution

        # energy threshold
        thr = 0.3*numpy.max(spec)
        spec[spec < max(thr, 0)] = 0

        # transitions via convolution
        tra = scipy.signal.convolve2d(spec, ker, mode='same')
        tra[spec > 0] = 0

        # multiply with original
        B = numpy.dot(tra.T, spec)

        # normalize
        mxB = numpy.max(B)
        mnB = numpy.min(B)
        if mxB != mnB:
            B = (B - mnB)/float(mxB-mnB)

        # circshift to highest?
        if align:
            ref = numpy.argmax(numpy.sum(spec, axis=0))
            B = numpy.roll(B, -ref, axis=0)
            B = numpy.roll(B, -ref, axis=1)
        return B

    def bihist_from_precomp_chroma(self, align=False):
        win2 = int(round(self.win2sec*self.chromasr))
        hop2 = int(round(0.5*self.chromasr))
        nbins, norigframes = self.chroma.shape
        if norigframes<win2:
            nframes = 1
        else:
            nframes = int(1+numpy.floor((norigframes-win2)/float(hop2)))
        bihistframes = numpy.empty((nbins*nbins, nframes))
        for i in range(nframes):  # loop over all 8-sec frames
            frame = self.chroma[:, (i*hop2):min((i*hop2+win2),norigframes)]
            bihist = self.bihistogram(frame, align=align)
            bihist = numpy.reshape(bihist, -1)
            bihistframes[:, i] = bihist
        self.bihist = bihistframes

    def get_pitchbihist(self, filename='test.wav'):
        self.bihist_from_chroma(filename=filename)
        return self.bihist
        
    def get_pitchbihist_from_chroma(self, chroma=[], chromasr=[]):
        self.chroma = chroma
        self.chromasr = chromasr
        self.bihist_from_precomp_chroma(align=False)
        return self.bihist


if __name__ == '__main__':
    pb = PitchBihist()
    pb.get_pitchbihist()