segmentation: utils/gtgram.py annotate

annotate utils/gtgram.py @ 19:890cfe424f4a tip

added annotations

author	mitian
date	Fri, 11 Dec 2015 09:47:40 +0000
parents	26838b1f560f
children

rev	line source
mi@0	1 # Copyright 2014 Jason Heeris, jason.heeris@gmail.com
mi@0	2 #
mi@0	3 # This file is part of the gammatone toolkit, and is licensed under the 3-clause
mi@0	4 # BSD license: https://github.com/detly/gammatone/blob/master/COPYING
mi@0	5 from __future__ import division
mi@0	6 import numpy as np
mi@0	7
mi@0	8 from .filters import make_erb_filters, centre_freqs, erb_filterbank
mi@0	9
mi@0	10 """
mi@0	11 This module contains functions for rendering "spectrograms" which use gammatone
mi@0	12 filterbanks instead of Fourier transforms.
mi@0	13 """
mi@0	14 '''
mi@0	15 Modified from the original toolbox. Jan 2015. --Mi
mi@0	16
mi@0	17 '''
mi@0	18
mi@0	19 def round_half_away_from_zero(num):
mi@0	20 """ Implement the round-half-away-from-zero rule, where fractional parts of
mi@0	21 0.5 result in rounding up to the nearest positive integer for positive
mi@0	22 numbers, and down to the nearest negative number for negative integers.
mi@0	23 """
mi@0	24 return np.sign(num) * np.floor(np.abs(num) + 0.5)
mi@0	25
mi@0	26
mi@0	27 def gtgram_strides(fs, gammatoneLen, step_rate, filterbank_cols):
mi@0	28 """
mi@0	29 Calculates the window size for a gammatonegram.
mi@0	30
mi@0	31 @return a tuple of (window_size, hop_samples, output_columns)
mi@0	32 """
mi@0	33 step_samples = int(gammatoneLen * step_rate)
mi@0	34 columns = (1 + int(np.floor((filterbank_cols - gammatoneLen)/ step_samples)))
mi@0	35
mi@0	36 return (step_samples, columns)
mi@0	37
mi@0	38
mi@0	39 def gtgram_xe(wave, fs, f_max, channels, f_min):
mi@0	40 """ Calculate the intermediate ERB filterbank processed matrix """
mi@0	41 cfs = centre_freqs(f_max, channels, f_min)
mi@0	42 fcoefs = np.flipud(make_erb_filters(fs, cfs))
mi@0	43 xf = erb_filterbank(wave, fcoefs)
mi@0	44 return xf
mi@0	45 # xe = np.power(xf, 2)
mi@0	46 # return xe
mi@0	47
mi@0	48
mi@0	49 def gtgram(wave, fs, gammatoneLen, step_rate, channels, f_max, f_min):
mi@0	50 """
mi@0	51 Calculate a spectrogram-like time frequency magnitude array based on
mi@0	52 gammatone subband filters. The waveform ``wave`` (at sample rate ``fs``) is
mi@0	53 passed through an multi-channel gammatone auditory model filterbank, with
mi@0	54 lowest frequency ``f_min`` and highest frequency ``f_max``. The outputs of
mi@0	55 each band then have their energy integrated over windows of ``window_time``
mi@0	56 seconds, advancing by ``hop_time`` secs for successive columns. These
mi@0	57 magnitudes are returned as a nonnegative real matrix with ``channels`` rows.
mi@0	58
mi@0	59 \| 2009-02-23 Dan Ellis dpwe@ee.columbia.edu
mi@0	60 \|
mi@0	61 \| (c) 2013 Jason Heeris (Python implementation)
mi@0	62 """
mi@0	63 xe = gtgram_xe(wave, fs, f_max, channels, f_min)
mi@0	64 # print 'xe', xe.shape
mi@0	65 step_samples, ncols = gtgram_strides(fs, gammatoneLen, step_rate, xe.shape[1])
mi@0	66 # print gammatoneLen, step_samples, channels, ncols
mi@0	67 y = np.zeros((channels, ncols))
mi@0	68
mi@0	69 for cnum in range(ncols):
mi@0	70 segment = xe[:, cnum * step_samples + np.arange(gammatoneLen)]
mi@0	71 segment = np.power(segment, 2)
mi@0	72 y[:, cnum] = np.sqrt(segment.mean(1))
mi@0	73
mi@0	74 return y.T

Mercurial > hg > segmentation

annotate utils/gtgram.py @ 19:890cfe424f4a tip