annotate utils/gtgram.py @ 19:890cfe424f4a tip

added annotations
author mitian
date Fri, 11 Dec 2015 09:47:40 +0000
parents 26838b1f560f
children
rev   line source
mi@0 1 # Copyright 2014 Jason Heeris, jason.heeris@gmail.com
mi@0 2 #
mi@0 3 # This file is part of the gammatone toolkit, and is licensed under the 3-clause
mi@0 4 # BSD license: https://github.com/detly/gammatone/blob/master/COPYING
mi@0 5 from __future__ import division
mi@0 6 import numpy as np
mi@0 7
mi@0 8 from .filters import make_erb_filters, centre_freqs, erb_filterbank
mi@0 9
mi@0 10 """
mi@0 11 This module contains functions for rendering "spectrograms" which use gammatone
mi@0 12 filterbanks instead of Fourier transforms.
mi@0 13 """
mi@0 14 '''
mi@0 15 Modified from the original toolbox. Jan 2015. --Mi
mi@0 16
mi@0 17 '''
mi@0 18
mi@0 19 def round_half_away_from_zero(num):
mi@0 20 """ Implement the round-half-away-from-zero rule, where fractional parts of
mi@0 21 0.5 result in rounding up to the nearest positive integer for positive
mi@0 22 numbers, and down to the nearest negative number for negative integers.
mi@0 23 """
mi@0 24 return np.sign(num) * np.floor(np.abs(num) + 0.5)
mi@0 25
mi@0 26
mi@0 27 def gtgram_strides(fs, gammatoneLen, step_rate, filterbank_cols):
mi@0 28 """
mi@0 29 Calculates the window size for a gammatonegram.
mi@0 30
mi@0 31 @return a tuple of (window_size, hop_samples, output_columns)
mi@0 32 """
mi@0 33 step_samples = int(gammatoneLen * step_rate)
mi@0 34 columns = (1 + int(np.floor((filterbank_cols - gammatoneLen)/ step_samples)))
mi@0 35
mi@0 36 return (step_samples, columns)
mi@0 37
mi@0 38
mi@0 39 def gtgram_xe(wave, fs, f_max, channels, f_min):
mi@0 40 """ Calculate the intermediate ERB filterbank processed matrix """
mi@0 41 cfs = centre_freqs(f_max, channels, f_min)
mi@0 42 fcoefs = np.flipud(make_erb_filters(fs, cfs))
mi@0 43 xf = erb_filterbank(wave, fcoefs)
mi@0 44 return xf
mi@0 45 # xe = np.power(xf, 2)
mi@0 46 # return xe
mi@0 47
mi@0 48
mi@0 49 def gtgram(wave, fs, gammatoneLen, step_rate, channels, f_max, f_min):
mi@0 50 """
mi@0 51 Calculate a spectrogram-like time frequency magnitude array based on
mi@0 52 gammatone subband filters. The waveform ``wave`` (at sample rate ``fs``) is
mi@0 53 passed through an multi-channel gammatone auditory model filterbank, with
mi@0 54 lowest frequency ``f_min`` and highest frequency ``f_max``. The outputs of
mi@0 55 each band then have their energy integrated over windows of ``window_time``
mi@0 56 seconds, advancing by ``hop_time`` secs for successive columns. These
mi@0 57 magnitudes are returned as a nonnegative real matrix with ``channels`` rows.
mi@0 58
mi@0 59 | 2009-02-23 Dan Ellis dpwe@ee.columbia.edu
mi@0 60 |
mi@0 61 | (c) 2013 Jason Heeris (Python implementation)
mi@0 62 """
mi@0 63 xe = gtgram_xe(wave, fs, f_max, channels, f_min)
mi@0 64 # print 'xe', xe.shape
mi@0 65 step_samples, ncols = gtgram_strides(fs, gammatoneLen, step_rate, xe.shape[1])
mi@0 66 # print gammatoneLen, step_samples, channels, ncols
mi@0 67 y = np.zeros((channels, ncols))
mi@0 68
mi@0 69 for cnum in range(ncols):
mi@0 70 segment = xe[:, cnum * step_samples + np.arange(gammatoneLen)]
mi@0 71 segment = np.power(segment, 2)
mi@0 72 y[:, cnum] = np.sqrt(segment.mean(1))
mi@0 73
mi@0 74 return y.T