mi@0: # Copyright 2014 Jason Heeris, jason.heeris@gmail.com mi@0: # mi@0: # This file is part of the gammatone toolkit, and is licensed under the 3-clause mi@0: # BSD license: https://github.com/detly/gammatone/blob/master/COPYING mi@0: from __future__ import division mi@0: import numpy as np mi@0: mi@0: from .filters import make_erb_filters, centre_freqs, erb_filterbank mi@0: mi@0: """ mi@0: This module contains functions for rendering "spectrograms" which use gammatone mi@0: filterbanks instead of Fourier transforms. mi@0: """ mi@0: ''' mi@0: Modified from the original toolbox. Jan 2015. --Mi mi@0: mi@0: ''' mi@0: mi@0: def round_half_away_from_zero(num): mi@0: """ Implement the round-half-away-from-zero rule, where fractional parts of mi@0: 0.5 result in rounding up to the nearest positive integer for positive mi@0: numbers, and down to the nearest negative number for negative integers. mi@0: """ mi@0: return np.sign(num) * np.floor(np.abs(num) + 0.5) mi@0: mi@0: mi@0: def gtgram_strides(fs, gammatoneLen, step_rate, filterbank_cols): mi@0: """ mi@0: Calculates the window size for a gammatonegram. mi@0: mi@0: @return a tuple of (window_size, hop_samples, output_columns) mi@0: """ mi@0: step_samples = int(gammatoneLen * step_rate) mi@0: columns = (1 + int(np.floor((filterbank_cols - gammatoneLen)/ step_samples))) mi@0: mi@0: return (step_samples, columns) mi@0: mi@0: mi@0: def gtgram_xe(wave, fs, f_max, channels, f_min): mi@0: """ Calculate the intermediate ERB filterbank processed matrix """ mi@0: cfs = centre_freqs(f_max, channels, f_min) mi@0: fcoefs = np.flipud(make_erb_filters(fs, cfs)) mi@0: xf = erb_filterbank(wave, fcoefs) mi@0: return xf mi@0: # xe = np.power(xf, 2) mi@0: # return xe mi@0: mi@0: mi@0: def gtgram(wave, fs, gammatoneLen, step_rate, channels, f_max, f_min): mi@0: """ mi@0: Calculate a spectrogram-like time frequency magnitude array based on mi@0: gammatone subband filters. The waveform ``wave`` (at sample rate ``fs``) is mi@0: passed through an multi-channel gammatone auditory model filterbank, with mi@0: lowest frequency ``f_min`` and highest frequency ``f_max``. The outputs of mi@0: each band then have their energy integrated over windows of ``window_time`` mi@0: seconds, advancing by ``hop_time`` secs for successive columns. These mi@0: magnitudes are returned as a nonnegative real matrix with ``channels`` rows. mi@0: mi@0: | 2009-02-23 Dan Ellis dpwe@ee.columbia.edu mi@0: | mi@0: | (c) 2013 Jason Heeris (Python implementation) mi@0: """ mi@0: xe = gtgram_xe(wave, fs, f_max, channels, f_min) mi@0: # print 'xe', xe.shape mi@0: step_samples, ncols = gtgram_strides(fs, gammatoneLen, step_rate, xe.shape[1]) mi@0: # print gammatoneLen, step_samples, channels, ncols mi@0: y = np.zeros((channels, ncols)) mi@0: mi@0: for cnum in range(ncols): mi@0: segment = xe[:, cnum * step_samples + np.arange(gammatoneLen)] mi@0: segment = np.power(segment, 2) mi@0: y[:, cnum] = np.sqrt(segment.mean(1)) mi@0: mi@0: return y.T