Repository: detly/gammatone Branch: master Commit: 0626328ef7c3 Files: 52 Total size: 47.3 MB Directory structure: gitextract_wlp1slnq/ ├── COPYING ├── README.md ├── auditory_toolkit/ │ ├── COPYING │ ├── ERBFilterBank.m │ ├── ERBSpace.m │ ├── MakeERBFilters.m │ ├── README │ ├── demo_gammatone.m │ ├── fft2gammatonemx.m │ ├── gammatone_demo.m │ ├── gammatonegram.m │ └── specgram.m ├── doc/ │ ├── Makefile │ ├── conf.py │ ├── details.rst │ ├── fftweight.rst │ ├── filters.rst │ ├── gtgram.rst │ ├── index.rst │ ├── make.bat │ └── plot.rst ├── gammatone/ │ ├── __init__.py │ ├── __main__.py │ ├── fftweight.py │ ├── filters.py │ ├── gtgram.py │ └── plot.py ├── setup.py ├── test_generation/ │ ├── README │ ├── test_ERBFilterBank.m │ ├── test_ERBSpace.m │ ├── test_MakeERBFilters.m │ ├── test_fft2gammatonemx.m │ ├── test_fft_gammatonegram.m │ ├── test_gammatonegram.m │ └── test_specgram.m └── tests/ ├── __init__.py ├── data/ │ ├── test_erb_filter_data.mat │ ├── test_erbspace_data.mat │ ├── test_fft2gtmx_data.mat │ ├── test_fft_gammatonegram_data.mat │ ├── test_filterbank_data.mat │ ├── test_gammatonegram_data.mat │ └── test_specgram_data.mat ├── test_cfs.py ├── test_erb_space.py ├── test_fft_gtgram.py ├── test_fft_weights.py ├── test_filterbank.py ├── test_gammatone_filters.py ├── test_gammatonegram.py └── test_specgram.py ================================================ FILE CONTENTS ================================================ ================================================ FILE: COPYING ================================================ Copyright (c) 1998, Malcolm Slaney Copyright (c) 2009, Dan Ellis Copyright (c) 2014, Jason Heeris All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ================================================ FILE: README.md ================================================ Gammatone Filterbank Toolkit ============================ *Utilities for analysing sound using perceptual models of human hearing.* Jason Heeris, 2013 Summary ------- This is a port of Malcolm Slaney's and Dan Ellis' gammatone filterbank MATLAB code, detailed below, to Python 2 and 3 using Numpy and Scipy. It analyses signals by running them through banks of gammatone filters, similar to Fourier-based spectrogram analysis. ![Gammatone-based spectrogram of Für Elise](doc/FurElise.png) Installation ------------ You can install directly from this git repository using: ```text pip install git+https://github.com/detly/gammatone.git ``` ...or you can clone the git repository however you prefer, and do: ```text pip install . ``` ...or: ``` python setup.py install ``` ...from the cloned tree. ### Dependencies - numpy - scipy - nose - mock - matplotlib Using the Code -------------- See the [API documentation](http://detly.github.io/gammatone/). For a demonstration, find a `.wav` file (for example, [Für Elise](http://heeris.id.au/samples/FurElise.wav)) and run: ```text python -m gammatone FurElise.wav -d 10 ``` ...to see a gammatone-gram of the first ten seconds of the track. If you've installed via `pip` or `setup.py install`, you should also be able to just run: ```text gammatone FurElise.wav -d 10 ``` Basis ----- This project is based on research into how humans perceive audio, originally published by Malcolm Slaney: [Malcolm Slaney (1998) "Auditory Toolbox Version 2", Technical Report #1998-010, Interval Research Corporation, 1998.]( http://cobweb.ecn.purdue.edu/~malcolm/interval/1998-010/ ) Slaney's report describes a way of modelling how the human ear perceives, emphasises and separates different frequencies of sound. A series of gammatone filters are constructed whose width increases with increasing centre frequency, and this bank of filters is applied to a time-domain signal. The result of this is a spectrum that should represent the human experience of sound better than, say, a Fourier-domain spectrum would. A gammatone filter has an impulse response that is a sine wave multiplied by a gamma distribution function. It is a common approach to modelling the auditory system. The gammatone filterbank approach can be considered analogous (but not equivalent) to a discrete Fourier transform where the frequency axis is logarithmic. For example, a series of notes spaced an octave apart would appear to be roughly linearly spaced; or a sound that was distributed across the same linear frequency range would appear to have more spread at lower frequencies. The real goal of this toolkit is to allow easy computation of the gammatone equivalent of a spectrogram — a time-varying spectrum of energy over audible frequencies based on a gammatone filterbank. Slaney demonstrated his research with an initial implementation in MATLAB. This implementation was later extended by Dan Ellis, who found a way to approximate a "gammatone-gram" by using the fast Fourier transform. Ellis' code calculates a matrix of weights that can be applied to the output of a FFT so that a Fourier-based spectrogram can easily be transformed into such an approximation. Ellis' code and documentation is here: [Gammatone-like spectrograms]( http://labrosa.ee.columbia.edu/matlab/gammatonegram/ ) Interest -------- I became interested in this because of my background in science communication and my general interest in the teaching of signal processing. I find that the spectrogram approach to visualising signals is adequate for illustrating abstract systems or the mathematical properties of transforms, but bears little correspondence to a person's own experience of sound. If someone wants to see what their favourite piece of music "looks like," a normal Fourier transform based spectrogram is actually quite a poor way to visualise it. Features of the audio seem to be oddly spaced or unnaturally emphasised or de-emphasised depending on where they are in the frequency domain. The gammatone filterbank approach seems to be closer to what someone might intuitively expect a visualisation of sound to look like, and can help develop an intuition about alternative representations of signals. Verifying the port ------------------ Since this is a port of existing MATLAB code, I've written tests to verify the Python implementation against the original code. These tests aren't unit tests, but they do generally test single functions. Running the tests has the same workflow: 1. Run the scripts in the `test_generation` directory. This will create a `.mat` file containing test data in `tests/data`. 2. Run `nosetest3` in the top level directory. This will find and run all the tests in the `tests` directory. Although I'm usually loathe to check in generated files to version control, I'm willing to make an exception for the `.mat` files containing the test data. My reasoning is that they represent the decoupling of my code from the MATLAB code, and if the two projects were separated, they would be considered a part of the Python code, not the original MATLAB code. ================================================ FILE: auditory_toolkit/COPYING ================================================ Copyright (c) 1998, Malcolm Slaney Copyright (c) 2009, Dan Ellis All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ================================================ FILE: auditory_toolkit/ERBFilterBank.m ================================================ function output = ERBFilterBank(x, fcoefs) % function output = ERBFilterBank(x, fcoefs) % Process an input waveform with a gammatone filter bank. This function % takes a single sound vector, and returns an array of filter outputs, one % channel per row. % % The fcoefs parameter, which completely specifies the Gammatone filterbank, % should be designed with the MakeERBFilters function. If it is omitted, % the filter coefficients are computed for you assuming a 22050Hz sampling % rate and 64 filters regularly spaced on an ERB scale from fs/2 down to 100Hz. % % Malcolm Slaney @ Interval, June 11, 1998. % (c) 1998 Interval Research Corporation % Thanks to Alain de Cheveigne' for his suggestions and improvements. if nargin < 1 error('Syntax: output_array = ERBFilterBank(input_vector[, fcoefs]);'); end if nargin < 2 fcoefs = MakeERBFilters(22050,64,100); end if size(fcoefs,2) ~= 10 error('fcoefs parameter passed to ERBFilterBank is the wrong size.'); end if size(x,2) < size(x,1) x = x'; end A0 = fcoefs(:,1); A11 = fcoefs(:,2); A12 = fcoefs(:,3); A13 = fcoefs(:,4); A14 = fcoefs(:,5); A2 = fcoefs(:,6); B0 = fcoefs(:,7); B1 = fcoefs(:,8); B2 = fcoefs(:,9); gain= fcoefs(:,10); output = zeros(size(gain,1), length(x)); for chan = 1: size(gain,1) y1=filter([A0(chan)/gain(chan) A11(chan)/gain(chan) ... A2(chan)/gain(chan)], ... [B0(chan) B1(chan) B2(chan)], x); y2=filter([A0(chan) A12(chan) A2(chan)], ... [B0(chan) B1(chan) B2(chan)], y1); y3=filter([A0(chan) A13(chan) A2(chan)], ... [B0(chan) B1(chan) B2(chan)], y2); y4=filter([A0(chan) A14(chan) A2(chan)], ... [B0(chan) B1(chan) B2(chan)], y3); output(chan, :) = y4; end if 0 semilogx((0:(length(x)-1))*(fs/length(x)),20*log10(abs(fft(output)))); end ================================================ FILE: auditory_toolkit/ERBSpace.m ================================================ function cfArray = ERBSpace(lowFreq, highFreq, N) % function cfArray = ERBSpace(lowFreq, highFreq, N) % This function computes an array of N frequencies uniformly spaced between % highFreq and lowFreq on an ERB scale. N is set to 100 if not specified. % % See also linspace, logspace, MakeERBCoeffs, MakeERBFilters. % % For a definition of ERB, see Moore, B. C. J., and Glasberg, B. R. (1983). % "Suggested formulae for calculating auditory-filter bandwidths and % excitation patterns," J. Acoust. Soc. Am. 74, 750-753. if nargin < 1 lowFreq = 100; end if nargin < 2 highFreq = 44100/4; end if nargin < 3 N = 100; end % Change the following three parameters if you wish to use a different % ERB scale. Must change in MakeERBCoeffs too. EarQ = 9.26449; % Glasberg and Moore Parameters minBW = 24.7; order = 1; % All of the followFreqing expressions are derived in Apple TR #35, "An % Efficient Implementation of the Patterson-Holdsworth Cochlear % Filter Bank." See pages 33-34. cfArray = -(EarQ*minBW) + exp((1:N)'*(-log(highFreq + EarQ*minBW) + ... log(lowFreq + EarQ*minBW))/N) * (highFreq + EarQ*minBW); ================================================ FILE: auditory_toolkit/MakeERBFilters.m ================================================ function [fcoefs,cf]=MakeERBFilters(fs,numChannels,lowFreq) % function [fcoefs,cf]=MakeERBFilters(fs,numChannels,lowFreq) % This function computes the filter coefficients for a bank of % Gammatone filters. These filters were defined by Patterson and % Holdworth for simulating the cochlea. % % The result is returned as an array of filter coefficients. Each row % of the filter arrays contains the coefficients for four second order % filters. The transfer function for these four filters share the same % denominator (poles) but have different numerators (zeros). All of these % coefficients are assembled into one vector that the ERBFilterBank % can take apart to implement the filter. % % The filter bank contains "numChannels" channels that extend from % half the sampling rate (fs) to "lowFreq". Alternatively, if the numChannels % input argument is a vector, then the values of this vector are taken to % be the center frequency of each desired filter. (The lowFreq argument is % ignored in this case.) % Note this implementation fixes a problem in the original code by % computing four separate second order filters. This avoids a big % problem with round off errors in cases of very small cfs (100Hz) and % large sample rates (44kHz). The problem is caused by roundoff error % when a number of poles are combined, all very close to the unit % circle. Small errors in the eigth order coefficient, are multiplied % when the eigth root is taken to give the pole location. These small % errors lead to poles outside the unit circle and instability. Thanks % to Julius Smith for leading me to the proper explanation. % Execute the following code to evaluate the frequency % response of a 10 channel filterbank. % fcoefs = MakeERBFilters(16000,10,100); % y = ERBFilterBank([1 zeros(1,511)], fcoefs); % resp = 20*log10(abs(fft(y'))); % freqScale = (0:511)/512*16000; % semilogx(freqScale(1:255),resp(1:255,:)); % axis([100 16000 -60 0]) % xlabel('Frequency (Hz)'); ylabel('Filter Response (dB)'); % Rewritten by Malcolm Slaney@Interval. June 11, 1998. % (c) 1998 Interval Research Corporation T = 1/fs; if length(numChannels) == 1 cf = ERBSpace(lowFreq, fs/2, numChannels); else cf = numChannels(1:end); if size(cf,2) > size(cf,1) cf = cf'; end end % Change the followFreqing three parameters if you wish to use a different % ERB scale. Must change in ERBSpace too. EarQ = 9.26449; % Glasberg and Moore Parameters minBW = 24.7; order = 1; ERB = ((cf/EarQ).^order + minBW^order).^(1/order); B=1.019*2*pi*ERB; A0 = T; A2 = 0; B0 = 1; B1 = -2*cos(2*cf*pi*T)./exp(B*T); B2 = exp(-2*B*T); A11 = -(2*T*cos(2*cf*pi*T)./exp(B*T) + 2*sqrt(3+2^1.5)*T*sin(2*cf*pi*T)./ ... exp(B*T))/2; A12 = -(2*T*cos(2*cf*pi*T)./exp(B*T) - 2*sqrt(3+2^1.5)*T*sin(2*cf*pi*T)./ ... exp(B*T))/2; A13 = -(2*T*cos(2*cf*pi*T)./exp(B*T) + 2*sqrt(3-2^1.5)*T*sin(2*cf*pi*T)./ ... exp(B*T))/2; A14 = -(2*T*cos(2*cf*pi*T)./exp(B*T) - 2*sqrt(3-2^1.5)*T*sin(2*cf*pi*T)./ ... exp(B*T))/2; gain = abs((-2*exp(4*i*cf*pi*T)*T + ... 2*exp(-(B*T) + 2*i*cf*pi*T).*T.* ... (cos(2*cf*pi*T) - sqrt(3 - 2^(3/2))* ... sin(2*cf*pi*T))) .* ... (-2*exp(4*i*cf*pi*T)*T + ... 2*exp(-(B*T) + 2*i*cf*pi*T).*T.* ... (cos(2*cf*pi*T) + sqrt(3 - 2^(3/2)) * ... sin(2*cf*pi*T))).* ... (-2*exp(4*i*cf*pi*T)*T + ... 2*exp(-(B*T) + 2*i*cf*pi*T).*T.* ... (cos(2*cf*pi*T) - ... sqrt(3 + 2^(3/2))*sin(2*cf*pi*T))) .* ... (-2*exp(4*i*cf*pi*T)*T + 2*exp(-(B*T) + 2*i*cf*pi*T).*T.* ... (cos(2*cf*pi*T) + sqrt(3 + 2^(3/2))*sin(2*cf*pi*T))) ./ ... (-2 ./ exp(2*B*T) - 2*exp(4*i*cf*pi*T) + ... 2*(1 + exp(4*i*cf*pi*T))./exp(B*T)).^4); allfilts = ones(length(cf),1); fcoefs = [A0*allfilts A11 A12 A13 A14 A2*allfilts B0*allfilts B1 B2 gain]; if (0) % Test Code A0 = fcoefs(:,1); A11 = fcoefs(:,2); A12 = fcoefs(:,3); A13 = fcoefs(:,4); A14 = fcoefs(:,5); A2 = fcoefs(:,6); B0 = fcoefs(:,7); B1 = fcoefs(:,8); B2 = fcoefs(:,9); gain= fcoefs(:,10); chan=1; x = [1 zeros(1, 511)]; y1=filter([A0(chan)/gain(chan) A11(chan)/gain(chan) ... A2(chan)/gain(chan)],[B0(chan) B1(chan) B2(chan)], x); y2=filter([A0(chan) A12(chan) A2(chan)], ... [B0(chan) B1(chan) B2(chan)], y1); y3=filter([A0(chan) A13(chan) A2(chan)], ... [B0(chan) B1(chan) B2(chan)], y2); y4=filter([A0(chan) A14(chan) A2(chan)], ... [B0(chan) B1(chan) B2(chan)], y3); semilogx((0:(length(x)-1))*(fs/length(x)),20*log10(abs(fft(y4)))); end ================================================ FILE: auditory_toolkit/README ================================================ These files are the original auditory toolkit/gammatone filterbank code created by Malcolm Slaney and Dan Ellis, published at: http://labrosa.ee.columbia.edu/matlab/gammatonegram/ https://engineering.purdue.edu/~malcolm/interval/1998-010/ Any non-code assets (ie. the sample WAV file and associated graphs) have been removed. ================================================ FILE: auditory_toolkit/demo_gammatone.m ================================================ %% Gammatone-like spectrograms % Gammatone filters are a popular linear approximation to the % filtering performed by the ear. This routine provides a simple % wrapper for generating time-frequency surfaces based on a % gammatone analysis, which can be used as a replacement for a % conventional spectrogram. It also provides a fast approximation % to this surface based on weighting the output of a conventional % FFT. %% Introduction % It is very natural to visualize sound as a time-varying % distribution of energy in frequency - not least because this is % one way of describing the information our brains get from our % ears via the auditory nerve. The spectrogram is the traditional % time-frequency visualization, but it actually has some important % differences from how sound is analyzed by the ear, most % significantly that the ear's frequency subbands get wider for % higher frequencies, whereas the spectrogram has a constant % bandwidth across all frequency channels. % % There have been many signal-processing approximations proposed % for the frequency analysis performed by the ear; one of the most % popular is the Gammatone filterbank originally proposed by % Roy Patterson and colleagues in 1992. Gammatone filters were % conceived as a simple fit to experimental observations of % the mammalian cochlea, and have a repeated pole structure leading % to an impulse response that is the product of a Gamma envelope % g(t) = t^n e^{-t} and a sinusoid (tone). % % One reason for the popularity of this approach is the % availability of an implementation by Malcolm Slaney, as % described in: % % Malcolm Slaney (1998) "Auditory Toolbox Version 2", % Technical Report #1998-010, Interval Research Corporation, 1998. % http://cobweb.ecn.purdue.edu/~malcolm/interval/1998-010/ % % Malcolm's toolbox includes routines to design a Gammatone % filterbank and to process a signal by every filter in a bank, % but in order to convert this into a time-frequency visualization % it is necessary to sum up the energy within regular time bins. % While this is not complicated, the function here provides a % convenient wrapper to achieve this final step, for applications % that are content to work with time-frequency magnitude % distributions instead of going down to the waveform levels. In % this mode of operation, the routine uses Malcolm's MakeERBFilters % and ERBFilterBank routines. % % This is, however, quite a computationally expensive approach, so % we also provide an alternative algorithm that gives very similar % results. In this mode, the Gammatone-based spectrogram is % constructed by first calculating a conventional, fixed-bandwidth % spectrogram, then combining the fine frequency resolution of the % FFT-based spectra into the coarser, smoother Gammatone responses % via a weighting function. This calculates the time-frequency % distribution some 30-40x faster than the full approach. %% Routines % The code consists of a main routine, , % which takes a waveform and other parameters and returns a % spectrogram-like time-frequency matrix, and a helper function % , which constructs the % weighting matrix to convert FFT output spectra into gammatone % approximations. %% Example usage % First, we calculate a Gammatone-based spectrogram-like image of % a speech waveform using the fast approximation. Then we do the % same thing using the full filtering approach, for comparison. % Load a waveform, calculate its gammatone spectrogram, then display: [d,sr] = wavread('sa2.wav'); tic; [D,F] = gammatonegram(d,sr); toc %Elapsed time is 0.140742 seconds. subplot(211) imagesc(20*log10(D)); axis xy caxis([-90 -30]) colorbar % F returns the center frequencies of each band; % display whichever elements were shown by the autoscaling set(gca,'YTickLabel',round(F(get(gca,'YTick')))); ylabel('freq / Hz'); xlabel('time / 10 ms steps'); title('Gammatonegram - fast method') % Now repeat with flag to use actual subband filters. % Since it's the last argument, we have to include all the other % arguments. These are the default values for: summation window % (0.025 sec), hop between successive windows (0.010 sec), % number of gammatone channels (64), lowest frequency (50 Hz), % and highest frequency (sr/2). The last argument as zero % means not to use the FFT approach. tic; [D2,F2] = gammatonegram(d,sr,0.025,0.010,64,50,sr/2,0); toc %Elapsed time is 3.165083 seconds. subplot(212) imagesc(20*log10(D2)); axis xy caxis([-90 -30]) colorbar set(gca,'YTickLabel',round(F(get(gca,'YTick')))); ylabel('freq / Hz'); xlabel('time / 10 ms steps'); title('Gammatonegram - accurate method') % Actual gammatone filters appear somewhat narrower. The fast % version assumes coherence of addition of amplitude from % different channels, whereas the actual subband energies will % depend on how the energy in different frequencies combines. % Also notice the visible time smearing in the low frequency % channels that does not occur in the fast version. %% Validation % We can check the frequency responses of the filterbank % simulated with the fast method against the actual filters % from Malcolm's toolbox. They match very closely, but of % course this still doesn't mean the two approaches will give % identical results - because the fast method ignores the phase % of each frequency channel when summing up. % Check the frequency responses to see that they match: % Put an impulse through the Slaney ERB filters, then take the % frequency response of each impulse response. fcfs = flipud(MakeERBFilters(16000,64,50)); gtir = ERBFilterBank([1, zeros(1,1000)],fcfs); H = zeros(64,512); for i = 1:64; H(i,:) = abs(freqz(gtir(i,:),1,512)); end % The weighting matrix for the FFT is the frequency response % of each output filter gtm = fft2gammatonemx(1024,16000,64,1,50,8000,512); % Plot every 5th channel from both. Offset by 3 dB just so we can % see both fs = [0:511]/512*8000; figure plot(fs,20*log10(H(5:5:64,:))','b',fs, -3 + 20*log10(gtm(5:5:64,:))','r') axis([0 8000 -150 0]) grid % Line up pretty well, apart from wiggles below -100 dB % (from truncating the impulse response at 1000 samples?) %% Download % You can download all the code and data for these examples here: % . %% Referencing % If you use this work in a publication, I would be grateful % if you referenced this page as follows: % % D. P. W. Ellis (2009). "Gammatone-like spectrograms", web resource. % http://www.ee.columbia.edu/~dpwe/resources/matlab/gammatonegram/ %% Acknowledgment % This project was supported in part by the NSF under % grant IIS-0535168. Any opinions, findings and conclusions % or recommendations expressed in this material are those of the % authors and do not necessarily reflect the views of the Sponsors. % Last updated: $Date: 2009/07/07 14:14:11 $ % Dan Ellis ================================================ FILE: auditory_toolkit/fft2gammatonemx.m ================================================ function [wts,gain] = fft2gammatonemx(nfft, sr, nfilts, width, minfreq, maxfreq, maxlen) % wts = fft2gammatonemx(nfft, sr, nfilts, width, minfreq, maxfreq, maxlen) % Generate a matrix of weights to combine FFT bins into % Gammatone bins. nfft defines the source FFT size at % sampling rate sr. Optional nfilts specifies the number of % output bands required (default 64), and width is the % constant width of each band in Bark (default 1). % minfreq, maxfreq specify range covered in Hz (100, sr/2). % While wts has nfft columns, the second half are all zero. % Hence, aud spectrum is % fft2gammatonemx(nfft,sr)*abs(fft(xincols,nfft)); % maxlen truncates the rows to this many bins % % 2004-09-05 Dan Ellis dpwe@ee.columbia.edu based on rastamat/audspec.m % Last updated: $Date: 2009/02/22 02:29:25 $ if nargin < 2; sr = 16000; end if nargin < 3; nfilts = 64; end if nargin < 4; width = 1.0; end if nargin < 5; minfreq = 100; end if nargin < 6; maxfreq = sr/2; end if nargin < 7; maxlen = nfft; end wts = zeros(nfilts, nfft); % after Slaney's MakeERBFilters EarQ = 9.26449; minBW = 24.7; order = 1; cfreqs = -(EarQ*minBW) + exp((1:nfilts)'*(-log(maxfreq + EarQ*minBW) + ... log(minfreq + EarQ*minBW))/nfilts) * (maxfreq + EarQ*minBW); cfreqs = flipud(cfreqs); GTord = 4; ucirc = exp(j*2*pi*[0:(nfft/2)]/nfft); justpoles = 0; for i = 1:nfilts cf = cfreqs(i); ERB = width*((cf/EarQ).^order + minBW^order).^(1/order); B = 1.019*2*pi*ERB; r = exp(-B/sr); theta = 2*pi*cf/sr; pole = r*exp(j*theta); if justpoles == 1 % point on unit circle of maximum gain, from differentiating magnitude cosomegamax = (1+r*r)/(2*r)*cos(theta); if abs(cosomegamax) > 1 if theta < pi/2; omegamax = 0; else omegamax = pi; end else omegamax = acos(cosomegamax); end center = exp(j*omegamax); gain = abs((pole-center).*(pole'-center)).^GTord; wts(i,1:(nfft/2+1)) = gain * (abs((pole-ucirc).*(pole'- ... ucirc)).^-GTord); else % poles and zeros, following Malcolm's MakeERBFilter T = 1/sr; A11 = -(2*T*cos(2*cf*pi*T)./exp(B*T) + 2*sqrt(3+2^1.5)*T*sin(2* ... cf*pi*T)./exp(B*T))/2; A12 = -(2*T*cos(2*cf*pi*T)./exp(B*T) - 2*sqrt(3+2^1.5)*T*sin(2* ... cf*pi*T)./exp(B*T))/2; A13 = -(2*T*cos(2*cf*pi*T)./exp(B*T) + 2*sqrt(3-2^1.5)*T*sin(2* ... cf*pi*T)./exp(B*T))/2; A14 = -(2*T*cos(2*cf*pi*T)./exp(B*T) - 2*sqrt(3-2^1.5)*T*sin(2* ... cf*pi*T)./exp(B*T))/2; zros = -[A11 A12 A13 A14]/T; gain(i) = abs((-2*exp(4*j*cf*pi*T)*T + ... 2*exp(-(B*T) + 2*j*cf*pi*T).*T.* ... (cos(2*cf*pi*T) - sqrt(3 - 2^(3/2))* ... sin(2*cf*pi*T))) .* ... (-2*exp(4*j*cf*pi*T)*T + ... 2*exp(-(B*T) + 2*j*cf*pi*T).*T.* ... (cos(2*cf*pi*T) + sqrt(3 - 2^(3/2)) * ... sin(2*cf*pi*T))).* ... (-2*exp(4*j*cf*pi*T)*T + ... 2*exp(-(B*T) + 2*j*cf*pi*T).*T.* ... (cos(2*cf*pi*T) - ... sqrt(3 + 2^(3/2))*sin(2*cf*pi*T))) .* ... (-2*exp(4*j*cf*pi*T)*T + 2*exp(-(B*T) + 2*j*cf*pi*T).*T.* ... (cos(2*cf*pi*T) + sqrt(3 + 2^(3/2))*sin(2*cf*pi*T))) ./ ... (-2 ./ exp(2*B*T) - 2*exp(4*j*cf*pi*T) + ... 2*(1 + exp(4*j*cf*pi*T))./exp(B*T)).^4); wts(i,1:(nfft/2+1)) = ((T^4)/gain(i)) ... * abs(ucirc-zros(1)).*abs(ucirc-zros(2))... .*abs(ucirc-zros(3)).*abs(ucirc-zros(4))... .*(abs((pole-ucirc).*(pole'-ucirc)).^-GTord); end end wts = wts(:,1:maxlen); ================================================ FILE: auditory_toolkit/gammatone_demo.m ================================================ %% Gammatone-like spectrograms % Gammatone filters are a popular linear approximation to the % filtering performed by the ear. This routine provides a simple % wrapper for generating time-frequency surfaces based on a % gammatone analysis, which can be used as a replacement for a % conventional spectrogram. It also provides a fast approximation % to this surface based on weighting the output of a conventional % FFT. %% Introduction % It is very natural to visualize sound as a time-varying % distribution of energy in frequency - not least because this is % one way of describing the information our brains get from our % ears via the auditory nerve. The spectrogram is the traditional % time-frequency visualization, but it actually has some important % differences from how sound is analyzed by the ear, most % significantly that the ear's frequency subbands get wider for % higher frequencies, whereas the spectrogram has a constant % bandwidth across all frequency channels. % % There have been many signal-processing approximations proposed % for the frequency analysis performed by the ear; one of the most % popular is the Gammatone filterbank originally proposed by % Roy Patterson and colleagues in 1992. Gammatone filters were % conceived as a simple fit to experimental observations of % the mammalian cochlea, and have a repeated pole structure leading % to an impulse response that is the product of a Gamma envelope % g(t) = t^n e^{-t} and a sinusoid (tone). % % One reason for the popularity of this approach is the % availability of an implementation by Malcolm Slaney, as % described in: % % Malcolm Slaney (1998) "Auditory Toolbox Version 2", % Technical Report #1998-010, Interval Research Corporation, 1998. % http://cobweb.ecn.purdue.edu/~malcolm/interval/1998-010/ % % Malcolm's toolbox includes routines to design a Gammatone % filterbank and to process a signal by every filter in a bank, % but in order to convert this into a time-frequency visualization % it is necessary to sum up the energy within regular time bins. % While this is not complicated, the function here provides a % convenient wrapper to achieve this final step, for applications % that are content to work with time-frequency magnitude % distributions instead of going down to the waveform levels. In % this mode of operation, the routine uses Malcolm's MakeERBFilters % and ERBFilterBank routines. % % This is, however, quite a computationally expensive approach, so % we also provide an alternative algorithm that gives very similar % results. In this mode, the Gammatone-based spectrogram is % constructed by first calculating a conventional, fixed-bandwidth % spectrogram, then combining the fine frequency resolution of the % FFT-based spectra into the coarser, smoother Gammatone responses % via a weighting function. This calculates the time-frequency % distribution some 30-40x faster than the full approach. %% Routines % The code consists of a main routine, , % which takes a waveform and other parameters and returns a % spectrogram-like time-frequency matrix, and a helper function % , which constructs the % weighting matrix to convert FFT output spectra into gammatone % approximations. %% Example usage % First, we calculate a Gammatone-based spectrogram-like image of % a speech waveform using the fast approximation. Then we do the % same thing using the full filtering approach, for comparison. % Load a waveform, calculate its gammatone spectrogram, then display: [d,sr] = wavread('sa2.wav'); tic; D = gammatonegram(d,sr); toc %Elapsed time is 0.140742 seconds. subplot(211) imagesc(20*log10(D)); axis xy caxis([-90 -30]) colorbar title('Gammatonegram - fast method') % Now repeat with flag to use actual subband filters. % Since it's the last argument, we have to include all the other % arguments. These are the default values for: summation window % (0.025 sec), hop between successive windows (0.010 sec), % number of gammatone channels (64), lowest frequency (50 Hz), % and highest frequency (sr/2). The last argument as zero % means not to use the FFT approach. tic; D2 = gammatonegram(d,sr,0.025,0.010,64,50,sr/2,0); toc %Elapsed time is 3.165083 seconds. subplot(212) imagesc(20*log10(D2)); axis xy caxis([-90 -30]) colorbar title('Gammatonegram - accurate method') % Actual gammatone filters appear somewhat narrower. The fast % version assumes coherence of addition of amplitude from % different channels, whereas the actual subband energies will % depend on how the energy in different frequencies combines. % Also notice the visible time smearing in the low frequency % channels that does not occur in the fast version. %% Validation % We can check the frequency responses of the filterbank % simulated with the fast method against the actual filters % from Malcolm's toolbox. They match very closely, but of % course this still doesn't mean the two approaches will give % identical results - because the fast method ignores the phase % of each frequency channel when summing up. % Check the frequency responses to see that they match: % Put an impulse through the Slaney ERB filters, then take the % frequency response of each impulse response. fcfs = flipud(MakeERBFilters(16000,64,50)); gtir = ERBFilterBank([1, zeros(1,1000)],fcfs); H = zeros(64,512); for i = 1:64; H(i,:) = abs(freqz(gtir(i,:),1,512)); end % The weighting matrix for the FFT is the frequency response % of each output filter gtm = fft2gammatonemx(1024,16000,64,1,50,8000,512); % Plot every 5th channel from both. Offset by 3 dB just so we can % see both fs = [0:511]/512*8000; figure plot(fs,20*log10(H(5:5:64,:))','b',fs, -3 + 20*log10(gtm(5:5:64,:))','r') axis([0 8000 -150 0]) grid % Line up pretty well, apart from wiggles below -100 dB % (from truncating the impulse response at 1000 samples?) %% Download % You can download all the code and data for these examples here: % . %% Referencing % If you use this work in a publication, I would be grateful % if you referenced this page as follows: % % D. P. W. Ellis (2009). "Gammatone-like spectrograms", web resource, http://www.ee.columbia.edu/~dpwe/resources/matlab/gammatonegram/ . %% Acknowledgment % This project was supported in part by the NSF under % grant IIS-0535168. Any opinions, findings and conclusions % or recommendations expressed in this material are those of the % authors and do not necessarily reflect the views of the Sponsors. % Last updated: $Date: 2009/02/22 01:46:42 $ % Dan Ellis ================================================ FILE: auditory_toolkit/gammatonegram.m ================================================ function [Y,F] = gammatonegram(X,SR,TWIN,THOP,N,FMIN,FMAX,USEFFT,WIDTH) % [Y,F] = gammatonegram(X,SR,N,TWIN,THOP,FMIN,FMAX,USEFFT,WIDTH) % Calculate a spectrogram-like time frequency magnitude array % based on Gammatone subband filters. Waveform X (at sample % rate SR) is passed through an N (default 64) channel gammatone % auditory model filterbank, with lowest frequency FMIN (50) % and highest frequency FMAX (SR/2). The outputs of each band % then have their energy integrated over windows of TWIN secs % (0.025), advancing by THOP secs (0.010) for successive % columns. These magnitudes are returned as an N-row % nonnegative real matrix, Y. % If USEFFT is present and zero, revert to actual filtering and % summing energy within windows. % WIDTH (default 1.0) is how to scale bandwidth of filters % relative to ERB default (for fast method only). % F returns the center frequencies in Hz of each row of Y % (uniformly spaced on a Bark scale). % % 2009-02-18 DAn Ellis dpwe@ee.columbia.edu % Last updated: $Date: 2009/02/23 21:07:09 $ if nargin < 2; SR = 16000; end if nargin < 3; TWIN = 0.025; end if nargin < 4; THOP = 0.010; end if nargin < 5; N = 64; end if nargin < 6; FMIN = 50; end if nargin < 7; FMAX = SR/2; end if nargin < 8; USEFFT = 1; end if nargin < 9; WIDTH = 1.0; end if USEFFT == 0 % Use malcolm's function to filter into subbands %%%% IGNORES FMAX! ***** [fcoefs,F] = MakeERBFilters(SR, N, FMIN); fcoefs = flipud(fcoefs); XF = ERBFilterBank(X,fcoefs); nwin = round(TWIN*SR); % Always use rectangular window for now % if USEHANN == 1 window = hann(nwin)'; % else % window = ones(1,nwin); % end % window = window/sum(window); % XE = [zeros(N,round(nwin/2)),XF.^2,zeros(N,round(nwin/2))]; XE = [XF.^2]; hopsamps = round(THOP*SR); ncols = 1 + floor((size(XE,2)-nwin)/hopsamps); Y = zeros(N,ncols); % winmx = repmat(window,N,1); for i = 1:ncols % Y(:,i) = sqrt(sum(winmx.*XE(:,(i-1)*hopsamps + [1:nwin]),2)); Y(:,i) = sqrt(mean(XE(:,(i-1)*hopsamps + [1:nwin]),2)); end else % USEFFT version % How long a window to use relative to the integration window requested winext = 1; twinmod = winext * TWIN; % first spectrogram nfft = 2^(ceil(log(2*twinmod*SR)/log(2))); nhop = round(THOP*SR); nwin = round(twinmod*SR); [gtm,F] = fft2gammatonemx(nfft, SR, N, WIDTH, FMIN, FMAX, nfft/2+1); % perform FFT and weighting in amplitude domain Y = 1/nfft*gtm*abs(specgram(X,nfft,SR,nwin,nwin-nhop)); % or the power domain? doesn't match nearly as well %Y = 1/nfft*sqrt(gtm*abs(specgram(X,nfft,SR,nwin,nwin-nhop).^2)); end ================================================ FILE: auditory_toolkit/specgram.m ================================================ function y = specgram(x,n,sr,w,ov) % Y = myspecgram(X,NFFT,SR,W,OV) % Substitute for Matlab's specgram, calculates & displays spectrogram % $Header: /homes/dpwe/tmp/e6820/RCS/myspecgram.m,v 1.1 2002/08/04 19:20:27 dpwe Exp $ if (size(x,1) > size(x,2)) x = x'; end s = length(x); if nargin < 2 n = 256; end if nargin < 3 sr = 1; end if nargin < 4 w = n; end if nargin < 5 ov = w/2; end h = w - ov; halflen = w/2; halff = n/2; % midpoint of win acthalflen = min(halff, halflen); halfwin = 0.5 * ( 1 + cos( pi * (0:halflen)/halflen)); win = zeros(1, n); win((halff+1):(halff+acthalflen)) = halfwin(1:acthalflen); win((halff+1):-1:(halff-acthalflen+2)) = halfwin(1:acthalflen); c = 1; % pre-allocate output array ncols = 1+fix((s-n)/h); d = zeros((1+n/2), ncols); for b = 0:h:(s-n) u = win.*x((b+1):(b+n)); t = fft(u); d(:,c) = t([1:(1+n/2)]'); c = c+1; end; tt = [0:h:(s-n)]/sr; ff = [0:(n/2)]*sr/n; if nargout < 1 imagesc(tt,ff,20*log10(abs(d))); axis xy xlabel('Time / s'); ylabel('Frequency / Hz'); else y = d; end ================================================ FILE: doc/Makefile ================================================ # Makefile for Sphinx documentation # # You can set these variables from the command line. SPHINXOPTS = SPHINXBUILD = sphinx-build PAPER = BUILDDIR = _build # Internal variables. PAPEROPT_a4 = -D latex_paper_size=a4 PAPEROPT_letter = -D latex_paper_size=letter ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . # the i18n builder cannot share the environment and doctrees with the others I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext help: @echo "Please use \`make ' where is one of" @echo " html to make standalone HTML files" @echo " dirhtml to make HTML files named index.html in directories" @echo " singlehtml to make a single large HTML file" @echo " pickle to make pickle files" @echo " json to make JSON files" @echo " htmlhelp to make HTML files and a HTML help project" @echo " qthelp to make HTML files and a qthelp project" @echo " devhelp to make HTML files and a Devhelp project" @echo " epub to make an epub" @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" @echo " latexpdf to make LaTeX files and run them through pdflatex" @echo " text to make text files" @echo " man to make manual pages" @echo " texinfo to make Texinfo files" @echo " info to make Texinfo files and run them through makeinfo" @echo " gettext to make PO message catalogs" @echo " changes to make an overview of all changed/added/deprecated items" @echo " linkcheck to check all external links for integrity" @echo " doctest to run all doctests embedded in the documentation (if enabled)" clean: -rm -rf $(BUILDDIR)/* html: $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html @echo @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." dirhtml: $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml @echo @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." singlehtml: $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml @echo @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." pickle: $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle @echo @echo "Build finished; now you can process the pickle files." json: $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json @echo @echo "Build finished; now you can process the JSON files." htmlhelp: $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp @echo @echo "Build finished; now you can run HTML Help Workshop with the" \ ".hhp project file in $(BUILDDIR)/htmlhelp." qthelp: $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp @echo @echo "Build finished; now you can run "qcollectiongenerator" with the" \ ".qhcp project file in $(BUILDDIR)/qthelp, like this:" @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/gammatone.qhcp" @echo "To view the help file:" @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/gammatone.qhc" devhelp: $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp @echo @echo "Build finished." @echo "To view the help file:" @echo "# mkdir -p $$HOME/.local/share/devhelp/gammatone" @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/gammatone" @echo "# devhelp" epub: $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub @echo @echo "Build finished. The epub file is in $(BUILDDIR)/epub." latex: $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex @echo @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." @echo "Run \`make' in that directory to run these through (pdf)latex" \ "(use \`make latexpdf' here to do that automatically)." latexpdf: $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex @echo "Running LaTeX files through pdflatex..." $(MAKE) -C $(BUILDDIR)/latex all-pdf @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." text: $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text @echo @echo "Build finished. The text files are in $(BUILDDIR)/text." man: $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man @echo @echo "Build finished. The manual pages are in $(BUILDDIR)/man." texinfo: $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo @echo @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." @echo "Run \`make' in that directory to run these through makeinfo" \ "(use \`make info' here to do that automatically)." info: $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo @echo "Running Texinfo files through makeinfo..." make -C $(BUILDDIR)/texinfo info @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." gettext: $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale @echo @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." changes: $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes @echo @echo "The overview file is in $(BUILDDIR)/changes." linkcheck: $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck @echo @echo "Link check complete; look for any errors in the above output " \ "or in $(BUILDDIR)/linkcheck/output.txt." doctest: $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest @echo "Testing of doctests in the sources finished, look at the " \ "results in $(BUILDDIR)/doctest/output.txt." ================================================ FILE: doc/conf.py ================================================ # -*- coding: utf-8 -*- # # gammatone documentation build configuration file, created by # sphinx-quickstart on Sat Dec 8 23:21:49 2012. # # This file is execfile()d with the current directory set to its containing dir. # # Note that not all possible configuration values are present in this # autogenerated file. # # All configuration values have a default; values that are commented out # serve to show the default. import sys, os # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. #sys.path.insert(0, os.path.abspath('.')) # -- General configuration ----------------------------------------------------- # If your documentation needs a minimal Sphinx version, state it here. #needs_sphinx = '1.0' # Add any Sphinx extension module names here, as strings. They can be extensions # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. extensions = ['sphinx.ext.autodoc'] # Add any paths that contain templates here, relative to this directory. templates_path = ['_templates'] # The suffix of source filenames. source_suffix = '.rst' # The encoding of source files. #source_encoding = 'utf-8-sig' # The master toctree document. master_doc = 'index' # General information about the project. project = u'Gammatone Filterbank Toolkit' copyright = u'2014, Jason Heeris' # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the # built documents. # # The short X.Y version. version = '1.0' # The full version, including alpha/beta/rc tags. release = '1.0' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. #language = None # There are two options for replacing |today|: either, you set today to some # non-false value, then it is used: #today = '' # Else, today_fmt is used as the format for a strftime call. #today_fmt = '%B %d, %Y' # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. exclude_patterns = ['_build'] # The reST default role (used for this markup: `text`) to use for all documents. #default_role = None # If true, '()' will be appended to :func: etc. cross-reference text. #add_function_parentheses = True # If true, the current module name will be prepended to all description # unit titles (such as .. function::). #add_module_names = True # If true, sectionauthor and moduleauthor directives will be shown in the # output. They are ignored by default. #show_authors = False # The name of the Pygments (syntax highlighting) style to use. pygments_style = 'sphinx' # A list of ignored prefixes for module index sorting. #modindex_common_prefix = [] # -- Options for HTML output --------------------------------------------------- # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. html_theme = 'haiku' # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. #html_theme_options = {} # Add any paths that contain custom themes here, relative to this directory. #html_theme_path = [] # The name for this set of Sphinx documents. If None, it defaults to # " v documentation". html_title = u"%s %s" % (project, release) # A shorter title for the navigation bar. Default is the same as html_title. #html_short_title = None # The name of an image file (relative to this directory) to place at the top # of the sidebar. #html_logo = None # The name of an image file (within the static path) to use as favicon of the # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 # pixels large. #html_favicon = None # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". html_static_path = ['_static'] # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, # using the given strftime format. #html_last_updated_fmt = '%b %d, %Y' # If true, SmartyPants will be used to convert quotes and dashes to # typographically correct entities. html_use_smartypants = True # Custom sidebar templates, maps document names to template names. html_sidebars = { '**' : [ 'localtoc.html', 'globaltoc.html', 'relations.html', 'searchbox.html' ], } # Additional templates that should be rendered to pages, maps page names to # template names. #html_additional_pages = {} # If false, no module index is generated. #html_domain_indices = True # If false, no index is generated. #html_use_index = True # If true, the index is split into individual pages for each letter. #html_split_index = False # If true, links to the reST sources are added to the pages. html_show_sourcelink = False # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. #html_show_sphinx = True # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. #html_show_copyright = True # If true, an OpenSearch description file will be output, and all pages will # contain a tag referring to it. The value of this option must be the # base URL from which the finished HTML is served. #html_use_opensearch = '' # This is the file name suffix for HTML files (e.g. ".xhtml"). #html_file_suffix = None # Output file base name for HTML help builder. htmlhelp_basename = 'gammatonedoc' # -- Options for LaTeX output -------------------------------------------------- latex_elements = { # The paper size ('letterpaper' or 'a4paper'). #'papersize': 'letterpaper', # The font size ('10pt', '11pt' or '12pt'). #'pointsize': '10pt', # Additional stuff for the LaTeX preamble. #'preamble': '', } # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, author, documentclass [howto/manual]). latex_documents = [ ('index', 'gammatone.tex', u'Gammatone Documentation', u'Jason Heeris', 'manual'), ] # The name of an image file (relative to this directory) to place at the top of # the title page. #latex_logo = None # For "manual" documents, if this is true, then toplevel headings are parts, # not chapters. #latex_use_parts = False # If true, show page references after internal links. #latex_show_pagerefs = False # If true, show URL addresses after external links. #latex_show_urls = False # Documents to append as an appendix to all manuals. #latex_appendices = [] # If false, no module index is generated. #latex_domain_indices = True # -- Options for manual page output -------------------------------------------- # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). man_pages = [ ('index', 'gammatone', u'Gammatone Documentation', [u'Jason Heeris'], 1) ] # If true, show URL addresses after external links. #man_show_urls = False # -- Options for Texinfo output ------------------------------------------------ # Grouping the document tree into Texinfo files. List of tuples # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ ('index', 'gammatone', u'Gammatone Documentation', u'Jason Heeris', 'gammatone', 'Gammatone filterbank construction tools.', 'Miscellaneous'), ] # Documents to append as an appendix to all manuals. #texinfo_appendices = [] # If false, no module index is generated. #texinfo_domain_indices = True # How to display URL addresses: 'footnote', 'no', or 'inline'. #texinfo_show_urls = 'footnote' # -- Autodoc configuration ----------------------------------------------------- # autodoc_default_flags = ['members'] ================================================ FILE: doc/details.rst ================================================ About the Gammatone Filterbank Toolkit -------------------------------------- Summary ~~~~~~~ This is a port of Malcolm Slaney's and Dan Ellis' gammatone filterbank MATLAB code, detailed below, to Python 2 and 3 using Numpy and Scipy. It analyses signals by running them through banks of gammatone filters, similar to Fourier-based spectrogram analysis. .. figure:: FurElise.png :align: center :alt: Gammatone-based spectrogram of Für Elise Gammatone-based spectrogram of Für Elise Dependencies ~~~~~~~~~~~~ - numpy - scipy - nose - mock - matplotlib Using the Code ~~~~~~~~~~~~~~ For a demonstration, find a `.wav` file (for example, `Für Elise `_) and run:: python -m gammatone FurElise.wav -d 10 ...to see a gammatone-gram of the first ten seconds of Beethoven's "Für Elise." If you've installed via ``pip`` or ``setup.py install``, you should also be able to just run:: gammatone FurElise.wav -d 10 Basis ~~~~~ This project is based on research into how humans perceive audio, originally published by Malcolm Slaney: `Malcolm Slaney (1998) "Auditory Toolbox Version 2", Technical Report #1998-010, Interval Research Corporation, 1998. `_ Slaney's report describes a way of modelling how the human ear perceives, emphasises and separates different frequencies of sound. A series of gammatone filters are constructed whose width increases with increasing centre frequency, and this bank of filters is applied to a time-domain signal. The result of this is a spectrum that should represent the human experience of sound better than, say, a Fourier-domain spectrum would. A gammatone filter has an impulse response that is a sine wave multiplied by a gamma distribution function. It is a common approach to modelling the auditory system. The gammatone filterbank approach can be considered analogous (but not equivalent) to a discrete Fourier transform where the frequency axis is logarithmic. For example, a series of notes spaced an octave apart would appear to be roughly linearly spaced; or a sound that was distributed across the same linear frequency range would appear to have more spread at lower frequencies. The real goal of this toolkit is to allow easy computation of the gammatone equivalent of a spectrogram — a time-varying spectrum of energy over audible frequencies based on a gammatone filterbank. Slaney demonstrated his research with an initial implementation in MATLAB. This implementation was later extended by Dan Ellis, who found a way to approximate a "gammatone-gram" by using the fast Fourier transform. Ellis' code calculates a matrix of weights that can be applied to the output of a FFT so that a Fourier-based spectrogram can easily be transformed into such an approximation. Ellis' code and documentation is here: `Gammatone-like spectrograms `_ Interest ~~~~~~~~ I became interested in this because of my background in science communication and my general interest in the teaching of signal processing. I find that the spectrogram approach to visualising signals is adequate for illustrating abstract systems or the mathematical properties of transforms, but bears little correspondence to a person's own experience of sound. If someone wants to see what their favourite piece of music "looks like," a normal Fourier transform based spectrogram is actually quite a poor way to visualise it. Features of the audio seem to be oddly spaced or unnaturally emphasised or de-emphasised depending on where they are in the frequency domain. The gammatone filterbank approach seems to be closer to what someone might intuitively expect a visualisation of sound to look like, and can help develop an intuition about alternative representations of signals. Verifying the port ~~~~~~~~~~~~~~~~~~ Since this is a port of existing MATLAB code, I've written tests to verify the Python implementation against the original code. These tests aren't unit tests, but they do generally test single functions. Running the tests has the same workflow: 1. Run the scripts in the ``test_generation`` directory. This will create a ``.mat`` file containing test data in ``tests/data``. 2. Run ``nosetest3`` in the top level directory. This will find and run all the tests in the ``tests`` directory. Although I'm usually loathe to check in generated files to version control, I'm willing to make an exception for the ``.mat`` files containing the test data. My reasoning is that they represent the decoupling of my code from the MATLAB code, and if the two projects were separated, they would be considered a part of the Python code, not the original MATLAB code. ================================================ FILE: doc/fftweight.rst ================================================ :mod:`gammatone.fftweight` -- FFT weightings for spectrogram-like gammatone analysis ==================================================================================== .. automodule:: gammatone.fftweight :members: ================================================ FILE: doc/filters.rst ================================================ :mod:`gammatone.filters` -- gammatone filterbank construction ============================================================= .. automodule:: gammatone.filters :members: ================================================ FILE: doc/gtgram.rst ================================================ :mod:`gammatone.gtgram` -- spectrogram-like gammatone analysis ============================================================== .. automodule:: gammatone.gtgram :members: ================================================ FILE: doc/index.rst ================================================ .. gammatone documentation master file, created by sphinx-quickstart on Sat Dec 8 23:21:49 2012. Index ===== Modules ------- .. toctree:: :maxdepth: 2 filters gtgram fftweight plot .. include:: details.rst Indices and tables ------------------ * :ref:`genindex` * :ref:`modindex` * :ref:`search` ================================================ FILE: doc/make.bat ================================================ @ECHO OFF REM Command file for Sphinx documentation if "%SPHINXBUILD%" == "" ( set SPHINXBUILD=sphinx-build ) set BUILDDIR=_build set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% . set I18NSPHINXOPTS=%SPHINXOPTS% . if NOT "%PAPER%" == "" ( set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS% set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS% ) if "%1" == "" goto help if "%1" == "help" ( :help echo.Please use `make ^` where ^ is one of echo. html to make standalone HTML files echo. dirhtml to make HTML files named index.html in directories echo. singlehtml to make a single large HTML file echo. pickle to make pickle files echo. json to make JSON files echo. htmlhelp to make HTML files and a HTML help project echo. qthelp to make HTML files and a qthelp project echo. devhelp to make HTML files and a Devhelp project echo. epub to make an epub echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter echo. text to make text files echo. man to make manual pages echo. texinfo to make Texinfo files echo. gettext to make PO message catalogs echo. changes to make an overview over all changed/added/deprecated items echo. linkcheck to check all external links for integrity echo. doctest to run all doctests embedded in the documentation if enabled goto end ) if "%1" == "clean" ( for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i del /q /s %BUILDDIR%\* goto end ) if "%1" == "html" ( %SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html if errorlevel 1 exit /b 1 echo. echo.Build finished. The HTML pages are in %BUILDDIR%/html. goto end ) if "%1" == "dirhtml" ( %SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml if errorlevel 1 exit /b 1 echo. echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml. goto end ) if "%1" == "singlehtml" ( %SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml if errorlevel 1 exit /b 1 echo. echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml. goto end ) if "%1" == "pickle" ( %SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle if errorlevel 1 exit /b 1 echo. echo.Build finished; now you can process the pickle files. goto end ) if "%1" == "json" ( %SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json if errorlevel 1 exit /b 1 echo. echo.Build finished; now you can process the JSON files. goto end ) if "%1" == "htmlhelp" ( %SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp if errorlevel 1 exit /b 1 echo. echo.Build finished; now you can run HTML Help Workshop with the ^ .hhp project file in %BUILDDIR%/htmlhelp. goto end ) if "%1" == "qthelp" ( %SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp if errorlevel 1 exit /b 1 echo. echo.Build finished; now you can run "qcollectiongenerator" with the ^ .qhcp project file in %BUILDDIR%/qthelp, like this: echo.^> qcollectiongenerator %BUILDDIR%\qthelp\gammatone.qhcp echo.To view the help file: echo.^> assistant -collectionFile %BUILDDIR%\qthelp\gammatone.ghc goto end ) if "%1" == "devhelp" ( %SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp if errorlevel 1 exit /b 1 echo. echo.Build finished. goto end ) if "%1" == "epub" ( %SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub if errorlevel 1 exit /b 1 echo. echo.Build finished. The epub file is in %BUILDDIR%/epub. goto end ) if "%1" == "latex" ( %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex if errorlevel 1 exit /b 1 echo. echo.Build finished; the LaTeX files are in %BUILDDIR%/latex. goto end ) if "%1" == "text" ( %SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text if errorlevel 1 exit /b 1 echo. echo.Build finished. The text files are in %BUILDDIR%/text. goto end ) if "%1" == "man" ( %SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man if errorlevel 1 exit /b 1 echo. echo.Build finished. The manual pages are in %BUILDDIR%/man. goto end ) if "%1" == "texinfo" ( %SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo if errorlevel 1 exit /b 1 echo. echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo. goto end ) if "%1" == "gettext" ( %SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale if errorlevel 1 exit /b 1 echo. echo.Build finished. The message catalogs are in %BUILDDIR%/locale. goto end ) if "%1" == "changes" ( %SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes if errorlevel 1 exit /b 1 echo. echo.The overview file is in %BUILDDIR%/changes. goto end ) if "%1" == "linkcheck" ( %SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck if errorlevel 1 exit /b 1 echo. echo.Link check complete; look for any errors in the above output ^ or in %BUILDDIR%/linkcheck/output.txt. goto end ) if "%1" == "doctest" ( %SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest if errorlevel 1 exit /b 1 echo. echo.Testing of doctests in the sources finished, look at the ^ results in %BUILDDIR%/doctest/output.txt. goto end ) :end ================================================ FILE: doc/plot.rst ================================================ :mod:`gammatone.plot` -- Plotting utilities for gammatone analysis ================================================================== .. automodule:: gammatone.plot :members: ================================================ FILE: gammatone/__init__.py ================================================ # Copyright 2014 Jason Heeris, jason.heeris@gmail.com # # This file is part of the gammatone toolkit, and is licensed under the 3-clause # BSD license: https://github.com/detly/gammatone/blob/master/COPYING # Designate gammatone module """ Gammatone filterbank toolkit """ ================================================ FILE: gammatone/__main__.py ================================================ # Copyright 2014 Jason Heeris, jason.heeris@gmail.com # # This file is part of the gammatone toolkit, and is licensed under the 3-clause # BSD license: https://github.com/detly/gammatone/blob/master/COPYING from gammatone.plot import main main() ================================================ FILE: gammatone/fftweight.py ================================================ # Copyright 2014 Jason Heeris, jason.heeris@gmail.com # # This file is part of the gammatone toolkit, and is licensed under the 3-clause # BSD license: https://github.com/detly/gammatone/blob/master/COPYING """ This module contains functions for calculating weights to approximate a gammatone filterbank-like "spectrogram" from a Fourier transform. """ from __future__ import division import numpy as np import gammatone.filters as filters import gammatone.gtgram as gtgram def specgram_window( nfft, nwin, ): """ Window calculation used in specgram replacement function. Hann window of width `nwin` centred in an array of width `nfft`. """ halflen = nwin // 2 halff = nfft // 2 # midpoint of win acthalflen = int(np.floor(min(halff, halflen))) halfwin = 0.5 * ( 1 + np.cos(np.pi * np.arange(0, halflen+1)/halflen)) win = np.zeros((nfft,)) win[halff:halff+acthalflen] = halfwin[0:acthalflen]; win[halff:halff-acthalflen:-1] = halfwin[0:acthalflen]; return win def specgram(x, n, sr, w, h): """ Substitute for Matlab's specgram, calculates a simple spectrogram. :param x: The signal to analyse :param n: The FFT length :param sr: The sampling rate :param w: The window length (see :func:`specgram_window`) :param h: The hop size (must be greater than zero) """ # Based on Dan Ellis' myspecgram.m,v 1.1 2002/08/04 assert h > 0, "Must have a hop size greater than 0" s = x.shape[0] win = specgram_window(n, w) c = 0 # pre-allocate output array ncols = 1 + int(np.floor((s - n)/h)) d = np.zeros(((1 + n // 2), ncols), np.dtype(complex)) for b in range(0, s - n, h): u = win * x[b : b + n] t = np.fft.fft(u) d[:, c] = t[0 : (1 + n // 2)].T c = c + 1 return d def fft_weights( nfft, fs, nfilts, width, fmin, fmax, maxlen): """ :param nfft: the source FFT size :param sr: sampling rate (Hz) :param nfilts: the number of output bands required (default 64) :param width: the constant width of each band in Bark (default 1) :param fmin: lower limit of frequencies (Hz) :param fmax: upper limit of frequencies (Hz) :param maxlen: number of bins to truncate the rows to :return: a tuple `weights`, `gain` with the calculated weight matrices and gain vectors Generate a matrix of weights to combine FFT bins into Gammatone bins. Note about `maxlen` parameter: While wts has nfft columns, the second half are all zero. Hence, aud spectrum is:: fft2gammatonemx(nfft,sr)*abs(fft(xincols,nfft)) `maxlen` truncates the rows to this many bins. | (c) 2004-2009 Dan Ellis dpwe@ee.columbia.edu based on rastamat/audspec.m | (c) 2012 Jason Heeris (Python implementation) """ ucirc = np.exp(1j * 2 * np.pi * np.arange(0, nfft / 2 + 1) / nfft)[None, ...] # Common ERB filter code factored out cf_array = filters.erb_space(fmin, fmax, nfilts)[::-1] _, A11, A12, A13, A14, _, _, _, B2, gain = ( filters.make_erb_filters(fs, cf_array, width).T ) A11, A12, A13, A14 = A11[..., None], A12[..., None], A13[..., None], A14[..., None] r = np.sqrt(B2) theta = 2 * np.pi * cf_array / fs pole = (r * np.exp(1j * theta))[..., None] GTord = 4 weights = np.zeros((nfilts, nfft)) weights[:, 0:ucirc.shape[1]] = ( np.abs(ucirc + A11 * fs) * np.abs(ucirc + A12 * fs) * np.abs(ucirc + A13 * fs) * np.abs(ucirc + A14 * fs) * np.abs(fs * (pole - ucirc) * (pole.conj() - ucirc)) ** (-GTord) / gain[..., None] ) weights = weights[:, 0:int(maxlen)] return weights, gain def fft_gtgram( wave, fs, window_time, hop_time, channels, f_min): """ Calculate a spectrogram-like time frequency magnitude array based on an FFT-based approximation to gammatone subband filters. A matrix of weightings is calculated (using :func:`gtgram.fft_weights`), and applied to the FFT of the input signal (``wave``, using sample rate ``fs``). The result is an approximation of full filtering using an ERB gammatone filterbank (as per :func:`gtgram.gtgram`). ``f_min`` determines the frequency cutoff for the corresponding gammatone filterbank. ``window_time`` and ``hop_time`` (both in seconds) are the size and overlap of the spectrogram columns. | 2009-02-23 Dan Ellis dpwe@ee.columbia.edu | | (c) 2013 Jason Heeris (Python implementation) """ width = 1 # Was a parameter in the MATLAB code nfft = int(2 ** (np.ceil(np.log2(2 * window_time * fs)))) nwin, nhop, _ = gtgram.gtgram_strides(fs, window_time, hop_time, 0); gt_weights, _ = fft_weights( nfft, fs, channels, width, f_min, fs / 2, nfft / 2 + 1 ) sgram = specgram(wave, nfft, fs, nwin, nhop) result = gt_weights.dot(np.abs(sgram)) / nfft return result ================================================ FILE: gammatone/filters.py ================================================ # Copyright 2014 Jason Heeris, jason.heeris@gmail.com # # This file is part of the gammatone toolkit, and is licensed under the 3-clause # BSD license: https://github.com/detly/gammatone/blob/master/COPYING """ This module contains functions for constructing sets of equivalent rectangular bandwidth gammatone filters. """ from __future__ import division from collections import namedtuple import numpy as np import scipy as sp from scipy import signal as sgn DEFAULT_FILTER_NUM = 100 DEFAULT_LOW_FREQ = 100 DEFAULT_HIGH_FREQ = 44100 / 4 def erb_point(low_freq, high_freq, fraction): """ Calculates a single point on an ERB scale between ``low_freq`` and ``high_freq``, determined by ``fraction``. When ``fraction`` is ``1``, ``low_freq`` will be returned. When ``fraction`` is ``0``, ``high_freq`` will be returned. ``fraction`` can actually be outside the range ``[0, 1]``, which in general isn't very meaningful, but might be useful when ``fraction`` is rounded a little above or below ``[0, 1]`` (eg. for plot axis labels). """ # Change the following three parameters if you wish to use a different ERB # scale. Must change in MakeERBCoeffs too. # TODO: Factor these parameters out ear_q = 9.26449 # Glasberg and Moore Parameters min_bw = 24.7 order = 1 # All of the following expressions are derived in Apple TR #35, "An # Efficient Implementation of the Patterson-Holdsworth Cochlear Filter # Bank." See pages 33-34. erb_point = ( -ear_q * min_bw + np.exp( fraction * ( -np.log(high_freq + ear_q * min_bw) + np.log(low_freq + ear_q * min_bw) ) ) * (high_freq + ear_q * min_bw) ) return erb_point def erb_space( low_freq=DEFAULT_LOW_FREQ, high_freq=DEFAULT_HIGH_FREQ, num=DEFAULT_FILTER_NUM): """ This function computes an array of ``num`` frequencies uniformly spaced between ``high_freq`` and ``low_freq`` on an ERB scale. For a definition of ERB, see Moore, B. C. J., and Glasberg, B. R. (1983). "Suggested formulae for calculating auditory-filter bandwidths and excitation patterns," J. Acoust. Soc. Am. 74, 750-753. """ return erb_point( low_freq, high_freq, np.arange(1, num + 1) / num ) def centre_freqs(fs, num_freqs, cutoff): """ Calculates an array of centre frequencies (for :func:`make_erb_filters`) from a sampling frequency, lower cutoff frequency and the desired number of filters. :param fs: sampling rate :param num_freqs: number of centre frequencies to calculate :type num_freqs: int :param cutoff: lower cutoff frequency :return: same as :func:`erb_space` """ return erb_space(cutoff, fs / 2, num_freqs) def make_erb_filters(fs, centre_freqs, width=1.0): """ This function computes the filter coefficients for a bank of Gammatone filters. These filters were defined by Patterson and Holdworth for simulating the cochlea. The result is returned as a :class:`ERBCoeffArray`. Each row of the filter arrays contains the coefficients for four second order filters. The transfer function for these four filters share the same denominator (poles) but have different numerators (zeros). All of these coefficients are assembled into one vector that the ERBFilterBank can take apart to implement the filter. The filter bank contains "numChannels" channels that extend from half the sampling rate (fs) to "lowFreq". Alternatively, if the numChannels input argument is a vector, then the values of this vector are taken to be the center frequency of each desired filter. (The lowFreq argument is ignored in this case.) Note this implementation fixes a problem in the original code by computing four separate second order filters. This avoids a big problem with round off errors in cases of very small cfs (100Hz) and large sample rates (44kHz). The problem is caused by roundoff error when a number of poles are combined, all very close to the unit circle. Small errors in the eigth order coefficient, are multiplied when the eigth root is taken to give the pole location. These small errors lead to poles outside the unit circle and instability. Thanks to Julius Smith for leading me to the proper explanation. Execute the following code to evaluate the frequency response of a 10 channel filterbank:: fcoefs = MakeERBFilters(16000,10,100); y = ERBFilterBank([1 zeros(1,511)], fcoefs); resp = 20*log10(abs(fft(y'))); freqScale = (0:511)/512*16000; semilogx(freqScale(1:255),resp(1:255,:)); axis([100 16000 -60 0]) xlabel('Frequency (Hz)'); ylabel('Filter Response (dB)'); | Rewritten by Malcolm Slaney@Interval. June 11, 1998. | (c) 1998 Interval Research Corporation | | (c) 2012 Jason Heeris (Python implementation) """ T = 1 / fs # Change the followFreqing three parameters if you wish to use a different # ERB scale. Must change in ERBSpace too. # TODO: factor these out ear_q = 9.26449 # Glasberg and Moore Parameters min_bw = 24.7 order = 1 erb = width*((centre_freqs / ear_q) ** order + min_bw ** order) ** ( 1 /order) B = 1.019 * 2 * np.pi * erb arg = 2 * centre_freqs * np.pi * T vec = np.exp(2j * arg) A0 = T A2 = 0 B0 = 1 B1 = -2 * np.cos(arg) / np.exp(B * T) B2 = np.exp(-2 * B * T) rt_pos = np.sqrt(3 + 2 ** 1.5) rt_neg = np.sqrt(3 - 2 ** 1.5) common = -T * np.exp(-(B * T)) # TODO: This could be simplified to a matrix calculation involving the # constant first term and the alternating rt_pos/rt_neg and +/-1 second # terms k11 = np.cos(arg) + rt_pos * np.sin(arg) k12 = np.cos(arg) - rt_pos * np.sin(arg) k13 = np.cos(arg) + rt_neg * np.sin(arg) k14 = np.cos(arg) - rt_neg * np.sin(arg) A11 = common * k11 A12 = common * k12 A13 = common * k13 A14 = common * k14 gain_arg = np.exp(1j * arg - B * T) gain = np.abs( (vec - gain_arg * k11) * (vec - gain_arg * k12) * (vec - gain_arg * k13) * (vec - gain_arg * k14) * ( T * np.exp(B * T) / (-1 / np.exp(B * T) + 1 + vec * (1 - np.exp(B * T))) )**4 ) allfilts = np.ones_like(centre_freqs) fcoefs = np.column_stack([ A0 * allfilts, A11, A12, A13, A14, A2*allfilts, B0 * allfilts, B1, B2, gain ]) return fcoefs def erb_filterbank(wave, coefs): """ :param wave: input data (one dimensional sequence) :param coefs: gammatone filter coefficients Process an input waveform with a gammatone filter bank. This function takes a single sound vector, and returns an array of filter outputs, one channel per row. The fcoefs parameter, which completely specifies the Gammatone filterbank, should be designed with the :func:`make_erb_filters` function. | Malcolm Slaney @ Interval, June 11, 1998. | (c) 1998 Interval Research Corporation | Thanks to Alain de Cheveigne' for his suggestions and improvements. | | (c) 2013 Jason Heeris (Python implementation) """ output = np.zeros((coefs[:,9].shape[0], wave.shape[0])) gain = coefs[:, 9] # A0, A11, A2 As1 = coefs[:, (0, 1, 5)] # A0, A12, A2 As2 = coefs[:, (0, 2, 5)] # A0, A13, A2 As3 = coefs[:, (0, 3, 5)] # A0, A14, A2 As4 = coefs[:, (0, 4, 5)] # B0, B1, B2 Bs = coefs[:, 6:9] # Loop over channels for idx in range(0, coefs.shape[0]): # These seem to be reversed (in the sense of A/B order), but that's what # the original code did... # Replacing these with polynomial multiplications reduces both accuracy # and speed. y1 = sgn.lfilter(As1[idx], Bs[idx], wave) y2 = sgn.lfilter(As2[idx], Bs[idx], y1) y3 = sgn.lfilter(As3[idx], Bs[idx], y2) y4 = sgn.lfilter(As4[idx], Bs[idx], y3) output[idx, :] = y4 / gain[idx] return output ================================================ FILE: gammatone/gtgram.py ================================================ # Copyright 2014 Jason Heeris, jason.heeris@gmail.com # # This file is part of the gammatone toolkit, and is licensed under the 3-clause # BSD license: https://github.com/detly/gammatone/blob/master/COPYING from __future__ import division import numpy as np from .filters import make_erb_filters, centre_freqs, erb_filterbank """ This module contains functions for rendering "spectrograms" which use gammatone filterbanks instead of Fourier transforms. """ def round_half_away_from_zero(num): """ Implement the round-half-away-from-zero rule, where fractional parts of 0.5 result in rounding up to the nearest positive integer for positive numbers, and down to the nearest negative number for negative integers. """ return np.sign(num) * np.floor(np.abs(num) + 0.5) def gtgram_strides(fs, window_time, hop_time, filterbank_cols): """ Calculates the window size for a gammatonegram. @return a tuple of (window_size, hop_samples, output_columns) """ nwin = int(round_half_away_from_zero(window_time * fs)) hop_samples = int(round_half_away_from_zero(hop_time * fs)) columns = (1 + int( np.floor( (filterbank_cols - nwin) / hop_samples ) ) ) return (nwin, hop_samples, columns) def gtgram_xe(wave, fs, channels, f_min): """ Calculate the intermediate ERB filterbank processed matrix """ cfs = centre_freqs(fs, channels, f_min) fcoefs = np.flipud(make_erb_filters(fs, cfs)) xf = erb_filterbank(wave, fcoefs) xe = np.power(xf, 2) return xe def gtgram( wave, fs, window_time, hop_time, channels, f_min): """ Calculate a spectrogram-like time frequency magnitude array based on gammatone subband filters. The waveform ``wave`` (at sample rate ``fs``) is passed through an multi-channel gammatone auditory model filterbank, with lowest frequency ``f_min`` and highest frequency ``f_max``. The outputs of each band then have their energy integrated over windows of ``window_time`` seconds, advancing by ``hop_time`` secs for successive columns. These magnitudes are returned as a nonnegative real matrix with ``channels`` rows. | 2009-02-23 Dan Ellis dpwe@ee.columbia.edu | | (c) 2013 Jason Heeris (Python implementation) """ xe = gtgram_xe(wave, fs, channels, f_min) nwin, hop_samples, ncols = gtgram_strides( fs, window_time, hop_time, xe.shape[1] ) y = np.zeros((channels, ncols)) for cnum in range(ncols): segment = xe[:, cnum * hop_samples + np.arange(nwin)] y[:, cnum] = np.sqrt(segment.mean(1)) return y ================================================ FILE: gammatone/plot.py ================================================ # Copyright 2014 Jason Heeris, jason.heeris@gmail.com # # This file is part of the gammatone toolkit, and is licensed under the 3-clause # BSD license: https://github.com/detly/gammatone/blob/master/COPYING """ Plotting utilities related to gammatone analysis, primarily for use with ``matplotlib``. """ from __future__ import division import argparse import os.path import matplotlib.pyplot import matplotlib.ticker import numpy as np import scipy.constants import scipy.io.wavfile from .filters import erb_point import gammatone.gtgram import gammatone.fftweight class ERBFormatter(matplotlib.ticker.EngFormatter): """ Axis formatter for gammatone filterbank analysis. This formatter calculates the ERB spaced frequencies used for analysis, and renders them similarly to the engineering axis formatter. The scale is changed so that `[0, 1]` corresponds to ERB spaced frequencies from ``high_freq`` to ``low_freq`` (note the reversal). It should be used with ``imshow`` where the ``extent`` argument is ``[a, b, 1, 0]`` (again, note the inversion). """ def __init__(self, low_freq, high_freq, *args, **kwargs): """ Creates a new :class ERBFormatter: for use with ``matplotlib`` plots. Note that this class does not supply the ``units`` or ``places`` arguments; typically these would be ``'Hz'`` and ``0``. :param low_freq: the low end of the gammatone filterbank frequency range :param high_freq: the high end of the gammatone filterbank frequency range """ self.low_freq = low_freq self.high_freq = high_freq super().__init__(*args, **kwargs) def _erb_axis_scale(self, fraction): return erb_point(self.low_freq, self.high_freq, fraction) def __call__(self, val, pos=None): newval = self._erb_axis_scale(val) return super().__call__(newval, pos) def gtgram_plot( gtgram_function, axes, x, fs, window_time, hop_time, channels, f_min, imshow_args=None ): """ Plots a spectrogram-like time frequency magnitude array based on gammatone subband filters. :param gtgram_function: A function with signature:: fft_gtgram( wave, fs, window_time, hop_time, channels, f_min) See :func:`gammatone.gtgram.gtgram` for details of the paramters. """ # Set a nice formatter for the y-axis formatter = ERBFormatter(f_min, fs/2, unit='Hz', places=0) axes.yaxis.set_major_formatter(formatter) # Figure out time axis scaling duration = len(x) / fs # Calculate 1:1 aspect ratio aspect_ratio = duration/scipy.constants.golden gtg = gtgram_function(x, fs, window_time, hop_time, channels, f_min) Z = np.flipud(20 * np.log10(gtg)) img = axes.imshow(Z, extent=[0, duration, 1, 0], aspect=aspect_ratio) # Entry point for CLI script HELP_TEXT = """\ Plots the gammatone filterbank analysis of a WAV file. If the file contains more than one channel, all channels are averaged before performing analysis. """ def render_audio_from_file(path, duration, function): """ Renders the given ``duration`` of audio from the audio file at ``path`` using the gammatone spectrogram function ``function``. """ samplerate, data = scipy.io.wavfile.read(path) # Average the stereo signal if duration: nframes = duration * samplerate data = data[0 : nframes, :] signal = data.mean(1) # Default gammatone-based spectrogram parameters twin = 0.08 thop = twin / 2 channels = 1024 fmin = 20 # Set up the plot fig = matplotlib.pyplot.figure() axes = fig.add_axes([0.1, 0.1, 0.8, 0.8]) gtgram_plot( function, axes, signal, samplerate, twin, thop, channels, fmin) axes.set_title(os.path.basename(path)) axes.set_xlabel("Time (s)") axes.set_ylabel("Frequency") matplotlib.pyplot.show() def main(): """ Entry point for CLI application to plot gammatonegrams of sound files. """ parser = argparse.ArgumentParser(description=HELP_TEXT) parser.add_argument( 'sound_file', help="The sound file to graph. See the help text for supported formats.") parser.add_argument( '-d', '--duration', type=int, help="The time in seconds from the start of the audio to use for the " "graph (default is to use the whole file)." ) parser.add_argument( '-a', '--accurate', action='store_const', dest='function', const=gammatone.gtgram.gtgram, default=gammatone.fftweight.fft_gtgram, help="Use the full filterbank approach instead of the weighted FFT " "approximation. This is much slower, and uses a lot of memory, but" " is more accurate." ) args = parser.parse_args() return render_audio_from_file(args.sound_file, args.duration, args.function) ================================================ FILE: setup.py ================================================ # Copyright 2014 Jason Heeris, jason.heeris@gmail.com # # This file is part of the gammatone toolkit, and is licensed under the 3-clause # BSD license: https://github.com/detly/gammatone/blob/master/COPYING from setuptools import setup, find_packages setup( name = "Gammatone", version = "1.0", packages = find_packages(), install_requires = [ 'numpy', 'scipy', 'nose', 'mock', 'matplotlib', ], entry_points = { 'console_scripts': [ 'gammatone = gammatone.plot:main', ] } ) ================================================ FILE: test_generation/README ================================================ These are Octave/MATLAB scripts that create test data for the Python implementation of that gammatone library. You must add both this directory and the top level 'auditory_toolkit' directory to your search path. The scripts are designed to run under MATLAB and Octave (using '--traditional'). ================================================ FILE: test_generation/test_ERBFilterBank.m ================================================ % Copyright 2014 Jason Heeris, jason.heeris@gmail.com % % This file is part of the gammatone toolkit, and is licensed under the 3-clause % BSD license: https://github.com/detly/gammatone/blob/master/COPYING function test_ERBFilterBank() erb_space_inputs = { ... 100, 11025, 10, sin(2*pi*220*[0:22050/100]'/22050); ... 20, 22050, 10, square(2*pi*150*[0:44100/200]'/44100); ... 20, 44100, 40, square(2*pi*12000*[0:88200/400]'/88200); ... 100, 11025, 1000, sawtooth(2*pi*10100*[0:22050/100]'/22050, 0.5); ... 500, 80000, 200, sawtooth(2*pi*3333*[0:160000/400]'/160000, 0.5); ... }; erb_filter_inputs = { ... 44100, [22050; 2205; 220], square(2*pi*220*[0:44100/200]'/44100); ... 16000, [8000; 7000; 6000; 5000; 4000; 3000; 2000; 1000], square(2*pi*2000*[0:16000/50]'/16000); ... 16000, [16000; 8000; 1], square(2*pi*880*[0:16000/50]'/16000); ... }; num_tests = size(erb_space_inputs)(1) ... + size(erb_filter_inputs)(1); erb_filterbank_inputs = {}; erb_filterbank_results = {}; % This will ONLY generate tests that use the centre frequency inputs % ERBSpace generated inputs for tnum=1:size(erb_space_inputs)(1) [f_low, f_high, num_f, wave] = deal(erb_space_inputs{tnum,:}); fs = f_high*2; f_arr = ERBSpace(f_low, f_high, num_f); fcoefs = MakeERBFilters(fs, f_arr, 0); erb_filterbank_inputs(tnum, :) = {fcoefs, wave}; end % MakeERBFilters generated inputs for tnum=1:size(erb_filter_inputs) [fs, f_arr, wave] = deal(erb_filter_inputs{tnum,:}); fcoefs = MakeERBFilters(fs, f_arr, 0); offset = size(erb_space_inputs)(1); erb_filterbank_inputs(offset+tnum, :) = {fcoefs, wave}; end for tnum=1:num_tests fcoefs = erb_filterbank_inputs{tnum, 1}; wave = erb_filterbank_inputs{tnum, 2}; erb_filterbank_results(tnum, :) = ERBFilterBank(wave, fcoefs); end results_file = fullfile('..', 'tests', 'data', 'test_filterbank_data.mat'); save(results_file, 'erb_filterbank_inputs', 'erb_filterbank_results'); end ================================================ FILE: test_generation/test_ERBSpace.m ================================================ % Copyright 2014 Jason Heeris, jason.heeris@gmail.com % % This file is part of the gammatone toolkit, and is licensed under the 3-clause % BSD license: https://github.com/detly/gammatone/blob/master/COPYING function test_ERBSpace() % Low freq, high freq, N erbspace_inputs = { ... 100, 11025, 100; ... 100, 22050, 100; ... 20, 22050, 100; ... 20, 44100, 100; ... 100, 11025, 10; ... 100, 11025, 1000; ... 500, 80000, 200; ... }; erbspace_results = {}; num_tests = size(erbspace_inputs)(1); for tnum=1:num_tests [f_low, f_high, num_f] = deal(erbspace_inputs{tnum,:}); erbspace_results(tnum, :) = ERBSpace(f_low, f_high, num_f); end results_file = fullfile('..', 'tests', 'data', 'test_erbspace_data.mat'); save(results_file, 'erbspace_inputs', 'erbspace_results'); end ================================================ FILE: test_generation/test_MakeERBFilters.m ================================================ % Copyright 2014 Jason Heeris, jason.heeris@gmail.com % % This file is part of the gammatone toolkit, and is licensed under the 3-clause % BSD license: https://github.com/detly/gammatone/blob/master/COPYING function test_MakeERBFilters() erb_space_inputs = { ... 100, 11025, 100; ... 100, 22050, 100; ... 20, 22050, 100; ... 20, 44100, 100; ... 100, 11025, 10; ... 100, 11025, 1000; ... 500, 80000, 200; ... }; extra_inputs = { ... 44100, [22050; 2205; 220]; ... 16000, [8000; 7000; 6000; 5000; 4000; 3000; 2000; 1000]; ... 16000, [16000; 8000; 1]; ... }; num_tests = size(erb_space_inputs)(1) + size(extra_inputs)(1); erb_filter_inputs = {}; erb_filter_results = {}; % This will ONLY generate tests that use the centre frequency inputs % ERBSpace generated inputs for tnum=1:size(erb_space_inputs)(1) [f_low, f_high, num_f] = deal(erb_space_inputs{tnum,:}); fs = f_high*2; cfs = ERBSpace(f_low, f_high, num_f); erb_filter_inputs(tnum, :) = {fs, cfs}; end erb_filter_inputs = cat(1, erb_filter_inputs, extra_inputs); for tnum=1:num_tests fs = erb_filter_inputs{tnum, 1}; cfs = erb_filter_inputs{tnum, 2}; fcoefs = MakeERBFilters(fs, cfs, 0); erb_filter_results(tnum, :) = fcoefs; end results_file = fullfile('..', 'tests', 'data', 'test_erb_filter_data.mat'); save(results_file, 'erb_filter_inputs', 'erb_filter_results'); end ================================================ FILE: test_generation/test_fft2gammatonemx.m ================================================ % Copyright 2014 Jason Heeris, jason.heeris@gmail.com % % This file is part of the gammatone toolkit, and is licensed under the 3-clause % BSD license: https://github.com/detly/gammatone/blob/master/COPYING function test_fft2gtmx() % Arguments: % nfft, sr, nfilts, width, minfreq, maxfreq, maxlen fft2gtmx_inputs = { ... 256 , 48000, 64 , 1 , 100, 48000/2 , 256; ... % Vary the width parameter 256 , 48000, 64 , 2 , 100, 48000/2 , 256; ... 256 , 48000, 64 , 4 , 100, 48000/2 , 256; ... 256 , 48000, 64 , 0.25, 100, 48000/2 , 256; ... % Vary sampling rate 256 , 96000, 64 , 1 , 100, 96000/2 , 256; ... % Vary upper frequency 256 , 48000, 64 , 1 , 100, 48000/2 , 256; ... 256 , 48000, 64 , 1 , 100, 48000/4 , 256; ... 256 , 48000, 64 , 1 , 100, 48000/10, 256; ... % Vary maxlen 256 , 48000, 64 , 1 , 100, 48000/2 , 128; ... 256 , 48000, 64 , 1 , 100, 48000/2 , 16; ... 256 , 48000, 64 , 1 , 100, 48000/2 , 99; ... % Vary sampling rate 1024, 48000, 128, 1 , 100, 48000/2 , 512; ... 1024, 48000, 128, 1 , 100, 48000/2 , 128; ... 64 , 44100, 32 , 1 , 20 , 44100/2 , 64; ... }; fft2gtmx_results = {}; for tnum=1:size(fft2gtmx_inputs)(1) [nfft, sr, nfilts, width, minfreq, maxfreq, maxlen] = deal(fft2gtmx_inputs{tnum,:}); [wts, gain] = fft2gammatonemx(nfft, sr, nfilts, width, minfreq, maxfreq, maxlen); fft2gtmx_results(tnum, :) = {wts, gain}; end results_file = fullfile('..', 'tests', 'data', 'test_fft2gtmx_data.mat'); save(results_file, 'fft2gtmx_inputs', 'fft2gtmx_results'); end ================================================ FILE: test_generation/test_fft_gammatonegram.m ================================================ % Copyright 2014 Jason Heeris, jason.heeris@gmail.com % % This file is part of the gammatone toolkit, and is licensed under the 3-clause % BSD license: https://github.com/detly/gammatone/blob/master/COPYING function test_fft_gammatonegram() % Need: % wave % fs % window_time % hop_time % channels % f_min % f_max % Need to mock out: % make_erb_filters output (elide) % centre_freqs (elide) % erb_filterbank (depends on X, SR, N, FMIN) % Ensure reproducible tests rand('state', [3 1 4 1 5 9 2 7]); fft_gammatonegram_inputs = { 'sawtooth_01', sawtooth(2*pi*10100*[0:22050 - 1]'/22050, 0.5), 22050, 0.025, 0.010, 64, 50; ... 'sin220_01' , sin(2*pi*220*[0:4800 - 1]'/48000), 48000, 0.01, 0.01, 64, 50; ... 'sin220_02' , sin(2*pi*220*[0:4800 - 1]'/48000), 48000, 0.025, 0.01, 32, 50; ... 'rand_01' , rand([1, 4410 - 1]), 44100, 0.02, 0.015, 128, 500; ... 'rand_02' , rand([1, 9600 - 1]), 96000, 0.01, 0.005, 256, 20; ... 'rand_03' , rand([1, 4800 - 1]), 48000, 0.01, 0.010, 256, 20; ... }; % Mocked intermediate results for unit testing fft_gammatonegram_mocks = {}; % Actual results fft_gammatonegram_results = {}; for tnum=1:size(fft_gammatonegram_inputs)(1) [name, wave, fs, twin, thop, chs, fmin] = deal(fft_gammatonegram_inputs{tnum,:}); % This is for mocking the output of the equivalent Python functions nfft = 2^(ceil(log(2*twin*fs)/log(2))); nwin = round(twin * fs); nhop = round(thop * fs); % Mock out the FFT weights as well wts = fft2gammatonemx( ... nfft, ... fs, ... chs, ... 1, ... % width is always 1 in the Python implementation fmin, ... fs/2, ... nfft/2+1 ... ); % Mock out windowing function window = gtgram_window(nfft, nwin); res = gammatonegram( ... wave, ... fs, ... twin, ... thop, ... chs, ... fmin, ... fs/2, % fmax is always fs/2 in the Python version 1 % Use FFT method ); fft_gammatonegram_mocks(tnum, :) = { ... wts ... }; fft_gammatonegram_results(tnum, :) = { ... res, ... window, ... nfft, ... nwin, ... nhop ... }; end; results_file = fullfile('..', 'tests', 'data', 'test_fft_gammatonegram_data.mat'); save(results_file, 'fft_gammatonegram_inputs', 'fft_gammatonegram_mocks', 'fft_gammatonegram_results'); end; function win = gtgram_window(n, w) % Reproduction of Dan Ellis' windowing function built in to specgram.m halflen = w/2; halff = n/2; % midpoint of win acthalflen = min(halff, halflen); halfwin = 0.5 * ( 1 + cos( pi * (0:halflen)/halflen)); win = zeros(1, n); win((halff+1):(halff+acthalflen)) = halfwin(1:acthalflen); win((halff+1):-1:(halff-acthalflen+2)) = halfwin(1:acthalflen); end; ================================================ FILE: test_generation/test_gammatonegram.m ================================================ % Copyright 2014 Jason Heeris, jason.heeris@gmail.com % % This file is part of the gammatone toolkit, and is licensed under the 3-clause % BSD license: https://github.com/detly/gammatone/blob/master/COPYING function test_gammatonegram() % Need: % wave % fs % window_time % hop_time % channels % f_min % f_max % Need to mock out: % make_erb_filters output (elide) % centre_freqs (elide) % erb_filterbank (depends on X, SR, N, FMIN) % Ensure reproducible tests rand('state', [3 1 4 1 5 9 2 7]); gammatonegram_inputs = { 'sawtooth_01', sawtooth(2*pi*10100*[0:22050 - 1]'/22050, 0.5), 22050, 0.025, 0.010, 64, 50; ... 'sin220_01' , sin(2*pi*220*[0:4800 - 1]'/48000), 48000, 0.01, 0.01, 64, 50; ... 'sin220_02' , sin(2*pi*220*[0:4800 - 1]'/48000), 48000, 0.025, 0.01, 32, 50; ... 'rand_01' , rand([1, 4410 - 1]), 44100, 0.02, 0.015, 128, 500; ... 'rand_02' , rand([1, 9600 - 1]), 96000, 0.01, 0.005, 256, 20; ... 'rand_03' , rand([1, 4800 - 1]), 48000, 0.01, 0.010, 256, 20; ... }; % Mocked intermediate results for unit testing gammatonegram_mocks = {}; % Actual results gammatonegram_results = {}; for tnum=1:size(gammatonegram_inputs)(1) [name, wave, fs, twin, thop, chs, fmin] = deal(gammatonegram_inputs{tnum,:}); res = gammatonegram( ... wave, ... fs, ... twin, ... thop, ... chs, ... fmin, ... 0, % fmax is ignored 0 % Don't use FFT method ); % This is for mocking the output of the equivalent Python functions nwin = round(twin * fs); hopsamps = round(thop * fs); f_coefs = flipud(MakeERBFilters(fs, chs, fmin)); x_f = ERBFilterBank(wave, f_coefs); x_e = [x_f .^ 2]; x_e_cols = size(x_e, 2); ncols = 1 + floor((x_e_cols - nwin) / hopsamps); % Mock out the ERB filter functions too fcoefs = flipud(MakeERBFilters(fs, chs, fmin)); erb_fb_output = ERBFilterBank(wave, fcoefs); gammatonegram_mocks(tnum, :) = { ... erb_fb_output, ... x_e_cols ... }; gammatonegram_results(tnum, :) = { ... res, ... nwin, ... hopsamps, ... ncols ... }; end; results_file = fullfile('..', 'tests', 'data', 'test_gammatonegram_data.mat'); save(results_file, 'gammatonegram_inputs', 'gammatonegram_mocks', 'gammatonegram_results'); end; ================================================ FILE: test_generation/test_specgram.m ================================================ % Copyright 2014 Jason Heeris, jason.heeris@gmail.com % % This file is part of the gammatone toolkit, and is licensed under the 3-clause % BSD license: https://github.com/detly/gammatone/blob/master/COPYING function test_specgram() % Need: % wave % nfft % fs % window_size % hop (technically the function takes the overlap, but only to recalculate this) % Ensure reproducible tests rand('state', [3 1 4 1 5 9 2 7]); specgram_inputs = { 'sawtooth_01', sawtooth(2*pi*10100*[0:22050 - 1]'/22050, 0.5), 2048, 22050, 551, 221; ... 'sin220_01' , sin(2*pi*220*[0:4800 - 1]'/48000), 1024, 48000, 480, 480; ... 'sin220_02' , sin(2*pi*220*[0:4800 - 1]'/48000), 4096, 48000, 1200, 480; ... 'rand_01' , rand([1, 4410 - 1]), 2048, 44100, 882, 662; ... 'rand_02' , rand([1, 9600 - 1]), 2048, 96000, 960, 480; ... 'rand_03' , rand([1, 4800 - 1]), 1024, 48000, 480, 480; ... }; % Mocked intermediate results for unit testing specgram_mocks = {}; % Actual results specgram_results = {}; for tnum=1:size(specgram_inputs)(1) [name, wave, nfft, fs, nwin, nhop] = deal(specgram_inputs{tnum,:}); % Mock out windowing function window = gtgram_window(nfft, nwin); res = specgram( ... wave, ... nfft, ... fs, ... nwin, ... nwin - nhop ... ); specgram_mocks(tnum, :) = { ... window, ... }; specgram_results(tnum, :) = { ... res, ... }; end; results_file = fullfile('..', 'tests', 'data', 'test_specgram_data.mat'); save(results_file, 'specgram_inputs', 'specgram_mocks', 'specgram_results'); end; function win = gtgram_window(n, w) % Reproduction of Dan Ellis' windowing function built in to specgram.m halflen = w/2; halff = n/2; % midpoint of win acthalflen = min(halff, halflen); halfwin = 0.5 * ( 1 + cos( pi * (0:halflen)/halflen)); win = zeros(1, n); win((halff+1):(halff+acthalflen)) = halfwin(1:acthalflen); win((halff+1):-1:(halff-acthalflen+2)) = halfwin(1:acthalflen); end; ================================================ FILE: tests/__init__.py ================================================ # Copyright 2014 Jason Heeris, jason.heeris@gmail.com # # This file is part of the gammatone toolkit, and is licensed under the 3-clause # BSD license: https://github.com/detly/gammatone/blob/master/COPYING # Designate as module ================================================ FILE: tests/data/test_gammatonegram_data.mat ================================================ [File too large to display: 47.2 MB] ================================================ FILE: tests/test_cfs.py ================================================ #!/usr/bin/env python3 # Copyright 2014 Jason Heeris, jason.heeris@gmail.com # # This file is part of the gammatone toolkit, and is licensed under the 3-clause # BSD license: https://github.com/detly/gammatone/blob/master/COPYING import nose from mock import patch import gammatone.filters EXPECTED_PARAMS = ( ((0, 0, 0), (0, 0, 0)), ((22050, 100, 100), (100, 11025, 100)), ((44100, 100, 100), (100, 22050, 100)), ((44100, 100, 20), (20, 22050, 100)), ((88200, 100, 20), (20, 44100, 100)), ((22050, 100, 10), (10, 11025, 100)), ((22050, 1000, 100), (100, 11025, 1000)), ((160000, 500, 200), (200, 80000, 500)), ) def test_centre_freqs(): for args, params in EXPECTED_PARAMS: yield CentreFreqsTester(args, params) class CentreFreqsTester: def __init__(self, args, params): self.args = args self.params = params self.description = "Centre freqs for {:g} {:d} {:g}".format(*args) @patch('gammatone.filters.erb_space') def __call__(self, erb_space_mock): gammatone.filters.centre_freqs(*self.args) erb_space_mock.assert_called_with(*self.params) if __name__ == '__main__': nose.main() ================================================ FILE: tests/test_erb_space.py ================================================ #!/usr/bin/env python3 # Copyright 2014 Jason Heeris, jason.heeris@gmail.com # # This file is part of the gammatone toolkit, and is licensed under the 3-clause # BSD license: https://github.com/detly/gammatone/blob/master/COPYING import nose import numpy as np import scipy.io from pkg_resources import resource_stream import gammatone.filters REF_DATA_FILENAME = 'data/test_erbspace_data.mat' INPUT_KEY = 'erbspace_inputs' RESULT_KEY = 'erbspace_results' INPUT_COLS = ('f_low', 'f_high', 'num_f') RESULT_COLS = ('cfs',) def load_reference_data(): """ Load test data generated from the reference code """ # Load test data with resource_stream(__name__, REF_DATA_FILENAME) as test_data: data = scipy.io.loadmat(test_data, squeeze_me=False) zipped_data = zip(data[INPUT_KEY], data[RESULT_KEY]) for inputs, refs in zipped_data: input_dict = dict(zip(INPUT_COLS, map(np.squeeze, inputs))) ref_dict = dict(zip(RESULT_COLS, map(np.squeeze, refs))) yield (input_dict, ref_dict) def test_ERB_space_known_values(): for inputs, refs in load_reference_data(): args = ( inputs['f_low'], inputs['f_high'], inputs['num_f'], ) expected = (refs['cfs'],) yield ERBSpaceTester(args, expected) class ERBSpaceTester: def __init__(self, args, expected): self.args = args self.expected = expected[0] self.description = ( "ERB space for {:.1f} {:.1f} {:d}".format( float(self.args[0]), float(self.args[1]), int(self.args[2]), ) ) def __call__(self): result = gammatone.filters.erb_space(*self.args) assert np.allclose(result, self.expected, rtol=1e-6, atol=1e-10) if __name__ == '__main__': nose.main() ================================================ FILE: tests/test_fft_gtgram.py ================================================ #!/usr/bin/env python3 # Copyright 2014 Jason Heeris, jason.heeris@gmail.com # # This file is part of the gammatone toolkit, and is licensed under the 3-clause # BSD license: https://github.com/detly/gammatone/blob/master/COPYING from mock import patch import nose import numpy as np import scipy.io from pkg_resources import resource_stream import gammatone.fftweight REF_DATA_FILENAME = 'data/test_fft_gammatonegram_data.mat' INPUT_KEY = 'fft_gammatonegram_inputs' MOCK_KEY = 'fft_gammatonegram_mocks' RESULT_KEY = 'fft_gammatonegram_results' INPUT_COLS = ('name', 'wave', 'fs', 'twin', 'thop', 'channels', 'fmin') MOCK_COLS = ('wts',) RESULT_COLS = ('res', 'window', 'nfft', 'nwin', 'nhop') def load_reference_data(): """ Load test data generated from the reference code """ # Load test data with resource_stream(__name__, REF_DATA_FILENAME) as test_data: data = scipy.io.loadmat(test_data, squeeze_me=False) zipped_data = zip(data[INPUT_KEY], data[MOCK_KEY], data[RESULT_KEY]) for inputs, mocks, refs in zipped_data: input_dict = dict(zip(INPUT_COLS, inputs)) mock_dict = dict(zip(MOCK_COLS, mocks)) ref_dict = dict(zip(RESULT_COLS, refs)) yield (input_dict, mock_dict, ref_dict) def test_fft_specgram_window(): for inputs, mocks, refs in load_reference_data(): args = ( refs['nfft'], refs['nwin'], ) expected = ( refs['window'], ) yield FFTGtgramWindowTester(inputs['name'], args, expected) class FFTGtgramWindowTester: def __init__(self, name, args, expected): self.nfft = args[0].squeeze() self.nwin = args[1].squeeze() self.expected = expected[0].squeeze() self.description = ( "FFT gammatonegram window for nfft = {:f}, nwin = {:f}".format( float(self.nfft), float(self.nwin) )) def __call__(self): result = gammatone.fftweight.specgram_window(self.nfft, self.nwin) max_diff = np.max(np.abs(result - self.expected)) diagnostic = "Maximum difference: {:6e}".format(max_diff) assert np.allclose(result, self.expected, rtol=1e-6, atol=1e-12), diagnostic def test_fft_gtgram(): for inputs, mocks, refs in load_reference_data(): args = ( inputs['fs'], inputs['twin'], inputs['thop'], inputs['channels'], inputs['fmin'] ) yield FFTGammatonegramTester( inputs['name'][0], args, inputs['wave'], mocks['wts'], refs['window'], refs['res'] ) class FFTGammatonegramTester: """ Testing class for gammatonegram calculation """ def __init__(self, name, args, sig, fft_weights, window, expected): self.signal = np.asarray(sig).squeeze() self.expected = np.asarray(expected).squeeze() self.fft_weights = np.asarray(fft_weights) self.args = args self.window = window.squeeze() self.description = "FFT gammatonegram for {:s}".format(name) def __call__(self): # Note that the second return value from fft_weights isn't actually used with patch( 'gammatone.fftweight.fft_weights', return_value=(self.fft_weights, None)), \ patch( 'gammatone.fftweight.specgram_window', return_value=self.window): result = gammatone.fftweight.fft_gtgram(self.signal, *self.args) max_diff = np.max(np.abs(result - self.expected)) diagnostic = "Maximum difference: {:6e}".format(max_diff) assert np.allclose(result, self.expected, rtol=1e-6, atol=1e-12), diagnostic if __name__ == '__main__': nose.main() ================================================ FILE: tests/test_fft_weights.py ================================================ #!/usr/bin/env python3 # Copyright 2014 Jason Heeris, jason.heeris@gmail.com # # This file is part of the gammatone toolkit, and is licensed under the 3-clause # BSD license: https://github.com/detly/gammatone/blob/master/COPYING from __future__ import division import nose import numpy as np import scipy.io from pkg_resources import resource_stream import gammatone.fftweight REF_DATA_FILENAME = 'data/test_fft2gtmx_data.mat' INPUT_KEY = 'fft2gtmx_inputs' RESULT_KEY = 'fft2gtmx_results' INPUT_COLS = ('nfft', 'sr', 'nfilts', 'width', 'fmin', 'fmax', 'maxlen') RESULT_COLS = ('weights', 'gain',) def load_reference_data(): """ Load test data generated from the reference code """ # Load test data with resource_stream(__name__, REF_DATA_FILENAME) as test_data: data = scipy.io.loadmat(test_data, squeeze_me=False) zipped_data = zip(data[INPUT_KEY], data[RESULT_KEY]) for inputs, refs in zipped_data: input_dict = dict(zip(INPUT_COLS, map(np.squeeze, inputs))) ref_dict = dict(zip(RESULT_COLS, map(np.squeeze, refs))) yield (input_dict, ref_dict) def fft_weights_funcs(args, expected): """ Construct a pair of unit tests for the gains and weights of the FFT to gammatonegram calculation. Returns two functions: test_gains, test_weights. """ args = list(args) expected_weights = expected[0] expected_gains = expected[1] # Convert nfft, nfilts, maxlen to ints args[0] = int(args[0]) args[2] = int(args[2]) args[6] = int(args[6]) weights, gains = gammatone.fftweight.fft_weights(*args) (test_weights_desc, test_gains_desc) = ( "FFT weights {:s} for nfft = {:d}, fs = {:d}, nfilts = {:d}".format( label, int(args[0]), int(args[1]), int(args[2]), ) for label in ("weights", "gains")) def test_gains(): assert gains.shape == expected_gains.shape assert np.allclose(gains, expected_gains, rtol=1e-6, atol=1e-12) def test_weights(): assert weights.shape == expected_weights.shape assert np.allclose(weights, expected_weights, rtol=1e-6, atol=1e-12) test_gains.description = test_gains_desc test_weights.description = test_weights_desc return test_gains, test_weights def test_fft_weights(): for inputs, refs in load_reference_data(): args = tuple(inputs[col] for col in INPUT_COLS) expected = (refs['weights'], refs['gain']) test_gains, test_weights = fft_weights_funcs(args, expected) yield test_gains yield test_weights if __name__ == '__main__': nose.main() ================================================ FILE: tests/test_filterbank.py ================================================ #!/usr/bin/env python3 # Copyright 2014 Jason Heeris, jason.heeris@gmail.com # # This file is part of the gammatone toolkit, and is licensed under the 3-clause # BSD license: https://github.com/detly/gammatone/blob/master/COPYING import nose import numpy as np import scipy.io from pkg_resources import resource_stream import gammatone.filters REF_DATA_FILENAME = 'data/test_filterbank_data.mat' INPUT_KEY = 'erb_filterbank_inputs' RESULT_KEY = 'erb_filterbank_results' INPUT_COLS = ('fcoefs', 'wave') RESULT_COLS = ('filterbank',) def load_reference_data(): """ Load test data generated from the reference code """ # Load test data with resource_stream(__name__, REF_DATA_FILENAME) as test_data: data = scipy.io.loadmat(test_data, squeeze_me=False) zipped_data = zip(data[INPUT_KEY], data[RESULT_KEY]) for inputs, refs in zipped_data: input_dict = dict(zip(INPUT_COLS, map(np.squeeze, inputs))) ref_dict = dict(zip(RESULT_COLS, map(np.squeeze, refs))) yield (input_dict, ref_dict) def test_ERB_filterbank_known_values(): for inputs, refs in load_reference_data(): args = ( inputs['wave'], inputs['fcoefs'], ) expected = (refs['filterbank'],) yield ERBFilterBankTester(args, expected) class ERBFilterBankTester: def __init__(self, args, expected): self.signal = args[0] self.fcoefs = args[1] self.expected = expected[0] self.description = ( "Gammatone filterbank result for {:.1f} ... {:.1f}".format( self.fcoefs[0][0], self.fcoefs[0][1] )) def __call__(self): result = gammatone.filters.erb_filterbank(self.signal, self.fcoefs) assert np.allclose(result, self.expected, rtol=1e-5, atol=1e-12) if __name__ == '__main__': nose.main() ================================================ FILE: tests/test_gammatone_filters.py ================================================ #!/usr/bin/env python3 # Copyright 2014 Jason Heeris, jason.heeris@gmail.com # # This file is part of the gammatone toolkit, and is licensed under the 3-clause # BSD license: https://github.com/detly/gammatone/blob/master/COPYING import nose import numpy as np import scipy.io from pkg_resources import resource_stream import gammatone.filters REF_DATA_FILENAME = 'data/test_erb_filter_data.mat' INPUT_KEY = 'erb_filter_inputs' RESULT_KEY = 'erb_filter_results' INPUT_COLS = ('fs', 'cfs') RESULT_COLS = ('fcoefs',) def load_reference_data(): """ Load test data generated from the reference code """ # Load test data with resource_stream(__name__, REF_DATA_FILENAME) as test_data: data = scipy.io.loadmat(test_data, squeeze_me=False) zipped_data = zip(data[INPUT_KEY], data[RESULT_KEY]) for inputs, refs in zipped_data: input_dict = dict(zip(INPUT_COLS, map(np.squeeze, inputs))) ref_dict = dict(zip(RESULT_COLS, map(np.squeeze, refs))) yield (input_dict, ref_dict) def test_make_ERB_filters_known_values(): for inputs, refs in load_reference_data(): args = ( inputs['fs'], inputs['cfs'], ) expected = (refs['fcoefs'],) yield MakeERBFiltersTester(args, expected) class MakeERBFiltersTester: def __init__(self, args, expected): self.fs = args[0] self.cfs = args[1] self.expected = expected[0] self.description = ( "Gammatone filters for {:f}, {:.1f} ... {:.1f}".format( float(self.fs), float(self.cfs[0]), float(self.cfs[-1]) )) def __call__(self): result = gammatone.filters.make_erb_filters(self.fs, self.cfs) assert np.allclose(result, self.expected, rtol=1e-6, atol=1e-12) if __name__ == '__main__': nose.main() ================================================ FILE: tests/test_gammatonegram.py ================================================ #!/usr/bin/env python3 # Copyright 2014 Jason Heeris, jason.heeris@gmail.com # # This file is part of the gammatone toolkit, and is licensed under the 3-clause # BSD license: https://github.com/detly/gammatone/blob/master/COPYING from mock import patch import nose import numpy as np import scipy.io from pkg_resources import resource_stream import gammatone.gtgram REF_DATA_FILENAME = 'data/test_gammatonegram_data.mat' INPUT_KEY = 'gammatonegram_inputs' MOCK_KEY = 'gammatonegram_mocks' RESULT_KEY = 'gammatonegram_results' INPUT_COLS = ('name', 'wave', 'fs', 'twin', 'thop', 'channels', 'fmin') MOCK_COLS = ('erb_fb', 'erb_fb_cols') RESULT_COLS = ('gtgram', 'nwin', 'hopsamps', 'ncols') def load_reference_data(): """ Load test data generated from the reference code """ # Load test data with resource_stream(__name__, REF_DATA_FILENAME) as test_data: data = scipy.io.loadmat(test_data, squeeze_me=True) zipped_data = zip(data[INPUT_KEY], data[MOCK_KEY], data[RESULT_KEY]) for inputs, mocks, refs in zipped_data: input_dict = dict(zip(INPUT_COLS, inputs)) mock_dict = dict(zip(MOCK_COLS, mocks)) ref_dict = dict(zip(RESULT_COLS, refs)) yield (input_dict, mock_dict, ref_dict) def test_nstrides(): """ Test gamamtonegram stride calculations """ for inputs, mocks, refs in load_reference_data(): args = ( inputs['fs'], inputs['twin'], inputs['thop'], mocks['erb_fb_cols'] ) expected = ( refs['nwin'], refs['hopsamps'], refs['ncols'] ) yield GTGramStrideTester(inputs['name'], args, expected) class GTGramStrideTester: """ Testing class for gammatonegram stride calculation """ def __init__(self, name, inputs, expected): self.inputs = inputs self.expected = expected self.description = "Gammatonegram strides for {:s}".format(name) def __call__(self): results = gammatone.gtgram.gtgram_strides(*self.inputs) diagnostic = ( "result: {:s}, expected: {:s}".format( str(results), str(self.expected) ) ) # These are integer values, so use direct equality assert results == self.expected # TODO: possibly mock out gtgram_strides def test_gtgram(): for inputs, mocks, refs in load_reference_data(): args = ( inputs['fs'], inputs['twin'], inputs['thop'], inputs['channels'], inputs['fmin'] ) yield GammatonegramTester( inputs['name'], args, inputs['wave'], mocks['erb_fb'], refs['gtgram'] ) class GammatonegramTester: """ Testing class for gammatonegram calculation """ def __init__(self, name, args, sig, erb_fb_out, expected): self.signal = np.asarray(sig) self.expected = np.asarray(expected) self.erb_fb_out = np.asarray(erb_fb_out) self.args = args self.description = "Gammatonegram for {:s}".format(name) def __call__(self): with patch( 'gammatone.gtgram.erb_filterbank', return_value=self.erb_fb_out): result = gammatone.gtgram.gtgram(self.signal, *self.args) max_diff = np.max(np.abs(result - self.expected)) diagnostic = "Maximum difference: {:6e}".format(max_diff) assert np.allclose(result, self.expected, rtol=1e-6, atol=1e-12), diagnostic if __name__ == '__main__': nose.main() ================================================ FILE: tests/test_specgram.py ================================================ #!/usr/bin/env python3 # Copyright 2014 Jason Heeris, jason.heeris@gmail.com # # This file is part of the gammatone toolkit, and is licensed under the 3-clause # BSD license: https://github.com/detly/gammatone/blob/master/COPYING from mock import patch import nose import numpy as np import scipy.io from pkg_resources import resource_stream import gammatone.fftweight REF_DATA_FILENAME = 'data/test_specgram_data.mat' INPUT_KEY = 'specgram_inputs' MOCK_KEY = 'specgram_mocks' RESULT_KEY = 'specgram_results' INPUT_COLS = ('name', 'wave', 'nfft', 'fs', 'nwin', 'nhop') MOCK_COLS = ('window',) RESULT_COLS = ('res',) def load_reference_data(): """ Load test data generated from the reference code """ # Load test data with resource_stream(__name__, REF_DATA_FILENAME) as test_data: data = scipy.io.loadmat(test_data, squeeze_me=False) zipped_data = zip(data[INPUT_KEY], data[MOCK_KEY], data[RESULT_KEY]) for inputs, mocks, refs in zipped_data: input_dict = dict(zip(INPUT_COLS, inputs)) mock_dict = dict(zip(MOCK_COLS, mocks)) ref_dict = dict(zip(RESULT_COLS, refs)) yield (input_dict, mock_dict, ref_dict) def test_specgram(): for inputs, mocks, refs in load_reference_data(): args = ( inputs['nfft'], inputs['fs'], inputs['nwin'], inputs['nhop'], ) yield SpecgramTester( inputs['name'][0], args, inputs['wave'], mocks['window'], refs['res'] ) class SpecgramTester: """ Testing class for specgram replacement calculation """ def __init__(self, name, args, sig, window, expected): self.signal = np.asarray(sig).squeeze() self.expected = np.asarray(expected).squeeze() self.args = [int(a.squeeze()) for a in args] self.window = window.squeeze() self.description = "Specgram for {:s}".format(name) def __call__(self): with patch( 'gammatone.fftweight.specgram_window', return_value=self.window): result = gammatone.fftweight.specgram(self.signal, *self.args) max_diff = np.max(np.abs(result - self.expected)) diagnostic = "Maximum difference: {:6e}".format(max_diff) assert np.allclose(result, self.expected, rtol=1e-6, atol=1e-12), diagnostic if __name__ == '__main__': nose.main()