# HG changeset patch # User dicklyon@google.com # Date 1368071324 0 # Node ID ec3a1c74ec5455d1bd3534cf2a47a4ce9fe08108 # Parent 087f3b3c36d3f59525203e055040589adcc86897 Add files for making log-lag SAI from CARFAC's NAP output. The file SAI_RunLayered.m dumps frames to PNG files. The hacking script calls ffmpeg to assemble them with the soundtrack into a movie. diff -r 087f3b3c36d3 -r ec3a1c74ec54 matlab/bmm/carfac/CARFAC_SAI_hacking.m --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/matlab/bmm/carfac/CARFAC_SAI_hacking.m Thu May 09 03:48:44 2013 +0000 @@ -0,0 +1,72 @@ +% Copyright 2013, Google, Inc. +% Author: Richard F. Lyon +% +% This Matlab file is part of an implementation of Lyon's cochlear model: +% "Cascade of Asymmetric Resonators with Fast-Acting Compression" +% to supplement Lyon's upcoming book "Human and Machine Hearing" +% +% Licensed under the Apache License, Version 2.0 (the "License"); +% you may not use this file except in compliance with the License. +% You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +% See the License for the specific language governing permissions and +% limitations under the License. + +%% Test/demo hacking for CARFAC_SAI Matlab stuff: + +clear variables + +system('mkdir frames'); + +%% + +dB_list = -40; % -60:20:0 + +wav_fn = 'plan.wav'; + +if ~exist(['./', wav_fn], 'file') + error('wav file not found') +end + +wav_fn +[file_signal, fs] = wavread(wav_fn); + +if fs == 44100 + file_signal = (file_signal(1:2:end-1, :) + file_signal(2:2:end, :)) / 2; + fs = fs / 2; +end + +if fs ~= 22050 + error('unexpected sample rate') +end + +file_signal = file_signal(:, 1); % mono +file_signal = [file_signal; zeros(fs, 1)]; % pad with a second of silence + + +% make a long test signal by repeating at different levels: +test_signal = []; +for dB = dB_list + test_signal = [test_signal; file_signal * 10^(dB/20)]; +end + +%% +CF_struct = CARFAC_Design(1); % default design + +CF_struct = CARFAC_Init(CF_struct); + +[frame_rate, num_frames] = SAI_RunLayered(CF_struct, test_signal); + +%% +png_name_pattern = 'frames/frame%05d.png'; +MakeMovieFromPngsAndWav(round(frame_rate), png_name_pattern, ... + wav_fn, ['CARFAC_SAI_movie_', wav_fn(1:end-4), '.mpg']) + +%% +system('rm -r frames'); + diff -r 087f3b3c36d3 -r ec3a1c74ec54 matlab/bmm/carfac/SAI_BlendFrameIntoComposite.m --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/matlab/bmm/carfac/SAI_BlendFrameIntoComposite.m Thu May 09 03:48:44 2013 +0000 @@ -0,0 +1,34 @@ +% Copyright 2013, Google, Inc. +% Author: Richard F. Lyon +% +% This Matlab file is part of an implementation of Lyon's cochlear model: +% "Cascade of Asymmetric Resonators with Fast-Acting Compression" +% to supplement Lyon's upcoming book "Human and Machine Hearing" +% +% Licensed under the Apache License, Version 2.0 (the "License"); +% you may not use this file except in compliance with the License. +% You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +% See the License for the specific language governing permissions and +% limitations under the License. + +function composite_frame = SAI_BlendFrameIntoComposite(new_frame, ... + layer_struct, composite_frame) + +alpha = layer_struct.alpha; +lag_curve = layer_struct.lag_curve; +target_columns = layer_struct.target_indices; + +frame_width = size(new_frame, 2); + +% Lags are measured from 0 at the right. +stretched_frame = interp1(new_frame', frame_width - lag_curve)'; +alpha = repmat(alpha, size(new_frame, 1), 1); +composite_frame(:, target_columns) = ... + (1 - alpha) .* composite_frame(:, target_columns) + ... + alpha .* stretched_frame; diff -r 087f3b3c36d3 -r ec3a1c74ec54 matlab/bmm/carfac/SAI_DesignLayers.m --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/matlab/bmm/carfac/SAI_DesignLayers.m Thu May 09 03:48:44 2013 +0000 @@ -0,0 +1,173 @@ +% Copyright 2013, Google, Inc. +% Author: Richard F. Lyon +% +% This Matlab file is part of an implementation of Lyon's cochlear model: +% "Cascade of Asymmetric Resonators with Fast-Acting Compression" +% to supplement Lyon's upcoming book "Human and Machine Hearing" +% +% Licensed under the Apache License, Version 2.0 (the "License"); +% you may not use this file except in compliance with the License. +% You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +% See the License for the specific language governing permissions and +% limitations under the License. + +function [layer_array, total_width] = SAI_DesignLayers( ... + n_layers, width_per_layer) +% function [layer_array, total_width] = SAI_DesignLayers( ... +% n_layers, width_per_layer) +% +% The layer_array is a struct array containing an entry for each layer +% in a layer of power-of-2 decimated pieces of SAI that get composited +% into a log-lag SAI. +% Each struct has the following fields: +% .width - number of pixels occupied in the final composite SAI, +% not counting the overlap into pixels counted for other layers. +% .target_indices - column indices in the final composite SAI, +% counting the overlap region(s). +% .lag_curve - for each point in the final composite SAI, the float index +% in the layer's buffer to interp from. +% .alpha - the blending coefficent, mostly 1, tapering toward 0 in the overlap +% region(s). +% Layer 1 has no overlap to it right, and layer n_layers has none to its +% left, but sizes of the target_indices, lag_curve, and alpha vectors are +% otherwise width + left_overlap + right_overlap. The total width of the +% final composite SAI is the sum of the widths. +% Other fields could be added to hold state, such as history buffers for +% each layer, or those could go in state struct array... + + +% Elevate these to a param struct? +if nargin < 1 + n_layers = 11 +end +if nargin < 2 + width_per_layer = 32; % resolution "half life" in space +end +future_lags = 3 * width_per_layer; +width_first_layer = future_lags + 2 * width_per_layer; +width_extra_last_layer = 2 * width_per_layer; +left_overlap = 15; +right_overlap = 15; +first_window_width = 400; % or maybe use seglen? or 0.020 * fs? +min_window_width = 2*width_per_layer; % or somewhere on that order +window_exponent = 1.4; +alpha_max = 0.5; + +% Start with NAP_samples_per_SAI_sample, declining to 1 from here: +max_samples_per = 2^(n_layers - 1); +% Construct the overall lag-warping function: +NAP_samples_per_SAI_sample = [ ... + max_samples_per * ones(1, width_extra_last_layer), ... + max_samples_per * ... + 2 .^ (-(1:(width_per_layer * (n_layers - 1))) / width_per_layer), ... + ones(1, width_first_layer)]; + +% Each layer needs a lag_warp for a portion of that, divided by +% 2^(layer-1), where the portion includes some overlap into its neighbors +% with higher layer numbers on left, lower on right. + +% Layer 1, rightmost, representing recent, current and near-future (negative +% lag) relative to trigger time, has 1 NAP sample per SAI sample. Other +% layers map more than one NAP sample into 1 SAI sample. Layer 2 is +% computed as 2X decimated, 2 NAP samples per SAI sample, but then gets +% interpolated to between 1 and 2 (and outside that range in the overlap +% regions) to connect up smoothly. Each layer is another 2X decimated. +% The last layer limits out at 1 (representing 2^(n_layers) SAI samples) +% at the width_extra_last_layer SAI samples that extend to the far past. + +layer_array = []; % to hold a struct array +for layer = 1:n_layers + layer_array(layer).width = width_per_layer; + layer_array(layer).left_overlap = left_overlap; + layer_array(layer).right_overlap = right_overlap; + layer_array(layer).future_lags = 0; + % Layer decimation factors: 1 1 1 1 2 2 2 4 4 4 8 ... + layer_array(layer).update_interval = max(1, 2 ^ floor((layer - 2) / 3)); +end +% Patch up the exceptions. +layer_array(1).width = width_first_layer; +layer_array(end).width = layer_array(end).width + width_extra_last_layer; +layer_array(1).right_overlap = 0; +layer_array(end).left_overlap = 0; +layer_array(1).future_lags = future_lags; + +% For each layer, working backwards, from left, find the locations they +% they render into in the final SAI. +offset = 0; +for layer = n_layers:-1:1 + width = layer_array(layer).width; + left = layer_array(layer).left_overlap; + right = layer_array(layer).right_overlap; + + % Size of the vectors needed. + n_final_lags = left + width + right; + layer_array(layer).n_final_lags = n_final_lags; + + % Integer indices into the final composite SAI for this layer. + target_indices = ((1 - left):(width + right)) + offset; + layer_array(layer).target_indices = target_indices; + + % Make a blending coefficient alpha, ramped in the overlap zone. + alpha = ones(1, n_final_lags); + alpha(1:left) = alpha(1:left) .* (1:left)/(left + 1); + alpha(end + 1 - (1:right)) = ... + alpha(end + 1 - (1:right)) .* (1:right)/(right + 1); + layer_array(layer).alpha = alpha * alpha_max; + + offset = offset + width; % total width from left through this layer. +end +total_width = offset; % Return size of SAI this will make. + +% for each layer, fill in its lag-resampling function for interp1: +for layer = 1:n_layers + width = layer_array(layer).width; + left = layer_array(layer).left_overlap; + right = layer_array(layer).right_overlap; + + % Still need to adjust this to make lags match at edges: + target_indices = layer_array(layer).target_indices; + samples_per = NAP_samples_per_SAI_sample(target_indices); + % Accumulate lag backwards from the zero-lag point, convert to units of + % samples in the current layer. + lag_curve = (cumsum(samples_per(end:-1:1))) / 2^(layer-1); + lag_curve = lag_curve(end:-1:1); % Turn it back to corrent order. + % Now adjust it to match the zero-lag point or a lag-point from + % previous layer, and reverse it back into place. + if layer == 1 + lag_adjust = lag_curve(end) - 0; + else + % Align right edge to previous layer's left edge, adjusting for 2X + % scaling factor difference. + lag_adjust = lag_curve(end - right) - last_left_lag / 2; + end + lag_curve = lag_curve - lag_adjust; + % lag_curve is now offsets from right end of layer's frame. + layer_array(layer).lag_curve = lag_curve; + % Specify number of point to generate in pre-warp frame. + layer_array(layer).frame_width = ceil(1 + lag_curve(1)); + if layer < n_layers % to avoid the left = 0 unused end case. + % A point to align next layer to. + last_left_lag = lag_curve(left) - layer_array(layer).future_lags; + end + + % Specify a good window width (in history buffer, for picking triggers) + % in samples for this layer, exponentially approaching minimum. + layer_array(layer).window_width = round(min_window_width + ... + first_window_width / window_exponent^(layer - 1)); + + % Say about how long the history buffer needs to be to shift any trigger + % location in the range of the window to a fixed location. Assume + % using two window placements overlapped 50%. + n_triggers = 2; + layer_array(layer).buffer_width = layer_array(layer).frame_width + ... + ceil((1 + (n_triggers - 1)/2) * layer_array(layer).window_width); +end + +return + diff -r 087f3b3c36d3 -r ec3a1c74ec54 matlab/bmm/carfac/SAI_RunLayered.m --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/matlab/bmm/carfac/SAI_RunLayered.m Thu May 09 03:48:44 2013 +0000 @@ -0,0 +1,158 @@ +% Copyright 2013, Google, Inc. +% Author: Richard F. Lyon +% +% This Matlab file is part of an implementation of Lyon's cochlear model: +% "Cascade of Asymmetric Resonators with Fast-Acting Compression" +% to supplement Lyon's upcoming book "Human and Machine Hearing" +% +% Licensed under the Apache License, Version 2.0 (the "License"); +% you may not use this file except in compliance with the License. +% You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +% See the License for the specific language governing permissions and +% limitations under the License. + +function [frame_rate, num_frames] = SAI_RunLayered(CF, input_waves) +% function [CF, SAI_movie] = CARFAC_Run_Layered_SAI(CF, input_waves) +% This function runs the CARFAC and generates an SAI movie, dumped as PNG +% files for now. + +% Layer 1 is not decimated from the 22050 rate; subsequent layers have +% smoothing and 2X decimation each. All layers get composited togehter +% into movie frames. + +n_ch = CF.n_ch; +[n_samp, n_ears] = size(input_waves); +if n_ears ~= CF.n_ears + error('bad number of input_waves channels passed to CARFAC_Run') +end +fs = CF.fs; + +% Design the composite log-lag SAI using these parameters and defaults. +n_layers = 10; +width_per_layer = 40; +[layer_array, total_width] = SAI_DesignLayers(n_layers, width_per_layer); + +% Make the composite SAI image array. +composite_frame = zeros(n_ch, total_width); + +seglen = round(fs * 0.020); % Pick about 20 ms segments +frame_rate = fs / seglen; +n_segs = ceil(n_samp / seglen); + +% Make the history buffers in the layers_array: +for layer = 1:n_layers + layer_array(layer).nap_buffer = zeros(layer_array(layer).buffer_width, n_ch); + layer_array(layer).nap_fraction = 0; % leftover fraction to shift in. +end + +for seg_num = 1:n_segs + % k_range is the range of input sample indices for this segment + if seg_num == n_segs + % The last segment may be short of seglen, but do it anyway: + k_range = (seglen*(seg_num - 1) + 1):n_samp; + else + k_range = seglen*(seg_num - 1) + (1:seglen); + end + % Process a segment to get a slice of decim_naps, and plot AGC state: + [seg_naps, CF] = CARFAC_Run_Segment(CF, input_waves(k_range, :)); + + seg_naps = max(0, seg_naps); % Rectify + + if seg_num == n_segs % pad out the last result + seg_naps = [seg_naps; zeros(seglen - size(seg_naps,1), size(seg_naps, 2))]; + end + + % Shift new data into some or all of the layer buffers: + layer_array = SAI_UpdateBuffers(layer_array, seg_naps, seg_num); + + + for layer = n_layers:-1:1 % blend from coarse to fine + update_interval = layer_array(layer).update_interval; + if 0 == mod(seg_num, update_interval) + nap_buffer = real(layer_array(layer).nap_buffer); + n_buffer_times = size(nap_buffer, 1); + width = layer_array(layer).frame_width; % To render linear SAI to. + new_frame = zeros(n_ch, width); + + % Make the window to use for all the channels at this layer. + layer_factor = 1.5; + window_size = layer_array(layer).window_width; + after_samples = layer_array(layer).future_lags; + + window_range = (1:window_size) + ... + (n_buffer_times - window_size) - after_samples; + window = sin((1:window_size)' * pi / window_size); + % This should not go negative! + offset_range = (1:width) + ... + (n_buffer_times - width - window_size); + % CHECK + if any(offset_range < 0) + error; + end + + % smooth across channels; more in later layers + smoothed_buffer = smooth1d(nap_buffer', 0.25*(layer - 2))'; + + % For each buffer column (channel), pick a trigger and align into SAI_frame + for ch = 1:n_ch + smooth_wave = smoothed_buffer(:, ch); % for the trigger + + [peak_val, trigger_time] = max(smooth_wave(window_range) .* window); + nap_wave = nap_buffer(:, ch); % for the waveform + if peak_val <= 0 % just use window center instead + [peak_val, trigger_time] = max(window); + end + if layer == n_layers % mark the trigger points to display as imaginary. + layer_array(layer).nap_buffer(trigger_time + window_range(1) - 1, ch) = ... + layer_array(layer).nap_buffer(trigger_time + window_range(1) - 1, ch) + 1i; + end + new_frame(ch, :) = nap_wave(trigger_time + offset_range)'; + end + composite_frame = SAI_BlendFrameIntoComposite(new_frame, ... + layer_array(layer), composite_frame); + end + end + + + lag_marginal = mean(composite_frame, 1); % means max out near 1 or 2 + frame_bottom = zeros(size(composite_frame)); % will end up being 1/3 + n_bottom_rows = size(frame_bottom, 1); + for height = 1:n_bottom_rows + big_ones = lag_marginal > 1*height/n_bottom_rows; + frame_bottom(n_bottom_rows - height + 1, big_ones) = 2; % 2 for black + end + + if 0 == mod(seg_num, update_interval) || seg_num == 1 + coc_gram = layer_array(end).nap_buffer'; + [n_ch, n_width] = size(composite_frame); + coc_gram = [coc_gram, zeros(n_ch, n_width - size(coc_gram, 2))]; + trigger_gram = 2 * (imag(coc_gram) ~= 0); + coc_gram = real(coc_gram); + end + + display_frame = [coc_gram; trigger_gram; ... + composite_frame(floor(1:0.5:end), :); frame_bottom]; + + cmap = jet; + cmap = 1 - gray; % jet + figure(10) + image(32*display_frame); + colormap(cmap); + + drawnow + imwrite(32*display_frame, cmap, sprintf('frames/frame%05d.png', seg_num)); +end + +num_frames = seg_num; + +return + + + + diff -r 087f3b3c36d3 -r ec3a1c74ec54 matlab/bmm/carfac/SAI_UpdateBuffers.m --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/matlab/bmm/carfac/SAI_UpdateBuffers.m Thu May 09 03:48:44 2013 +0000 @@ -0,0 +1,83 @@ +% Copyright 2013, Google, Inc. +% Author: Richard F. Lyon +% +% This Matlab file is part of an implementation of Lyon's cochlear model: +% "Cascade of Asymmetric Resonators with Fast-Acting Compression" +% to supplement Lyon's upcoming book "Human and Machine Hearing" +% +% Licensed under the Apache License, Version 2.0 (the "License"); +% you may not use this file except in compliance with the License. +% You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +% See the License for the specific language governing permissions and +% limitations under the License. + +function layer_array = SAI_UpdateBuffers(layer_array, seg_naps, seg_num) +% function layer_array = SAI_UpdateBuffers(layer_array, seg_naps, seg_num) +% +% Input/Output: layer_array contains all the coefficients and state for +% the layer of different time scales of SAI; +% we might want to separate these as in CARFAC. +% +% seg_naps is a new segmeent of NAP from the CAR-FAC to shift into the +% first layer. Each subsequent layer gets input off the input end of the +% previous layer, with smoothing and decimation. +% +% The segment index seg_num is used to control sub-sampled updates of +% the larger-scale layers. + +n_layers = length(layer_array); +[seg_len, n_nap_ch] = size(seg_naps); + +% Array of what to shift in to first or next layer. +new_chunk = seg_naps; + +gain = 1.1; % gain from layer to layer; could be layer dependent. + +%% +% Decimate using a 2-3-4-filter and partial differencing emphasize onsets: +kernel = filter([1 1]/2, 1, filter([1 1 1]/3, 1, [1 1 1 1 0 0 0 0]/4)); +kernel = kernel + 2*diff([0, kernel]); +% figure(1) +% plot(kernel) + +%% +for layer = 1:n_layers + [n_lags, n_ch] = size(layer_array(layer).nap_buffer); + if (n_nap_ch ~= n_ch) + error('Wrong number of channels in nap_buffer.'); + end + + interval = layer_array(layer).update_interval; + if (0 == mod(seg_num, interval)) + % Account for 2X decimation and infrequent updates; find number of time + % points to shift in. Tolerate slip of a fraction of a sample. + n_shift = seg_len * interval / (2.0^(layer - 1)); + if layer > 1 + % Add the leftover fraction before floor. + n_shift = n_shift + layer_array(layer).nap_fraction; + layer_array(layer).nap_fraction = n_shift - floor(n_shift); + n_shift = floor(n_shift); + % Grab new stuff from new end (big time indices) of previous layer. + % Take twice as many times as we need, + 5, for decimation, and do + % 343 smoothing to get new points. + new_chunk = ... + layer_array(layer - 1).nap_buffer((end - 2*n_shift - 4):end, :); + new_chunk = filter(kernel, 1, new_chunk); + new_chunk = gain * new_chunk(7:2:end, :); + + end + % Put new stuff in at latest time indices. + layer_array(layer).nap_buffer = ... + [layer_array(layer).nap_buffer((1 + n_shift):end, :); ... + new_chunk]; % this should fit just right if we have n_shift new times. + end +end + +return +