changeset 604:ec3a1c74ec54

Add files for making log-lag SAI from CARFAC's NAP output. The file SAI_RunLayered.m dumps frames to PNG files. The hacking script calls ffmpeg to assemble them with the soundtrack into a movie.
author dicklyon@google.com
date Thu, 09 May 2013 03:48:44 +0000
parents 087f3b3c36d3
children 3875bb31cc10
files matlab/bmm/carfac/CARFAC_SAI_hacking.m matlab/bmm/carfac/SAI_BlendFrameIntoComposite.m matlab/bmm/carfac/SAI_DesignLayers.m matlab/bmm/carfac/SAI_RunLayered.m matlab/bmm/carfac/SAI_UpdateBuffers.m
diffstat 5 files changed, 520 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/matlab/bmm/carfac/CARFAC_SAI_hacking.m	Thu May 09 03:48:44 2013 +0000
@@ -0,0 +1,72 @@
+% Copyright 2013, Google, Inc.
+% Author: Richard F. Lyon
+%
+% This Matlab file is part of an implementation of Lyon's cochlear model:
+% "Cascade of Asymmetric Resonators with Fast-Acting Compression"
+% to supplement Lyon's upcoming book "Human and Machine Hearing"
+%
+% Licensed under the Apache License, Version 2.0 (the "License");
+% you may not use this file except in compliance with the License.
+% You may obtain a copy of the License at
+%
+%     http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS,
+% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+% See the License for the specific language governing permissions and
+% limitations under the License.
+
+%% Test/demo hacking for CARFAC_SAI Matlab stuff:
+
+clear variables
+
+system('mkdir frames');
+
+%%
+
+dB_list = -40; %  -60:20:0
+
+wav_fn = 'plan.wav';
+
+if ~exist(['./', wav_fn], 'file')
+  error('wav file not found')
+end
+
+wav_fn
+[file_signal, fs] = wavread(wav_fn);
+
+if fs == 44100
+  file_signal = (file_signal(1:2:end-1, :) + file_signal(2:2:end, :)) / 2;
+  fs = fs / 2;
+end
+
+if fs ~= 22050
+  error('unexpected sample rate')
+end
+
+file_signal = file_signal(:, 1);  % mono
+file_signal = [file_signal; zeros(fs, 1)];  % pad with a second of silence
+
+
+% make a long test signal by repeating at different levels:
+test_signal = [];
+for dB =  dB_list
+  test_signal = [test_signal; file_signal * 10^(dB/20)];
+end
+
+%%
+CF_struct = CARFAC_Design(1);  % default design
+
+CF_struct = CARFAC_Init(CF_struct);
+
+[frame_rate, num_frames] = SAI_RunLayered(CF_struct, test_signal);
+
+%%
+png_name_pattern = 'frames/frame%05d.png';
+MakeMovieFromPngsAndWav(round(frame_rate), png_name_pattern, ...
+  wav_fn, ['CARFAC_SAI_movie_', wav_fn(1:end-4), '.mpg'])
+
+%%
+system('rm -r frames');
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/matlab/bmm/carfac/SAI_BlendFrameIntoComposite.m	Thu May 09 03:48:44 2013 +0000
@@ -0,0 +1,34 @@
+% Copyright 2013, Google, Inc.
+% Author: Richard F. Lyon
+%
+% This Matlab file is part of an implementation of Lyon's cochlear model:
+% "Cascade of Asymmetric Resonators with Fast-Acting Compression"
+% to supplement Lyon's upcoming book "Human and Machine Hearing"
+%
+% Licensed under the Apache License, Version 2.0 (the "License");
+% you may not use this file except in compliance with the License.
+% You may obtain a copy of the License at
+%
+%     http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS,
+% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+% See the License for the specific language governing permissions and
+% limitations under the License.
+
+function composite_frame = SAI_BlendFrameIntoComposite(new_frame, ...
+  layer_struct, composite_frame)
+
+alpha = layer_struct.alpha;
+lag_curve = layer_struct.lag_curve;
+target_columns = layer_struct.target_indices;
+
+frame_width = size(new_frame, 2);
+
+% Lags are measured from 0 at the right.
+stretched_frame = interp1(new_frame', frame_width - lag_curve)';
+alpha = repmat(alpha, size(new_frame, 1), 1);
+composite_frame(:, target_columns) = ...
+  (1 - alpha) .* composite_frame(:, target_columns) + ...
+  alpha .* stretched_frame;
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/matlab/bmm/carfac/SAI_DesignLayers.m	Thu May 09 03:48:44 2013 +0000
@@ -0,0 +1,173 @@
+% Copyright 2013, Google, Inc.
+% Author: Richard F. Lyon
+%
+% This Matlab file is part of an implementation of Lyon's cochlear model:
+% "Cascade of Asymmetric Resonators with Fast-Acting Compression"
+% to supplement Lyon's upcoming book "Human and Machine Hearing"
+%
+% Licensed under the Apache License, Version 2.0 (the "License");
+% you may not use this file except in compliance with the License.
+% You may obtain a copy of the License at
+%
+%     http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS,
+% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+% See the License for the specific language governing permissions and
+% limitations under the License.
+
+function [layer_array, total_width] = SAI_DesignLayers( ...
+  n_layers, width_per_layer)
+% function [layer_array, total_width] = SAI_DesignLayers( ...
+%   n_layers, width_per_layer)
+%
+% The layer_array is a struct array containing an entry for each layer
+% in a layer of power-of-2 decimated pieces of SAI that get composited
+% into a log-lag SAI.
+% Each struct has the following fields:
+%  .width - number of pixels occupied in the final composite SAI,
+%     not counting the overlap into pixels counted for other layers.
+%  .target_indices - column indices in the final composite SAI,
+%     counting the overlap region(s).
+%  .lag_curve - for each point in the final composite SAI, the float index
+%     in the layer's buffer to interp from.
+%  .alpha - the blending coefficent, mostly 1, tapering toward 0 in the overlap
+%     region(s).
+% Layer 1 has no overlap to it right, and layer n_layers has none to its
+% left, but sizes of the target_indices, lag_curve, and alpha vectors are
+% otherwise width + left_overlap + right_overlap.  The total width of the
+% final composite SAI is the sum of the widths.
+% Other fields could be added to hold state, such as history buffers for
+% each layer, or those could go in state struct array...
+
+
+% Elevate these to a param struct?
+if nargin < 1
+  n_layers = 11
+end
+if nargin < 2
+  width_per_layer = 32;  % resolution "half life" in space
+end
+future_lags = 3 * width_per_layer;
+width_first_layer = future_lags + 2 * width_per_layer;
+width_extra_last_layer = 2 * width_per_layer;
+left_overlap = 15;
+right_overlap = 15;
+first_window_width = 400;  % or maybe use seglen?  or 0.020 * fs?
+min_window_width = 2*width_per_layer;  % or somewhere on that order
+window_exponent = 1.4;
+alpha_max = 0.5;
+
+% Start with NAP_samples_per_SAI_sample, declining to 1 from here:
+max_samples_per = 2^(n_layers - 1);
+% Construct the overall lag-warping function:
+NAP_samples_per_SAI_sample = [ ...
+  max_samples_per * ones(1, width_extra_last_layer), ...
+  max_samples_per * ...
+    2 .^ (-(1:(width_per_layer * (n_layers - 1))) / width_per_layer), ...
+  ones(1, width_first_layer)];
+
+% Each layer needs a lag_warp for a portion of that, divided by
+% 2^(layer-1), where the portion includes some overlap into its neighbors
+% with higher layer numbers on left, lower on right.
+
+% Layer 1, rightmost, representing recent, current and near-future (negative
+% lag) relative to trigger time, has 1 NAP sample per SAI sample.  Other
+% layers map more than one NAP sample into 1 SAI sample.  Layer 2 is
+% computed as 2X decimated, 2 NAP samples per SAI sample, but then gets 
+% interpolated to between 1 and 2 (and outside that range in the overlap
+% regions) to connect up smoothly.  Each layer is another 2X decimated.
+% The last layer limits out at 1 (representing 2^(n_layers) SAI samples)
+% at the width_extra_last_layer SAI samples that extend to the far past.
+
+layer_array = [];  % to hold a struct array
+for layer = 1:n_layers
+  layer_array(layer).width = width_per_layer;
+  layer_array(layer).left_overlap = left_overlap;
+  layer_array(layer).right_overlap = right_overlap;
+  layer_array(layer).future_lags = 0;
+  % Layer decimation factors:  1 1 1 1 2 2 2 4 4 4 8 ...
+  layer_array(layer).update_interval = max(1, 2 ^ floor((layer - 2) / 3));
+end
+% Patch up the exceptions.
+layer_array(1).width = width_first_layer;
+layer_array(end).width = layer_array(end).width + width_extra_last_layer;
+layer_array(1).right_overlap = 0;
+layer_array(end).left_overlap = 0;
+layer_array(1).future_lags = future_lags;
+
+% For each layer, working backwards, from left, find the locations they
+% they render into in the final SAI.
+offset = 0;
+for layer = n_layers:-1:1
+  width = layer_array(layer).width;
+  left = layer_array(layer).left_overlap;
+  right = layer_array(layer).right_overlap;
+  
+  % Size of the vectors needed.
+  n_final_lags = left + width + right;
+  layer_array(layer).n_final_lags = n_final_lags;
+  
+  % Integer indices into the final composite SAI for this layer.
+  target_indices = ((1 - left):(width + right)) + offset;
+  layer_array(layer).target_indices = target_indices;
+    
+  % Make a blending coefficient alpha, ramped in the overlap zone.
+  alpha = ones(1, n_final_lags);
+  alpha(1:left) = alpha(1:left) .* (1:left)/(left + 1);
+  alpha(end + 1 - (1:right)) = ...
+    alpha(end + 1 - (1:right)) .* (1:right)/(right + 1);
+  layer_array(layer).alpha = alpha * alpha_max;
+  
+  offset = offset + width;  % total width from left through this layer.
+end
+total_width = offset;  % Return size of SAI this will make.
+
+% for each layer, fill in its lag-resampling function for interp1:
+for layer = 1:n_layers
+  width = layer_array(layer).width;
+  left = layer_array(layer).left_overlap;
+  right = layer_array(layer).right_overlap;
+  
+  % Still need to adjust this to make lags match at edges:
+  target_indices = layer_array(layer).target_indices;
+  samples_per = NAP_samples_per_SAI_sample(target_indices);
+  % Accumulate lag backwards from the zero-lag point, convert to units of
+  % samples in the current layer.
+  lag_curve = (cumsum(samples_per(end:-1:1))) / 2^(layer-1);
+  lag_curve = lag_curve(end:-1:1);  % Turn it back to corrent order.
+  % Now adjust it to match the zero-lag point or a lag-point from
+  % previous layer, and reverse it back into place.
+  if layer == 1
+    lag_adjust = lag_curve(end) - 0;
+  else
+    % Align right edge to previous layer's left edge, adjusting for 2X
+    % scaling factor difference.
+    lag_adjust = lag_curve(end - right) - last_left_lag / 2;
+  end
+  lag_curve = lag_curve - lag_adjust;
+  % lag_curve is now offsets from right end of layer's frame.
+  layer_array(layer).lag_curve = lag_curve;
+  % Specify number of point to generate in pre-warp frame.
+  layer_array(layer).frame_width = ceil(1 + lag_curve(1));
+  if layer < n_layers  % to avoid the left = 0 unused end case.
+    % A point to align next layer to.
+    last_left_lag = lag_curve(left) - layer_array(layer).future_lags;  
+  end
+  
+  % Specify a good window width (in history buffer, for picking triggers) 
+  % in samples for this layer, exponentially approaching minimum.
+  layer_array(layer).window_width = round(min_window_width + ...
+    first_window_width / window_exponent^(layer - 1));
+  
+  % Say about how long the history buffer needs to be to shift any trigger
+  % location in the range of the window to a fixed location.  Assume
+  % using two window placements overlapped 50%.
+  n_triggers = 2;
+  layer_array(layer).buffer_width = layer_array(layer).frame_width + ...
+    ceil((1 + (n_triggers - 1)/2) * layer_array(layer).window_width);
+end
+
+return
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/matlab/bmm/carfac/SAI_RunLayered.m	Thu May 09 03:48:44 2013 +0000
@@ -0,0 +1,158 @@
+% Copyright 2013, Google, Inc.
+% Author: Richard F. Lyon
+%
+% This Matlab file is part of an implementation of Lyon's cochlear model:
+% "Cascade of Asymmetric Resonators with Fast-Acting Compression"
+% to supplement Lyon's upcoming book "Human and Machine Hearing"
+%
+% Licensed under the Apache License, Version 2.0 (the "License");
+% you may not use this file except in compliance with the License.
+% You may obtain a copy of the License at
+%
+%     http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS,
+% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+% See the License for the specific language governing permissions and
+% limitations under the License.
+
+function [frame_rate, num_frames] = SAI_RunLayered(CF, input_waves)
+% function [CF, SAI_movie] = CARFAC_Run_Layered_SAI(CF, input_waves)
+% This function runs the CARFAC and generates an SAI movie, dumped as PNG
+% files for now.
+
+% Layer 1 is not decimated from the 22050 rate; subsequent layers have
+% smoothing and 2X decimation each.  All layers get composited togehter
+% into movie frames.
+
+n_ch = CF.n_ch;
+[n_samp, n_ears] = size(input_waves);
+if n_ears ~= CF.n_ears
+  error('bad number of input_waves channels passed to CARFAC_Run')
+end
+fs = CF.fs;
+
+% Design the composite log-lag SAI using these parameters and defaults.
+n_layers = 10;
+width_per_layer = 40;
+[layer_array, total_width] = SAI_DesignLayers(n_layers, width_per_layer);
+
+% Make the composite SAI image array.
+composite_frame = zeros(n_ch, total_width);
+
+seglen = round(fs * 0.020);  % Pick about 20 ms segments
+frame_rate = fs / seglen;
+n_segs = ceil(n_samp / seglen);
+
+% Make the history buffers in the layers_array:
+for layer = 1:n_layers
+  layer_array(layer).nap_buffer = zeros(layer_array(layer).buffer_width, n_ch);
+  layer_array(layer).nap_fraction = 0;  % leftover fraction to shift in.
+end
+
+for seg_num = 1:n_segs
+  % k_range is the range of input sample indices for this segment
+  if seg_num == n_segs
+    % The last segment may be short of seglen, but do it anyway:
+    k_range = (seglen*(seg_num - 1) + 1):n_samp;
+  else
+    k_range = seglen*(seg_num - 1) + (1:seglen);
+  end
+  % Process a segment to get a slice of decim_naps, and plot AGC state:
+  [seg_naps, CF] = CARFAC_Run_Segment(CF, input_waves(k_range, :));
+  
+  seg_naps = max(0, seg_naps);  % Rectify
+  
+  if seg_num == n_segs  % pad out the last result
+    seg_naps = [seg_naps; zeros(seglen - size(seg_naps,1), size(seg_naps, 2))];
+  end
+ 
+  % Shift new data into some or all of the layer buffers:
+  layer_array = SAI_UpdateBuffers(layer_array, seg_naps, seg_num);
+
+
+  for layer = n_layers:-1:1  % blend from coarse to fine
+    update_interval = layer_array(layer).update_interval;
+    if 0 == mod(seg_num, update_interval)
+      nap_buffer = real(layer_array(layer).nap_buffer);
+      n_buffer_times = size(nap_buffer, 1);
+      width = layer_array(layer).frame_width;  % To render linear SAI to.
+      new_frame = zeros(n_ch, width);
+      
+      % Make the window to use for all the channels at this layer.
+      layer_factor = 1.5;
+      window_size = layer_array(layer).window_width;
+      after_samples = layer_array(layer).future_lags;
+
+      window_range = (1:window_size) + ...
+        (n_buffer_times - window_size) - after_samples;
+      window = sin((1:window_size)' * pi / window_size);
+      % This should not go negative!
+      offset_range = (1:width) + ...
+        (n_buffer_times - width - window_size);
+      % CHECK
+      if any(offset_range < 0)
+        error;
+      end
+      
+      % smooth across channels; more in later layers
+      smoothed_buffer =  smooth1d(nap_buffer', 0.25*(layer - 2))';
+      
+      % For each buffer column (channel), pick a trigger and align into SAI_frame
+      for ch = 1:n_ch
+        smooth_wave = smoothed_buffer(:, ch);  % for the trigger
+        
+        [peak_val, trigger_time] = max(smooth_wave(window_range) .* window);
+        nap_wave = nap_buffer(:, ch);  % for the waveform
+        if peak_val <= 0  % just use window center instead
+          [peak_val, trigger_time] = max(window);
+        end
+        if layer == n_layers  % mark the trigger points to display as imaginary.
+          layer_array(layer).nap_buffer(trigger_time + window_range(1) - 1, ch) = ...
+            layer_array(layer).nap_buffer(trigger_time + window_range(1) - 1, ch) + 1i;
+        end
+        new_frame(ch, :) = nap_wave(trigger_time + offset_range)';
+      end
+      composite_frame = SAI_BlendFrameIntoComposite(new_frame, ...
+        layer_array(layer), composite_frame);
+    end
+  end
+  
+  
+  lag_marginal = mean(composite_frame, 1);  % means max out near 1 or 2
+  frame_bottom = zeros(size(composite_frame));  % will end up being 1/3
+  n_bottom_rows = size(frame_bottom, 1);
+  for height = 1:n_bottom_rows
+    big_ones = lag_marginal > 1*height/n_bottom_rows;
+    frame_bottom(n_bottom_rows - height + 1, big_ones) = 2;  % 2 for black
+  end
+    
+  if 0 == mod(seg_num, update_interval) || seg_num == 1
+    coc_gram = layer_array(end).nap_buffer';
+    [n_ch, n_width] = size(composite_frame);
+    coc_gram = [coc_gram, zeros(n_ch, n_width - size(coc_gram, 2))];
+    trigger_gram = 2 * (imag(coc_gram) ~= 0);
+    coc_gram = real(coc_gram);
+  end
+  
+  display_frame = [coc_gram; trigger_gram; ...
+    composite_frame(floor(1:0.5:end), :); frame_bottom];
+  
+  cmap = jet;
+  cmap = 1 - gray;  % jet
+  figure(10)
+  image(32*display_frame);
+  colormap(cmap);
+
+  drawnow
+  imwrite(32*display_frame, cmap, sprintf('frames/frame%05d.png', seg_num));
+end
+
+num_frames = seg_num;
+
+return
+
+
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/matlab/bmm/carfac/SAI_UpdateBuffers.m	Thu May 09 03:48:44 2013 +0000
@@ -0,0 +1,83 @@
+% Copyright 2013, Google, Inc.
+% Author: Richard F. Lyon
+%
+% This Matlab file is part of an implementation of Lyon's cochlear model:
+% "Cascade of Asymmetric Resonators with Fast-Acting Compression"
+% to supplement Lyon's upcoming book "Human and Machine Hearing"
+%
+% Licensed under the Apache License, Version 2.0 (the "License");
+% you may not use this file except in compliance with the License.
+% You may obtain a copy of the License at
+%
+%     http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS,
+% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+% See the License for the specific language governing permissions and
+% limitations under the License.
+
+function layer_array = SAI_UpdateBuffers(layer_array, seg_naps, seg_num)
+% function layer_array = SAI_UpdateBuffers(layer_array, seg_naps, seg_num)
+%
+% Input/Output: layer_array contains all the coefficients and state for
+% the layer of different time scales of SAI;
+% we might want to separate these as in CARFAC.
+%
+% seg_naps is a new segmeent of NAP from the CAR-FAC to shift into the
+% first layer.  Each subsequent layer gets input off the input end of the
+% previous layer, with smoothing and decimation.
+%
+% The segment index seg_num is used to control sub-sampled updates of
+% the larger-scale layers.
+
+n_layers = length(layer_array);
+[seg_len, n_nap_ch] = size(seg_naps);
+
+% Array of what to shift in to first or next layer.
+new_chunk = seg_naps;
+
+gain = 1.1;  % gain from layer to layer; could be layer dependent.
+
+%% 
+% Decimate using a 2-3-4-filter and partial differencing emphasize onsets:
+kernel = filter([1 1]/2, 1, filter([1 1 1]/3, 1, [1 1 1 1 0 0 0 0]/4));
+kernel = kernel + 2*diff([0, kernel]);
+% figure(1)
+% plot(kernel)
+
+%% 
+for layer = 1:n_layers
+  [n_lags, n_ch] = size(layer_array(layer).nap_buffer);
+  if (n_nap_ch ~= n_ch)
+    error('Wrong number of channels in nap_buffer.');
+  end
+  
+  interval = layer_array(layer).update_interval;
+  if (0 == mod(seg_num, interval))
+    % Account for 2X decimation and infrequent updates; find number of time
+    % points to shift in.  Tolerate slip of a fraction of a sample.
+    n_shift = seg_len * interval / (2.0^(layer - 1));
+    if layer > 1
+      % Add the leftover fraction before floor.
+      n_shift = n_shift + layer_array(layer).nap_fraction;
+      layer_array(layer).nap_fraction = n_shift - floor(n_shift);
+      n_shift = floor(n_shift);
+      % Grab new stuff from new end (big time indices) of previous layer.
+      % Take twice as many times as we need, + 5, for decimation, and do
+      % 343 smoothing to get new points.
+      new_chunk = ...
+        layer_array(layer - 1).nap_buffer((end - 2*n_shift - 4):end, :);
+      new_chunk = filter(kernel, 1, new_chunk);
+      new_chunk = gain * new_chunk(7:2:end, :);
+      
+    end
+    % Put new stuff in at latest time indices.
+    layer_array(layer).nap_buffer = ...
+      [layer_array(layer).nap_buffer((1 + n_shift):end, :); ...
+      new_chunk];  % this should fit just right if we have n_shift new times.
+  end
+end
+
+return
+