diff src/matlab/DataHash.m @ 0:c52bc3e8d3ad tip

user: boblsturm branch 'default' added README.md added assets/.DS_Store added assets/playButton.jpg added assets/stopButton.png added assets/swapButton.jpg added data/.DS_Store added data/fiveoctaves.mp3 added data/glock2.wav added data/sinScale.mp3 added data/speech_female.mp3 added data/sweep.wav added nimfks.m.lnk added src/.DS_Store added src/matlab/.DS_Store added src/matlab/AnalysisCache.m added src/matlab/CSS.m added src/matlab/DataHash.m added src/matlab/ExistsInCache.m added src/matlab/KLDivCost.m added src/matlab/LoadFromCache.m added src/matlab/SA_B_NMF.m added src/matlab/SaveInCache.m added src/matlab/Sound.m added src/matlab/SynthesisCache.m added src/matlab/chromagram_E.m added src/matlab/chromagram_IF.m added src/matlab/chromagram_P.m added src/matlab/chromsynth.m added src/matlab/computeSTFTFeat.m added src/matlab/controller.m added src/matlab/decibelSliderReleaseCallback.m added src/matlab/drawClickCallBack.m added src/matlab/fft2chromamx.m added src/matlab/hz2octs.m added src/matlab/ifgram.m added src/matlab/ifptrack.m added src/matlab/istft.m added src/matlab/nimfks.fig added src/matlab/nimfks.m added src/matlab/nmfFn.m added src/matlab/nmf_beta.m added src/matlab/nmf_divergence.m added src/matlab/nmf_euclidean.m added src/matlab/prune_corpus.m added src/matlab/rot_kernel.m added src/matlab/templateAdditionResynth.m added src/matlab/templateDelCb.m added src/matlab/templateScrollCb.m
author boblsturm
date Sun, 18 Jun 2017 06:26:13 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/matlab/DataHash.m	Sun Jun 18 06:26:13 2017 -0400
@@ -0,0 +1,484 @@
+function Hash = DataHash(Data, Opt)
+% DATAHASH - Checksum for Matlab array of any type
+% This function creates a hash value for an input of any type. The type and
+% dimensions of the input are considered as default, such that UINT8([0,0]) and
+% UINT16(0) have different hash values. Nested STRUCTs and CELLs are parsed
+% recursively.
+%
+% Hash = DataHash(Data, Opt)
+% INPUT:
+%   Data: Array of these built-in types:
+%           (U)INT8/16/32/64, SINGLE, DOUBLE, (real/complex, full/sparse)
+%           CHAR, LOGICAL, CELL (nested), STRUCT (scalar or array, nested),
+%           function_handle.
+%   Opt:  Struct to specify the hashing algorithm and the output format.
+%         Opt and all its fields are optional.
+%         Opt.Method: String, known methods for Java 1.6 (Matlab 2011b):
+%              'SHA-1', 'SHA-256', 'SHA-384', 'SHA-512', 'MD2', 'MD5'.
+%            Call DataHash without inputs to get a list of available methods.
+%            Default: 'MD5'.
+%         Opt.Format: String specifying the output format:
+%            'hex', 'HEX':      Lower/uppercase hexadecimal string.
+%            'double', 'uint8': Numerical vector.
+%            'base64':          Base64 encoded string, only printable ASCII
+%                               characters, shorter than 'hex', no padding.
+%            Default: 'hex'.
+%         Opt.Input: Type of the input as string, not case-sensitive:
+%            'array': The contents, type and size of the input [Data] are
+%                     considered  for the creation of the hash. Nested CELLs
+%                     and STRUCT arrays are parsed recursively. Empty arrays of
+%                     different type reply different hashs.
+%            'file':  [Data] is treated as file name and the hash is calculated
+%                     for the files contents.
+%            'bin':   [Data] is a numerical, LOGICAL or CHAR array. Only the
+%                     binary contents of the array is considered, such that
+%                     e.g. empty arrays of different type reply the same hash.
+%            'ascii': Same as 'bin', but only the 8-bit ASCII part of the 16-bit
+%                     Matlab CHARs is considered.
+%            Default: 'array'.
+%
+% OUTPUT:
+%   Hash: String, DOUBLE or UINT8 vector. The length depends on the hashing
+%         method.
+%
+% EXAMPLES:
+% % Default: MD5, hex:
+%   DataHash([])                % 5b302b7b2099a97ba2a276640a192485
+% % MD5, Base64:
+%   Opt = struct('Format', 'base64', 'Method', 'MD5');
+%   DataHash(int32(1:10), Opt)  % +tJN9yeF89h3jOFNN55XLg
+% % SHA-1, Base64:
+%   S.a = uint8([]);
+%   S.b = {{1:10}, struct('q', uint64(415))};
+%   Opt.Method = 'SHA-1';
+%   Opt.Format = 'HEX';
+%   DataHash(S, Opt)            % 18672BE876463B25214CA9241B3C79CC926F3093
+% % SHA-1 of binary values:
+%   Opt = struct('Method', 'SHA-1', 'Input', 'bin');
+%   DataHash(1:8, Opt)          % 826cf9d3a5d74bbe415e97d4cecf03f445f69225
+% % SHA-256, consider ASCII part only (Matlab's CHAR has 16 bits!):
+%   Opt.Method = 'SHA-256';
+%   Opt.Input  = 'ascii';
+%   DataHash('abc', Opt)
+%       % ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad
+%   % Or equivalently:
+%   Opt.Input = 'bin';
+%   DataHash(uint8('abc'), Opt)
+%
+% NOTES:
+%   Function handles and user-defined objects cannot be converted uniquely:
+%   - The subfunction ConvertFuncHandle uses the built-in function FUNCTIONS,
+%     but the replied struct can depend on the Matlab version.
+%   - It is tried to convert objects to UINT8 streams in the subfunction
+%     ConvertObject. A conversion by STRUCT() might be more appropriate.
+%   Adjust these subfunctions on demand.
+%
+%   MATLAB CHARs have 16 bits! Use Opt.Input='ascii' for comparisons with e.g.
+%   online hash generators.
+%
+%   Matt Raum suggested this for e.g. user-defined objects:
+%     DataHash(getByteStreamFromArray(Data)
+%   This works very well, but unfortunately getByteStreamFromArray is
+%   undocumented, such that it might vanish in the future or reply different
+%   output.
+%
+%   For arrays the calculated hash value might be changed in new versions.
+%   Calling this function without inputs replies the version of the hash.
+%
+%   The C-Mex function GetMD5 is 2 to 100 times faster, but obtains MD5 only:
+%   http://www.mathworks.com/matlabcentral/fileexchange/25921
+%
+% Tested: Matlab 7.7, 7.8, 7.13, 8.6, WinXP/32, Win7/64
+% Author: Jan Simon, Heidelberg, (C) 2011-2016 matlab.2010(a)n(MINUS)simon.de
+%
+% See also: TYPECAST, CAST.
+%
+% Michael Kleder, "Compute Hash", no structs and cells:
+%   http://www.mathworks.com/matlabcentral/fileexchange/8944
+% Tim, "Serialize/Deserialize", converts structs and cells to a byte stream:
+%   http://www.mathworks.com/matlabcentral/fileexchange/29457
+
+% $JRev: R-H V:033 Sum:R+m7rAPNLvlw Date:18-Jun-2016 14:33:17 $
+% $License: BSD (use/copy/change/redistribute on own risk, mention the author) $
+% $File: Tools\GLFile\DataHash.m $
+% History:
+% 001: 01-May-2011 21:52, First version.
+% 007: 10-Jun-2011 10:38, [Opt.Input], binary data, complex values considered.
+% 011: 26-May-2012 15:57, Fixed: Failed for binary input and empty data.
+% 014: 04-Nov-2012 11:37, Consider Mex-, MDL- and P-files also.
+%      Thanks to David (author 243360), who found this bug.
+%      Jan Achterhold (author 267816) suggested to consider Java objects.
+% 016: 01-Feb-2015 20:53, Java heap space exhausted for large files.
+%      Now files are process in chunks to save memory.
+% 017: 15-Feb-2015 19:40, Collsions: Same hash for different data.
+%      Examples: zeros(1,1) and zeros(1,1,0)
+%                complex(0) and zeros(1,1,0,0)
+%      Now the number of dimensions is included, to avoid this.
+% 022: 30-Mar-2015 00:04, Bugfix: Failed for strings and [] without TYPECASTX.
+%      Ross found these 2 bugs, which occur when TYPECASTX is not installed.
+%      If you need the base64 format padded with '=' characters, adjust
+%      fBase64_enc as you like.
+% 026: 29-Jun-2015 00:13, Changed hash for STRUCTs.
+%      Struct arrays are analysed field by field now, which is much faster.
+% 027: 13-Sep-2015 19:03, 'ascii' input as abbrev. for Input='bin' and UINT8().
+% 028: 15-Oct-2015 23:11, Example values in help section updated to v022.
+% 029: 16-Oct-2015 22:32, Use default options for empty input.
+% 031: 28-Feb-2016 15:10, New hash value to get same reply as GetMD5.
+%      New Matlab version (at least 2015b) use a fast method for TYPECAST, such
+%      that calling James Tursa's TYPECASTX is not needed anymore.
+%      Matlab 6.5 not supported anymore: MException for CATCH.
+% 033: 18-Jun-2016 14:28, BUGFIX: Failed on empty files.
+%      Thanks to Christian (AuthorID 2918599).
+
+% OPEN BUGS:
+% Nath wrote:
+% function handle refering to struct containing the function will create
+% infinite loop. Is there any workaround ?
+% Example:
+%   d= dynamicprops();
+%   addprop(d,'f');
+%   d.f= @(varargin) struct2cell(d);
+%   DataHash(d.f) % infinite loop
+% This is caught with an error message concerning the recursion limit now.
+
+% Main function: ===============================================================
+% Default options: -------------------------------------------------------------
+Method    = 'MD5';
+OutFormat = 'hex';
+isFile    = false;
+isBin     = false;
+
+% Check number and type of inputs: ---------------------------------------------
+nArg = nargin;
+if nArg == 2
+   if isa(Opt, 'struct') == 0   % Bad type of 2nd input:
+      Error_L('BadInput2', '2nd input [Opt] must be a struct.');
+   end
+   
+   % Specify hash algorithm:
+   if isfield(Opt, 'Method')  && ~isempty(Opt.Method)   % Short-circuiting
+      Method = upper(Opt.Method);
+   end
+   
+   % Specify output format:
+   if isfield(Opt, 'Format') && ~isempty(Opt.Format)    % Short-circuiting
+      OutFormat = Opt.Format;
+   end
+   
+   % Check if the Input type is specified - default: 'array':
+   if isfield(Opt, 'Input') && ~isempty(Opt.Input)      % Short-circuiting
+      if strcmpi(Opt.Input, 'File')
+         if ischar(Data) == 0
+            Error_L('CannotOpen', '1st input FileName must be a string');
+         end
+         isFile = true;
+         
+      elseif strncmpi(Opt.Input, 'bin', 3)  % Accept 'binary' also
+         if (isnumeric(Data) || ischar(Data) || islogical(Data)) == 0 || ...
+               issparse(Data)
+            Error_L('BadDataType', ...
+               '1st input must be numeric, CHAR or LOGICAL for binary input.');
+         end
+         isBin = true;
+         
+      elseif strncmpi(Opt.Input, 'asc', 3)  % 8-bit ASCII characters
+         if ~ischar(Data)
+            Error_L('BadDataType', ...
+               '1st input must be a CHAR for the input type ASCII.');
+         end
+         isBin = true;
+         Data  = uint8(Data);
+      end
+   end
+   
+elseif nArg == 0  % Reply version of this function:
+   R = Version_L;
+   
+   if nargout == 0
+      disp(R);
+   else
+      Hash = R;
+   end
+   
+   return;
+   
+elseif nArg ~= 1  % Bad number of arguments:
+   Error_L('BadNInput', '1 or 2 inputs required.');
+end
+
+% Create the engine: -----------------------------------------------------------
+try
+   Engine = java.security.MessageDigest.getInstance(Method);
+catch
+   Error_L('BadInput2', 'Invalid algorithm: [%s].', Method);
+end
+
+% Create the hash value: -------------------------------------------------------
+if isFile
+   % Open the file:
+   FID = fopen(Data, 'r');
+   if FID < 0
+      % Check existence of file:
+      Found = FileExist_L(Data);
+      if Found
+         Error_L('CantOpenFile', 'Cannot open file: %s.', Data);
+      else
+         Error_L('FileNotFound', 'File not found: %s.', Data);
+      end
+   end
+   
+   % Read file in chunks to save memory and Java heap space:
+   Chunk = 1e6;      % Fastest for 1e6 on Win7/64, HDD
+   Count = Chunk;    % Dummy value to satisfy WHILE condition
+   while Count == Chunk
+      [Data, Count] = fread(FID, Chunk, '*uint8');
+      if Count ~= 0  % Avoid error for empty file
+         Engine.update(Data);
+      end
+   end
+   fclose(FID);
+   
+   % Calculate the hash:
+   Hash = typecast(Engine.digest, 'uint8');
+   
+elseif isBin             % Contents of an elementary array, type tested already:
+   if isempty(Data)      % Nothing to do, Engine.update fails for empty input!
+      Hash = typecast(Engine.digest, 'uint8');
+   else                  % Matlab's TYPECAST is less elegant:
+      if isnumeric(Data)
+         if isreal(Data)
+            Engine.update(typecast(Data(:), 'uint8'));
+         else
+            Engine.update(typecast(real(Data(:)), 'uint8'));
+            Engine.update(typecast(imag(Data(:)), 'uint8'));
+         end
+      elseif islogical(Data)               % TYPECAST cannot handle LOGICAL
+         Engine.update(typecast(uint8(Data(:)), 'uint8'));
+      elseif ischar(Data)                  % TYPECAST cannot handle CHAR
+         Engine.update(typecast(uint16(Data(:)), 'uint8'));
+         % Bugfix: Line removed
+      end
+      Hash = typecast(Engine.digest, 'uint8');
+   end
+else                 % Array with type:
+   Engine = CoreHash(Data, Engine);
+   Hash   = typecast(Engine.digest, 'uint8');
+end
+
+% Convert hash specific output format: -----------------------------------------
+switch OutFormat
+   case 'hex'
+      Hash = sprintf('%.2x', double(Hash));
+   case 'HEX'
+      Hash = sprintf('%.2X', double(Hash));
+   case 'double'
+      Hash = double(reshape(Hash, 1, []));
+   case 'uint8'
+      Hash = reshape(Hash, 1, []);
+   case 'base64'
+      Hash = fBase64_enc(double(Hash));
+   otherwise
+      Error_L('BadOutFormat', ...
+         '[Opt.Format] must be: HEX, hex, uint8, double, base64.');
+end
+
+% return;
+
+% ******************************************************************************
+function Engine = CoreHash(Data, Engine)
+% This methods uses the slower TYPECAST of Matlab
+
+% Consider the type and dimensions of the array to distinguish arrays with the
+% same data, but different shape: [0 x 0] and [0 x 1], [1,2] and [1;2],
+% DOUBLE(0) and SINGLE([0,0]):
+% <  v016: [class, size, data]. BUG! 0 and zeros(1,1,0) had the same hash!
+% >= v016: [class, ndims, size, data]
+Engine.update([uint8(class(Data)), ...
+              typecast(uint64([ndims(Data), size(Data)]), 'uint8')]);
+           
+if issparse(Data)                    % Sparse arrays to struct:
+   [S.Index1, S.Index2, S.Value] = find(Data);
+   Engine                        = CoreHash(S, Engine);
+elseif isstruct(Data)                % Hash for all array elements and fields:
+   F = sort(fieldnames(Data));       % Ignore order of fields
+   for iField = 1:length(F)          % Loop over fields
+      aField = F{iField};
+      Engine.update(uint8(aField));
+      for iS = 1:numel(Data)         % Loop over elements of struct array
+         Engine = CoreHash(Data(iS).(aField), Engine);
+      end
+   end
+elseif iscell(Data)                  % Get hash for all cell elements:
+   for iS = 1:numel(Data)
+      Engine = CoreHash(Data{iS}, Engine);
+   end
+elseif isempty(Data)                 % Nothing to do
+elseif isnumeric(Data)
+   if isreal(Data)
+      Engine.update(typecast(Data(:), 'uint8'));
+   else
+      Engine.update(typecast(real(Data(:)), 'uint8'));
+      Engine.update(typecast(imag(Data(:)), 'uint8'));
+   end
+elseif islogical(Data)               % TYPECAST cannot handle LOGICAL
+   Engine.update(typecast(uint8(Data(:)), 'uint8'));
+elseif ischar(Data)                  % TYPECAST cannot handle CHAR
+   Engine.update(typecast(uint16(Data(:)), 'uint8'));
+elseif isa(Data, 'function_handle')
+   Engine = CoreHash(ConvertFuncHandle(Data), Engine);
+elseif (isobject(Data) || isjava(Data)) && ismethod(Data, 'hashCode')
+   Engine = CoreHash(char(Data.hashCode), Engine);
+else  % Most likely a user-defined object:
+   try
+      BasicData = ConvertObject(Data);
+   catch ME
+      error(['JSimon:', mfilename, ':BadDataType'], ...
+         '%s: Cannot create elementary array for type: %s\n  %s', ...
+         mfilename, class(Data), ME.message);
+   end
+   
+   try
+      Engine = CoreHash(BasicData, Engine);
+   catch ME
+      if strcmpi(ME.identifier, 'MATLAB:recursionLimit')
+         ME = MException(['JSimon:', mfilename, ':RecursiveType'], ...
+            '%s: Cannot create hash for recursive data type: %s', ...
+            mfilename, class(Data));
+      end
+      throw(ME);
+   end
+end
+
+% return;
+
+% ******************************************************************************
+function FuncKey = ConvertFuncHandle(FuncH)
+%   The subfunction ConvertFuncHandle converts function_handles to a struct
+%   using the Matlab function FUNCTIONS. The output of this function changes
+%   with the Matlab version, such that DataHash(@sin) replies different hashes
+%   under Matlab 6.5 and 2009a.
+%   An alternative is using the function name and name of the file for
+%   function_handles, but this is not unique for nested or anonymous functions.
+%   If the MATLABROOT is removed from the file's path, at least the hash of
+%   Matlab's toolbox functions is (usually!) not influenced by the version.
+%   Finally I'm in doubt if there is a unique method to hash function handles.
+%   Please adjust the subfunction ConvertFuncHandles to your needs.
+
+% The Matlab version influences the conversion by FUNCTIONS:
+% 1. The format of the struct replied FUNCTIONS is not fixed,
+% 2. The full paths of toolbox function e.g. for @mean differ.
+FuncKey = functions(FuncH);
+
+% Include modification file time and file size. Suggested by Aslak Grinsted:
+if ~isempty(FuncKey.file)
+    d = dir(FuncKey.file);
+    if ~isempty(d)
+        FuncKey.filebytes = d.bytes;
+        FuncKey.filedate  = d.datenum;
+    end
+end
+
+% ALTERNATIVE: Use name and path. The <matlabroot> part of the toolbox functions
+% is replaced such that the hash for @mean does not depend on the Matlab
+% version.
+% Drawbacks: Anonymous functions, nested functions...
+% funcStruct = functions(FuncH);
+% funcfile   = strrep(funcStruct.file, matlabroot, '<MATLAB>');
+% FuncKey    = uint8([funcStruct.function, ' ', funcfile]);
+
+% Finally I'm afraid there is no unique method to get a hash for a function
+% handle. Please adjust this conversion to your needs.
+
+% return;
+
+% ******************************************************************************
+function DataBin = ConvertObject(DataObj)
+% Convert a user-defined object to a binary stream. There cannot be a unique
+% solution, so this part is left for the user...
+
+try    % Perhaps a direct conversion is implemented:
+   DataBin = uint8(DataObj);
+   
+   % Matt Raum had this excellent idea - unfortunately this function is
+   % undocumented and might not be supported in te future:
+   % DataBin = getByteStreamFromArray(DataObj);
+   
+catch  % Or perhaps this is better:
+   WarnS   = warning('off', 'MATLAB:structOnObject');
+   DataBin = struct(DataObj);
+   warning(WarnS);
+end
+
+% return;
+
+% ******************************************************************************
+function Out = fBase64_enc(In)
+% Encode numeric vector of UINT8 values to base64 string.
+% The intention of this is to create a shorter hash than the HEX format.
+% Therefore a padding with '=' characters is omitted on purpose.
+
+Pool = [65:90, 97:122, 48:57, 43, 47];  % [0:9, a:z, A:Z, +, /]
+v8   = [128; 64; 32; 16; 8; 4; 2; 1];
+v6   = [32, 16, 8, 4, 2, 1];
+
+In  = reshape(In, 1, []);
+X   = rem(floor(In(ones(8, 1), :) ./ v8(:, ones(length(In), 1))), 2);
+Y   = reshape([X(:); zeros(6 - rem(numel(X), 6), 1)], 6, []);
+Out = char(Pool(1 + v6 * Y));
+
+% return;
+
+% ******************************************************************************
+function Ex = FileExist_L(FileName)
+% A more reliable version of EXIST(FileName, 'file'):
+dirFile = dir(FileName);
+if length(dirFile) == 1
+   Ex = ~(dirFile.isdir);
+else
+   Ex = false;
+end
+
+% return;
+
+% ******************************************************************************
+function R = Version_L()
+% The output differs between versions of this function. So give the user a
+% chance to recognize the version:
+% 1: 01-May-2011, Initial version
+% 2: 15-Feb-2015, The number of dimensions is considered in addition.
+%    In version 1 these variables had the same hash:
+%    zeros(1,1) and zeros(1,1,0), complex(0) and zeros(1,1,0,0)
+% 3: 29-Jun-2015, Struct arrays are processed field by field and not element
+%    by element, because this is much faster. In consequence the hash value
+%    differs, if the input contains a struct.
+% 4: 28-Feb-2016 15:20, same output as GetMD5 for MD5 sums. Therefore the
+%    dimensions are casted to UINT64 at first.
+R.HashVersion = 4;
+R.Date        = [2016, 2, 28];
+
+R.HashMethod  = {};
+try
+   Provider = java.security.Security.getProviders;
+   for iProvider = 1:numel(Provider)
+      S     = char(Provider(iProvider).getServices);
+      Index = strfind(S, 'MessageDigest.');
+      for iDigest = 1:length(Index)
+         Digest       = strtok(S(Index(iDigest):end));
+         Digest       = strrep(Digest, 'MessageDigest.', '');
+         R.HashMethod = cat(2, R.HashMethod, {Digest});
+      end
+   end
+catch ME
+   fprintf(2, '%s\n', ME.message);
+   R.HashMethod = 'error';
+end
+
+% return;
+
+% ******************************************************************************
+function Error_L(ID, varargin)
+
+error(['JSimon:', mfilename, ':', ID], ['*** %s: ', varargin{1}], ...
+   mfilename, varargin{2:nargin - 1});
+
+% return;