wolffd@0: function [a d] = mircluster(a,varargin) wolffd@0: % c = mircluster(a,f) clusters the segments in the audio sequence(s) wolffd@0: % contained in the audio object a, along the analytic feature(s) wolffd@0: % f, using the k-means strategy. Multiple analytic features have to wolffd@0: % be grouped into one array of cells. wolffd@0: % Example: wolffd@0: % sg = mirsegment(a); wolffd@0: % mircluster(sg, mirmfcc(sg)) wolffd@0: % mircluster(sg, {mirmfcc(sg), mircentroid(sg)}) wolffd@0: % c = mircluster(d) clusters the frame-decomposed data d into groups wolffd@0: % using K-means clustering. wolffd@0: % Example: wolffd@0: % cc = mirmfcc(a,'Frame'); wolffd@0: % mircluster(cc) wolffd@0: % Optional argument: wolffd@0: % mircluster(...,n) indicates the maximal number of clusters. wolffd@0: % Default value: n = 2. wolffd@0: % mircluster(...,'Runs',r) indicates the maximal number of runs. wolffd@0: % Default value: r = 5. wolffd@0: % wolffd@0: % Requires SOM Toolbox (included in the MIRtoolbox distribution). wolffd@0: wolffd@0: wolffd@0: nruns.key = 'Runs'; wolffd@0: nruns.type = 'Integer'; wolffd@0: nruns.default = 5; wolffd@0: option.nruns = nruns; wolffd@0: wolffd@0: nclust.position = 2; wolffd@0: nclust.type = 'Integer'; wolffd@0: nclust.default = 2; wolffd@0: option.nclust = nclust; wolffd@0: wolffd@0: specif.option = option; wolffd@0: wolffd@0: specif.nochunk = 1; wolffd@0: wolffd@0: d = a; wolffd@0: if isa(a,'mirdesign') wolffd@0: if not(get(a,'Eval')) wolffd@0: % During bottom-up construction of the general design wolffd@0: wolffd@0: [unused option] = miroptions(@mircluster,a,specif,varargin); wolffd@0: type = get(a,'Type'); wolffd@0: a = mirdesign(@mircluster,a,option,{},struct,type); wolffd@0: a = set(a,'NoChunk',1); wolffd@0: else wolffd@0: % During top-down evaluation initiation wolffd@0: wolffd@0: e = evaleach(a); wolffd@0: if iscell(e) wolffd@0: e = e{1}; wolffd@0: end wolffd@0: a = mircluster(e,varargin{:}); wolffd@0: end wolffd@0: else wolffd@0: if not(isa(a,'mirdata')) wolffd@0: mirerror('mircluster','The input should be either frame- or segment-decomposed.'); wolffd@0: end wolffd@0: wolffd@0: if isempty(varargin) || (not(isa(varargin{1},'mirdata') || ... wolffd@0: (iscell(varargin{1}) && ... wolffd@0: isa(varargin{1}{1},'mirdata')))) wolffd@0: % mircluster version for frame-decomposed data: wolffd@0: % frames are clustered into groups using K-means clustering. wolffd@0: [unused option] = miroptions(@mircluster,a,specif,varargin); wolffd@0: da = get(a,'Data'); wolffd@0: lva = length(da); % Number of audio files in the audio object. wolffd@0: c = cell(1,lva); wolffd@0: display('Clustering frames...'); wolffd@0: if mirwaitbar wolffd@0: handle = waitbar(0,'Clustering frames...'); wolffd@0: else wolffd@0: handle = 0; wolffd@0: end wolffd@0: for j = 1:lva % For each audio file,... wolffd@0: va = []; % Data transmitted to the kmeans_cluster function. wolffd@0: v = da{j}; wolffd@0: if iscell(v) wolffd@0: v = uncell(v,-Inf); %v{1}; wolffd@0: end wolffd@0: if size(v,4)>1 wolffd@0: v(end+1:2*end,:,:,1) = v(:,:,:,2); wolffd@0: v(:,:,:,2) = []; wolffd@0: end wolffd@0: % Standardization wolffd@0: %stv = std(v,0,2); wolffd@0: %stv(find(stv == 0)) = 1; wolffd@0: va(end+1:end+size(v,1),:,:) = v;%... wolffd@0: %(v - repmat(mean(v,2),[1 size(v,2) ])) ... wolffd@0: %./ repmat(stv,[1 size(v,2) ]); wolffd@0: if isa(a,'mirscalar') wolffd@0: m = get(a,'Mode'); wolffd@0: if not(isempty(m)) wolffd@0: m = m{j}; wolffd@0: val = []; wolffd@0: for l = 1:nseg wolffd@0: vl = m{l}; wolffd@0: if iscell(vl) wolffd@0: vl = vl{1}; wolffd@0: end wolffd@0: val(:,l) = vl; wolffd@0: end wolffd@0: stv = std(val,0,2); wolffd@0: stv(find(stv == 0)) = 1; wolffd@0: va(end+1:end+size(val,1),:) = ... wolffd@0: (val - repmat(mean(val,2),[1 size(val,2) ])) ... wolffd@0: ./ repmat(stv,[1 size(val,2) ]); wolffd@0: end wolffd@0: end wolffd@0: if size(va,3)>1 wolffd@0: mel = 1; wolffd@0: va = reshape(va,size(va,2),size(va,3))'; wolffd@0: else wolffd@0: mel = 0; wolffd@0: end wolffd@0: [cc, p, err, ind] = kmeans_clusters(va',option.nclust,option.nruns); wolffd@0: [minind select] = min(ind); wolffd@0: c{j}.centr = cc{select}'; wolffd@0: c{j}.index = p{select}; wolffd@0: c{j}.weight = zeros(1,size(cc{select},1)); wolffd@0: c{j}.covar = zeros(size(cc{select}')); wolffd@0: ii = 1; wolffd@0: for i = 1:size(c{j}.centr,2) wolffd@0: clus = va(:,c{j}.index == ii); wolffd@0: if isempty(clus) wolffd@0: higher = find(c{j}.index > ii); wolffd@0: c{j}.index(higher) = c{j}.index(higher)-1; wolffd@0: c{j}.centr(:,ii) = []; wolffd@0: c{j}.weight(ii) = []; wolffd@0: c{j}.covar(:,ii) = []; wolffd@0: else wolffd@0: c{j}.weight(ii) = size(clus,2)/size(va,2); wolffd@0: if c{j}.weight(ii) == 0 wolffd@0: pause wolffd@0: end wolffd@0: c{j}.covar(:,ii) = mean((clus'-ones(1,size(clus,1))*c{j}.centr(:,ii)).^2); wolffd@0: ii = ii+1; wolffd@0: end wolffd@0: end wolffd@0: if handle wolffd@0: waitbar(j/lva,handle); wolffd@0: end wolffd@0: end wolffd@0: if handle wolffd@0: delete(handle) wolffd@0: end wolffd@0: a = set(a,'Clusters',c); wolffd@0: else wolffd@0: % mircluster version for segmented audio: wolffd@0: % segments are clustered into groups using K-means clustering. wolffd@0: da = varargin{1}; wolffd@0: varargin(1) = []; wolffd@0: [unused option] = miroptions(@mircluster,a,specif,varargin); wolffd@0: display('Clustering segments...'); wolffd@0: if isa(da,'mirdata') || (iscell(da) && isa(da{1},'mirdata')) wolffd@0: if not(iscell(da)) wolffd@0: da = {da}; wolffd@0: end wolffd@0: vala = get(a,'Data'); % Data contained in the audio object a. wolffd@0: lva = length(vala); % Number of audio files in the audio object. wolffd@0: clus = cell(1,lva); wolffd@0: for j = 1:lva % For each audio file,... wolffd@0: va = []; % Data transmitted to the kmeans_cluster function. wolffd@0: nseg = length(vala{j}); % Number of segments in the audio file. wolffd@0: for i = 1:length(da) % For each analytic feature,... wolffd@0: v = get(da{i},'Data'); wolffd@0: v = v{j}; wolffd@0: if iscell(v) wolffd@0: v = uncell(v,-Inf); %v{1}; wolffd@0: end wolffd@0: val = []; wolffd@0: if size(v,4)>1 wolffd@0: v(end+1:2*end,:,:,1) = v(:,:,:,2); wolffd@0: v(:,:,:,2) = []; wolffd@0: end wolffd@0: wolffd@0: % Standardization wolffd@0: stv = std(v,0,2); wolffd@0: stv(find(stv == 0)) = 1; wolffd@0: va(end+1:end+size(v,1),:) = ... wolffd@0: (v - repmat(mean(v,2),[1 size(v,2) ])) ... wolffd@0: ./ repmat(stv,[1 size(v,2) ]); wolffd@0: if isa(da{i},'mirscalar') wolffd@0: m = get(da{i},'Mode'); wolffd@0: if not(isempty(m)) wolffd@0: m = m{j}; wolffd@0: val = []; wolffd@0: for l = 1:nseg wolffd@0: vl = m{l}; wolffd@0: if iscell(vl) wolffd@0: vl = vl{1}; wolffd@0: end wolffd@0: val(:,l) = vl; wolffd@0: end wolffd@0: stv = std(val,0,2); wolffd@0: stv(find(stv == 0)) = 1; wolffd@0: va(end+1:end+size(val,1),:) = ... wolffd@0: (val - repmat(mean(val,2),[1 size(val,2) ])) ... wolffd@0: ./ repmat(stv,[1 size(val,2) ]); wolffd@0: end wolffd@0: end wolffd@0: wolffd@0: end wolffd@0: [cc, p, err, ind] = kmeans_clusters(va',min(option.nclust,nseg),option.nruns); wolffd@0: clus{j} = p{end}; wolffd@0: end wolffd@0: a = set(a,'Clusters',clus); wolffd@0: t = get(a,'Time'); wolffd@0: fp = get(a,'FramePos'); wolffd@0: for j = 1:lva % For each audio file,... wolffd@0: aj = vala{j}; wolffd@0: tj = t{j}; wolffd@0: fpj = fp{j}; wolffd@0: clj = clus{j}; wolffd@0: k = 2; wolffd@0: while k <= length(aj) wolffd@0: if clj(k) == clj(k-1) wolffd@0: aj{k-1} = [aj{k-1};aj{k}]; wolffd@0: aj(k) = []; wolffd@0: tj{k-1} = [tj{k-1};tj{k}]; wolffd@0: tj(k) = []; wolffd@0: fpj{k-1} = [fpj{k-1}(1);fpj{k}(2)]; wolffd@0: fpj(k) = []; wolffd@0: clj(k) = []; wolffd@0: k = k-1; wolffd@0: end wolffd@0: k = k+1; wolffd@0: end wolffd@0: vala{j} = aj; wolffd@0: t{j} = tj; wolffd@0: fp{j} = fpj; wolffd@0: cl{j} = clj; wolffd@0: end wolffd@0: a = set(a,'Data',vala,'Time',t,'FramePos',fp,'Clusters',cl); wolffd@0: end wolffd@0: end wolffd@0: end