wolffd@0
|
1 function [a d] = mircluster(a,varargin)
|
wolffd@0
|
2 % c = mircluster(a,f) clusters the segments in the audio sequence(s)
|
wolffd@0
|
3 % contained in the audio object a, along the analytic feature(s)
|
wolffd@0
|
4 % f, using the k-means strategy. Multiple analytic features have to
|
wolffd@0
|
5 % be grouped into one array of cells.
|
wolffd@0
|
6 % Example:
|
wolffd@0
|
7 % sg = mirsegment(a);
|
wolffd@0
|
8 % mircluster(sg, mirmfcc(sg))
|
wolffd@0
|
9 % mircluster(sg, {mirmfcc(sg), mircentroid(sg)})
|
wolffd@0
|
10 % c = mircluster(d) clusters the frame-decomposed data d into groups
|
wolffd@0
|
11 % using K-means clustering.
|
wolffd@0
|
12 % Example:
|
wolffd@0
|
13 % cc = mirmfcc(a,'Frame');
|
wolffd@0
|
14 % mircluster(cc)
|
wolffd@0
|
15 % Optional argument:
|
wolffd@0
|
16 % mircluster(...,n) indicates the maximal number of clusters.
|
wolffd@0
|
17 % Default value: n = 2.
|
wolffd@0
|
18 % mircluster(...,'Runs',r) indicates the maximal number of runs.
|
wolffd@0
|
19 % Default value: r = 5.
|
wolffd@0
|
20 %
|
wolffd@0
|
21 % Requires SOM Toolbox (included in the MIRtoolbox distribution).
|
wolffd@0
|
22
|
wolffd@0
|
23
|
wolffd@0
|
24 nruns.key = 'Runs';
|
wolffd@0
|
25 nruns.type = 'Integer';
|
wolffd@0
|
26 nruns.default = 5;
|
wolffd@0
|
27 option.nruns = nruns;
|
wolffd@0
|
28
|
wolffd@0
|
29 nclust.position = 2;
|
wolffd@0
|
30 nclust.type = 'Integer';
|
wolffd@0
|
31 nclust.default = 2;
|
wolffd@0
|
32 option.nclust = nclust;
|
wolffd@0
|
33
|
wolffd@0
|
34 specif.option = option;
|
wolffd@0
|
35
|
wolffd@0
|
36 specif.nochunk = 1;
|
wolffd@0
|
37
|
wolffd@0
|
38 d = a;
|
wolffd@0
|
39 if isa(a,'mirdesign')
|
wolffd@0
|
40 if not(get(a,'Eval'))
|
wolffd@0
|
41 % During bottom-up construction of the general design
|
wolffd@0
|
42
|
wolffd@0
|
43 [unused option] = miroptions(@mircluster,a,specif,varargin);
|
wolffd@0
|
44 type = get(a,'Type');
|
wolffd@0
|
45 a = mirdesign(@mircluster,a,option,{},struct,type);
|
wolffd@0
|
46 a = set(a,'NoChunk',1);
|
wolffd@0
|
47 else
|
wolffd@0
|
48 % During top-down evaluation initiation
|
wolffd@0
|
49
|
wolffd@0
|
50 e = evaleach(a);
|
wolffd@0
|
51 if iscell(e)
|
wolffd@0
|
52 e = e{1};
|
wolffd@0
|
53 end
|
wolffd@0
|
54 a = mircluster(e,varargin{:});
|
wolffd@0
|
55 end
|
wolffd@0
|
56 else
|
wolffd@0
|
57 if not(isa(a,'mirdata'))
|
wolffd@0
|
58 mirerror('mircluster','The input should be either frame- or segment-decomposed.');
|
wolffd@0
|
59 end
|
wolffd@0
|
60
|
wolffd@0
|
61 if isempty(varargin) || (not(isa(varargin{1},'mirdata') || ...
|
wolffd@0
|
62 (iscell(varargin{1}) && ...
|
wolffd@0
|
63 isa(varargin{1}{1},'mirdata'))))
|
wolffd@0
|
64 % mircluster version for frame-decomposed data:
|
wolffd@0
|
65 % frames are clustered into groups using K-means clustering.
|
wolffd@0
|
66 [unused option] = miroptions(@mircluster,a,specif,varargin);
|
wolffd@0
|
67 da = get(a,'Data');
|
wolffd@0
|
68 lva = length(da); % Number of audio files in the audio object.
|
wolffd@0
|
69 c = cell(1,lva);
|
wolffd@0
|
70 display('Clustering frames...');
|
wolffd@0
|
71 if mirwaitbar
|
wolffd@0
|
72 handle = waitbar(0,'Clustering frames...');
|
wolffd@0
|
73 else
|
wolffd@0
|
74 handle = 0;
|
wolffd@0
|
75 end
|
wolffd@0
|
76 for j = 1:lva % For each audio file,...
|
wolffd@0
|
77 va = []; % Data transmitted to the kmeans_cluster function.
|
wolffd@0
|
78 v = da{j};
|
wolffd@0
|
79 if iscell(v)
|
wolffd@0
|
80 v = uncell(v,-Inf); %v{1};
|
wolffd@0
|
81 end
|
wolffd@0
|
82 if size(v,4)>1
|
wolffd@0
|
83 v(end+1:2*end,:,:,1) = v(:,:,:,2);
|
wolffd@0
|
84 v(:,:,:,2) = [];
|
wolffd@0
|
85 end
|
wolffd@0
|
86 % Standardization
|
wolffd@0
|
87 %stv = std(v,0,2);
|
wolffd@0
|
88 %stv(find(stv == 0)) = 1;
|
wolffd@0
|
89 va(end+1:end+size(v,1),:,:) = v;%...
|
wolffd@0
|
90 %(v - repmat(mean(v,2),[1 size(v,2) ])) ...
|
wolffd@0
|
91 %./ repmat(stv,[1 size(v,2) ]);
|
wolffd@0
|
92 if isa(a,'mirscalar')
|
wolffd@0
|
93 m = get(a,'Mode');
|
wolffd@0
|
94 if not(isempty(m))
|
wolffd@0
|
95 m = m{j};
|
wolffd@0
|
96 val = [];
|
wolffd@0
|
97 for l = 1:nseg
|
wolffd@0
|
98 vl = m{l};
|
wolffd@0
|
99 if iscell(vl)
|
wolffd@0
|
100 vl = vl{1};
|
wolffd@0
|
101 end
|
wolffd@0
|
102 val(:,l) = vl;
|
wolffd@0
|
103 end
|
wolffd@0
|
104 stv = std(val,0,2);
|
wolffd@0
|
105 stv(find(stv == 0)) = 1;
|
wolffd@0
|
106 va(end+1:end+size(val,1),:) = ...
|
wolffd@0
|
107 (val - repmat(mean(val,2),[1 size(val,2) ])) ...
|
wolffd@0
|
108 ./ repmat(stv,[1 size(val,2) ]);
|
wolffd@0
|
109 end
|
wolffd@0
|
110 end
|
wolffd@0
|
111 if size(va,3)>1
|
wolffd@0
|
112 mel = 1;
|
wolffd@0
|
113 va = reshape(va,size(va,2),size(va,3))';
|
wolffd@0
|
114 else
|
wolffd@0
|
115 mel = 0;
|
wolffd@0
|
116 end
|
wolffd@0
|
117 [cc, p, err, ind] = kmeans_clusters(va',option.nclust,option.nruns);
|
wolffd@0
|
118 [minind select] = min(ind);
|
wolffd@0
|
119 c{j}.centr = cc{select}';
|
wolffd@0
|
120 c{j}.index = p{select};
|
wolffd@0
|
121 c{j}.weight = zeros(1,size(cc{select},1));
|
wolffd@0
|
122 c{j}.covar = zeros(size(cc{select}'));
|
wolffd@0
|
123 ii = 1;
|
wolffd@0
|
124 for i = 1:size(c{j}.centr,2)
|
wolffd@0
|
125 clus = va(:,c{j}.index == ii);
|
wolffd@0
|
126 if isempty(clus)
|
wolffd@0
|
127 higher = find(c{j}.index > ii);
|
wolffd@0
|
128 c{j}.index(higher) = c{j}.index(higher)-1;
|
wolffd@0
|
129 c{j}.centr(:,ii) = [];
|
wolffd@0
|
130 c{j}.weight(ii) = [];
|
wolffd@0
|
131 c{j}.covar(:,ii) = [];
|
wolffd@0
|
132 else
|
wolffd@0
|
133 c{j}.weight(ii) = size(clus,2)/size(va,2);
|
wolffd@0
|
134 if c{j}.weight(ii) == 0
|
wolffd@0
|
135 pause
|
wolffd@0
|
136 end
|
wolffd@0
|
137 c{j}.covar(:,ii) = mean((clus'-ones(1,size(clus,1))*c{j}.centr(:,ii)).^2);
|
wolffd@0
|
138 ii = ii+1;
|
wolffd@0
|
139 end
|
wolffd@0
|
140 end
|
wolffd@0
|
141 if handle
|
wolffd@0
|
142 waitbar(j/lva,handle);
|
wolffd@0
|
143 end
|
wolffd@0
|
144 end
|
wolffd@0
|
145 if handle
|
wolffd@0
|
146 delete(handle)
|
wolffd@0
|
147 end
|
wolffd@0
|
148 a = set(a,'Clusters',c);
|
wolffd@0
|
149 else
|
wolffd@0
|
150 % mircluster version for segmented audio:
|
wolffd@0
|
151 % segments are clustered into groups using K-means clustering.
|
wolffd@0
|
152 da = varargin{1};
|
wolffd@0
|
153 varargin(1) = [];
|
wolffd@0
|
154 [unused option] = miroptions(@mircluster,a,specif,varargin);
|
wolffd@0
|
155 display('Clustering segments...');
|
wolffd@0
|
156 if isa(da,'mirdata') || (iscell(da) && isa(da{1},'mirdata'))
|
wolffd@0
|
157 if not(iscell(da))
|
wolffd@0
|
158 da = {da};
|
wolffd@0
|
159 end
|
wolffd@0
|
160 vala = get(a,'Data'); % Data contained in the audio object a.
|
wolffd@0
|
161 lva = length(vala); % Number of audio files in the audio object.
|
wolffd@0
|
162 clus = cell(1,lva);
|
wolffd@0
|
163 for j = 1:lva % For each audio file,...
|
wolffd@0
|
164 va = []; % Data transmitted to the kmeans_cluster function.
|
wolffd@0
|
165 nseg = length(vala{j}); % Number of segments in the audio file.
|
wolffd@0
|
166 for i = 1:length(da) % For each analytic feature,...
|
wolffd@0
|
167 v = get(da{i},'Data');
|
wolffd@0
|
168 v = v{j};
|
wolffd@0
|
169 if iscell(v)
|
wolffd@0
|
170 v = uncell(v,-Inf); %v{1};
|
wolffd@0
|
171 end
|
wolffd@0
|
172 val = [];
|
wolffd@0
|
173 if size(v,4)>1
|
wolffd@0
|
174 v(end+1:2*end,:,:,1) = v(:,:,:,2);
|
wolffd@0
|
175 v(:,:,:,2) = [];
|
wolffd@0
|
176 end
|
wolffd@0
|
177
|
wolffd@0
|
178 % Standardization
|
wolffd@0
|
179 stv = std(v,0,2);
|
wolffd@0
|
180 stv(find(stv == 0)) = 1;
|
wolffd@0
|
181 va(end+1:end+size(v,1),:) = ...
|
wolffd@0
|
182 (v - repmat(mean(v,2),[1 size(v,2) ])) ...
|
wolffd@0
|
183 ./ repmat(stv,[1 size(v,2) ]);
|
wolffd@0
|
184 if isa(da{i},'mirscalar')
|
wolffd@0
|
185 m = get(da{i},'Mode');
|
wolffd@0
|
186 if not(isempty(m))
|
wolffd@0
|
187 m = m{j};
|
wolffd@0
|
188 val = [];
|
wolffd@0
|
189 for l = 1:nseg
|
wolffd@0
|
190 vl = m{l};
|
wolffd@0
|
191 if iscell(vl)
|
wolffd@0
|
192 vl = vl{1};
|
wolffd@0
|
193 end
|
wolffd@0
|
194 val(:,l) = vl;
|
wolffd@0
|
195 end
|
wolffd@0
|
196 stv = std(val,0,2);
|
wolffd@0
|
197 stv(find(stv == 0)) = 1;
|
wolffd@0
|
198 va(end+1:end+size(val,1),:) = ...
|
wolffd@0
|
199 (val - repmat(mean(val,2),[1 size(val,2) ])) ...
|
wolffd@0
|
200 ./ repmat(stv,[1 size(val,2) ]);
|
wolffd@0
|
201 end
|
wolffd@0
|
202 end
|
wolffd@0
|
203
|
wolffd@0
|
204 end
|
wolffd@0
|
205 [cc, p, err, ind] = kmeans_clusters(va',min(option.nclust,nseg),option.nruns);
|
wolffd@0
|
206 clus{j} = p{end};
|
wolffd@0
|
207 end
|
wolffd@0
|
208 a = set(a,'Clusters',clus);
|
wolffd@0
|
209 t = get(a,'Time');
|
wolffd@0
|
210 fp = get(a,'FramePos');
|
wolffd@0
|
211 for j = 1:lva % For each audio file,...
|
wolffd@0
|
212 aj = vala{j};
|
wolffd@0
|
213 tj = t{j};
|
wolffd@0
|
214 fpj = fp{j};
|
wolffd@0
|
215 clj = clus{j};
|
wolffd@0
|
216 k = 2;
|
wolffd@0
|
217 while k <= length(aj)
|
wolffd@0
|
218 if clj(k) == clj(k-1)
|
wolffd@0
|
219 aj{k-1} = [aj{k-1};aj{k}];
|
wolffd@0
|
220 aj(k) = [];
|
wolffd@0
|
221 tj{k-1} = [tj{k-1};tj{k}];
|
wolffd@0
|
222 tj(k) = [];
|
wolffd@0
|
223 fpj{k-1} = [fpj{k-1}(1);fpj{k}(2)];
|
wolffd@0
|
224 fpj(k) = [];
|
wolffd@0
|
225 clj(k) = [];
|
wolffd@0
|
226 k = k-1;
|
wolffd@0
|
227 end
|
wolffd@0
|
228 k = k+1;
|
wolffd@0
|
229 end
|
wolffd@0
|
230 vala{j} = aj;
|
wolffd@0
|
231 t{j} = tj;
|
wolffd@0
|
232 fp{j} = fpj;
|
wolffd@0
|
233 cl{j} = clj;
|
wolffd@0
|
234 end
|
wolffd@0
|
235 a = set(a,'Data',vala,'Time',t,'FramePos',fp,'Clusters',cl);
|
wolffd@0
|
236 end
|
wolffd@0
|
237 end
|
wolffd@0
|
238 end |