wolffd@0
|
1 /***********************************************************************/
|
wolffd@0
|
2 /* */
|
wolffd@0
|
3 /* svm_learn.c */
|
wolffd@0
|
4 /* */
|
wolffd@0
|
5 /* Learning module of Support Vector Machine. */
|
wolffd@0
|
6 /* */
|
wolffd@0
|
7 /* Author: Thorsten Joachims */
|
wolffd@0
|
8 /* Date: 02.07.02 */
|
wolffd@0
|
9 /* */
|
wolffd@0
|
10 /* Copyright (c) 2002 Thorsten Joachims - All rights reserved */
|
wolffd@0
|
11 /* */
|
wolffd@0
|
12 /* This software is available for non-commercial use only. It must */
|
wolffd@0
|
13 /* not be modified and distributed without prior permission of the */
|
wolffd@0
|
14 /* author. The author is not responsible for implications from the */
|
wolffd@0
|
15 /* use of this software. */
|
wolffd@0
|
16 /* */
|
wolffd@0
|
17 /***********************************************************************/
|
wolffd@0
|
18
|
wolffd@0
|
19
|
wolffd@0
|
20 # include "svm_common.h"
|
wolffd@0
|
21 # include "svm_learn.h"
|
wolffd@0
|
22
|
wolffd@0
|
23
|
wolffd@0
|
24 /* interface to QP-solver */
|
wolffd@0
|
25 double *optimize_qp(QP *, double *, long, double *, LEARN_PARM *);
|
wolffd@0
|
26
|
wolffd@0
|
27 /*---------------------------------------------------------------------------*/
|
wolffd@0
|
28
|
wolffd@0
|
29 /* Learns an SVM classification model based on the training data in
|
wolffd@0
|
30 docs/label. The resulting model is returned in the structure
|
wolffd@0
|
31 model. */
|
wolffd@0
|
32
|
wolffd@0
|
33 void svm_learn_classification(DOC **docs, double *class, long int
|
wolffd@0
|
34 totdoc, long int totwords,
|
wolffd@0
|
35 LEARN_PARM *learn_parm,
|
wolffd@0
|
36 KERNEL_PARM *kernel_parm,
|
wolffd@0
|
37 KERNEL_CACHE *kernel_cache,
|
wolffd@0
|
38 MODEL *model,
|
wolffd@0
|
39 double *alpha)
|
wolffd@0
|
40 /* docs: Training vectors (x-part) */
|
wolffd@0
|
41 /* class: Training labels (y-part, zero if test example for
|
wolffd@0
|
42 transduction) */
|
wolffd@0
|
43 /* totdoc: Number of examples in docs/label */
|
wolffd@0
|
44 /* totwords: Number of features (i.e. highest feature index) */
|
wolffd@0
|
45 /* learn_parm: Learning paramenters */
|
wolffd@0
|
46 /* kernel_parm: Kernel paramenters */
|
wolffd@0
|
47 /* kernel_cache:Initialized Cache of size totdoc, if using a kernel.
|
wolffd@0
|
48 NULL if linear.*/
|
wolffd@0
|
49 /* model: Returns learning result (assumed empty before called) */
|
wolffd@0
|
50 /* alpha: Start values for the alpha variables or NULL
|
wolffd@0
|
51 pointer. The new alpha values are returned after
|
wolffd@0
|
52 optimization if not NULL. Array must be of size totdoc. */
|
wolffd@0
|
53 {
|
wolffd@0
|
54 long *inconsistent,i,*label;
|
wolffd@0
|
55 long inconsistentnum;
|
wolffd@0
|
56 long misclassified,upsupvecnum;
|
wolffd@0
|
57 double loss,model_length,example_length;
|
wolffd@0
|
58 double maxdiff,*lin,*a,*c;
|
wolffd@0
|
59 long runtime_start,runtime_end;
|
wolffd@0
|
60 long iterations;
|
wolffd@0
|
61 long *unlabeled,transduction;
|
wolffd@0
|
62 long heldout;
|
wolffd@0
|
63 long loo_count=0,loo_count_pos=0,loo_count_neg=0,trainpos=0,trainneg=0;
|
wolffd@0
|
64 long loocomputed=0,runtime_start_loo=0,runtime_start_xa=0;
|
wolffd@0
|
65 double heldout_c=0,r_delta_sq=0,r_delta,r_delta_avg;
|
wolffd@0
|
66 long *index,*index2dnum;
|
wolffd@0
|
67 double *weights;
|
wolffd@0
|
68 CFLOAT *aicache; /* buffer to keep one row of hessian */
|
wolffd@0
|
69
|
wolffd@0
|
70 double *xi_fullset; /* buffer for storing xi on full sample in loo */
|
wolffd@0
|
71 double *a_fullset; /* buffer for storing alpha on full sample in loo */
|
wolffd@0
|
72 TIMING timing_profile;
|
wolffd@0
|
73 SHRINK_STATE shrink_state;
|
wolffd@0
|
74
|
wolffd@0
|
75 runtime_start=get_runtime();
|
wolffd@0
|
76 timing_profile.time_kernel=0;
|
wolffd@0
|
77 timing_profile.time_opti=0;
|
wolffd@0
|
78 timing_profile.time_shrink=0;
|
wolffd@0
|
79 timing_profile.time_update=0;
|
wolffd@0
|
80 timing_profile.time_model=0;
|
wolffd@0
|
81 timing_profile.time_check=0;
|
wolffd@0
|
82 timing_profile.time_select=0;
|
wolffd@0
|
83 kernel_cache_statistic=0;
|
wolffd@0
|
84
|
wolffd@0
|
85 learn_parm->totwords=totwords;
|
wolffd@0
|
86
|
wolffd@0
|
87 /* make sure -n value is reasonable */
|
wolffd@0
|
88 if((learn_parm->svm_newvarsinqp < 2)
|
wolffd@0
|
89 || (learn_parm->svm_newvarsinqp > learn_parm->svm_maxqpsize)) {
|
wolffd@0
|
90 learn_parm->svm_newvarsinqp=learn_parm->svm_maxqpsize;
|
wolffd@0
|
91 }
|
wolffd@0
|
92
|
wolffd@0
|
93 init_shrink_state(&shrink_state,totdoc,(long)MAXSHRINK);
|
wolffd@0
|
94
|
wolffd@0
|
95 label = (long *)my_malloc(sizeof(long)*totdoc);
|
wolffd@0
|
96 inconsistent = (long *)my_malloc(sizeof(long)*totdoc);
|
wolffd@0
|
97 unlabeled = (long *)my_malloc(sizeof(long)*totdoc);
|
wolffd@0
|
98 c = (double *)my_malloc(sizeof(double)*totdoc);
|
wolffd@0
|
99 a = (double *)my_malloc(sizeof(double)*totdoc);
|
wolffd@0
|
100 a_fullset = (double *)my_malloc(sizeof(double)*totdoc);
|
wolffd@0
|
101 xi_fullset = (double *)my_malloc(sizeof(double)*totdoc);
|
wolffd@0
|
102 lin = (double *)my_malloc(sizeof(double)*totdoc);
|
wolffd@0
|
103 learn_parm->svm_cost = (double *)my_malloc(sizeof(double)*totdoc);
|
wolffd@0
|
104 model->supvec = (DOC **)my_malloc(sizeof(DOC *)*(totdoc+2));
|
wolffd@0
|
105 model->alpha = (double *)my_malloc(sizeof(double)*(totdoc+2));
|
wolffd@0
|
106 model->index = (long *)my_malloc(sizeof(long)*(totdoc+2));
|
wolffd@0
|
107
|
wolffd@0
|
108 model->at_upper_bound=0;
|
wolffd@0
|
109 model->b=0;
|
wolffd@0
|
110 model->supvec[0]=0; /* element 0 reserved and empty for now */
|
wolffd@0
|
111 model->alpha[0]=0;
|
wolffd@0
|
112 model->lin_weights=NULL;
|
wolffd@0
|
113 model->totwords=totwords;
|
wolffd@0
|
114 model->totdoc=totdoc;
|
wolffd@0
|
115 model->kernel_parm=(*kernel_parm);
|
wolffd@0
|
116 model->sv_num=1;
|
wolffd@0
|
117 model->loo_error=-1;
|
wolffd@0
|
118 model->loo_recall=-1;
|
wolffd@0
|
119 model->loo_precision=-1;
|
wolffd@0
|
120 model->xa_error=-1;
|
wolffd@0
|
121 model->xa_recall=-1;
|
wolffd@0
|
122 model->xa_precision=-1;
|
wolffd@0
|
123 inconsistentnum=0;
|
wolffd@0
|
124 transduction=0;
|
wolffd@0
|
125
|
wolffd@0
|
126 r_delta=estimate_r_delta(docs,totdoc,kernel_parm);
|
wolffd@0
|
127 r_delta_sq=r_delta*r_delta;
|
wolffd@0
|
128
|
wolffd@0
|
129 r_delta_avg=estimate_r_delta_average(docs,totdoc,kernel_parm);
|
wolffd@0
|
130 if(learn_parm->svm_c == 0.0) { /* default value for C */
|
wolffd@0
|
131 learn_parm->svm_c=1.0/(r_delta_avg*r_delta_avg);
|
wolffd@0
|
132 if(verbosity>=1)
|
wolffd@0
|
133 printf("Setting default regularization parameter C=%.4f\n",
|
wolffd@0
|
134 learn_parm->svm_c);
|
wolffd@0
|
135 }
|
wolffd@0
|
136
|
wolffd@0
|
137 learn_parm->eps=-1.0; /* equivalent regression epsilon for
|
wolffd@0
|
138 classification */
|
wolffd@0
|
139
|
wolffd@0
|
140 for(i=0;i<totdoc;i++) { /* various inits */
|
wolffd@0
|
141 docs[i]->docnum=i;
|
wolffd@0
|
142 inconsistent[i]=0;
|
wolffd@0
|
143 a[i]=0;
|
wolffd@0
|
144 lin[i]=0;
|
wolffd@0
|
145 c[i]=0.0;
|
wolffd@0
|
146 unlabeled[i]=0;
|
wolffd@0
|
147 if(class[i] == 0) {
|
wolffd@0
|
148 unlabeled[i]=1;
|
wolffd@0
|
149 label[i]=0;
|
wolffd@0
|
150 transduction=1;
|
wolffd@0
|
151 }
|
wolffd@0
|
152 if(class[i] > 0) {
|
wolffd@0
|
153 learn_parm->svm_cost[i]=learn_parm->svm_c*learn_parm->svm_costratio*
|
wolffd@0
|
154 docs[i]->costfactor;
|
wolffd@0
|
155 label[i]=1;
|
wolffd@0
|
156 trainpos++;
|
wolffd@0
|
157 }
|
wolffd@0
|
158 else if(class[i] < 0) {
|
wolffd@0
|
159 learn_parm->svm_cost[i]=learn_parm->svm_c*docs[i]->costfactor;
|
wolffd@0
|
160 label[i]=-1;
|
wolffd@0
|
161 trainneg++;
|
wolffd@0
|
162 }
|
wolffd@0
|
163 else {
|
wolffd@0
|
164 learn_parm->svm_cost[i]=0;
|
wolffd@0
|
165 }
|
wolffd@0
|
166 }
|
wolffd@0
|
167 if(verbosity>=2) {
|
wolffd@0
|
168 printf("%ld positive, %ld negative, and %ld unlabeled examples.\n",trainpos,trainneg,totdoc-trainpos-trainneg); fflush(stdout);
|
wolffd@0
|
169 }
|
wolffd@0
|
170
|
wolffd@0
|
171 /* caching makes no sense for linear kernel */
|
wolffd@0
|
172 if(kernel_parm->kernel_type == LINEAR) {
|
wolffd@0
|
173 kernel_cache = NULL;
|
wolffd@0
|
174 }
|
wolffd@0
|
175
|
wolffd@0
|
176 /* compute starting state for initial alpha values */
|
wolffd@0
|
177 if(alpha) {
|
wolffd@0
|
178 if(verbosity>=1) {
|
wolffd@0
|
179 printf("Computing starting state..."); fflush(stdout);
|
wolffd@0
|
180 }
|
wolffd@0
|
181 index = (long *)my_malloc(sizeof(long)*totdoc);
|
wolffd@0
|
182 index2dnum = (long *)my_malloc(sizeof(long)*(totdoc+11));
|
wolffd@0
|
183 weights=(double *)my_malloc(sizeof(double)*(totwords+1));
|
wolffd@0
|
184 aicache = (CFLOAT *)my_malloc(sizeof(CFLOAT)*totdoc);
|
wolffd@0
|
185 for(i=0;i<totdoc;i++) { /* create full index and clip alphas */
|
wolffd@0
|
186 index[i]=1;
|
wolffd@0
|
187 alpha[i]=fabs(alpha[i]);
|
wolffd@0
|
188 if(alpha[i]<0) alpha[i]=0;
|
wolffd@0
|
189 if(alpha[i]>learn_parm->svm_cost[i]) alpha[i]=learn_parm->svm_cost[i];
|
wolffd@0
|
190 }
|
wolffd@0
|
191 if(kernel_parm->kernel_type != LINEAR) {
|
wolffd@0
|
192 for(i=0;i<totdoc;i++) /* fill kernel cache with unbounded SV */
|
wolffd@0
|
193 if((alpha[i]>0) && (alpha[i]<learn_parm->svm_cost[i])
|
wolffd@0
|
194 && (kernel_cache_space_available(kernel_cache)))
|
wolffd@0
|
195 cache_kernel_row(kernel_cache,docs,i,kernel_parm);
|
wolffd@0
|
196 for(i=0;i<totdoc;i++) /* fill rest of kernel cache with bounded SV */
|
wolffd@0
|
197 if((alpha[i]==learn_parm->svm_cost[i])
|
wolffd@0
|
198 && (kernel_cache_space_available(kernel_cache)))
|
wolffd@0
|
199 cache_kernel_row(kernel_cache,docs,i,kernel_parm);
|
wolffd@0
|
200 }
|
wolffd@0
|
201 (void)compute_index(index,totdoc,index2dnum);
|
wolffd@0
|
202 update_linear_component(docs,label,index2dnum,alpha,a,index2dnum,totdoc,
|
wolffd@0
|
203 totwords,kernel_parm,kernel_cache,lin,aicache,
|
wolffd@0
|
204 weights);
|
wolffd@0
|
205 (void)calculate_svm_model(docs,label,unlabeled,lin,alpha,a,c,
|
wolffd@0
|
206 learn_parm,index2dnum,index2dnum,model);
|
wolffd@0
|
207 for(i=0;i<totdoc;i++) { /* copy initial alphas */
|
wolffd@0
|
208 a[i]=alpha[i];
|
wolffd@0
|
209 }
|
wolffd@0
|
210 free(index);
|
wolffd@0
|
211 free(index2dnum);
|
wolffd@0
|
212 free(weights);
|
wolffd@0
|
213 free(aicache);
|
wolffd@0
|
214 if(verbosity>=1) {
|
wolffd@0
|
215 printf("done.\n"); fflush(stdout);
|
wolffd@0
|
216 }
|
wolffd@0
|
217 }
|
wolffd@0
|
218
|
wolffd@0
|
219 if(transduction) {
|
wolffd@0
|
220 learn_parm->svm_iter_to_shrink=99999999;
|
wolffd@0
|
221 if(verbosity >= 1)
|
wolffd@0
|
222 printf("\nDeactivating Shrinking due to an incompatibility with the transductive \nlearner in the current version.\n\n");
|
wolffd@0
|
223 }
|
wolffd@0
|
224
|
wolffd@0
|
225 if(transduction && learn_parm->compute_loo) {
|
wolffd@0
|
226 learn_parm->compute_loo=0;
|
wolffd@0
|
227 if(verbosity >= 1)
|
wolffd@0
|
228 printf("\nCannot compute leave-one-out estimates for transductive learner.\n\n");
|
wolffd@0
|
229 }
|
wolffd@0
|
230
|
wolffd@0
|
231 if(learn_parm->remove_inconsistent && learn_parm->compute_loo) {
|
wolffd@0
|
232 learn_parm->compute_loo=0;
|
wolffd@0
|
233 printf("\nCannot compute leave-one-out estimates when removing inconsistent examples.\n\n");
|
wolffd@0
|
234 }
|
wolffd@0
|
235
|
wolffd@0
|
236 if(learn_parm->compute_loo && ((trainpos == 1) || (trainneg == 1))) {
|
wolffd@0
|
237 learn_parm->compute_loo=0;
|
wolffd@0
|
238 printf("\nCannot compute leave-one-out with only one example in one class.\n\n");
|
wolffd@0
|
239 }
|
wolffd@0
|
240
|
wolffd@0
|
241
|
wolffd@0
|
242 if(verbosity==1) {
|
wolffd@0
|
243 printf("Optimizing"); fflush(stdout);
|
wolffd@0
|
244 }
|
wolffd@0
|
245
|
wolffd@0
|
246 /* train the svm */
|
wolffd@0
|
247 iterations=optimize_to_convergence(docs,label,totdoc,totwords,learn_parm,
|
wolffd@0
|
248 kernel_parm,kernel_cache,&shrink_state,model,
|
wolffd@0
|
249 inconsistent,unlabeled,a,lin,
|
wolffd@0
|
250 c,&timing_profile,
|
wolffd@0
|
251 &maxdiff,(long)-1,
|
wolffd@0
|
252 (long)1);
|
wolffd@0
|
253
|
wolffd@0
|
254 if(verbosity>=1) {
|
wolffd@0
|
255 if(verbosity==1) printf("done. (%ld iterations)\n",iterations);
|
wolffd@0
|
256
|
wolffd@0
|
257 misclassified=0;
|
wolffd@0
|
258 for(i=0;(i<totdoc);i++) { /* get final statistic */
|
wolffd@0
|
259 if((lin[i]-model->b)*(double)label[i] <= 0.0)
|
wolffd@0
|
260 misclassified++;
|
wolffd@0
|
261 }
|
wolffd@0
|
262
|
wolffd@0
|
263 printf("Optimization finished (%ld misclassified, maxdiff=%.5f).\n",
|
wolffd@0
|
264 misclassified,maxdiff);
|
wolffd@0
|
265
|
wolffd@0
|
266 runtime_end=get_runtime();
|
wolffd@0
|
267 if(verbosity>=2) {
|
wolffd@0
|
268 printf("Runtime in cpu-seconds: %.2f (%.2f%% for kernel/%.2f%% for optimizer/%.2f%% for final/%.2f%% for update/%.2f%% for model/%.2f%% for check/%.2f%% for select)\n",
|
wolffd@0
|
269 ((float)runtime_end-(float)runtime_start)/100.0,
|
wolffd@0
|
270 (100.0*timing_profile.time_kernel)/(float)(runtime_end-runtime_start),
|
wolffd@0
|
271 (100.0*timing_profile.time_opti)/(float)(runtime_end-runtime_start),
|
wolffd@0
|
272 (100.0*timing_profile.time_shrink)/(float)(runtime_end-runtime_start),
|
wolffd@0
|
273 (100.0*timing_profile.time_update)/(float)(runtime_end-runtime_start),
|
wolffd@0
|
274 (100.0*timing_profile.time_model)/(float)(runtime_end-runtime_start),
|
wolffd@0
|
275 (100.0*timing_profile.time_check)/(float)(runtime_end-runtime_start),
|
wolffd@0
|
276 (100.0*timing_profile.time_select)/(float)(runtime_end-runtime_start));
|
wolffd@0
|
277 }
|
wolffd@0
|
278 else {
|
wolffd@0
|
279 printf("Runtime in cpu-seconds: %.2f\n",
|
wolffd@0
|
280 (runtime_end-runtime_start)/100.0);
|
wolffd@0
|
281 }
|
wolffd@0
|
282
|
wolffd@0
|
283 if(learn_parm->remove_inconsistent) {
|
wolffd@0
|
284 inconsistentnum=0;
|
wolffd@0
|
285 for(i=0;i<totdoc;i++)
|
wolffd@0
|
286 if(inconsistent[i])
|
wolffd@0
|
287 inconsistentnum++;
|
wolffd@0
|
288 printf("Number of SV: %ld (plus %ld inconsistent examples)\n",
|
wolffd@0
|
289 model->sv_num-1,inconsistentnum);
|
wolffd@0
|
290 }
|
wolffd@0
|
291 else {
|
wolffd@0
|
292 upsupvecnum=0;
|
wolffd@0
|
293 for(i=1;i<model->sv_num;i++) {
|
wolffd@0
|
294 if(fabs(model->alpha[i]) >=
|
wolffd@0
|
295 (learn_parm->svm_cost[(model->supvec[i])->docnum]-
|
wolffd@0
|
296 learn_parm->epsilon_a))
|
wolffd@0
|
297 upsupvecnum++;
|
wolffd@0
|
298 }
|
wolffd@0
|
299 printf("Number of SV: %ld (including %ld at upper bound)\n",
|
wolffd@0
|
300 model->sv_num-1,upsupvecnum);
|
wolffd@0
|
301 }
|
wolffd@0
|
302
|
wolffd@0
|
303 if((verbosity>=1) && (!learn_parm->skip_final_opt_check)) {
|
wolffd@0
|
304 loss=0;
|
wolffd@0
|
305 model_length=0;
|
wolffd@0
|
306 for(i=0;i<totdoc;i++) {
|
wolffd@0
|
307 if((lin[i]-model->b)*(double)label[i] < 1.0-learn_parm->epsilon_crit)
|
wolffd@0
|
308 loss+=1.0-(lin[i]-model->b)*(double)label[i];
|
wolffd@0
|
309 model_length+=a[i]*label[i]*lin[i];
|
wolffd@0
|
310 }
|
wolffd@0
|
311 model_length=sqrt(model_length);
|
wolffd@0
|
312 fprintf(stdout,"L1 loss: loss=%.5f\n",loss);
|
wolffd@0
|
313 fprintf(stdout,"Norm of weight vector: |w|=%.5f\n",model_length);
|
wolffd@0
|
314 example_length=estimate_sphere(model,kernel_parm);
|
wolffd@0
|
315 fprintf(stdout,"Norm of longest example vector: |x|=%.5f\n",
|
wolffd@0
|
316 length_of_longest_document_vector(docs,totdoc,kernel_parm));
|
wolffd@0
|
317 fprintf(stdout,"Estimated VCdim of classifier: VCdim<=%.5f\n",
|
wolffd@0
|
318 estimate_margin_vcdim(model,model_length,example_length,
|
wolffd@0
|
319 kernel_parm));
|
wolffd@0
|
320 if((!learn_parm->remove_inconsistent) && (!transduction)) {
|
wolffd@0
|
321 runtime_start_xa=get_runtime();
|
wolffd@0
|
322 if(verbosity>=1) {
|
wolffd@0
|
323 printf("Computing XiAlpha-estimates..."); fflush(stdout);
|
wolffd@0
|
324 }
|
wolffd@0
|
325 compute_xa_estimates(model,label,unlabeled,totdoc,docs,lin,a,
|
wolffd@0
|
326 kernel_parm,learn_parm,&(model->xa_error),
|
wolffd@0
|
327 &(model->xa_recall),&(model->xa_precision));
|
wolffd@0
|
328 if(verbosity>=1) {
|
wolffd@0
|
329 printf("done\n");
|
wolffd@0
|
330 }
|
wolffd@0
|
331 printf("Runtime for XiAlpha-estimates in cpu-seconds: %.2f\n",
|
wolffd@0
|
332 (get_runtime()-runtime_start_xa)/100.0);
|
wolffd@0
|
333
|
wolffd@0
|
334 fprintf(stdout,"XiAlpha-estimate of the error: error<=%.2f%% (rho=%.2f,depth=%ld)\n",
|
wolffd@0
|
335 model->xa_error,learn_parm->rho,learn_parm->xa_depth);
|
wolffd@0
|
336 fprintf(stdout,"XiAlpha-estimate of the recall: recall=>%.2f%% (rho=%.2f,depth=%ld)\n",
|
wolffd@0
|
337 model->xa_recall,learn_parm->rho,learn_parm->xa_depth);
|
wolffd@0
|
338 fprintf(stdout,"XiAlpha-estimate of the precision: precision=>%.2f%% (rho=%.2f,depth=%ld)\n",
|
wolffd@0
|
339 model->xa_precision,learn_parm->rho,learn_parm->xa_depth);
|
wolffd@0
|
340 }
|
wolffd@0
|
341 else if(!learn_parm->remove_inconsistent) {
|
wolffd@0
|
342 estimate_transduction_quality(model,label,unlabeled,totdoc,docs,lin);
|
wolffd@0
|
343 }
|
wolffd@0
|
344 }
|
wolffd@0
|
345 if(verbosity>=1) {
|
wolffd@0
|
346 printf("Number of kernel evaluations: %ld\n",kernel_cache_statistic);
|
wolffd@0
|
347 }
|
wolffd@0
|
348 }
|
wolffd@0
|
349
|
wolffd@0
|
350
|
wolffd@0
|
351 /* leave-one-out testing starts now */
|
wolffd@0
|
352 if(learn_parm->compute_loo) {
|
wolffd@0
|
353 /* save results of training on full dataset for leave-one-out */
|
wolffd@0
|
354 runtime_start_loo=get_runtime();
|
wolffd@0
|
355 for(i=0;i<totdoc;i++) {
|
wolffd@0
|
356 xi_fullset[i]=1.0-((lin[i]-model->b)*(double)label[i]);
|
wolffd@0
|
357 if(xi_fullset[i]<0) xi_fullset[i]=0;
|
wolffd@0
|
358 a_fullset[i]=a[i];
|
wolffd@0
|
359 }
|
wolffd@0
|
360 if(verbosity>=1) {
|
wolffd@0
|
361 printf("Computing leave-one-out");
|
wolffd@0
|
362 }
|
wolffd@0
|
363
|
wolffd@0
|
364 /* repeat this loop for every held-out example */
|
wolffd@0
|
365 for(heldout=0;(heldout<totdoc);heldout++) {
|
wolffd@0
|
366 if(learn_parm->rho*a_fullset[heldout]*r_delta_sq+xi_fullset[heldout]
|
wolffd@0
|
367 < 1.0) {
|
wolffd@0
|
368 /* guaranteed to not produce a leave-one-out error */
|
wolffd@0
|
369 if(verbosity==1) {
|
wolffd@0
|
370 printf("+"); fflush(stdout);
|
wolffd@0
|
371 }
|
wolffd@0
|
372 }
|
wolffd@0
|
373 else if(xi_fullset[heldout] > 1.0) {
|
wolffd@0
|
374 /* guaranteed to produce a leave-one-out error */
|
wolffd@0
|
375 loo_count++;
|
wolffd@0
|
376 if(label[heldout] > 0) loo_count_pos++; else loo_count_neg++;
|
wolffd@0
|
377 if(verbosity==1) {
|
wolffd@0
|
378 printf("-"); fflush(stdout);
|
wolffd@0
|
379 }
|
wolffd@0
|
380 }
|
wolffd@0
|
381 else {
|
wolffd@0
|
382 loocomputed++;
|
wolffd@0
|
383 heldout_c=learn_parm->svm_cost[heldout]; /* set upper bound to zero */
|
wolffd@0
|
384 learn_parm->svm_cost[heldout]=0;
|
wolffd@0
|
385 /* make sure heldout example is not currently */
|
wolffd@0
|
386 /* shrunk away. Assumes that lin is up to date! */
|
wolffd@0
|
387 shrink_state.active[heldout]=1;
|
wolffd@0
|
388 if(verbosity>=2)
|
wolffd@0
|
389 printf("\nLeave-One-Out test on example %ld\n",heldout);
|
wolffd@0
|
390 if(verbosity>=1) {
|
wolffd@0
|
391 printf("(?[%ld]",heldout); fflush(stdout);
|
wolffd@0
|
392 }
|
wolffd@0
|
393
|
wolffd@0
|
394 optimize_to_convergence(docs,label,totdoc,totwords,learn_parm,
|
wolffd@0
|
395 kernel_parm,
|
wolffd@0
|
396 kernel_cache,&shrink_state,model,inconsistent,unlabeled,
|
wolffd@0
|
397 a,lin,c,&timing_profile,
|
wolffd@0
|
398 &maxdiff,heldout,(long)2);
|
wolffd@0
|
399
|
wolffd@0
|
400 /* printf("%.20f\n",(lin[heldout]-model->b)*(double)label[heldout]); */
|
wolffd@0
|
401
|
wolffd@0
|
402 if(((lin[heldout]-model->b)*(double)label[heldout]) <= 0.0) {
|
wolffd@0
|
403 loo_count++; /* there was a loo-error */
|
wolffd@0
|
404 if(label[heldout] > 0) loo_count_pos++; else loo_count_neg++;
|
wolffd@0
|
405 if(verbosity>=1) {
|
wolffd@0
|
406 printf("-)"); fflush(stdout);
|
wolffd@0
|
407 }
|
wolffd@0
|
408 }
|
wolffd@0
|
409 else {
|
wolffd@0
|
410 if(verbosity>=1) {
|
wolffd@0
|
411 printf("+)"); fflush(stdout);
|
wolffd@0
|
412 }
|
wolffd@0
|
413 }
|
wolffd@0
|
414 /* now we need to restore the original data set*/
|
wolffd@0
|
415 learn_parm->svm_cost[heldout]=heldout_c; /* restore upper bound */
|
wolffd@0
|
416 }
|
wolffd@0
|
417 } /* end of leave-one-out loop */
|
wolffd@0
|
418
|
wolffd@0
|
419
|
wolffd@0
|
420 if(verbosity>=1) {
|
wolffd@0
|
421 printf("\nRetrain on full problem"); fflush(stdout);
|
wolffd@0
|
422 }
|
wolffd@0
|
423 optimize_to_convergence(docs,label,totdoc,totwords,learn_parm,
|
wolffd@0
|
424 kernel_parm,
|
wolffd@0
|
425 kernel_cache,&shrink_state,model,inconsistent,unlabeled,
|
wolffd@0
|
426 a,lin,c,&timing_profile,
|
wolffd@0
|
427 &maxdiff,(long)-1,(long)1);
|
wolffd@0
|
428 if(verbosity >= 1)
|
wolffd@0
|
429 printf("done.\n");
|
wolffd@0
|
430
|
wolffd@0
|
431
|
wolffd@0
|
432 /* after all leave-one-out computed */
|
wolffd@0
|
433 model->loo_error=100.0*loo_count/(double)totdoc;
|
wolffd@0
|
434 model->loo_recall=(1.0-(double)loo_count_pos/(double)trainpos)*100.0;
|
wolffd@0
|
435 model->loo_precision=(trainpos-loo_count_pos)/
|
wolffd@0
|
436 (double)(trainpos-loo_count_pos+loo_count_neg)*100.0;
|
wolffd@0
|
437 if(verbosity >= 1) {
|
wolffd@0
|
438 fprintf(stdout,"Leave-one-out estimate of the error: error=%.2f%%\n",
|
wolffd@0
|
439 model->loo_error);
|
wolffd@0
|
440 fprintf(stdout,"Leave-one-out estimate of the recall: recall=%.2f%%\n",
|
wolffd@0
|
441 model->loo_recall);
|
wolffd@0
|
442 fprintf(stdout,"Leave-one-out estimate of the precision: precision=%.2f%%\n",
|
wolffd@0
|
443 model->loo_precision);
|
wolffd@0
|
444 fprintf(stdout,"Actual leave-one-outs computed: %ld (rho=%.2f)\n",
|
wolffd@0
|
445 loocomputed,learn_parm->rho);
|
wolffd@0
|
446 printf("Runtime for leave-one-out in cpu-seconds: %.2f\n",
|
wolffd@0
|
447 (double)(get_runtime()-runtime_start_loo)/100.0);
|
wolffd@0
|
448 }
|
wolffd@0
|
449 }
|
wolffd@0
|
450
|
wolffd@0
|
451 if(learn_parm->alphafile[0])
|
wolffd@0
|
452 write_alphas(learn_parm->alphafile,a,label,totdoc);
|
wolffd@0
|
453
|
wolffd@0
|
454 shrink_state_cleanup(&shrink_state);
|
wolffd@0
|
455 free(label);
|
wolffd@0
|
456 free(inconsistent);
|
wolffd@0
|
457 free(unlabeled);
|
wolffd@0
|
458 free(c);
|
wolffd@0
|
459 free(a);
|
wolffd@0
|
460 free(a_fullset);
|
wolffd@0
|
461 free(xi_fullset);
|
wolffd@0
|
462 free(lin);
|
wolffd@0
|
463 free(learn_parm->svm_cost);
|
wolffd@0
|
464 }
|
wolffd@0
|
465
|
wolffd@0
|
466
|
wolffd@0
|
467 /* Learns an SVM regression model based on the training data in
|
wolffd@0
|
468 docs/label. The resulting model is returned in the structure
|
wolffd@0
|
469 model. */
|
wolffd@0
|
470
|
wolffd@0
|
471 void svm_learn_regression(DOC **docs, double *value, long int totdoc,
|
wolffd@0
|
472 long int totwords, LEARN_PARM *learn_parm,
|
wolffd@0
|
473 KERNEL_PARM *kernel_parm,
|
wolffd@0
|
474 KERNEL_CACHE **kernel_cache, MODEL *model)
|
wolffd@0
|
475 /* docs: Training vectors (x-part) */
|
wolffd@0
|
476 /* class: Training value (y-part) */
|
wolffd@0
|
477 /* totdoc: Number of examples in docs/label */
|
wolffd@0
|
478 /* totwords: Number of features (i.e. highest feature index) */
|
wolffd@0
|
479 /* learn_parm: Learning paramenters */
|
wolffd@0
|
480 /* kernel_parm: Kernel paramenters */
|
wolffd@0
|
481 /* kernel_cache:Initialized Cache, if using a kernel. NULL if
|
wolffd@0
|
482 linear. Note that it will be free'd and reassigned */
|
wolffd@0
|
483 /* model: Returns learning result (assumed empty before called) */
|
wolffd@0
|
484 {
|
wolffd@0
|
485 long *inconsistent,i,j;
|
wolffd@0
|
486 long inconsistentnum;
|
wolffd@0
|
487 long upsupvecnum;
|
wolffd@0
|
488 double loss,model_length,example_length;
|
wolffd@0
|
489 double maxdiff,*lin,*a,*c;
|
wolffd@0
|
490 long runtime_start,runtime_end;
|
wolffd@0
|
491 long iterations,kernel_cache_size;
|
wolffd@0
|
492 long *unlabeled;
|
wolffd@0
|
493 double r_delta_sq=0,r_delta,r_delta_avg;
|
wolffd@0
|
494 double *xi_fullset; /* buffer for storing xi on full sample in loo */
|
wolffd@0
|
495 double *a_fullset; /* buffer for storing alpha on full sample in loo */
|
wolffd@0
|
496 TIMING timing_profile;
|
wolffd@0
|
497 SHRINK_STATE shrink_state;
|
wolffd@0
|
498 DOC **docs_org;
|
wolffd@0
|
499 long *label;
|
wolffd@0
|
500
|
wolffd@0
|
501 /* set up regression problem in standard form */
|
wolffd@0
|
502 docs_org=docs;
|
wolffd@0
|
503 docs = (DOC **)my_malloc(sizeof(DOC)*2*totdoc);
|
wolffd@0
|
504 label = (long *)my_malloc(sizeof(long)*2*totdoc);
|
wolffd@0
|
505 c = (double *)my_malloc(sizeof(double)*2*totdoc);
|
wolffd@0
|
506 for(i=0;i<totdoc;i++) {
|
wolffd@0
|
507 j=2*totdoc-1-i;
|
wolffd@0
|
508 docs[i]=create_example(i,0,0,docs_org[i]->costfactor,docs_org[i]->fvec);
|
wolffd@0
|
509 label[i]=+1;
|
wolffd@0
|
510 c[i]=value[i];
|
wolffd@0
|
511 docs[j]=create_example(j,0,0,docs_org[i]->costfactor,docs_org[i]->fvec);
|
wolffd@0
|
512 label[j]=-1;
|
wolffd@0
|
513 c[j]=value[i];
|
wolffd@0
|
514 }
|
wolffd@0
|
515 totdoc*=2;
|
wolffd@0
|
516
|
wolffd@0
|
517 /* need to get a bigger kernel cache */
|
wolffd@0
|
518 if(*kernel_cache) {
|
wolffd@0
|
519 kernel_cache_size=(*kernel_cache)->buffsize*sizeof(CFLOAT)/(1024*1024);
|
wolffd@0
|
520 kernel_cache_cleanup(*kernel_cache);
|
wolffd@0
|
521 (*kernel_cache)=kernel_cache_init(totdoc,kernel_cache_size);
|
wolffd@0
|
522 }
|
wolffd@0
|
523
|
wolffd@0
|
524 runtime_start=get_runtime();
|
wolffd@0
|
525 timing_profile.time_kernel=0;
|
wolffd@0
|
526 timing_profile.time_opti=0;
|
wolffd@0
|
527 timing_profile.time_shrink=0;
|
wolffd@0
|
528 timing_profile.time_update=0;
|
wolffd@0
|
529 timing_profile.time_model=0;
|
wolffd@0
|
530 timing_profile.time_check=0;
|
wolffd@0
|
531 timing_profile.time_select=0;
|
wolffd@0
|
532 kernel_cache_statistic=0;
|
wolffd@0
|
533
|
wolffd@0
|
534 learn_parm->totwords=totwords;
|
wolffd@0
|
535
|
wolffd@0
|
536 /* make sure -n value is reasonable */
|
wolffd@0
|
537 if((learn_parm->svm_newvarsinqp < 2)
|
wolffd@0
|
538 || (learn_parm->svm_newvarsinqp > learn_parm->svm_maxqpsize)) {
|
wolffd@0
|
539 learn_parm->svm_newvarsinqp=learn_parm->svm_maxqpsize;
|
wolffd@0
|
540 }
|
wolffd@0
|
541
|
wolffd@0
|
542 init_shrink_state(&shrink_state,totdoc,(long)MAXSHRINK);
|
wolffd@0
|
543
|
wolffd@0
|
544 inconsistent = (long *)my_malloc(sizeof(long)*totdoc);
|
wolffd@0
|
545 unlabeled = (long *)my_malloc(sizeof(long)*totdoc);
|
wolffd@0
|
546 a = (double *)my_malloc(sizeof(double)*totdoc);
|
wolffd@0
|
547 a_fullset = (double *)my_malloc(sizeof(double)*totdoc);
|
wolffd@0
|
548 xi_fullset = (double *)my_malloc(sizeof(double)*totdoc);
|
wolffd@0
|
549 lin = (double *)my_malloc(sizeof(double)*totdoc);
|
wolffd@0
|
550 learn_parm->svm_cost = (double *)my_malloc(sizeof(double)*totdoc);
|
wolffd@0
|
551 model->supvec = (DOC **)my_malloc(sizeof(DOC *)*(totdoc+2));
|
wolffd@0
|
552 model->alpha = (double *)my_malloc(sizeof(double)*(totdoc+2));
|
wolffd@0
|
553 model->index = (long *)my_malloc(sizeof(long)*(totdoc+2));
|
wolffd@0
|
554
|
wolffd@0
|
555 model->at_upper_bound=0;
|
wolffd@0
|
556 model->b=0;
|
wolffd@0
|
557 model->supvec[0]=0; /* element 0 reserved and empty for now */
|
wolffd@0
|
558 model->alpha[0]=0;
|
wolffd@0
|
559 model->lin_weights=NULL;
|
wolffd@0
|
560 model->totwords=totwords;
|
wolffd@0
|
561 model->totdoc=totdoc;
|
wolffd@0
|
562 model->kernel_parm=(*kernel_parm);
|
wolffd@0
|
563 model->sv_num=1;
|
wolffd@0
|
564 model->loo_error=-1;
|
wolffd@0
|
565 model->loo_recall=-1;
|
wolffd@0
|
566 model->loo_precision=-1;
|
wolffd@0
|
567 model->xa_error=-1;
|
wolffd@0
|
568 model->xa_recall=-1;
|
wolffd@0
|
569 model->xa_precision=-1;
|
wolffd@0
|
570 inconsistentnum=0;
|
wolffd@0
|
571
|
wolffd@0
|
572 r_delta=estimate_r_delta(docs,totdoc,kernel_parm);
|
wolffd@0
|
573 r_delta_sq=r_delta*r_delta;
|
wolffd@0
|
574
|
wolffd@0
|
575 r_delta_avg=estimate_r_delta_average(docs,totdoc,kernel_parm);
|
wolffd@0
|
576 if(learn_parm->svm_c == 0.0) { /* default value for C */
|
wolffd@0
|
577 learn_parm->svm_c=1.0/(r_delta_avg*r_delta_avg);
|
wolffd@0
|
578 if(verbosity>=1)
|
wolffd@0
|
579 printf("Setting default regularization parameter C=%.4f\n",
|
wolffd@0
|
580 learn_parm->svm_c);
|
wolffd@0
|
581 }
|
wolffd@0
|
582
|
wolffd@0
|
583 for(i=0;i<totdoc;i++) { /* various inits */
|
wolffd@0
|
584 inconsistent[i]=0;
|
wolffd@0
|
585 a[i]=0;
|
wolffd@0
|
586 lin[i]=0;
|
wolffd@0
|
587 unlabeled[i]=0;
|
wolffd@0
|
588 if(label[i] > 0) {
|
wolffd@0
|
589 learn_parm->svm_cost[i]=learn_parm->svm_c*learn_parm->svm_costratio*
|
wolffd@0
|
590 docs[i]->costfactor;
|
wolffd@0
|
591 }
|
wolffd@0
|
592 else if(label[i] < 0) {
|
wolffd@0
|
593 learn_parm->svm_cost[i]=learn_parm->svm_c*docs[i]->costfactor;
|
wolffd@0
|
594 }
|
wolffd@0
|
595 }
|
wolffd@0
|
596
|
wolffd@0
|
597 /* caching makes no sense for linear kernel */
|
wolffd@0
|
598 if((kernel_parm->kernel_type == LINEAR) && (*kernel_cache)) {
|
wolffd@0
|
599 printf("WARNING: Using a kernel cache for linear case will slow optimization down!\n");
|
wolffd@0
|
600 }
|
wolffd@0
|
601
|
wolffd@0
|
602 if(verbosity==1) {
|
wolffd@0
|
603 printf("Optimizing"); fflush(stdout);
|
wolffd@0
|
604 }
|
wolffd@0
|
605
|
wolffd@0
|
606 /* train the svm */
|
wolffd@0
|
607 iterations=optimize_to_convergence(docs,label,totdoc,totwords,learn_parm,
|
wolffd@0
|
608 kernel_parm,*kernel_cache,&shrink_state,
|
wolffd@0
|
609 model,inconsistent,unlabeled,a,lin,c,
|
wolffd@0
|
610 &timing_profile,&maxdiff,(long)-1,
|
wolffd@0
|
611 (long)1);
|
wolffd@0
|
612
|
wolffd@0
|
613 if(verbosity>=1) {
|
wolffd@0
|
614 if(verbosity==1) printf("done. (%ld iterations)\n",iterations);
|
wolffd@0
|
615
|
wolffd@0
|
616 printf("Optimization finished (maxdiff=%.5f).\n",maxdiff);
|
wolffd@0
|
617
|
wolffd@0
|
618 runtime_end=get_runtime();
|
wolffd@0
|
619 if(verbosity>=2) {
|
wolffd@0
|
620 printf("Runtime in cpu-seconds: %.2f (%.2f%% for kernel/%.2f%% for optimizer/%.2f%% for final/%.2f%% for update/%.2f%% for model/%.2f%% for check/%.2f%% for select)\n",
|
wolffd@0
|
621 ((float)runtime_end-(float)runtime_start)/100.0,
|
wolffd@0
|
622 (100.0*timing_profile.time_kernel)/(float)(runtime_end-runtime_start),
|
wolffd@0
|
623 (100.0*timing_profile.time_opti)/(float)(runtime_end-runtime_start),
|
wolffd@0
|
624 (100.0*timing_profile.time_shrink)/(float)(runtime_end-runtime_start),
|
wolffd@0
|
625 (100.0*timing_profile.time_update)/(float)(runtime_end-runtime_start),
|
wolffd@0
|
626 (100.0*timing_profile.time_model)/(float)(runtime_end-runtime_start),
|
wolffd@0
|
627 (100.0*timing_profile.time_check)/(float)(runtime_end-runtime_start),
|
wolffd@0
|
628 (100.0*timing_profile.time_select)/(float)(runtime_end-runtime_start));
|
wolffd@0
|
629 }
|
wolffd@0
|
630 else {
|
wolffd@0
|
631 printf("Runtime in cpu-seconds: %.2f\n",
|
wolffd@0
|
632 (runtime_end-runtime_start)/100.0);
|
wolffd@0
|
633 }
|
wolffd@0
|
634
|
wolffd@0
|
635 if(learn_parm->remove_inconsistent) {
|
wolffd@0
|
636 inconsistentnum=0;
|
wolffd@0
|
637 for(i=0;i<totdoc;i++)
|
wolffd@0
|
638 if(inconsistent[i])
|
wolffd@0
|
639 inconsistentnum++;
|
wolffd@0
|
640 printf("Number of SV: %ld (plus %ld inconsistent examples)\n",
|
wolffd@0
|
641 model->sv_num-1,inconsistentnum);
|
wolffd@0
|
642 }
|
wolffd@0
|
643 else {
|
wolffd@0
|
644 upsupvecnum=0;
|
wolffd@0
|
645 for(i=1;i<model->sv_num;i++) {
|
wolffd@0
|
646 if(fabs(model->alpha[i]) >=
|
wolffd@0
|
647 (learn_parm->svm_cost[(model->supvec[i])->docnum]-
|
wolffd@0
|
648 learn_parm->epsilon_a))
|
wolffd@0
|
649 upsupvecnum++;
|
wolffd@0
|
650 }
|
wolffd@0
|
651 printf("Number of SV: %ld (including %ld at upper bound)\n",
|
wolffd@0
|
652 model->sv_num-1,upsupvecnum);
|
wolffd@0
|
653 }
|
wolffd@0
|
654
|
wolffd@0
|
655 if((verbosity>=1) && (!learn_parm->skip_final_opt_check)) {
|
wolffd@0
|
656 loss=0;
|
wolffd@0
|
657 model_length=0;
|
wolffd@0
|
658 for(i=0;i<totdoc;i++) {
|
wolffd@0
|
659 if((lin[i]-model->b)*(double)label[i] < (-learn_parm->eps+(double)label[i]*c[i])-learn_parm->epsilon_crit)
|
wolffd@0
|
660 loss+=-learn_parm->eps+(double)label[i]*c[i]-(lin[i]-model->b)*(double)label[i];
|
wolffd@0
|
661 model_length+=a[i]*label[i]*lin[i];
|
wolffd@0
|
662 }
|
wolffd@0
|
663 model_length=sqrt(model_length);
|
wolffd@0
|
664 fprintf(stdout,"L1 loss: loss=%.5f\n",loss);
|
wolffd@0
|
665 fprintf(stdout,"Norm of weight vector: |w|=%.5f\n",model_length);
|
wolffd@0
|
666 example_length=estimate_sphere(model,kernel_parm);
|
wolffd@0
|
667 fprintf(stdout,"Norm of longest example vector: |x|=%.5f\n",
|
wolffd@0
|
668 length_of_longest_document_vector(docs,totdoc,kernel_parm));
|
wolffd@0
|
669 }
|
wolffd@0
|
670 if(verbosity>=1) {
|
wolffd@0
|
671 printf("Number of kernel evaluations: %ld\n",kernel_cache_statistic);
|
wolffd@0
|
672 }
|
wolffd@0
|
673 }
|
wolffd@0
|
674
|
wolffd@0
|
675 if(learn_parm->alphafile[0])
|
wolffd@0
|
676 write_alphas(learn_parm->alphafile,a,label,totdoc);
|
wolffd@0
|
677
|
wolffd@0
|
678 /* this makes sure the model we return does not contain pointers to the
|
wolffd@0
|
679 temporary documents */
|
wolffd@0
|
680 for(i=1;i<model->sv_num;i++) {
|
wolffd@0
|
681 j=model->supvec[i]->docnum;
|
wolffd@0
|
682 if(j >= (totdoc/2)) {
|
wolffd@0
|
683 j=totdoc-j-1;
|
wolffd@0
|
684 }
|
wolffd@0
|
685 model->supvec[i]=docs_org[j];
|
wolffd@0
|
686 }
|
wolffd@0
|
687
|
wolffd@0
|
688 shrink_state_cleanup(&shrink_state);
|
wolffd@0
|
689 for(i=0;i<totdoc;i++)
|
wolffd@0
|
690 free_example(docs[i],0);
|
wolffd@0
|
691 free(docs);
|
wolffd@0
|
692 free(label);
|
wolffd@0
|
693 free(inconsistent);
|
wolffd@0
|
694 free(unlabeled);
|
wolffd@0
|
695 free(c);
|
wolffd@0
|
696 free(a);
|
wolffd@0
|
697 free(a_fullset);
|
wolffd@0
|
698 free(xi_fullset);
|
wolffd@0
|
699 free(lin);
|
wolffd@0
|
700 free(learn_parm->svm_cost);
|
wolffd@0
|
701 }
|
wolffd@0
|
702
|
wolffd@0
|
703 void svm_learn_ranking(DOC **docs, double *rankvalue, long int totdoc,
|
wolffd@0
|
704 long int totwords, LEARN_PARM *learn_parm,
|
wolffd@0
|
705 KERNEL_PARM *kernel_parm, KERNEL_CACHE **kernel_cache,
|
wolffd@0
|
706 MODEL *model)
|
wolffd@0
|
707 /* docs: Training vectors (x-part) */
|
wolffd@0
|
708 /* rankvalue: Training target values that determine the ranking */
|
wolffd@0
|
709 /* totdoc: Number of examples in docs/label */
|
wolffd@0
|
710 /* totwords: Number of features (i.e. highest feature index) */
|
wolffd@0
|
711 /* learn_parm: Learning paramenters */
|
wolffd@0
|
712 /* kernel_parm: Kernel paramenters */
|
wolffd@0
|
713 /* kernel_cache:Initialized pointer to Cache of size 1*totdoc, if
|
wolffd@0
|
714 using a kernel. NULL if linear. NOTE: Cache is
|
wolffd@0
|
715 getting reinitialized in this function */
|
wolffd@0
|
716 /* model: Returns learning result (assumed empty before called) */
|
wolffd@0
|
717 {
|
wolffd@0
|
718 DOC **docdiff;
|
wolffd@0
|
719 long i,j,k,totpair,kernel_cache_size;
|
wolffd@0
|
720 double *target,*alpha,cost;
|
wolffd@0
|
721 long *greater,*lesser;
|
wolffd@0
|
722 MODEL *pairmodel;
|
wolffd@0
|
723 SVECTOR *flow,*fhigh;
|
wolffd@0
|
724
|
wolffd@0
|
725 totpair=0;
|
wolffd@0
|
726 for(i=0;i<totdoc;i++) {
|
wolffd@0
|
727 for(j=i+1;j<totdoc;j++) {
|
wolffd@0
|
728 if((docs[i]->queryid==docs[j]->queryid) && (rankvalue[i] != rankvalue[j])) {
|
wolffd@0
|
729 totpair++;
|
wolffd@0
|
730 }
|
wolffd@0
|
731 }
|
wolffd@0
|
732 }
|
wolffd@0
|
733
|
wolffd@0
|
734 printf("Constructing %ld rank constraints...",totpair); fflush(stdout);
|
wolffd@0
|
735 docdiff=(DOC **)my_malloc(sizeof(DOC)*totpair);
|
wolffd@0
|
736 target=(double *)my_malloc(sizeof(double)*totpair);
|
wolffd@0
|
737 greater=(long *)my_malloc(sizeof(long)*totpair);
|
wolffd@0
|
738 lesser=(long *)my_malloc(sizeof(long)*totpair);
|
wolffd@0
|
739
|
wolffd@0
|
740 k=0;
|
wolffd@0
|
741 for(i=0;i<totdoc;i++) {
|
wolffd@0
|
742 for(j=i+1;j<totdoc;j++) {
|
wolffd@0
|
743 if(docs[i]->queryid == docs[j]->queryid) {
|
wolffd@0
|
744 cost=(docs[i]->costfactor+docs[j]->costfactor)/2.0;
|
wolffd@0
|
745 if(rankvalue[i] > rankvalue[j]) {
|
wolffd@0
|
746 if(kernel_parm->kernel_type == LINEAR)
|
wolffd@0
|
747 docdiff[k]=create_example(k,0,0,cost,
|
wolffd@0
|
748 sub_ss(docs[i]->fvec,docs[j]->fvec));
|
wolffd@0
|
749 else {
|
wolffd@0
|
750 flow=copy_svector(docs[j]->fvec);
|
wolffd@0
|
751 flow->factor=-1.0;
|
wolffd@0
|
752 flow->next=NULL;
|
wolffd@0
|
753 fhigh=copy_svector(docs[i]->fvec);
|
wolffd@0
|
754 fhigh->factor=1.0;
|
wolffd@0
|
755 fhigh->next=flow;
|
wolffd@0
|
756 docdiff[k]=create_example(k,0,0,cost,fhigh);
|
wolffd@0
|
757 }
|
wolffd@0
|
758 target[k]=1;
|
wolffd@0
|
759 greater[k]=i;
|
wolffd@0
|
760 lesser[k]=j;
|
wolffd@0
|
761 k++;
|
wolffd@0
|
762 }
|
wolffd@0
|
763 else if(rankvalue[i] < rankvalue[j]) {
|
wolffd@0
|
764 if(kernel_parm->kernel_type == LINEAR)
|
wolffd@0
|
765 docdiff[k]=create_example(k,0,0,cost,
|
wolffd@0
|
766 sub_ss(docs[i]->fvec,docs[j]->fvec));
|
wolffd@0
|
767 else {
|
wolffd@0
|
768 flow=copy_svector(docs[j]->fvec);
|
wolffd@0
|
769 flow->factor=-1.0;
|
wolffd@0
|
770 flow->next=NULL;
|
wolffd@0
|
771 fhigh=copy_svector(docs[i]->fvec);
|
wolffd@0
|
772 fhigh->factor=1.0;
|
wolffd@0
|
773 fhigh->next=flow;
|
wolffd@0
|
774 docdiff[k]=create_example(k,0,0,cost,fhigh);
|
wolffd@0
|
775 }
|
wolffd@0
|
776 target[k]=-1;
|
wolffd@0
|
777 greater[k]=i;
|
wolffd@0
|
778 lesser[k]=j;
|
wolffd@0
|
779 k++;
|
wolffd@0
|
780 }
|
wolffd@0
|
781 }
|
wolffd@0
|
782 }
|
wolffd@0
|
783 }
|
wolffd@0
|
784 printf("done.\n"); fflush(stdout);
|
wolffd@0
|
785
|
wolffd@0
|
786 /* need to get a bigger kernel cache */
|
wolffd@0
|
787 if(*kernel_cache) {
|
wolffd@0
|
788 kernel_cache_size=(*kernel_cache)->buffsize*sizeof(CFLOAT)/(1024*1024);
|
wolffd@0
|
789 kernel_cache_cleanup(*kernel_cache);
|
wolffd@0
|
790 (*kernel_cache)=kernel_cache_init(totpair,kernel_cache_size);
|
wolffd@0
|
791 }
|
wolffd@0
|
792
|
wolffd@0
|
793 /* must use unbiased hyperplane on difference vectors */
|
wolffd@0
|
794 learn_parm->biased_hyperplane=0;
|
wolffd@0
|
795 pairmodel=(MODEL *)my_malloc(sizeof(MODEL));
|
wolffd@0
|
796 svm_learn_classification(docdiff,target,totpair,totwords,learn_parm,
|
wolffd@0
|
797 kernel_parm,(*kernel_cache),pairmodel,NULL);
|
wolffd@0
|
798
|
wolffd@0
|
799 /* Transfer the result into a more compact model. If you would like
|
wolffd@0
|
800 to output the original model on pairs of documents, see below. */
|
wolffd@0
|
801 alpha=(double *)my_malloc(sizeof(double)*totdoc);
|
wolffd@0
|
802 for(i=0;i<totdoc;i++) {
|
wolffd@0
|
803 alpha[i]=0;
|
wolffd@0
|
804 }
|
wolffd@0
|
805 for(i=1;i<pairmodel->sv_num;i++) {
|
wolffd@0
|
806 alpha[lesser[(pairmodel->supvec[i])->docnum]]-=pairmodel->alpha[i];
|
wolffd@0
|
807 alpha[greater[(pairmodel->supvec[i])->docnum]]+=pairmodel->alpha[i];
|
wolffd@0
|
808 }
|
wolffd@0
|
809 model->supvec = (DOC **)my_malloc(sizeof(DOC *)*(totdoc+2));
|
wolffd@0
|
810 model->alpha = (double *)my_malloc(sizeof(double)*(totdoc+2));
|
wolffd@0
|
811 model->index = (long *)my_malloc(sizeof(long)*(totdoc+2));
|
wolffd@0
|
812 model->supvec[0]=0; /* element 0 reserved and empty for now */
|
wolffd@0
|
813 model->alpha[0]=0;
|
wolffd@0
|
814 model->sv_num=1;
|
wolffd@0
|
815 for(i=0;i<totdoc;i++) {
|
wolffd@0
|
816 if(alpha[i]) {
|
wolffd@0
|
817 model->supvec[model->sv_num]=docs[i];
|
wolffd@0
|
818 model->alpha[model->sv_num]=alpha[i];
|
wolffd@0
|
819 model->index[i]=model->sv_num;
|
wolffd@0
|
820 model->sv_num++;
|
wolffd@0
|
821 }
|
wolffd@0
|
822 else {
|
wolffd@0
|
823 model->index[i]=-1;
|
wolffd@0
|
824 }
|
wolffd@0
|
825 }
|
wolffd@0
|
826 model->at_upper_bound=0;
|
wolffd@0
|
827 model->b=0;
|
wolffd@0
|
828 model->lin_weights=NULL;
|
wolffd@0
|
829 model->totwords=totwords;
|
wolffd@0
|
830 model->totdoc=totdoc;
|
wolffd@0
|
831 model->kernel_parm=(*kernel_parm);
|
wolffd@0
|
832 model->loo_error=-1;
|
wolffd@0
|
833 model->loo_recall=-1;
|
wolffd@0
|
834 model->loo_precision=-1;
|
wolffd@0
|
835 model->xa_error=-1;
|
wolffd@0
|
836 model->xa_recall=-1;
|
wolffd@0
|
837 model->xa_precision=-1;
|
wolffd@0
|
838
|
wolffd@0
|
839 free(alpha);
|
wolffd@0
|
840 free(greater);
|
wolffd@0
|
841 free(lesser);
|
wolffd@0
|
842 free(target);
|
wolffd@0
|
843
|
wolffd@0
|
844 /* If you would like to output the original model on pairs of
|
wolffd@0
|
845 document, replace the following lines with '(*model)=(*pairmodel);' */
|
wolffd@0
|
846 for(i=0;i<totpair;i++)
|
wolffd@0
|
847 free_example(docdiff[i],1);
|
wolffd@0
|
848 free(docdiff);
|
wolffd@0
|
849 free_model(pairmodel,0);
|
wolffd@0
|
850 }
|
wolffd@0
|
851
|
wolffd@0
|
852
|
wolffd@0
|
853 /* The following solves a freely defined and given set of
|
wolffd@0
|
854 inequalities. The optimization problem is of the following form:
|
wolffd@0
|
855
|
wolffd@0
|
856 min 0.5 w*w + C sum_i C_i \xi_i
|
wolffd@0
|
857 s.t. x_i * w > rhs_i - \xi_i
|
wolffd@0
|
858
|
wolffd@0
|
859 This corresponds to the -z o option. */
|
wolffd@0
|
860
|
wolffd@0
|
861 void svm_learn_optimization(DOC **docs, double *rhs, long int
|
wolffd@0
|
862 totdoc, long int totwords,
|
wolffd@0
|
863 LEARN_PARM *learn_parm,
|
wolffd@0
|
864 KERNEL_PARM *kernel_parm,
|
wolffd@0
|
865 KERNEL_CACHE *kernel_cache, MODEL *model,
|
wolffd@0
|
866 double *alpha)
|
wolffd@0
|
867 /* docs: Left-hand side of inequalities (x-part) */
|
wolffd@0
|
868 /* rhs: Right-hand side of inequalities */
|
wolffd@0
|
869 /* totdoc: Number of examples in docs/label */
|
wolffd@0
|
870 /* totwords: Number of features (i.e. highest feature index) */
|
wolffd@0
|
871 /* learn_parm: Learning paramenters */
|
wolffd@0
|
872 /* kernel_parm: Kernel paramenters */
|
wolffd@0
|
873 /* kernel_cache:Initialized Cache of size 1*totdoc, if using a kernel.
|
wolffd@0
|
874 NULL if linear.*/
|
wolffd@0
|
875 /* model: Returns solution as SV expansion (assumed empty before called) */
|
wolffd@0
|
876 /* alpha: Start values for the alpha variables or NULL
|
wolffd@0
|
877 pointer. The new alpha values are returned after
|
wolffd@0
|
878 optimization if not NULL. Array must be of size totdoc. */
|
wolffd@0
|
879 {
|
wolffd@0
|
880 long i,*label;
|
wolffd@0
|
881 long misclassified,upsupvecnum;
|
wolffd@0
|
882 double loss,model_length,example_length;
|
wolffd@0
|
883 double maxdiff,*lin,*a,*c;
|
wolffd@0
|
884 long runtime_start,runtime_end;
|
wolffd@0
|
885 long iterations,maxslackid,svsetnum;
|
wolffd@0
|
886 long *unlabeled,*inconsistent;
|
wolffd@0
|
887 double r_delta_sq=0,r_delta,r_delta_avg;
|
wolffd@0
|
888 long *index,*index2dnum;
|
wolffd@0
|
889 double *weights,*slack,*alphaslack;
|
wolffd@0
|
890 CFLOAT *aicache; /* buffer to keep one row of hessian */
|
wolffd@0
|
891
|
wolffd@0
|
892 TIMING timing_profile;
|
wolffd@0
|
893 SHRINK_STATE shrink_state;
|
wolffd@0
|
894
|
wolffd@0
|
895 runtime_start=get_runtime();
|
wolffd@0
|
896 timing_profile.time_kernel=0;
|
wolffd@0
|
897 timing_profile.time_opti=0;
|
wolffd@0
|
898 timing_profile.time_shrink=0;
|
wolffd@0
|
899 timing_profile.time_update=0;
|
wolffd@0
|
900 timing_profile.time_model=0;
|
wolffd@0
|
901 timing_profile.time_check=0;
|
wolffd@0
|
902 timing_profile.time_select=0;
|
wolffd@0
|
903 kernel_cache_statistic=0;
|
wolffd@0
|
904
|
wolffd@0
|
905 learn_parm->totwords=totwords;
|
wolffd@0
|
906
|
wolffd@0
|
907 /* make sure -n value is reasonable */
|
wolffd@0
|
908 if((learn_parm->svm_newvarsinqp < 2)
|
wolffd@0
|
909 || (learn_parm->svm_newvarsinqp > learn_parm->svm_maxqpsize)) {
|
wolffd@0
|
910 learn_parm->svm_newvarsinqp=learn_parm->svm_maxqpsize;
|
wolffd@0
|
911 }
|
wolffd@0
|
912
|
wolffd@0
|
913 init_shrink_state(&shrink_state,totdoc,(long)MAXSHRINK);
|
wolffd@0
|
914
|
wolffd@0
|
915 label = (long *)my_malloc(sizeof(long)*totdoc);
|
wolffd@0
|
916 unlabeled = (long *)my_malloc(sizeof(long)*totdoc);
|
wolffd@0
|
917 inconsistent = (long *)my_malloc(sizeof(long)*totdoc);
|
wolffd@0
|
918 c = (double *)my_malloc(sizeof(double)*totdoc);
|
wolffd@0
|
919 a = (double *)my_malloc(sizeof(double)*totdoc);
|
wolffd@0
|
920 lin = (double *)my_malloc(sizeof(double)*totdoc);
|
wolffd@0
|
921 learn_parm->svm_cost = (double *)my_malloc(sizeof(double)*totdoc);
|
wolffd@0
|
922 model->supvec = (DOC **)my_malloc(sizeof(DOC *)*(totdoc+2));
|
wolffd@0
|
923 model->alpha = (double *)my_malloc(sizeof(double)*(totdoc+2));
|
wolffd@0
|
924 model->index = (long *)my_malloc(sizeof(long)*(totdoc+2));
|
wolffd@0
|
925
|
wolffd@0
|
926 model->at_upper_bound=0;
|
wolffd@0
|
927 model->b=0;
|
wolffd@0
|
928 model->supvec[0]=0; /* element 0 reserved and empty for now */
|
wolffd@0
|
929 model->alpha[0]=0;
|
wolffd@0
|
930 model->lin_weights=NULL;
|
wolffd@0
|
931 model->totwords=totwords;
|
wolffd@0
|
932 model->totdoc=totdoc;
|
wolffd@0
|
933 model->kernel_parm=(*kernel_parm);
|
wolffd@0
|
934 model->sv_num=1;
|
wolffd@0
|
935 model->loo_error=-1;
|
wolffd@0
|
936 model->loo_recall=-1;
|
wolffd@0
|
937 model->loo_precision=-1;
|
wolffd@0
|
938 model->xa_error=-1;
|
wolffd@0
|
939 model->xa_recall=-1;
|
wolffd@0
|
940 model->xa_precision=-1;
|
wolffd@0
|
941
|
wolffd@0
|
942 r_delta=estimate_r_delta(docs,totdoc,kernel_parm);
|
wolffd@0
|
943 r_delta_sq=r_delta*r_delta;
|
wolffd@0
|
944
|
wolffd@0
|
945 r_delta_avg=estimate_r_delta_average(docs,totdoc,kernel_parm);
|
wolffd@0
|
946 if(learn_parm->svm_c == 0.0) { /* default value for C */
|
wolffd@0
|
947 learn_parm->svm_c=1.0/(r_delta_avg*r_delta_avg);
|
wolffd@0
|
948 if(verbosity>=1)
|
wolffd@0
|
949 printf("Setting default regularization parameter C=%.4f\n",
|
wolffd@0
|
950 learn_parm->svm_c);
|
wolffd@0
|
951 }
|
wolffd@0
|
952
|
wolffd@0
|
953 learn_parm->biased_hyperplane=0; /* learn an unbiased hyperplane */
|
wolffd@0
|
954
|
wolffd@0
|
955 learn_parm->eps=0.0; /* No margin, unless explicitly handcoded
|
wolffd@0
|
956 in the right-hand side in the training
|
wolffd@0
|
957 set. */
|
wolffd@0
|
958
|
wolffd@0
|
959 for(i=0;i<totdoc;i++) { /* various inits */
|
wolffd@0
|
960 docs[i]->docnum=i;
|
wolffd@0
|
961 a[i]=0;
|
wolffd@0
|
962 lin[i]=0;
|
wolffd@0
|
963 c[i]=rhs[i]; /* set right-hand side */
|
wolffd@0
|
964 unlabeled[i]=0;
|
wolffd@0
|
965 inconsistent[i]=0;
|
wolffd@0
|
966 learn_parm->svm_cost[i]=learn_parm->svm_c*learn_parm->svm_costratio*
|
wolffd@0
|
967 docs[i]->costfactor;
|
wolffd@0
|
968 label[i]=1;
|
wolffd@0
|
969 }
|
wolffd@0
|
970 if(learn_parm->sharedslack) /* if shared slacks are used, they must */
|
wolffd@0
|
971 for(i=0;i<totdoc;i++) /* be used on every constraint */
|
wolffd@0
|
972 if(!docs[i]->slackid) {
|
wolffd@0
|
973 perror("Error: Missing shared slacks definitions in some of the examples.");
|
wolffd@0
|
974 exit(0);
|
wolffd@0
|
975 }
|
wolffd@0
|
976
|
wolffd@0
|
977 /* compute starting state for initial alpha values */
|
wolffd@0
|
978 if(alpha) {
|
wolffd@0
|
979 if(verbosity>=1) {
|
wolffd@0
|
980 printf("Computing starting state..."); fflush(stdout);
|
wolffd@0
|
981 }
|
wolffd@0
|
982 index = (long *)my_malloc(sizeof(long)*totdoc);
|
wolffd@0
|
983 index2dnum = (long *)my_malloc(sizeof(long)*(totdoc+11));
|
wolffd@0
|
984 weights=(double *)my_malloc(sizeof(double)*(totwords+1));
|
wolffd@0
|
985 aicache = (CFLOAT *)my_malloc(sizeof(CFLOAT)*totdoc);
|
wolffd@0
|
986 for(i=0;i<totdoc;i++) { /* create full index and clip alphas */
|
wolffd@0
|
987 index[i]=1;
|
wolffd@0
|
988 alpha[i]=fabs(alpha[i]);
|
wolffd@0
|
989 if(alpha[i]<0) alpha[i]=0;
|
wolffd@0
|
990 if(alpha[i]>learn_parm->svm_cost[i]) alpha[i]=learn_parm->svm_cost[i];
|
wolffd@0
|
991 }
|
wolffd@0
|
992 if(kernel_parm->kernel_type != LINEAR) {
|
wolffd@0
|
993 for(i=0;i<totdoc;i++) /* fill kernel cache with unbounded SV */
|
wolffd@0
|
994 if((alpha[i]>0) && (alpha[i]<learn_parm->svm_cost[i])
|
wolffd@0
|
995 && (kernel_cache_space_available(kernel_cache)))
|
wolffd@0
|
996 cache_kernel_row(kernel_cache,docs,i,kernel_parm);
|
wolffd@0
|
997 for(i=0;i<totdoc;i++) /* fill rest of kernel cache with bounded SV */
|
wolffd@0
|
998 if((alpha[i]==learn_parm->svm_cost[i])
|
wolffd@0
|
999 && (kernel_cache_space_available(kernel_cache)))
|
wolffd@0
|
1000 cache_kernel_row(kernel_cache,docs,i,kernel_parm);
|
wolffd@0
|
1001 }
|
wolffd@0
|
1002 (void)compute_index(index,totdoc,index2dnum);
|
wolffd@0
|
1003 update_linear_component(docs,label,index2dnum,alpha,a,index2dnum,totdoc,
|
wolffd@0
|
1004 totwords,kernel_parm,kernel_cache,lin,aicache,
|
wolffd@0
|
1005 weights);
|
wolffd@0
|
1006 (void)calculate_svm_model(docs,label,unlabeled,lin,alpha,a,c,
|
wolffd@0
|
1007 learn_parm,index2dnum,index2dnum,model);
|
wolffd@0
|
1008 for(i=0;i<totdoc;i++) { /* copy initial alphas */
|
wolffd@0
|
1009 a[i]=alpha[i];
|
wolffd@0
|
1010 }
|
wolffd@0
|
1011 free(index);
|
wolffd@0
|
1012 free(index2dnum);
|
wolffd@0
|
1013 free(weights);
|
wolffd@0
|
1014 free(aicache);
|
wolffd@0
|
1015 if(verbosity>=1) {
|
wolffd@0
|
1016 printf("done.\n"); fflush(stdout);
|
wolffd@0
|
1017 }
|
wolffd@0
|
1018 }
|
wolffd@0
|
1019
|
wolffd@0
|
1020 /* removing inconsistent does not work for general optimization problem */
|
wolffd@0
|
1021 if(learn_parm->remove_inconsistent) {
|
wolffd@0
|
1022 learn_parm->remove_inconsistent = 0;
|
wolffd@0
|
1023 printf("'remove inconsistent' not available in this mode. Switching option off!"); fflush(stdout);
|
wolffd@0
|
1024 }
|
wolffd@0
|
1025
|
wolffd@0
|
1026 /* caching makes no sense for linear kernel */
|
wolffd@0
|
1027 if(kernel_parm->kernel_type == LINEAR) {
|
wolffd@0
|
1028 kernel_cache = NULL;
|
wolffd@0
|
1029 }
|
wolffd@0
|
1030
|
wolffd@0
|
1031 if(verbosity==1) {
|
wolffd@0
|
1032 printf("Optimizing"); fflush(stdout);
|
wolffd@0
|
1033 }
|
wolffd@0
|
1034
|
wolffd@0
|
1035 /* train the svm */
|
wolffd@0
|
1036 if(learn_parm->sharedslack)
|
wolffd@0
|
1037 iterations=optimize_to_convergence_sharedslack(docs,label,totdoc,
|
wolffd@0
|
1038 totwords,learn_parm,kernel_parm,
|
wolffd@0
|
1039 kernel_cache,&shrink_state,model,
|
wolffd@0
|
1040 a,lin,c,&timing_profile,
|
wolffd@0
|
1041 &maxdiff);
|
wolffd@0
|
1042 else
|
wolffd@0
|
1043 iterations=optimize_to_convergence(docs,label,totdoc,
|
wolffd@0
|
1044 totwords,learn_parm,kernel_parm,
|
wolffd@0
|
1045 kernel_cache,&shrink_state,model,
|
wolffd@0
|
1046 inconsistent,unlabeled,
|
wolffd@0
|
1047 a,lin,c,&timing_profile,
|
wolffd@0
|
1048 &maxdiff,(long)-1,(long)1);
|
wolffd@0
|
1049
|
wolffd@0
|
1050 if(verbosity>=1) {
|
wolffd@0
|
1051 if(verbosity==1) printf("done. (%ld iterations)\n",iterations);
|
wolffd@0
|
1052
|
wolffd@0
|
1053 misclassified=0;
|
wolffd@0
|
1054 for(i=0;(i<totdoc);i++) { /* get final statistic */
|
wolffd@0
|
1055 if((lin[i]-model->b)*(double)label[i] <= 0.0)
|
wolffd@0
|
1056 misclassified++;
|
wolffd@0
|
1057 }
|
wolffd@0
|
1058
|
wolffd@0
|
1059 printf("Optimization finished (maxdiff=%.5f).\n",maxdiff);
|
wolffd@0
|
1060
|
wolffd@0
|
1061 runtime_end=get_runtime();
|
wolffd@0
|
1062 if(verbosity>=2) {
|
wolffd@0
|
1063 printf("Runtime in cpu-seconds: %.2f (%.2f%% for kernel/%.2f%% for optimizer/%.2f%% for final/%.2f%% for update/%.2f%% for model/%.2f%% for check/%.2f%% for select)\n",
|
wolffd@0
|
1064 ((float)runtime_end-(float)runtime_start)/100.0,
|
wolffd@0
|
1065 (100.0*timing_profile.time_kernel)/(float)(runtime_end-runtime_start),
|
wolffd@0
|
1066 (100.0*timing_profile.time_opti)/(float)(runtime_end-runtime_start),
|
wolffd@0
|
1067 (100.0*timing_profile.time_shrink)/(float)(runtime_end-runtime_start),
|
wolffd@0
|
1068 (100.0*timing_profile.time_update)/(float)(runtime_end-runtime_start),
|
wolffd@0
|
1069 (100.0*timing_profile.time_model)/(float)(runtime_end-runtime_start),
|
wolffd@0
|
1070 (100.0*timing_profile.time_check)/(float)(runtime_end-runtime_start),
|
wolffd@0
|
1071 (100.0*timing_profile.time_select)/(float)(runtime_end-runtime_start));
|
wolffd@0
|
1072 }
|
wolffd@0
|
1073 else {
|
wolffd@0
|
1074 printf("Runtime in cpu-seconds: %.2f\n",
|
wolffd@0
|
1075 (runtime_end-runtime_start)/100.0);
|
wolffd@0
|
1076 }
|
wolffd@0
|
1077 }
|
wolffd@0
|
1078 if((verbosity>=1) && (!learn_parm->skip_final_opt_check)) {
|
wolffd@0
|
1079 loss=0;
|
wolffd@0
|
1080 model_length=0;
|
wolffd@0
|
1081 for(i=0;i<totdoc;i++) {
|
wolffd@0
|
1082 if((lin[i]-model->b)*(double)label[i] < c[i]-learn_parm->epsilon_crit)
|
wolffd@0
|
1083 loss+=c[i]-(lin[i]-model->b)*(double)label[i];
|
wolffd@0
|
1084 model_length+=a[i]*label[i]*lin[i];
|
wolffd@0
|
1085 }
|
wolffd@0
|
1086 model_length=sqrt(model_length);
|
wolffd@0
|
1087 fprintf(stdout,"Norm of weight vector: |w|=%.5f\n",model_length);
|
wolffd@0
|
1088 }
|
wolffd@0
|
1089
|
wolffd@0
|
1090 if(learn_parm->sharedslack) {
|
wolffd@0
|
1091 index = (long *)my_malloc(sizeof(long)*totdoc);
|
wolffd@0
|
1092 index2dnum = (long *)my_malloc(sizeof(long)*(totdoc+11));
|
wolffd@0
|
1093 maxslackid=0;
|
wolffd@0
|
1094 for(i=0;i<totdoc;i++) { /* create full index */
|
wolffd@0
|
1095 index[i]=1;
|
wolffd@0
|
1096 if(maxslackid<docs[i]->slackid)
|
wolffd@0
|
1097 maxslackid=docs[i]->slackid;
|
wolffd@0
|
1098 }
|
wolffd@0
|
1099 (void)compute_index(index,totdoc,index2dnum);
|
wolffd@0
|
1100 slack=(double *)my_malloc(sizeof(double)*(maxslackid+1));
|
wolffd@0
|
1101 alphaslack=(double *)my_malloc(sizeof(double)*(maxslackid+1));
|
wolffd@0
|
1102 for(i=0;i<=maxslackid;i++) { /* init shared slacks */
|
wolffd@0
|
1103 slack[i]=0;
|
wolffd@0
|
1104 alphaslack[i]=0;
|
wolffd@0
|
1105 }
|
wolffd@0
|
1106 compute_shared_slacks(docs,label,a,lin,c,index2dnum,learn_parm,
|
wolffd@0
|
1107 slack,alphaslack);
|
wolffd@0
|
1108 loss=0;
|
wolffd@0
|
1109 model->at_upper_bound=0;
|
wolffd@0
|
1110 svsetnum=0;
|
wolffd@0
|
1111 for(i=0;i<=maxslackid;i++) { /* create full index */
|
wolffd@0
|
1112 loss+=slack[i];
|
wolffd@0
|
1113 if(alphaslack[i] > (learn_parm->svm_c - learn_parm->epsilon_a))
|
wolffd@0
|
1114 model->at_upper_bound++;
|
wolffd@0
|
1115 if(alphaslack[i] > learn_parm->epsilon_a)
|
wolffd@0
|
1116 svsetnum++;
|
wolffd@0
|
1117 }
|
wolffd@0
|
1118 free(index);
|
wolffd@0
|
1119 free(index2dnum);
|
wolffd@0
|
1120 free(slack);
|
wolffd@0
|
1121 free(alphaslack);
|
wolffd@0
|
1122 }
|
wolffd@0
|
1123
|
wolffd@0
|
1124 if((verbosity>=1) && (!learn_parm->skip_final_opt_check)) {
|
wolffd@0
|
1125 if(learn_parm->sharedslack) {
|
wolffd@0
|
1126 printf("Number of SV: %ld\n",
|
wolffd@0
|
1127 model->sv_num-1);
|
wolffd@0
|
1128 printf("Number of non-zero slack variables: %ld (out of %ld)\n",
|
wolffd@0
|
1129 model->at_upper_bound,svsetnum);
|
wolffd@0
|
1130 fprintf(stdout,"L1 loss: loss=%.5f\n",loss);
|
wolffd@0
|
1131 }
|
wolffd@0
|
1132 else {
|
wolffd@0
|
1133 upsupvecnum=0;
|
wolffd@0
|
1134 for(i=1;i<model->sv_num;i++) {
|
wolffd@0
|
1135 if(fabs(model->alpha[i]) >=
|
wolffd@0
|
1136 (learn_parm->svm_cost[(model->supvec[i])->docnum]-
|
wolffd@0
|
1137 learn_parm->epsilon_a))
|
wolffd@0
|
1138 upsupvecnum++;
|
wolffd@0
|
1139 }
|
wolffd@0
|
1140 printf("Number of SV: %ld (including %ld at upper bound)\n",
|
wolffd@0
|
1141 model->sv_num-1,upsupvecnum);
|
wolffd@0
|
1142 fprintf(stdout,"L1 loss: loss=%.5f\n",loss);
|
wolffd@0
|
1143 }
|
wolffd@0
|
1144 example_length=estimate_sphere(model,kernel_parm);
|
wolffd@0
|
1145 fprintf(stdout,"Norm of longest example vector: |x|=%.5f\n",
|
wolffd@0
|
1146 length_of_longest_document_vector(docs,totdoc,kernel_parm));
|
wolffd@0
|
1147 }
|
wolffd@0
|
1148 if(verbosity>=1) {
|
wolffd@0
|
1149 printf("Number of kernel evaluations: %ld\n",kernel_cache_statistic);
|
wolffd@0
|
1150 }
|
wolffd@0
|
1151
|
wolffd@0
|
1152 if(alpha) {
|
wolffd@0
|
1153 for(i=0;i<totdoc;i++) { /* copy final alphas */
|
wolffd@0
|
1154 alpha[i]=a[i];
|
wolffd@0
|
1155 }
|
wolffd@0
|
1156 }
|
wolffd@0
|
1157
|
wolffd@0
|
1158 if(learn_parm->alphafile[0])
|
wolffd@0
|
1159 write_alphas(learn_parm->alphafile,a,label,totdoc);
|
wolffd@0
|
1160
|
wolffd@0
|
1161 shrink_state_cleanup(&shrink_state);
|
wolffd@0
|
1162 free(label);
|
wolffd@0
|
1163 free(unlabeled);
|
wolffd@0
|
1164 free(inconsistent);
|
wolffd@0
|
1165 free(c);
|
wolffd@0
|
1166 free(a);
|
wolffd@0
|
1167 free(lin);
|
wolffd@0
|
1168 free(learn_parm->svm_cost);
|
wolffd@0
|
1169 }
|
wolffd@0
|
1170
|
wolffd@0
|
1171
|
wolffd@0
|
1172 long optimize_to_convergence(DOC **docs, long int *label, long int totdoc,
|
wolffd@0
|
1173 long int totwords, LEARN_PARM *learn_parm,
|
wolffd@0
|
1174 KERNEL_PARM *kernel_parm,
|
wolffd@0
|
1175 KERNEL_CACHE *kernel_cache,
|
wolffd@0
|
1176 SHRINK_STATE *shrink_state, MODEL *model,
|
wolffd@0
|
1177 long int *inconsistent, long int *unlabeled,
|
wolffd@0
|
1178 double *a, double *lin, double *c,
|
wolffd@0
|
1179 TIMING *timing_profile, double *maxdiff,
|
wolffd@0
|
1180 long int heldout, long int retrain)
|
wolffd@0
|
1181 /* docs: Training vectors (x-part) */
|
wolffd@0
|
1182 /* label: Training labels/value (y-part, zero if test example for
|
wolffd@0
|
1183 transduction) */
|
wolffd@0
|
1184 /* totdoc: Number of examples in docs/label */
|
wolffd@0
|
1185 /* totwords: Number of features (i.e. highest feature index) */
|
wolffd@0
|
1186 /* laern_parm: Learning paramenters */
|
wolffd@0
|
1187 /* kernel_parm: Kernel paramenters */
|
wolffd@0
|
1188 /* kernel_cache: Initialized/partly filled Cache, if using a kernel.
|
wolffd@0
|
1189 NULL if linear. */
|
wolffd@0
|
1190 /* shrink_state: State of active variables */
|
wolffd@0
|
1191 /* model: Returns learning result */
|
wolffd@0
|
1192 /* inconsistent: examples thrown out as inconstistent */
|
wolffd@0
|
1193 /* unlabeled: test examples for transduction */
|
wolffd@0
|
1194 /* a: alphas */
|
wolffd@0
|
1195 /* lin: linear component of gradient */
|
wolffd@0
|
1196 /* c: right hand side of inequalities (margin) */
|
wolffd@0
|
1197 /* maxdiff: returns maximum violation of KT-conditions */
|
wolffd@0
|
1198 /* heldout: marks held-out example for leave-one-out (or -1) */
|
wolffd@0
|
1199 /* retrain: selects training mode (1=regular / 2=holdout) */
|
wolffd@0
|
1200 {
|
wolffd@0
|
1201 long *chosen,*key,i,j,jj,*last_suboptimal_at,noshrink;
|
wolffd@0
|
1202 long inconsistentnum,choosenum,already_chosen=0,iteration;
|
wolffd@0
|
1203 long misclassified,supvecnum=0,*active2dnum,inactivenum;
|
wolffd@0
|
1204 long *working2dnum,*selexam;
|
wolffd@0
|
1205 long activenum;
|
wolffd@0
|
1206 double criterion,eq;
|
wolffd@0
|
1207 double *a_old;
|
wolffd@0
|
1208 long t0=0,t1=0,t2=0,t3=0,t4=0,t5=0,t6=0; /* timing */
|
wolffd@0
|
1209 long transductcycle;
|
wolffd@0
|
1210 long transduction;
|
wolffd@0
|
1211 double epsilon_crit_org;
|
wolffd@0
|
1212 double bestmaxdiff;
|
wolffd@0
|
1213 long bestmaxdiffiter,terminate;
|
wolffd@0
|
1214
|
wolffd@0
|
1215 double *selcrit; /* buffer for sorting */
|
wolffd@0
|
1216 CFLOAT *aicache; /* buffer to keep one row of hessian */
|
wolffd@0
|
1217 double *weights; /* buffer for weight vector in linear case */
|
wolffd@0
|
1218 QP qp; /* buffer for one quadratic program */
|
wolffd@0
|
1219
|
wolffd@0
|
1220 epsilon_crit_org=learn_parm->epsilon_crit; /* save org */
|
wolffd@0
|
1221 if(kernel_parm->kernel_type == LINEAR) {
|
wolffd@0
|
1222 learn_parm->epsilon_crit=2.0;
|
wolffd@0
|
1223 kernel_cache=NULL; /* caching makes no sense for linear kernel */
|
wolffd@0
|
1224 }
|
wolffd@0
|
1225 learn_parm->epsilon_shrink=2;
|
wolffd@0
|
1226 (*maxdiff)=1;
|
wolffd@0
|
1227
|
wolffd@0
|
1228 learn_parm->totwords=totwords;
|
wolffd@0
|
1229
|
wolffd@0
|
1230 chosen = (long *)my_malloc(sizeof(long)*totdoc);
|
wolffd@0
|
1231 last_suboptimal_at = (long *)my_malloc(sizeof(long)*totdoc);
|
wolffd@0
|
1232 key = (long *)my_malloc(sizeof(long)*(totdoc+11));
|
wolffd@0
|
1233 selcrit = (double *)my_malloc(sizeof(double)*totdoc);
|
wolffd@0
|
1234 selexam = (long *)my_malloc(sizeof(long)*totdoc);
|
wolffd@0
|
1235 a_old = (double *)my_malloc(sizeof(double)*totdoc);
|
wolffd@0
|
1236 aicache = (CFLOAT *)my_malloc(sizeof(CFLOAT)*totdoc);
|
wolffd@0
|
1237 working2dnum = (long *)my_malloc(sizeof(long)*(totdoc+11));
|
wolffd@0
|
1238 active2dnum = (long *)my_malloc(sizeof(long)*(totdoc+11));
|
wolffd@0
|
1239 qp.opt_ce = (double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize);
|
wolffd@0
|
1240 qp.opt_ce0 = (double *)my_malloc(sizeof(double));
|
wolffd@0
|
1241 qp.opt_g = (double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize
|
wolffd@0
|
1242 *learn_parm->svm_maxqpsize);
|
wolffd@0
|
1243 qp.opt_g0 = (double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize);
|
wolffd@0
|
1244 qp.opt_xinit = (double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize);
|
wolffd@0
|
1245 qp.opt_low=(double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize);
|
wolffd@0
|
1246 qp.opt_up=(double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize);
|
wolffd@0
|
1247 weights=(double *)my_malloc(sizeof(double)*(totwords+1));
|
wolffd@0
|
1248
|
wolffd@0
|
1249 choosenum=0;
|
wolffd@0
|
1250 inconsistentnum=0;
|
wolffd@0
|
1251 transductcycle=0;
|
wolffd@0
|
1252 transduction=0;
|
wolffd@0
|
1253 if(!retrain) retrain=1;
|
wolffd@0
|
1254 iteration=1;
|
wolffd@0
|
1255 bestmaxdiffiter=1;
|
wolffd@0
|
1256 bestmaxdiff=999999999;
|
wolffd@0
|
1257 terminate=0;
|
wolffd@0
|
1258
|
wolffd@0
|
1259 if(kernel_cache) {
|
wolffd@0
|
1260 kernel_cache->time=iteration; /* for lru cache */
|
wolffd@0
|
1261 kernel_cache_reset_lru(kernel_cache);
|
wolffd@0
|
1262 }
|
wolffd@0
|
1263
|
wolffd@0
|
1264 for(i=0;i<totdoc;i++) { /* various inits */
|
wolffd@0
|
1265 chosen[i]=0;
|
wolffd@0
|
1266 a_old[i]=a[i];
|
wolffd@0
|
1267 last_suboptimal_at[i]=1;
|
wolffd@0
|
1268 if(inconsistent[i])
|
wolffd@0
|
1269 inconsistentnum++;
|
wolffd@0
|
1270 if(unlabeled[i]) {
|
wolffd@0
|
1271 transduction=1;
|
wolffd@0
|
1272 }
|
wolffd@0
|
1273 }
|
wolffd@0
|
1274 activenum=compute_index(shrink_state->active,totdoc,active2dnum);
|
wolffd@0
|
1275 inactivenum=totdoc-activenum;
|
wolffd@0
|
1276 clear_index(working2dnum);
|
wolffd@0
|
1277
|
wolffd@0
|
1278 /* repeat this loop until we have convergence */
|
wolffd@0
|
1279 for(;retrain && (!terminate);iteration++) {
|
wolffd@0
|
1280
|
wolffd@0
|
1281 if(kernel_cache)
|
wolffd@0
|
1282 kernel_cache->time=iteration; /* for lru cache */
|
wolffd@0
|
1283 if(verbosity>=2) {
|
wolffd@0
|
1284 printf(
|
wolffd@0
|
1285 "Iteration %ld: ",iteration); fflush(stdout);
|
wolffd@0
|
1286 }
|
wolffd@0
|
1287 else if(verbosity==1) {
|
wolffd@0
|
1288 printf("."); fflush(stdout);
|
wolffd@0
|
1289 }
|
wolffd@0
|
1290
|
wolffd@0
|
1291 if(verbosity>=2) t0=get_runtime();
|
wolffd@0
|
1292 if(verbosity>=3) {
|
wolffd@0
|
1293 printf("\nSelecting working set... "); fflush(stdout);
|
wolffd@0
|
1294 }
|
wolffd@0
|
1295
|
wolffd@0
|
1296 if(learn_parm->svm_newvarsinqp>learn_parm->svm_maxqpsize)
|
wolffd@0
|
1297 learn_parm->svm_newvarsinqp=learn_parm->svm_maxqpsize;
|
wolffd@0
|
1298
|
wolffd@0
|
1299 i=0;
|
wolffd@0
|
1300 for(jj=0;(j=working2dnum[jj])>=0;jj++) { /* clear working set */
|
wolffd@0
|
1301 if((chosen[j]>=(learn_parm->svm_maxqpsize/
|
wolffd@0
|
1302 minl(learn_parm->svm_maxqpsize,
|
wolffd@0
|
1303 learn_parm->svm_newvarsinqp)))
|
wolffd@0
|
1304 || (inconsistent[j])
|
wolffd@0
|
1305 || (j == heldout)) {
|
wolffd@0
|
1306 chosen[j]=0;
|
wolffd@0
|
1307 choosenum--;
|
wolffd@0
|
1308 }
|
wolffd@0
|
1309 else {
|
wolffd@0
|
1310 chosen[j]++;
|
wolffd@0
|
1311 working2dnum[i++]=j;
|
wolffd@0
|
1312 }
|
wolffd@0
|
1313 }
|
wolffd@0
|
1314 working2dnum[i]=-1;
|
wolffd@0
|
1315
|
wolffd@0
|
1316 if(retrain == 2) {
|
wolffd@0
|
1317 choosenum=0;
|
wolffd@0
|
1318 for(jj=0;(j=working2dnum[jj])>=0;jj++) { /* fully clear working set */
|
wolffd@0
|
1319 chosen[j]=0;
|
wolffd@0
|
1320 }
|
wolffd@0
|
1321 clear_index(working2dnum);
|
wolffd@0
|
1322 for(i=0;i<totdoc;i++) { /* set inconsistent examples to zero (-i 1) */
|
wolffd@0
|
1323 if((inconsistent[i] || (heldout==i)) && (a[i] != 0.0)) {
|
wolffd@0
|
1324 chosen[i]=99999;
|
wolffd@0
|
1325 choosenum++;
|
wolffd@0
|
1326 a[i]=0;
|
wolffd@0
|
1327 }
|
wolffd@0
|
1328 }
|
wolffd@0
|
1329 if(learn_parm->biased_hyperplane) {
|
wolffd@0
|
1330 eq=0;
|
wolffd@0
|
1331 for(i=0;i<totdoc;i++) { /* make sure we fulfill equality constraint */
|
wolffd@0
|
1332 eq+=a[i]*label[i];
|
wolffd@0
|
1333 }
|
wolffd@0
|
1334 for(i=0;(i<totdoc) && (fabs(eq) > learn_parm->epsilon_a);i++) {
|
wolffd@0
|
1335 if((eq*label[i] > 0) && (a[i] > 0)) {
|
wolffd@0
|
1336 chosen[i]=88888;
|
wolffd@0
|
1337 choosenum++;
|
wolffd@0
|
1338 if((eq*label[i]) > a[i]) {
|
wolffd@0
|
1339 eq-=(a[i]*label[i]);
|
wolffd@0
|
1340 a[i]=0;
|
wolffd@0
|
1341 }
|
wolffd@0
|
1342 else {
|
wolffd@0
|
1343 a[i]-=(eq*label[i]);
|
wolffd@0
|
1344 eq=0;
|
wolffd@0
|
1345 }
|
wolffd@0
|
1346 }
|
wolffd@0
|
1347 }
|
wolffd@0
|
1348 }
|
wolffd@0
|
1349 compute_index(chosen,totdoc,working2dnum);
|
wolffd@0
|
1350 }
|
wolffd@0
|
1351 else { /* select working set according to steepest gradient */
|
wolffd@0
|
1352 if(iteration % 101) {
|
wolffd@0
|
1353 already_chosen=0;
|
wolffd@0
|
1354 if((minl(learn_parm->svm_newvarsinqp,
|
wolffd@0
|
1355 learn_parm->svm_maxqpsize-choosenum)>=4)
|
wolffd@0
|
1356 && (kernel_parm->kernel_type != LINEAR)) {
|
wolffd@0
|
1357 /* select part of the working set from cache */
|
wolffd@0
|
1358 already_chosen=select_next_qp_subproblem_grad(
|
wolffd@0
|
1359 label,unlabeled,a,lin,c,totdoc,
|
wolffd@0
|
1360 (long)(minl(learn_parm->svm_maxqpsize-choosenum,
|
wolffd@0
|
1361 learn_parm->svm_newvarsinqp)
|
wolffd@0
|
1362 /2),
|
wolffd@0
|
1363 learn_parm,inconsistent,active2dnum,
|
wolffd@0
|
1364 working2dnum,selcrit,selexam,kernel_cache,1,
|
wolffd@0
|
1365 key,chosen);
|
wolffd@0
|
1366 choosenum+=already_chosen;
|
wolffd@0
|
1367 }
|
wolffd@0
|
1368 choosenum+=select_next_qp_subproblem_grad(
|
wolffd@0
|
1369 label,unlabeled,a,lin,c,totdoc,
|
wolffd@0
|
1370 minl(learn_parm->svm_maxqpsize-choosenum,
|
wolffd@0
|
1371 learn_parm->svm_newvarsinqp-already_chosen),
|
wolffd@0
|
1372 learn_parm,inconsistent,active2dnum,
|
wolffd@0
|
1373 working2dnum,selcrit,selexam,kernel_cache,0,key,
|
wolffd@0
|
1374 chosen);
|
wolffd@0
|
1375 }
|
wolffd@0
|
1376 else { /* once in a while, select a somewhat random working set
|
wolffd@0
|
1377 to get unlocked of infinite loops due to numerical
|
wolffd@0
|
1378 inaccuracies in the core qp-solver */
|
wolffd@0
|
1379 choosenum+=select_next_qp_subproblem_rand(
|
wolffd@0
|
1380 label,unlabeled,a,lin,c,totdoc,
|
wolffd@0
|
1381 minl(learn_parm->svm_maxqpsize-choosenum,
|
wolffd@0
|
1382 learn_parm->svm_newvarsinqp),
|
wolffd@0
|
1383 learn_parm,inconsistent,active2dnum,
|
wolffd@0
|
1384 working2dnum,selcrit,selexam,kernel_cache,key,
|
wolffd@0
|
1385 chosen,iteration);
|
wolffd@0
|
1386 }
|
wolffd@0
|
1387 }
|
wolffd@0
|
1388
|
wolffd@0
|
1389 if(verbosity>=2) {
|
wolffd@0
|
1390 printf(" %ld vectors chosen\n",choosenum); fflush(stdout);
|
wolffd@0
|
1391 }
|
wolffd@0
|
1392
|
wolffd@0
|
1393 if(verbosity>=2) t1=get_runtime();
|
wolffd@0
|
1394
|
wolffd@0
|
1395 if(kernel_cache)
|
wolffd@0
|
1396 cache_multiple_kernel_rows(kernel_cache,docs,working2dnum,
|
wolffd@0
|
1397 choosenum,kernel_parm);
|
wolffd@0
|
1398
|
wolffd@0
|
1399 if(verbosity>=2) t2=get_runtime();
|
wolffd@0
|
1400 if(retrain != 2) {
|
wolffd@0
|
1401 optimize_svm(docs,label,unlabeled,inconsistent,0.0,chosen,active2dnum,
|
wolffd@0
|
1402 model,totdoc,working2dnum,choosenum,a,lin,c,learn_parm,
|
wolffd@0
|
1403 aicache,kernel_parm,&qp,&epsilon_crit_org);
|
wolffd@0
|
1404 }
|
wolffd@0
|
1405
|
wolffd@0
|
1406 if(verbosity>=2) t3=get_runtime();
|
wolffd@0
|
1407 update_linear_component(docs,label,active2dnum,a,a_old,working2dnum,totdoc,
|
wolffd@0
|
1408 totwords,kernel_parm,kernel_cache,lin,aicache,
|
wolffd@0
|
1409 weights);
|
wolffd@0
|
1410
|
wolffd@0
|
1411 if(verbosity>=2) t4=get_runtime();
|
wolffd@0
|
1412 supvecnum=calculate_svm_model(docs,label,unlabeled,lin,a,a_old,c,
|
wolffd@0
|
1413 learn_parm,working2dnum,active2dnum,model);
|
wolffd@0
|
1414
|
wolffd@0
|
1415 if(verbosity>=2) t5=get_runtime();
|
wolffd@0
|
1416
|
wolffd@0
|
1417 /* The following computation of the objective function works only */
|
wolffd@0
|
1418 /* relative to the active variables */
|
wolffd@0
|
1419 if(verbosity>=3) {
|
wolffd@0
|
1420 criterion=compute_objective_function(a,lin,c,learn_parm->eps,label,
|
wolffd@0
|
1421 active2dnum);
|
wolffd@0
|
1422 printf("Objective function (over active variables): %.16f\n",criterion);
|
wolffd@0
|
1423 fflush(stdout);
|
wolffd@0
|
1424 }
|
wolffd@0
|
1425
|
wolffd@0
|
1426 for(jj=0;(i=working2dnum[jj])>=0;jj++) {
|
wolffd@0
|
1427 a_old[i]=a[i];
|
wolffd@0
|
1428 }
|
wolffd@0
|
1429
|
wolffd@0
|
1430 if(retrain == 2) { /* reset inconsistent unlabeled examples */
|
wolffd@0
|
1431 for(i=0;(i<totdoc);i++) {
|
wolffd@0
|
1432 if(inconsistent[i] && unlabeled[i]) {
|
wolffd@0
|
1433 inconsistent[i]=0;
|
wolffd@0
|
1434 label[i]=0;
|
wolffd@0
|
1435 }
|
wolffd@0
|
1436 }
|
wolffd@0
|
1437 }
|
wolffd@0
|
1438
|
wolffd@0
|
1439 retrain=check_optimality(model,label,unlabeled,a,lin,c,totdoc,learn_parm,
|
wolffd@0
|
1440 maxdiff,epsilon_crit_org,&misclassified,
|
wolffd@0
|
1441 inconsistent,active2dnum,last_suboptimal_at,
|
wolffd@0
|
1442 iteration,kernel_parm);
|
wolffd@0
|
1443
|
wolffd@0
|
1444 if(verbosity>=2) {
|
wolffd@0
|
1445 t6=get_runtime();
|
wolffd@0
|
1446 timing_profile->time_select+=t1-t0;
|
wolffd@0
|
1447 timing_profile->time_kernel+=t2-t1;
|
wolffd@0
|
1448 timing_profile->time_opti+=t3-t2;
|
wolffd@0
|
1449 timing_profile->time_update+=t4-t3;
|
wolffd@0
|
1450 timing_profile->time_model+=t5-t4;
|
wolffd@0
|
1451 timing_profile->time_check+=t6-t5;
|
wolffd@0
|
1452 }
|
wolffd@0
|
1453
|
wolffd@0
|
1454 /* checking whether optimizer got stuck */
|
wolffd@0
|
1455 if((*maxdiff) < bestmaxdiff) {
|
wolffd@0
|
1456 bestmaxdiff=(*maxdiff);
|
wolffd@0
|
1457 bestmaxdiffiter=iteration;
|
wolffd@0
|
1458 }
|
wolffd@0
|
1459 if(iteration > (bestmaxdiffiter+learn_parm->maxiter)) {
|
wolffd@0
|
1460 /* long time no progress? */
|
wolffd@0
|
1461 terminate=1;
|
wolffd@0
|
1462 retrain=0;
|
wolffd@0
|
1463 if(verbosity>=1)
|
wolffd@0
|
1464 printf("\nWARNING: Relaxing KT-Conditions due to slow progress! Terminating!\n");
|
wolffd@0
|
1465 }
|
wolffd@0
|
1466
|
wolffd@0
|
1467 noshrink=0;
|
wolffd@0
|
1468 if((!retrain) && (inactivenum>0)
|
wolffd@0
|
1469 && ((!learn_parm->skip_final_opt_check)
|
wolffd@0
|
1470 || (kernel_parm->kernel_type == LINEAR))) {
|
wolffd@0
|
1471 if(((verbosity>=1) && (kernel_parm->kernel_type != LINEAR))
|
wolffd@0
|
1472 || (verbosity>=2)) {
|
wolffd@0
|
1473 if(verbosity==1) {
|
wolffd@0
|
1474 printf("\n");
|
wolffd@0
|
1475 }
|
wolffd@0
|
1476 printf(" Checking optimality of inactive variables...");
|
wolffd@0
|
1477 fflush(stdout);
|
wolffd@0
|
1478 }
|
wolffd@0
|
1479 t1=get_runtime();
|
wolffd@0
|
1480 reactivate_inactive_examples(label,unlabeled,a,shrink_state,lin,c,totdoc,
|
wolffd@0
|
1481 totwords,iteration,learn_parm,inconsistent,
|
wolffd@0
|
1482 docs,kernel_parm,kernel_cache,model,aicache,
|
wolffd@0
|
1483 weights,maxdiff);
|
wolffd@0
|
1484 /* Update to new active variables. */
|
wolffd@0
|
1485 activenum=compute_index(shrink_state->active,totdoc,active2dnum);
|
wolffd@0
|
1486 inactivenum=totdoc-activenum;
|
wolffd@0
|
1487 /* reset watchdog */
|
wolffd@0
|
1488 bestmaxdiff=(*maxdiff);
|
wolffd@0
|
1489 bestmaxdiffiter=iteration;
|
wolffd@0
|
1490 /* termination criterion */
|
wolffd@0
|
1491 noshrink=1;
|
wolffd@0
|
1492 retrain=0;
|
wolffd@0
|
1493 if((*maxdiff) > learn_parm->epsilon_crit)
|
wolffd@0
|
1494 retrain=1;
|
wolffd@0
|
1495 timing_profile->time_shrink+=get_runtime()-t1;
|
wolffd@0
|
1496 if(((verbosity>=1) && (kernel_parm->kernel_type != LINEAR))
|
wolffd@0
|
1497 || (verbosity>=2)) {
|
wolffd@0
|
1498 printf("done.\n"); fflush(stdout);
|
wolffd@0
|
1499 printf(" Number of inactive variables = %ld\n",inactivenum);
|
wolffd@0
|
1500 }
|
wolffd@0
|
1501 }
|
wolffd@0
|
1502
|
wolffd@0
|
1503 if((!retrain) && (learn_parm->epsilon_crit>(*maxdiff)))
|
wolffd@0
|
1504 learn_parm->epsilon_crit=(*maxdiff);
|
wolffd@0
|
1505 if((!retrain) && (learn_parm->epsilon_crit>epsilon_crit_org)) {
|
wolffd@0
|
1506 learn_parm->epsilon_crit/=2.0;
|
wolffd@0
|
1507 retrain=1;
|
wolffd@0
|
1508 noshrink=1;
|
wolffd@0
|
1509 }
|
wolffd@0
|
1510 if(learn_parm->epsilon_crit<epsilon_crit_org)
|
wolffd@0
|
1511 learn_parm->epsilon_crit=epsilon_crit_org;
|
wolffd@0
|
1512
|
wolffd@0
|
1513 if(verbosity>=2) {
|
wolffd@0
|
1514 printf(" => (%ld SV (incl. %ld SV at u-bound), max violation=%.5f)\n",
|
wolffd@0
|
1515 supvecnum,model->at_upper_bound,(*maxdiff));
|
wolffd@0
|
1516 fflush(stdout);
|
wolffd@0
|
1517 }
|
wolffd@0
|
1518 if(verbosity>=3) {
|
wolffd@0
|
1519 printf("\n");
|
wolffd@0
|
1520 }
|
wolffd@0
|
1521
|
wolffd@0
|
1522 if((!retrain) && (transduction)) {
|
wolffd@0
|
1523 for(i=0;(i<totdoc);i++) {
|
wolffd@0
|
1524 shrink_state->active[i]=1;
|
wolffd@0
|
1525 }
|
wolffd@0
|
1526 activenum=compute_index(shrink_state->active,totdoc,active2dnum);
|
wolffd@0
|
1527 inactivenum=0;
|
wolffd@0
|
1528 if(verbosity==1) printf("done\n");
|
wolffd@0
|
1529 retrain=incorporate_unlabeled_examples(model,label,inconsistent,
|
wolffd@0
|
1530 unlabeled,a,lin,totdoc,
|
wolffd@0
|
1531 selcrit,selexam,key,
|
wolffd@0
|
1532 transductcycle,kernel_parm,
|
wolffd@0
|
1533 learn_parm);
|
wolffd@0
|
1534 epsilon_crit_org=learn_parm->epsilon_crit;
|
wolffd@0
|
1535 if(kernel_parm->kernel_type == LINEAR)
|
wolffd@0
|
1536 learn_parm->epsilon_crit=1;
|
wolffd@0
|
1537 transductcycle++;
|
wolffd@0
|
1538 /* reset watchdog */
|
wolffd@0
|
1539 bestmaxdiff=(*maxdiff);
|
wolffd@0
|
1540 bestmaxdiffiter=iteration;
|
wolffd@0
|
1541 }
|
wolffd@0
|
1542 else if(((iteration % 10) == 0) && (!noshrink)) {
|
wolffd@0
|
1543 activenum=shrink_problem(docs,learn_parm,shrink_state,kernel_parm,
|
wolffd@0
|
1544 active2dnum,last_suboptimal_at,iteration,totdoc,
|
wolffd@0
|
1545 maxl((long)(activenum/10),
|
wolffd@0
|
1546 maxl((long)(totdoc/500),100)),
|
wolffd@0
|
1547 a,inconsistent);
|
wolffd@0
|
1548 inactivenum=totdoc-activenum;
|
wolffd@0
|
1549 if((kernel_cache)
|
wolffd@0
|
1550 && (supvecnum>kernel_cache->max_elems)
|
wolffd@0
|
1551 && ((kernel_cache->activenum-activenum)>maxl((long)(activenum/10),500))) {
|
wolffd@0
|
1552 kernel_cache_shrink(kernel_cache,totdoc,
|
wolffd@0
|
1553 minl((kernel_cache->activenum-activenum),
|
wolffd@0
|
1554 (kernel_cache->activenum-supvecnum)),
|
wolffd@0
|
1555 shrink_state->active);
|
wolffd@0
|
1556 }
|
wolffd@0
|
1557 }
|
wolffd@0
|
1558
|
wolffd@0
|
1559 if((!retrain) && learn_parm->remove_inconsistent) {
|
wolffd@0
|
1560 if(verbosity>=1) {
|
wolffd@0
|
1561 printf(" Moving training errors to inconsistent examples...");
|
wolffd@0
|
1562 fflush(stdout);
|
wolffd@0
|
1563 }
|
wolffd@0
|
1564 if(learn_parm->remove_inconsistent == 1) {
|
wolffd@0
|
1565 retrain=identify_inconsistent(a,label,unlabeled,totdoc,learn_parm,
|
wolffd@0
|
1566 &inconsistentnum,inconsistent);
|
wolffd@0
|
1567 }
|
wolffd@0
|
1568 else if(learn_parm->remove_inconsistent == 2) {
|
wolffd@0
|
1569 retrain=identify_misclassified(lin,label,unlabeled,totdoc,
|
wolffd@0
|
1570 model,&inconsistentnum,inconsistent);
|
wolffd@0
|
1571 }
|
wolffd@0
|
1572 else if(learn_parm->remove_inconsistent == 3) {
|
wolffd@0
|
1573 retrain=identify_one_misclassified(lin,label,unlabeled,totdoc,
|
wolffd@0
|
1574 model,&inconsistentnum,inconsistent);
|
wolffd@0
|
1575 }
|
wolffd@0
|
1576 if(retrain) {
|
wolffd@0
|
1577 if(kernel_parm->kernel_type == LINEAR) { /* reinit shrinking */
|
wolffd@0
|
1578 learn_parm->epsilon_crit=2.0;
|
wolffd@0
|
1579 }
|
wolffd@0
|
1580 }
|
wolffd@0
|
1581 if(verbosity>=1) {
|
wolffd@0
|
1582 printf("done.\n");
|
wolffd@0
|
1583 if(retrain) {
|
wolffd@0
|
1584 printf(" Now %ld inconsistent examples.\n",inconsistentnum);
|
wolffd@0
|
1585 }
|
wolffd@0
|
1586 }
|
wolffd@0
|
1587 }
|
wolffd@0
|
1588 } /* end of loop */
|
wolffd@0
|
1589
|
wolffd@0
|
1590 free(chosen);
|
wolffd@0
|
1591 free(last_suboptimal_at);
|
wolffd@0
|
1592 free(key);
|
wolffd@0
|
1593 free(selcrit);
|
wolffd@0
|
1594 free(selexam);
|
wolffd@0
|
1595 free(a_old);
|
wolffd@0
|
1596 free(aicache);
|
wolffd@0
|
1597 free(working2dnum);
|
wolffd@0
|
1598 free(active2dnum);
|
wolffd@0
|
1599 free(qp.opt_ce);
|
wolffd@0
|
1600 free(qp.opt_ce0);
|
wolffd@0
|
1601 free(qp.opt_g);
|
wolffd@0
|
1602 free(qp.opt_g0);
|
wolffd@0
|
1603 free(qp.opt_xinit);
|
wolffd@0
|
1604 free(qp.opt_low);
|
wolffd@0
|
1605 free(qp.opt_up);
|
wolffd@0
|
1606 free(weights);
|
wolffd@0
|
1607
|
wolffd@0
|
1608 learn_parm->epsilon_crit=epsilon_crit_org; /* restore org */
|
wolffd@0
|
1609 model->maxdiff=(*maxdiff);
|
wolffd@0
|
1610
|
wolffd@0
|
1611 return(iteration);
|
wolffd@0
|
1612 }
|
wolffd@0
|
1613
|
wolffd@0
|
1614 long optimize_to_convergence_sharedslack(DOC **docs, long int *label,
|
wolffd@0
|
1615 long int totdoc,
|
wolffd@0
|
1616 long int totwords, LEARN_PARM *learn_parm,
|
wolffd@0
|
1617 KERNEL_PARM *kernel_parm,
|
wolffd@0
|
1618 KERNEL_CACHE *kernel_cache,
|
wolffd@0
|
1619 SHRINK_STATE *shrink_state, MODEL *model,
|
wolffd@0
|
1620 double *a, double *lin, double *c,
|
wolffd@0
|
1621 TIMING *timing_profile, double *maxdiff)
|
wolffd@0
|
1622 /* docs: Training vectors (x-part) */
|
wolffd@0
|
1623 /* label: Training labels/value (y-part, zero if test example for
|
wolffd@0
|
1624 transduction) */
|
wolffd@0
|
1625 /* totdoc: Number of examples in docs/label */
|
wolffd@0
|
1626 /* totwords: Number of features (i.e. highest feature index) */
|
wolffd@0
|
1627 /* learn_parm: Learning paramenters */
|
wolffd@0
|
1628 /* kernel_parm: Kernel paramenters */
|
wolffd@0
|
1629 /* kernel_cache: Initialized/partly filled Cache, if using a kernel.
|
wolffd@0
|
1630 NULL if linear. */
|
wolffd@0
|
1631 /* shrink_state: State of active variables */
|
wolffd@0
|
1632 /* model: Returns learning result */
|
wolffd@0
|
1633 /* a: alphas */
|
wolffd@0
|
1634 /* lin: linear component of gradient */
|
wolffd@0
|
1635 /* c: right hand side of inequalities (margin) */
|
wolffd@0
|
1636 /* maxdiff: returns maximum violation of KT-conditions */
|
wolffd@0
|
1637 {
|
wolffd@0
|
1638 long *chosen,*key,i,j,jj,*last_suboptimal_at,noshrink,*unlabeled;
|
wolffd@0
|
1639 long *inconsistent,choosenum,already_chosen=0,iteration;
|
wolffd@0
|
1640 long misclassified,supvecnum=0,*active2dnum,inactivenum;
|
wolffd@0
|
1641 long *working2dnum,*selexam,*ignore;
|
wolffd@0
|
1642 long activenum,retrain,maxslackid,slackset,jointstep;
|
wolffd@0
|
1643 double criterion,eq_target;
|
wolffd@0
|
1644 double *a_old,*alphaslack;
|
wolffd@0
|
1645 long t0=0,t1=0,t2=0,t3=0,t4=0,t5=0,t6=0; /* timing */
|
wolffd@0
|
1646 double epsilon_crit_org,maxsharedviol;
|
wolffd@0
|
1647 double bestmaxdiff;
|
wolffd@0
|
1648 long bestmaxdiffiter,terminate;
|
wolffd@0
|
1649
|
wolffd@0
|
1650 double *selcrit; /* buffer for sorting */
|
wolffd@0
|
1651 CFLOAT *aicache; /* buffer to keep one row of hessian */
|
wolffd@0
|
1652 double *weights; /* buffer for weight vector in linear case */
|
wolffd@0
|
1653 QP qp; /* buffer for one quadratic program */
|
wolffd@0
|
1654 double *slack; /* vector of slack variables for optimization with
|
wolffd@0
|
1655 shared slacks */
|
wolffd@0
|
1656
|
wolffd@0
|
1657 epsilon_crit_org=learn_parm->epsilon_crit; /* save org */
|
wolffd@0
|
1658 if(kernel_parm->kernel_type == LINEAR) {
|
wolffd@0
|
1659 learn_parm->epsilon_crit=2.0;
|
wolffd@0
|
1660 kernel_cache=NULL; /* caching makes no sense for linear kernel */
|
wolffd@0
|
1661 }
|
wolffd@0
|
1662 learn_parm->epsilon_shrink=2;
|
wolffd@0
|
1663 (*maxdiff)=1;
|
wolffd@0
|
1664
|
wolffd@0
|
1665 learn_parm->totwords=totwords;
|
wolffd@0
|
1666
|
wolffd@0
|
1667 chosen = (long *)my_malloc(sizeof(long)*totdoc);
|
wolffd@0
|
1668 unlabeled = (long *)my_malloc(sizeof(long)*totdoc);
|
wolffd@0
|
1669 inconsistent = (long *)my_malloc(sizeof(long)*totdoc);
|
wolffd@0
|
1670 ignore = (long *)my_malloc(sizeof(long)*totdoc);
|
wolffd@0
|
1671 key = (long *)my_malloc(sizeof(long)*(totdoc+11));
|
wolffd@0
|
1672 selcrit = (double *)my_malloc(sizeof(double)*totdoc);
|
wolffd@0
|
1673 selexam = (long *)my_malloc(sizeof(long)*totdoc);
|
wolffd@0
|
1674 a_old = (double *)my_malloc(sizeof(double)*totdoc);
|
wolffd@0
|
1675 aicache = (CFLOAT *)my_malloc(sizeof(CFLOAT)*totdoc);
|
wolffd@0
|
1676 working2dnum = (long *)my_malloc(sizeof(long)*(totdoc+11));
|
wolffd@0
|
1677 active2dnum = (long *)my_malloc(sizeof(long)*(totdoc+11));
|
wolffd@0
|
1678 qp.opt_ce = (double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize);
|
wolffd@0
|
1679 qp.opt_ce0 = (double *)my_malloc(sizeof(double));
|
wolffd@0
|
1680 qp.opt_g = (double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize
|
wolffd@0
|
1681 *learn_parm->svm_maxqpsize);
|
wolffd@0
|
1682 qp.opt_g0 = (double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize);
|
wolffd@0
|
1683 qp.opt_xinit = (double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize);
|
wolffd@0
|
1684 qp.opt_low=(double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize);
|
wolffd@0
|
1685 qp.opt_up=(double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize);
|
wolffd@0
|
1686 weights=(double *)my_malloc(sizeof(double)*(totwords+1));
|
wolffd@0
|
1687 maxslackid=0;
|
wolffd@0
|
1688 for(i=0;i<totdoc;i++) { /* determine size of slack array */
|
wolffd@0
|
1689 if(maxslackid<docs[i]->slackid)
|
wolffd@0
|
1690 maxslackid=docs[i]->slackid;
|
wolffd@0
|
1691 }
|
wolffd@0
|
1692 slack=(double *)my_malloc(sizeof(double)*(maxslackid+1));
|
wolffd@0
|
1693 alphaslack=(double *)my_malloc(sizeof(double)*(maxslackid+1));
|
wolffd@0
|
1694 last_suboptimal_at = (long *)my_malloc(sizeof(long)*(maxslackid+1));
|
wolffd@0
|
1695 for(i=0;i<=maxslackid;i++) { /* init shared slacks */
|
wolffd@0
|
1696 slack[i]=0;
|
wolffd@0
|
1697 alphaslack[i]=0;
|
wolffd@0
|
1698 last_suboptimal_at[i]=1;
|
wolffd@0
|
1699 }
|
wolffd@0
|
1700
|
wolffd@0
|
1701 choosenum=0;
|
wolffd@0
|
1702 retrain=1;
|
wolffd@0
|
1703 iteration=1;
|
wolffd@0
|
1704 bestmaxdiffiter=1;
|
wolffd@0
|
1705 bestmaxdiff=999999999;
|
wolffd@0
|
1706 terminate=0;
|
wolffd@0
|
1707
|
wolffd@0
|
1708 if(kernel_cache) {
|
wolffd@0
|
1709 kernel_cache->time=iteration; /* for lru cache */
|
wolffd@0
|
1710 kernel_cache_reset_lru(kernel_cache);
|
wolffd@0
|
1711 }
|
wolffd@0
|
1712
|
wolffd@0
|
1713 for(i=0;i<totdoc;i++) { /* various inits */
|
wolffd@0
|
1714 chosen[i]=0;
|
wolffd@0
|
1715 unlabeled[i]=0;
|
wolffd@0
|
1716 inconsistent[i]=0;
|
wolffd@0
|
1717 ignore[i]=0;
|
wolffd@0
|
1718 a_old[i]=a[i];
|
wolffd@0
|
1719 }
|
wolffd@0
|
1720 activenum=compute_index(shrink_state->active,totdoc,active2dnum);
|
wolffd@0
|
1721 inactivenum=totdoc-activenum;
|
wolffd@0
|
1722 clear_index(working2dnum);
|
wolffd@0
|
1723
|
wolffd@0
|
1724 /* call to init slack and alphaslack */
|
wolffd@0
|
1725 compute_shared_slacks(docs,label,a,lin,c,active2dnum,learn_parm,
|
wolffd@0
|
1726 slack,alphaslack);
|
wolffd@0
|
1727
|
wolffd@0
|
1728 /* repeat this loop until we have convergence */
|
wolffd@0
|
1729 for(;retrain && (!terminate);iteration++) {
|
wolffd@0
|
1730
|
wolffd@0
|
1731 if(kernel_cache)
|
wolffd@0
|
1732 kernel_cache->time=iteration; /* for lru cache */
|
wolffd@0
|
1733 if(verbosity>=2) {
|
wolffd@0
|
1734 printf(
|
wolffd@0
|
1735 "Iteration %ld: ",iteration); fflush(stdout);
|
wolffd@0
|
1736 }
|
wolffd@0
|
1737 else if(verbosity==1) {
|
wolffd@0
|
1738 printf("."); fflush(stdout);
|
wolffd@0
|
1739 }
|
wolffd@0
|
1740
|
wolffd@0
|
1741 if(verbosity>=2) t0=get_runtime();
|
wolffd@0
|
1742 if(verbosity>=3) {
|
wolffd@0
|
1743 printf("\nSelecting working set... "); fflush(stdout);
|
wolffd@0
|
1744 }
|
wolffd@0
|
1745
|
wolffd@0
|
1746 if(learn_parm->svm_newvarsinqp>learn_parm->svm_maxqpsize)
|
wolffd@0
|
1747 learn_parm->svm_newvarsinqp=learn_parm->svm_maxqpsize;
|
wolffd@0
|
1748
|
wolffd@0
|
1749 /* select working set according to steepest gradient */
|
wolffd@0
|
1750 jointstep=0;
|
wolffd@0
|
1751 eq_target=0;
|
wolffd@0
|
1752 if(iteration % 101) {
|
wolffd@0
|
1753 slackset=select_next_qp_slackset(docs,label,a,lin,slack,alphaslack,c,
|
wolffd@0
|
1754 learn_parm,active2dnum,&maxsharedviol);
|
wolffd@0
|
1755 if((iteration % 2)
|
wolffd@0
|
1756 || (!slackset) || (maxsharedviol<learn_parm->epsilon_crit)){
|
wolffd@0
|
1757 /* do a step with examples from different slack sets */
|
wolffd@0
|
1758 if(verbosity >= 2) {
|
wolffd@0
|
1759 printf("(i-step)"); fflush(stdout);
|
wolffd@0
|
1760 }
|
wolffd@0
|
1761 i=0;
|
wolffd@0
|
1762 for(jj=0;(j=working2dnum[jj])>=0;jj++) { /* clear old part of working set */
|
wolffd@0
|
1763 if((chosen[j]>=(learn_parm->svm_maxqpsize/
|
wolffd@0
|
1764 minl(learn_parm->svm_maxqpsize,
|
wolffd@0
|
1765 learn_parm->svm_newvarsinqp)))) {
|
wolffd@0
|
1766 chosen[j]=0;
|
wolffd@0
|
1767 choosenum--;
|
wolffd@0
|
1768 }
|
wolffd@0
|
1769 else {
|
wolffd@0
|
1770 chosen[j]++;
|
wolffd@0
|
1771 working2dnum[i++]=j;
|
wolffd@0
|
1772 }
|
wolffd@0
|
1773 }
|
wolffd@0
|
1774 working2dnum[i]=-1;
|
wolffd@0
|
1775
|
wolffd@0
|
1776 already_chosen=0;
|
wolffd@0
|
1777 if((minl(learn_parm->svm_newvarsinqp,
|
wolffd@0
|
1778 learn_parm->svm_maxqpsize-choosenum)>=4)
|
wolffd@0
|
1779 && (kernel_parm->kernel_type != LINEAR)) {
|
wolffd@0
|
1780 /* select part of the working set from cache */
|
wolffd@0
|
1781 already_chosen=select_next_qp_subproblem_grad(
|
wolffd@0
|
1782 label,unlabeled,a,lin,c,totdoc,
|
wolffd@0
|
1783 (long)(minl(learn_parm->svm_maxqpsize-choosenum,
|
wolffd@0
|
1784 learn_parm->svm_newvarsinqp)
|
wolffd@0
|
1785 /2),
|
wolffd@0
|
1786 learn_parm,inconsistent,active2dnum,
|
wolffd@0
|
1787 working2dnum,selcrit,selexam,kernel_cache,
|
wolffd@0
|
1788 (long)1,key,chosen);
|
wolffd@0
|
1789 choosenum+=already_chosen;
|
wolffd@0
|
1790 }
|
wolffd@0
|
1791 choosenum+=select_next_qp_subproblem_grad(
|
wolffd@0
|
1792 label,unlabeled,a,lin,c,totdoc,
|
wolffd@0
|
1793 minl(learn_parm->svm_maxqpsize-choosenum,
|
wolffd@0
|
1794 learn_parm->svm_newvarsinqp-already_chosen),
|
wolffd@0
|
1795 learn_parm,inconsistent,active2dnum,
|
wolffd@0
|
1796 working2dnum,selcrit,selexam,kernel_cache,
|
wolffd@0
|
1797 (long)0,key,chosen);
|
wolffd@0
|
1798 }
|
wolffd@0
|
1799 else { /* do a step with all examples from same slack set */
|
wolffd@0
|
1800 if(verbosity >= 2) {
|
wolffd@0
|
1801 printf("(j-step on %ld)",slackset); fflush(stdout);
|
wolffd@0
|
1802 }
|
wolffd@0
|
1803 jointstep=1;
|
wolffd@0
|
1804 for(jj=0;(j=working2dnum[jj])>=0;jj++) { /* clear working set */
|
wolffd@0
|
1805 chosen[j]=0;
|
wolffd@0
|
1806 }
|
wolffd@0
|
1807 working2dnum[0]=-1;
|
wolffd@0
|
1808 eq_target=alphaslack[slackset];
|
wolffd@0
|
1809 for(j=0;j<totdoc;j++) { /* mask all but slackset */
|
wolffd@0
|
1810 /* for(jj=0;(j=active2dnum[jj])>=0;jj++) { */
|
wolffd@0
|
1811 if(docs[j]->slackid != slackset)
|
wolffd@0
|
1812 ignore[j]=1;
|
wolffd@0
|
1813 else {
|
wolffd@0
|
1814 ignore[j]=0;
|
wolffd@0
|
1815 learn_parm->svm_cost[j]=learn_parm->svm_c;
|
wolffd@0
|
1816 /* printf("Inslackset(%ld,%ld)",j,shrink_state->active[j]); */
|
wolffd@0
|
1817 }
|
wolffd@0
|
1818 }
|
wolffd@0
|
1819 learn_parm->biased_hyperplane=1;
|
wolffd@0
|
1820 choosenum=select_next_qp_subproblem_grad(
|
wolffd@0
|
1821 label,unlabeled,a,lin,c,totdoc,
|
wolffd@0
|
1822 learn_parm->svm_maxqpsize,
|
wolffd@0
|
1823 learn_parm,ignore,active2dnum,
|
wolffd@0
|
1824 working2dnum,selcrit,selexam,kernel_cache,
|
wolffd@0
|
1825 (long)0,key,chosen);
|
wolffd@0
|
1826 learn_parm->biased_hyperplane=0;
|
wolffd@0
|
1827 }
|
wolffd@0
|
1828 }
|
wolffd@0
|
1829 else { /* once in a while, select a somewhat random working set
|
wolffd@0
|
1830 to get unlocked of infinite loops due to numerical
|
wolffd@0
|
1831 inaccuracies in the core qp-solver */
|
wolffd@0
|
1832 choosenum+=select_next_qp_subproblem_rand(
|
wolffd@0
|
1833 label,unlabeled,a,lin,c,totdoc,
|
wolffd@0
|
1834 minl(learn_parm->svm_maxqpsize-choosenum,
|
wolffd@0
|
1835 learn_parm->svm_newvarsinqp),
|
wolffd@0
|
1836 learn_parm,inconsistent,active2dnum,
|
wolffd@0
|
1837 working2dnum,selcrit,selexam,kernel_cache,key,
|
wolffd@0
|
1838 chosen,iteration);
|
wolffd@0
|
1839 }
|
wolffd@0
|
1840
|
wolffd@0
|
1841 if(verbosity>=2) {
|
wolffd@0
|
1842 printf(" %ld vectors chosen\n",choosenum); fflush(stdout);
|
wolffd@0
|
1843 }
|
wolffd@0
|
1844
|
wolffd@0
|
1845 if(verbosity>=2) t1=get_runtime();
|
wolffd@0
|
1846
|
wolffd@0
|
1847 if(kernel_cache)
|
wolffd@0
|
1848 cache_multiple_kernel_rows(kernel_cache,docs,working2dnum,
|
wolffd@0
|
1849 choosenum,kernel_parm);
|
wolffd@0
|
1850
|
wolffd@0
|
1851 if(verbosity>=2) t2=get_runtime();
|
wolffd@0
|
1852 if(jointstep) learn_parm->biased_hyperplane=1;
|
wolffd@0
|
1853 optimize_svm(docs,label,unlabeled,ignore,eq_target,chosen,active2dnum,
|
wolffd@0
|
1854 model,totdoc,working2dnum,choosenum,a,lin,c,learn_parm,
|
wolffd@0
|
1855 aicache,kernel_parm,&qp,&epsilon_crit_org);
|
wolffd@0
|
1856 learn_parm->biased_hyperplane=0;
|
wolffd@0
|
1857
|
wolffd@0
|
1858 for(jj=0;(i=working2dnum[jj])>=0;jj++) /* recompute sums of alphas */
|
wolffd@0
|
1859 alphaslack[docs[i]->slackid]+=(a[i]-a_old[i]);
|
wolffd@0
|
1860 for(jj=0;(i=working2dnum[jj])>=0;jj++) { /* reduce alpha to fulfill
|
wolffd@0
|
1861 constraints */
|
wolffd@0
|
1862 if(alphaslack[docs[i]->slackid] > learn_parm->svm_c) {
|
wolffd@0
|
1863 if(a[i] < (alphaslack[docs[i]->slackid]-learn_parm->svm_c)) {
|
wolffd@0
|
1864 alphaslack[docs[i]->slackid]-=a[i];
|
wolffd@0
|
1865 a[i]=0;
|
wolffd@0
|
1866 }
|
wolffd@0
|
1867 else {
|
wolffd@0
|
1868 a[i]-=(alphaslack[docs[i]->slackid]-learn_parm->svm_c);
|
wolffd@0
|
1869 alphaslack[docs[i]->slackid]=learn_parm->svm_c;
|
wolffd@0
|
1870 }
|
wolffd@0
|
1871 }
|
wolffd@0
|
1872 }
|
wolffd@0
|
1873 for(jj=0;(i=active2dnum[jj])>=0;jj++)
|
wolffd@0
|
1874 learn_parm->svm_cost[i]=a[i]+(learn_parm->svm_c
|
wolffd@0
|
1875 -alphaslack[docs[i]->slackid]);
|
wolffd@0
|
1876
|
wolffd@0
|
1877 if(verbosity>=2) t3=get_runtime();
|
wolffd@0
|
1878 update_linear_component(docs,label,active2dnum,a,a_old,working2dnum,totdoc,
|
wolffd@0
|
1879 totwords,kernel_parm,kernel_cache,lin,aicache,
|
wolffd@0
|
1880 weights);
|
wolffd@0
|
1881 compute_shared_slacks(docs,label,a,lin,c,active2dnum,learn_parm,
|
wolffd@0
|
1882 slack,alphaslack);
|
wolffd@0
|
1883
|
wolffd@0
|
1884 if(verbosity>=2) t4=get_runtime();
|
wolffd@0
|
1885 supvecnum=calculate_svm_model(docs,label,unlabeled,lin,a,a_old,c,
|
wolffd@0
|
1886 learn_parm,working2dnum,active2dnum,model);
|
wolffd@0
|
1887
|
wolffd@0
|
1888 if(verbosity>=2) t5=get_runtime();
|
wolffd@0
|
1889
|
wolffd@0
|
1890 /* The following computation of the objective function works only */
|
wolffd@0
|
1891 /* relative to the active variables */
|
wolffd@0
|
1892 if(verbosity>=3) {
|
wolffd@0
|
1893 criterion=compute_objective_function(a,lin,c,learn_parm->eps,label,
|
wolffd@0
|
1894 active2dnum);
|
wolffd@0
|
1895 printf("Objective function (over active variables): %.16f\n",criterion);
|
wolffd@0
|
1896 fflush(stdout);
|
wolffd@0
|
1897 }
|
wolffd@0
|
1898
|
wolffd@0
|
1899 for(jj=0;(i=working2dnum[jj])>=0;jj++) {
|
wolffd@0
|
1900 a_old[i]=a[i];
|
wolffd@0
|
1901 }
|
wolffd@0
|
1902
|
wolffd@0
|
1903 retrain=check_optimality_sharedslack(docs,model,label,a,lin,c,
|
wolffd@0
|
1904 slack,alphaslack,totdoc,learn_parm,
|
wolffd@0
|
1905 maxdiff,epsilon_crit_org,&misclassified,
|
wolffd@0
|
1906 active2dnum,last_suboptimal_at,
|
wolffd@0
|
1907 iteration,kernel_parm);
|
wolffd@0
|
1908
|
wolffd@0
|
1909 if(verbosity>=2) {
|
wolffd@0
|
1910 t6=get_runtime();
|
wolffd@0
|
1911 timing_profile->time_select+=t1-t0;
|
wolffd@0
|
1912 timing_profile->time_kernel+=t2-t1;
|
wolffd@0
|
1913 timing_profile->time_opti+=t3-t2;
|
wolffd@0
|
1914 timing_profile->time_update+=t4-t3;
|
wolffd@0
|
1915 timing_profile->time_model+=t5-t4;
|
wolffd@0
|
1916 timing_profile->time_check+=t6-t5;
|
wolffd@0
|
1917 }
|
wolffd@0
|
1918
|
wolffd@0
|
1919 /* checking whether optimizer got stuck */
|
wolffd@0
|
1920 if((*maxdiff) < bestmaxdiff) {
|
wolffd@0
|
1921 bestmaxdiff=(*maxdiff);
|
wolffd@0
|
1922 bestmaxdiffiter=iteration;
|
wolffd@0
|
1923 }
|
wolffd@0
|
1924 if(iteration > (bestmaxdiffiter+learn_parm->maxiter)) {
|
wolffd@0
|
1925 /* long time no progress? */
|
wolffd@0
|
1926 terminate=1;
|
wolffd@0
|
1927 retrain=0;
|
wolffd@0
|
1928 if(verbosity>=1)
|
wolffd@0
|
1929 printf("\nWARNING: Relaxing KT-Conditions due to slow progress! Terminating!\n");
|
wolffd@0
|
1930 }
|
wolffd@0
|
1931
|
wolffd@0
|
1932 noshrink=0;
|
wolffd@0
|
1933
|
wolffd@0
|
1934 if((!retrain) && (inactivenum>0)
|
wolffd@0
|
1935 && ((!learn_parm->skip_final_opt_check)
|
wolffd@0
|
1936 || (kernel_parm->kernel_type == LINEAR))) {
|
wolffd@0
|
1937 if(((verbosity>=1) && (kernel_parm->kernel_type != LINEAR))
|
wolffd@0
|
1938 || (verbosity>=2)) {
|
wolffd@0
|
1939 if(verbosity==1) {
|
wolffd@0
|
1940 printf("\n");
|
wolffd@0
|
1941 }
|
wolffd@0
|
1942 printf(" Checking optimality of inactive variables...");
|
wolffd@0
|
1943 fflush(stdout);
|
wolffd@0
|
1944 }
|
wolffd@0
|
1945 t1=get_runtime();
|
wolffd@0
|
1946 reactivate_inactive_examples(label,unlabeled,a,shrink_state,lin,c,totdoc,
|
wolffd@0
|
1947 totwords,iteration,learn_parm,inconsistent,
|
wolffd@0
|
1948 docs,kernel_parm,kernel_cache,model,aicache,
|
wolffd@0
|
1949 weights,maxdiff);
|
wolffd@0
|
1950 /* Update to new active variables. */
|
wolffd@0
|
1951 activenum=compute_index(shrink_state->active,totdoc,active2dnum);
|
wolffd@0
|
1952 inactivenum=totdoc-activenum;
|
wolffd@0
|
1953 /* check optimality, since check in reactivate does not work for
|
wolffd@0
|
1954 sharedslacks */
|
wolffd@0
|
1955 retrain=check_optimality_sharedslack(docs,model,label,a,lin,c,
|
wolffd@0
|
1956 slack,alphaslack,totdoc,learn_parm,
|
wolffd@0
|
1957 maxdiff,epsilon_crit_org,&misclassified,
|
wolffd@0
|
1958 active2dnum,last_suboptimal_at,
|
wolffd@0
|
1959 iteration,kernel_parm);
|
wolffd@0
|
1960
|
wolffd@0
|
1961 /* reset watchdog */
|
wolffd@0
|
1962 bestmaxdiff=(*maxdiff);
|
wolffd@0
|
1963 bestmaxdiffiter=iteration;
|
wolffd@0
|
1964 /* termination criterion */
|
wolffd@0
|
1965 noshrink=1;
|
wolffd@0
|
1966 retrain=0;
|
wolffd@0
|
1967 if((*maxdiff) > learn_parm->epsilon_crit)
|
wolffd@0
|
1968 retrain=1;
|
wolffd@0
|
1969 timing_profile->time_shrink+=get_runtime()-t1;
|
wolffd@0
|
1970 if(((verbosity>=1) && (kernel_parm->kernel_type != LINEAR))
|
wolffd@0
|
1971 || (verbosity>=2)) {
|
wolffd@0
|
1972 printf("done.\n"); fflush(stdout);
|
wolffd@0
|
1973 printf(" Number of inactive variables = %ld\n",inactivenum);
|
wolffd@0
|
1974 }
|
wolffd@0
|
1975 }
|
wolffd@0
|
1976
|
wolffd@0
|
1977 if((!retrain) && (learn_parm->epsilon_crit>(*maxdiff)))
|
wolffd@0
|
1978 learn_parm->epsilon_crit=(*maxdiff);
|
wolffd@0
|
1979 if((!retrain) && (learn_parm->epsilon_crit>epsilon_crit_org)) {
|
wolffd@0
|
1980 learn_parm->epsilon_crit/=2.0;
|
wolffd@0
|
1981 retrain=1;
|
wolffd@0
|
1982 noshrink=1;
|
wolffd@0
|
1983 }
|
wolffd@0
|
1984 if(learn_parm->epsilon_crit<epsilon_crit_org)
|
wolffd@0
|
1985 learn_parm->epsilon_crit=epsilon_crit_org;
|
wolffd@0
|
1986
|
wolffd@0
|
1987 if(verbosity>=2) {
|
wolffd@0
|
1988 printf(" => (%ld SV (incl. %ld SV at u-bound), max violation=%.5f)\n",
|
wolffd@0
|
1989 supvecnum,model->at_upper_bound,(*maxdiff));
|
wolffd@0
|
1990 fflush(stdout);
|
wolffd@0
|
1991 }
|
wolffd@0
|
1992 if(verbosity>=3) {
|
wolffd@0
|
1993 printf("\n");
|
wolffd@0
|
1994 }
|
wolffd@0
|
1995
|
wolffd@0
|
1996 if(((iteration % 10) == 0) && (!noshrink)) {
|
wolffd@0
|
1997 activenum=shrink_problem(docs,learn_parm,shrink_state,
|
wolffd@0
|
1998 kernel_parm,active2dnum,
|
wolffd@0
|
1999 last_suboptimal_at,iteration,totdoc,
|
wolffd@0
|
2000 maxl((long)(activenum/10),
|
wolffd@0
|
2001 maxl((long)(totdoc/500),100)),
|
wolffd@0
|
2002 a,inconsistent);
|
wolffd@0
|
2003 inactivenum=totdoc-activenum;
|
wolffd@0
|
2004 if((kernel_cache)
|
wolffd@0
|
2005 && (supvecnum>kernel_cache->max_elems)
|
wolffd@0
|
2006 && ((kernel_cache->activenum-activenum)>maxl((long)(activenum/10),500))) {
|
wolffd@0
|
2007 kernel_cache_shrink(kernel_cache,totdoc,
|
wolffd@0
|
2008 minl((kernel_cache->activenum-activenum),
|
wolffd@0
|
2009 (kernel_cache->activenum-supvecnum)),
|
wolffd@0
|
2010 shrink_state->active);
|
wolffd@0
|
2011 }
|
wolffd@0
|
2012 }
|
wolffd@0
|
2013
|
wolffd@0
|
2014 } /* end of loop */
|
wolffd@0
|
2015
|
wolffd@0
|
2016
|
wolffd@0
|
2017 free(alphaslack);
|
wolffd@0
|
2018 free(slack);
|
wolffd@0
|
2019 free(chosen);
|
wolffd@0
|
2020 free(unlabeled);
|
wolffd@0
|
2021 free(inconsistent);
|
wolffd@0
|
2022 free(ignore);
|
wolffd@0
|
2023 free(last_suboptimal_at);
|
wolffd@0
|
2024 free(key);
|
wolffd@0
|
2025 free(selcrit);
|
wolffd@0
|
2026 free(selexam);
|
wolffd@0
|
2027 free(a_old);
|
wolffd@0
|
2028 free(aicache);
|
wolffd@0
|
2029 free(working2dnum);
|
wolffd@0
|
2030 free(active2dnum);
|
wolffd@0
|
2031 free(qp.opt_ce);
|
wolffd@0
|
2032 free(qp.opt_ce0);
|
wolffd@0
|
2033 free(qp.opt_g);
|
wolffd@0
|
2034 free(qp.opt_g0);
|
wolffd@0
|
2035 free(qp.opt_xinit);
|
wolffd@0
|
2036 free(qp.opt_low);
|
wolffd@0
|
2037 free(qp.opt_up);
|
wolffd@0
|
2038 free(weights);
|
wolffd@0
|
2039
|
wolffd@0
|
2040 learn_parm->epsilon_crit=epsilon_crit_org; /* restore org */
|
wolffd@0
|
2041 model->maxdiff=(*maxdiff);
|
wolffd@0
|
2042
|
wolffd@0
|
2043 return(iteration);
|
wolffd@0
|
2044 }
|
wolffd@0
|
2045
|
wolffd@0
|
2046
|
wolffd@0
|
2047 double compute_objective_function(double *a, double *lin, double *c,
|
wolffd@0
|
2048 double eps, long int *label,
|
wolffd@0
|
2049 long int *active2dnum)
|
wolffd@0
|
2050 /* Return value of objective function. */
|
wolffd@0
|
2051 /* Works only relative to the active variables! */
|
wolffd@0
|
2052 {
|
wolffd@0
|
2053 long i,ii;
|
wolffd@0
|
2054 double criterion;
|
wolffd@0
|
2055 /* calculate value of objective function */
|
wolffd@0
|
2056 criterion=0;
|
wolffd@0
|
2057 for(ii=0;active2dnum[ii]>=0;ii++) {
|
wolffd@0
|
2058 i=active2dnum[ii];
|
wolffd@0
|
2059 criterion=criterion+(eps-(double)label[i]*c[i])*a[i]+0.5*a[i]*label[i]*lin[i];
|
wolffd@0
|
2060 }
|
wolffd@0
|
2061 return(criterion);
|
wolffd@0
|
2062 }
|
wolffd@0
|
2063
|
wolffd@0
|
2064 void clear_index(long int *index)
|
wolffd@0
|
2065 /* initializes and empties index */
|
wolffd@0
|
2066 {
|
wolffd@0
|
2067 index[0]=-1;
|
wolffd@0
|
2068 }
|
wolffd@0
|
2069
|
wolffd@0
|
2070 void add_to_index(long int *index, long int elem)
|
wolffd@0
|
2071 /* initializes and empties index */
|
wolffd@0
|
2072 {
|
wolffd@0
|
2073 register long i;
|
wolffd@0
|
2074 for(i=0;index[i] != -1;i++);
|
wolffd@0
|
2075 index[i]=elem;
|
wolffd@0
|
2076 index[i+1]=-1;
|
wolffd@0
|
2077 }
|
wolffd@0
|
2078
|
wolffd@0
|
2079 long compute_index(long int *binfeature, long int range, long int *index)
|
wolffd@0
|
2080 /* create an inverted index of binfeature */
|
wolffd@0
|
2081 {
|
wolffd@0
|
2082 register long i,ii;
|
wolffd@0
|
2083
|
wolffd@0
|
2084 ii=0;
|
wolffd@0
|
2085 for(i=0;i<range;i++) {
|
wolffd@0
|
2086 if(binfeature[i]) {
|
wolffd@0
|
2087 index[ii]=i;
|
wolffd@0
|
2088 ii++;
|
wolffd@0
|
2089 }
|
wolffd@0
|
2090 }
|
wolffd@0
|
2091 for(i=0;i<4;i++) {
|
wolffd@0
|
2092 index[ii+i]=-1;
|
wolffd@0
|
2093 }
|
wolffd@0
|
2094 return(ii);
|
wolffd@0
|
2095 }
|
wolffd@0
|
2096
|
wolffd@0
|
2097
|
wolffd@0
|
2098 void optimize_svm(DOC **docs, long int *label, long int *unlabeled,
|
wolffd@0
|
2099 long int *exclude_from_eq_const, double eq_target,
|
wolffd@0
|
2100 long int *chosen, long int *active2dnum, MODEL *model,
|
wolffd@0
|
2101 long int totdoc, long int *working2dnum, long int varnum,
|
wolffd@0
|
2102 double *a, double *lin, double *c, LEARN_PARM *learn_parm,
|
wolffd@0
|
2103 CFLOAT *aicache, KERNEL_PARM *kernel_parm, QP *qp,
|
wolffd@0
|
2104 double *epsilon_crit_target)
|
wolffd@0
|
2105 /* Do optimization on the working set. */
|
wolffd@0
|
2106 {
|
wolffd@0
|
2107 long i;
|
wolffd@0
|
2108 double *a_v;
|
wolffd@0
|
2109
|
wolffd@0
|
2110 compute_matrices_for_optimization(docs,label,unlabeled,
|
wolffd@0
|
2111 exclude_from_eq_const,eq_target,chosen,
|
wolffd@0
|
2112 active2dnum,working2dnum,model,a,lin,c,
|
wolffd@0
|
2113 varnum,totdoc,learn_parm,aicache,
|
wolffd@0
|
2114 kernel_parm,qp);
|
wolffd@0
|
2115
|
wolffd@0
|
2116 if(verbosity>=3) {
|
wolffd@0
|
2117 printf("Running optimizer..."); fflush(stdout);
|
wolffd@0
|
2118 }
|
wolffd@0
|
2119 /* call the qp-subsolver */
|
wolffd@0
|
2120 a_v=optimize_qp(qp,epsilon_crit_target,
|
wolffd@0
|
2121 learn_parm->svm_maxqpsize,
|
wolffd@0
|
2122 &(model->b), /* in case the optimizer gives us */
|
wolffd@0
|
2123 /* the threshold for free. otherwise */
|
wolffd@0
|
2124 /* b is calculated in calculate_model. */
|
wolffd@0
|
2125 learn_parm);
|
wolffd@0
|
2126 if(verbosity>=3) {
|
wolffd@0
|
2127 printf("done\n");
|
wolffd@0
|
2128 }
|
wolffd@0
|
2129
|
wolffd@0
|
2130 for(i=0;i<varnum;i++) {
|
wolffd@0
|
2131 a[working2dnum[i]]=a_v[i];
|
wolffd@0
|
2132 /*
|
wolffd@0
|
2133 if(a_v[i]<=(0+learn_parm->epsilon_a)) {
|
wolffd@0
|
2134 a[working2dnum[i]]=0;
|
wolffd@0
|
2135 }
|
wolffd@0
|
2136 else if(a_v[i]>=(learn_parm->svm_cost[working2dnum[i]]-learn_parm->epsilon_a)) {
|
wolffd@0
|
2137 a[working2dnum[i]]=learn_parm->svm_cost[working2dnum[i]];
|
wolffd@0
|
2138 }
|
wolffd@0
|
2139 */
|
wolffd@0
|
2140 }
|
wolffd@0
|
2141 }
|
wolffd@0
|
2142
|
wolffd@0
|
2143 void compute_matrices_for_optimization(DOC **docs, long int *label,
|
wolffd@0
|
2144 long int *unlabeled, long *exclude_from_eq_const, double eq_target,
|
wolffd@0
|
2145 long int *chosen, long int *active2dnum,
|
wolffd@0
|
2146 long int *key, MODEL *model, double *a, double *lin, double *c,
|
wolffd@0
|
2147 long int varnum, long int totdoc, LEARN_PARM *learn_parm,
|
wolffd@0
|
2148 CFLOAT *aicache, KERNEL_PARM *kernel_parm, QP *qp)
|
wolffd@0
|
2149 {
|
wolffd@0
|
2150 register long ki,kj,i,j;
|
wolffd@0
|
2151 register double kernel_temp;
|
wolffd@0
|
2152
|
wolffd@0
|
2153 if(verbosity>=3) {
|
wolffd@0
|
2154 fprintf(stdout,"Computing qp-matrices (type %ld kernel [degree %ld, rbf_gamma %f, coef_lin %f, coef_const %f])...",kernel_parm->kernel_type,kernel_parm->poly_degree,kernel_parm->rbf_gamma,kernel_parm->coef_lin,kernel_parm->coef_const);
|
wolffd@0
|
2155 fflush(stdout);
|
wolffd@0
|
2156 }
|
wolffd@0
|
2157
|
wolffd@0
|
2158 qp->opt_n=varnum;
|
wolffd@0
|
2159 qp->opt_ce0[0]=-eq_target; /* compute the constant for equality constraint */
|
wolffd@0
|
2160 for(j=1;j<model->sv_num;j++) { /* start at 1 */
|
wolffd@0
|
2161 if((!chosen[(model->supvec[j])->docnum])
|
wolffd@0
|
2162 && (!exclude_from_eq_const[(model->supvec[j])->docnum])) {
|
wolffd@0
|
2163 qp->opt_ce0[0]+=model->alpha[j];
|
wolffd@0
|
2164 }
|
wolffd@0
|
2165 }
|
wolffd@0
|
2166 if(learn_parm->biased_hyperplane)
|
wolffd@0
|
2167 qp->opt_m=1;
|
wolffd@0
|
2168 else
|
wolffd@0
|
2169 qp->opt_m=0; /* eq-constraint will be ignored */
|
wolffd@0
|
2170
|
wolffd@0
|
2171 /* init linear part of objective function */
|
wolffd@0
|
2172 for(i=0;i<varnum;i++) {
|
wolffd@0
|
2173 qp->opt_g0[i]=lin[key[i]];
|
wolffd@0
|
2174 }
|
wolffd@0
|
2175
|
wolffd@0
|
2176 for(i=0;i<varnum;i++) {
|
wolffd@0
|
2177 ki=key[i];
|
wolffd@0
|
2178
|
wolffd@0
|
2179 /* Compute the matrix for equality constraints */
|
wolffd@0
|
2180 qp->opt_ce[i]=label[ki];
|
wolffd@0
|
2181 qp->opt_low[i]=0;
|
wolffd@0
|
2182 qp->opt_up[i]=learn_parm->svm_cost[ki];
|
wolffd@0
|
2183
|
wolffd@0
|
2184 kernel_temp=(double)kernel(kernel_parm,docs[ki],docs[ki]);
|
wolffd@0
|
2185 /* compute linear part of objective function */
|
wolffd@0
|
2186 qp->opt_g0[i]-=(kernel_temp*a[ki]*(double)label[ki]);
|
wolffd@0
|
2187 /* compute quadratic part of objective function */
|
wolffd@0
|
2188 qp->opt_g[varnum*i+i]=kernel_temp;
|
wolffd@0
|
2189 for(j=i+1;j<varnum;j++) {
|
wolffd@0
|
2190 kj=key[j];
|
wolffd@0
|
2191 kernel_temp=(double)kernel(kernel_parm,docs[ki],docs[kj]);
|
wolffd@0
|
2192 /* compute linear part of objective function */
|
wolffd@0
|
2193 qp->opt_g0[i]-=(kernel_temp*a[kj]*(double)label[kj]);
|
wolffd@0
|
2194 qp->opt_g0[j]-=(kernel_temp*a[ki]*(double)label[ki]);
|
wolffd@0
|
2195 /* compute quadratic part of objective function */
|
wolffd@0
|
2196 qp->opt_g[varnum*i+j]=(double)label[ki]*(double)label[kj]*kernel_temp;
|
wolffd@0
|
2197 qp->opt_g[varnum*j+i]=(double)label[ki]*(double)label[kj]*kernel_temp;
|
wolffd@0
|
2198 }
|
wolffd@0
|
2199
|
wolffd@0
|
2200 if(verbosity>=3) {
|
wolffd@0
|
2201 if(i % 20 == 0) {
|
wolffd@0
|
2202 fprintf(stdout,"%ld..",i); fflush(stdout);
|
wolffd@0
|
2203 }
|
wolffd@0
|
2204 }
|
wolffd@0
|
2205 }
|
wolffd@0
|
2206
|
wolffd@0
|
2207 for(i=0;i<varnum;i++) {
|
wolffd@0
|
2208 /* assure starting at feasible point */
|
wolffd@0
|
2209 qp->opt_xinit[i]=a[key[i]];
|
wolffd@0
|
2210 /* set linear part of objective function */
|
wolffd@0
|
2211 qp->opt_g0[i]=(learn_parm->eps-(double)label[key[i]]*c[key[i]])+qp->opt_g0[i]*(double)label[key[i]];
|
wolffd@0
|
2212 }
|
wolffd@0
|
2213
|
wolffd@0
|
2214 if(verbosity>=3) {
|
wolffd@0
|
2215 fprintf(stdout,"done\n");
|
wolffd@0
|
2216 }
|
wolffd@0
|
2217 }
|
wolffd@0
|
2218
|
wolffd@0
|
2219 long calculate_svm_model(DOC **docs, long int *label, long int *unlabeled,
|
wolffd@0
|
2220 double *lin, double *a, double *a_old, double *c,
|
wolffd@0
|
2221 LEARN_PARM *learn_parm, long int *working2dnum,
|
wolffd@0
|
2222 long int *active2dnum, MODEL *model)
|
wolffd@0
|
2223 /* Compute decision function based on current values */
|
wolffd@0
|
2224 /* of alpha. */
|
wolffd@0
|
2225 {
|
wolffd@0
|
2226 long i,ii,pos,b_calculated=0,first_low,first_high;
|
wolffd@0
|
2227 double ex_c,b_temp,b_low,b_high;
|
wolffd@0
|
2228
|
wolffd@0
|
2229 if(verbosity>=3) {
|
wolffd@0
|
2230 printf("Calculating model..."); fflush(stdout);
|
wolffd@0
|
2231 }
|
wolffd@0
|
2232
|
wolffd@0
|
2233 if(!learn_parm->biased_hyperplane) {
|
wolffd@0
|
2234 model->b=0;
|
wolffd@0
|
2235 b_calculated=1;
|
wolffd@0
|
2236 }
|
wolffd@0
|
2237
|
wolffd@0
|
2238 for(ii=0;(i=working2dnum[ii])>=0;ii++) {
|
wolffd@0
|
2239 if((a_old[i]>0) && (a[i]==0)) { /* remove from model */
|
wolffd@0
|
2240 pos=model->index[i];
|
wolffd@0
|
2241 model->index[i]=-1;
|
wolffd@0
|
2242 (model->sv_num)--;
|
wolffd@0
|
2243 model->supvec[pos]=model->supvec[model->sv_num];
|
wolffd@0
|
2244 model->alpha[pos]=model->alpha[model->sv_num];
|
wolffd@0
|
2245 model->index[(model->supvec[pos])->docnum]=pos;
|
wolffd@0
|
2246 }
|
wolffd@0
|
2247 else if((a_old[i]==0) && (a[i]>0)) { /* add to model */
|
wolffd@0
|
2248 model->supvec[model->sv_num]=docs[i];
|
wolffd@0
|
2249 model->alpha[model->sv_num]=a[i]*(double)label[i];
|
wolffd@0
|
2250 model->index[i]=model->sv_num;
|
wolffd@0
|
2251 (model->sv_num)++;
|
wolffd@0
|
2252 }
|
wolffd@0
|
2253 else if(a_old[i]==a[i]) { /* nothing to do */
|
wolffd@0
|
2254 }
|
wolffd@0
|
2255 else { /* just update alpha */
|
wolffd@0
|
2256 model->alpha[model->index[i]]=a[i]*(double)label[i];
|
wolffd@0
|
2257 }
|
wolffd@0
|
2258
|
wolffd@0
|
2259 ex_c=learn_parm->svm_cost[i]-learn_parm->epsilon_a;
|
wolffd@0
|
2260 if((a_old[i]>=ex_c) && (a[i]<ex_c)) {
|
wolffd@0
|
2261 (model->at_upper_bound)--;
|
wolffd@0
|
2262 }
|
wolffd@0
|
2263 else if((a_old[i]<ex_c) && (a[i]>=ex_c)) {
|
wolffd@0
|
2264 (model->at_upper_bound)++;
|
wolffd@0
|
2265 }
|
wolffd@0
|
2266
|
wolffd@0
|
2267 if((!b_calculated)
|
wolffd@0
|
2268 && (a[i]>learn_parm->epsilon_a) && (a[i]<ex_c)) { /* calculate b */
|
wolffd@0
|
2269 model->b=((double)label[i]*learn_parm->eps-c[i]+lin[i]);
|
wolffd@0
|
2270 /* model->b=(-(double)label[i]+lin[i]); */
|
wolffd@0
|
2271 b_calculated=1;
|
wolffd@0
|
2272 }
|
wolffd@0
|
2273 }
|
wolffd@0
|
2274
|
wolffd@0
|
2275 /* No alpha in the working set not at bounds, so b was not
|
wolffd@0
|
2276 calculated in the usual way. The following handles this special
|
wolffd@0
|
2277 case. */
|
wolffd@0
|
2278 if(learn_parm->biased_hyperplane
|
wolffd@0
|
2279 && (!b_calculated)
|
wolffd@0
|
2280 && (model->sv_num-1 == model->at_upper_bound)) {
|
wolffd@0
|
2281 first_low=1;
|
wolffd@0
|
2282 first_high=1;
|
wolffd@0
|
2283 b_low=0;
|
wolffd@0
|
2284 b_high=0;
|
wolffd@0
|
2285 for(ii=0;(i=active2dnum[ii])>=0;ii++) {
|
wolffd@0
|
2286 ex_c=learn_parm->svm_cost[i]-learn_parm->epsilon_a;
|
wolffd@0
|
2287 if(a[i]<ex_c) {
|
wolffd@0
|
2288 if(label[i]>0) {
|
wolffd@0
|
2289 b_temp=-(learn_parm->eps-c[i]+lin[i]);
|
wolffd@0
|
2290 if((b_temp>b_low) || (first_low)) {
|
wolffd@0
|
2291 b_low=b_temp;
|
wolffd@0
|
2292 first_low=0;
|
wolffd@0
|
2293 }
|
wolffd@0
|
2294 }
|
wolffd@0
|
2295 else {
|
wolffd@0
|
2296 b_temp=-(-learn_parm->eps-c[i]+lin[i]);
|
wolffd@0
|
2297 if((b_temp<b_high) || (first_high)) {
|
wolffd@0
|
2298 b_high=b_temp;
|
wolffd@0
|
2299 first_high=0;
|
wolffd@0
|
2300 }
|
wolffd@0
|
2301 }
|
wolffd@0
|
2302 }
|
wolffd@0
|
2303 else {
|
wolffd@0
|
2304 if(label[i]<0) {
|
wolffd@0
|
2305 b_temp=-(-learn_parm->eps-c[i]+lin[i]);
|
wolffd@0
|
2306 if((b_temp>b_low) || (first_low)) {
|
wolffd@0
|
2307 b_low=b_temp;
|
wolffd@0
|
2308 first_low=0;
|
wolffd@0
|
2309 }
|
wolffd@0
|
2310 }
|
wolffd@0
|
2311 else {
|
wolffd@0
|
2312 b_temp=-(learn_parm->eps-c[i]+lin[i]);
|
wolffd@0
|
2313 if((b_temp<b_high) || (first_high)) {
|
wolffd@0
|
2314 b_high=b_temp;
|
wolffd@0
|
2315 first_high=0;
|
wolffd@0
|
2316 }
|
wolffd@0
|
2317 }
|
wolffd@0
|
2318 }
|
wolffd@0
|
2319 }
|
wolffd@0
|
2320 if(first_high) {
|
wolffd@0
|
2321 model->b=-b_low;
|
wolffd@0
|
2322 }
|
wolffd@0
|
2323 else if(first_low) {
|
wolffd@0
|
2324 model->b=-b_high;
|
wolffd@0
|
2325 }
|
wolffd@0
|
2326 else {
|
wolffd@0
|
2327 model->b=-(b_high+b_low)/2.0; /* select b as the middle of range */
|
wolffd@0
|
2328 /* printf("\nb_low=%f, b_high=%f,b=%f\n",b_low,b_high,model->b); */
|
wolffd@0
|
2329 }
|
wolffd@0
|
2330 }
|
wolffd@0
|
2331
|
wolffd@0
|
2332 if(verbosity>=3) {
|
wolffd@0
|
2333 printf("done\n"); fflush(stdout);
|
wolffd@0
|
2334 }
|
wolffd@0
|
2335
|
wolffd@0
|
2336 return(model->sv_num-1); /* have to substract one, since element 0 is empty*/
|
wolffd@0
|
2337 }
|
wolffd@0
|
2338
|
wolffd@0
|
2339 long check_optimality(MODEL *model, long int *label, long int *unlabeled,
|
wolffd@0
|
2340 double *a, double *lin, double *c, long int totdoc,
|
wolffd@0
|
2341 LEARN_PARM *learn_parm, double *maxdiff,
|
wolffd@0
|
2342 double epsilon_crit_org, long int *misclassified,
|
wolffd@0
|
2343 long int *inconsistent, long int *active2dnum,
|
wolffd@0
|
2344 long int *last_suboptimal_at,
|
wolffd@0
|
2345 long int iteration, KERNEL_PARM *kernel_parm)
|
wolffd@0
|
2346 /* Check KT-conditions */
|
wolffd@0
|
2347 {
|
wolffd@0
|
2348 long i,ii,retrain;
|
wolffd@0
|
2349 double dist,ex_c,target;
|
wolffd@0
|
2350
|
wolffd@0
|
2351 if(kernel_parm->kernel_type == LINEAR) { /* be optimistic */
|
wolffd@0
|
2352 learn_parm->epsilon_shrink=-learn_parm->epsilon_crit+epsilon_crit_org;
|
wolffd@0
|
2353 }
|
wolffd@0
|
2354 else { /* be conservative */
|
wolffd@0
|
2355 learn_parm->epsilon_shrink=learn_parm->epsilon_shrink*0.7+(*maxdiff)*0.3;
|
wolffd@0
|
2356 }
|
wolffd@0
|
2357 retrain=0;
|
wolffd@0
|
2358 (*maxdiff)=0;
|
wolffd@0
|
2359 (*misclassified)=0;
|
wolffd@0
|
2360 for(ii=0;(i=active2dnum[ii])>=0;ii++) {
|
wolffd@0
|
2361 if((!inconsistent[i]) && label[i]) {
|
wolffd@0
|
2362 dist=(lin[i]-model->b)*(double)label[i];/* 'distance' from
|
wolffd@0
|
2363 hyperplane*/
|
wolffd@0
|
2364 target=-(learn_parm->eps-(double)label[i]*c[i]);
|
wolffd@0
|
2365 ex_c=learn_parm->svm_cost[i]-learn_parm->epsilon_a;
|
wolffd@0
|
2366 if(dist <= 0) {
|
wolffd@0
|
2367 (*misclassified)++; /* does not work due to deactivation of var */
|
wolffd@0
|
2368 }
|
wolffd@0
|
2369 if((a[i]>learn_parm->epsilon_a) && (dist > target)) {
|
wolffd@0
|
2370 if((dist-target)>(*maxdiff)) /* largest violation */
|
wolffd@0
|
2371 (*maxdiff)=dist-target;
|
wolffd@0
|
2372 }
|
wolffd@0
|
2373 else if((a[i]<ex_c) && (dist < target)) {
|
wolffd@0
|
2374 if((target-dist)>(*maxdiff)) /* largest violation */
|
wolffd@0
|
2375 (*maxdiff)=target-dist;
|
wolffd@0
|
2376 }
|
wolffd@0
|
2377 /* Count how long a variable was at lower/upper bound (and optimal).*/
|
wolffd@0
|
2378 /* Variables, which were at the bound and optimal for a long */
|
wolffd@0
|
2379 /* time are unlikely to become support vectors. In case our */
|
wolffd@0
|
2380 /* cache is filled up, those variables are excluded to save */
|
wolffd@0
|
2381 /* kernel evaluations. (See chapter 'Shrinking').*/
|
wolffd@0
|
2382 if((a[i]>(learn_parm->epsilon_a))
|
wolffd@0
|
2383 && (a[i]<ex_c)) {
|
wolffd@0
|
2384 last_suboptimal_at[i]=iteration; /* not at bound */
|
wolffd@0
|
2385 }
|
wolffd@0
|
2386 else if((a[i]<=(learn_parm->epsilon_a))
|
wolffd@0
|
2387 && (dist < (target+learn_parm->epsilon_shrink))) {
|
wolffd@0
|
2388 last_suboptimal_at[i]=iteration; /* not likely optimal */
|
wolffd@0
|
2389 }
|
wolffd@0
|
2390 else if((a[i]>=ex_c)
|
wolffd@0
|
2391 && (dist > (target-learn_parm->epsilon_shrink))) {
|
wolffd@0
|
2392 last_suboptimal_at[i]=iteration; /* not likely optimal */
|
wolffd@0
|
2393 }
|
wolffd@0
|
2394 }
|
wolffd@0
|
2395 }
|
wolffd@0
|
2396 /* termination criterion */
|
wolffd@0
|
2397 if((!retrain) && ((*maxdiff) > learn_parm->epsilon_crit)) {
|
wolffd@0
|
2398 retrain=1;
|
wolffd@0
|
2399 }
|
wolffd@0
|
2400 return(retrain);
|
wolffd@0
|
2401 }
|
wolffd@0
|
2402
|
wolffd@0
|
2403 long check_optimality_sharedslack(DOC **docs, MODEL *model, long int *label,
|
wolffd@0
|
2404 double *a, double *lin, double *c, double *slack,
|
wolffd@0
|
2405 double *alphaslack,
|
wolffd@0
|
2406 long int totdoc,
|
wolffd@0
|
2407 LEARN_PARM *learn_parm, double *maxdiff,
|
wolffd@0
|
2408 double epsilon_crit_org, long int *misclassified,
|
wolffd@0
|
2409 long int *active2dnum,
|
wolffd@0
|
2410 long int *last_suboptimal_at,
|
wolffd@0
|
2411 long int iteration, KERNEL_PARM *kernel_parm)
|
wolffd@0
|
2412 /* Check KT-conditions */
|
wolffd@0
|
2413 {
|
wolffd@0
|
2414 long i,ii,retrain;
|
wolffd@0
|
2415 double dist,ex_c=0,target;
|
wolffd@0
|
2416
|
wolffd@0
|
2417 if(kernel_parm->kernel_type == LINEAR) { /* be optimistic */
|
wolffd@0
|
2418 learn_parm->epsilon_shrink=-learn_parm->epsilon_crit+epsilon_crit_org;
|
wolffd@0
|
2419 }
|
wolffd@0
|
2420 else { /* be conservative */
|
wolffd@0
|
2421 learn_parm->epsilon_shrink=learn_parm->epsilon_shrink*0.7+(*maxdiff)*0.3;
|
wolffd@0
|
2422 }
|
wolffd@0
|
2423
|
wolffd@0
|
2424 retrain=0;
|
wolffd@0
|
2425 (*maxdiff)=0;
|
wolffd@0
|
2426 (*misclassified)=0;
|
wolffd@0
|
2427 for(ii=0;(i=active2dnum[ii])>=0;ii++) {
|
wolffd@0
|
2428 /* 'distance' from hyperplane*/
|
wolffd@0
|
2429 dist=(lin[i]-model->b)*(double)label[i]+slack[docs[i]->slackid];
|
wolffd@0
|
2430 target=-(learn_parm->eps-(double)label[i]*c[i]);
|
wolffd@0
|
2431 ex_c=learn_parm->svm_c-learn_parm->epsilon_a;
|
wolffd@0
|
2432 if((a[i]>learn_parm->epsilon_a) && (dist > target)) {
|
wolffd@0
|
2433 if((dist-target)>(*maxdiff)) { /* largest violation */
|
wolffd@0
|
2434 (*maxdiff)=dist-target;
|
wolffd@0
|
2435 if(verbosity>=5) printf("sid %ld: dist=%.2f, target=%.2f, slack=%.2f, a=%f, alphaslack=%f\n",docs[i]->slackid,dist,target,slack[docs[i]->slackid],a[i],alphaslack[docs[i]->slackid]);
|
wolffd@0
|
2436 if(verbosity>=5) printf(" (single %f)\n",(*maxdiff));
|
wolffd@0
|
2437 }
|
wolffd@0
|
2438 }
|
wolffd@0
|
2439 if((alphaslack[docs[i]->slackid]<ex_c) && (slack[docs[i]->slackid]>0)) {
|
wolffd@0
|
2440 if((slack[docs[i]->slackid])>(*maxdiff)) { /* largest violation */
|
wolffd@0
|
2441 (*maxdiff)=slack[docs[i]->slackid];
|
wolffd@0
|
2442 if(verbosity>=5) printf("sid %ld: dist=%.2f, target=%.2f, slack=%.2f, a=%f, alphaslack=%f\n",docs[i]->slackid,dist,target,slack[docs[i]->slackid],a[i],alphaslack[docs[i]->slackid]);
|
wolffd@0
|
2443 if(verbosity>=5) printf(" (joint %f)\n",(*maxdiff));
|
wolffd@0
|
2444 }
|
wolffd@0
|
2445 }
|
wolffd@0
|
2446 /* Count how long a variable was at lower/upper bound (and optimal).*/
|
wolffd@0
|
2447 /* Variables, which were at the bound and optimal for a long */
|
wolffd@0
|
2448 /* time are unlikely to become support vectors. In case our */
|
wolffd@0
|
2449 /* cache is filled up, those variables are excluded to save */
|
wolffd@0
|
2450 /* kernel evaluations. (See chapter 'Shrinking').*/
|
wolffd@0
|
2451 if((a[i]>(learn_parm->epsilon_a))
|
wolffd@0
|
2452 && (a[i]<ex_c)) {
|
wolffd@0
|
2453 last_suboptimal_at[docs[i]->slackid]=iteration; /* not at bound */
|
wolffd@0
|
2454 }
|
wolffd@0
|
2455 else if((a[i]<=(learn_parm->epsilon_a))
|
wolffd@0
|
2456 && (dist < (target+learn_parm->epsilon_shrink))) {
|
wolffd@0
|
2457 last_suboptimal_at[docs[i]->slackid]=iteration; /* not likely optimal */
|
wolffd@0
|
2458 }
|
wolffd@0
|
2459 else if((a[i]>=ex_c)
|
wolffd@0
|
2460 && (slack[docs[i]->slackid] < learn_parm->epsilon_shrink)) {
|
wolffd@0
|
2461 last_suboptimal_at[docs[i]->slackid]=iteration; /* not likely optimal */
|
wolffd@0
|
2462 }
|
wolffd@0
|
2463 }
|
wolffd@0
|
2464 /* termination criterion */
|
wolffd@0
|
2465 if((!retrain) && ((*maxdiff) > learn_parm->epsilon_crit)) {
|
wolffd@0
|
2466 retrain=1;
|
wolffd@0
|
2467 }
|
wolffd@0
|
2468 return(retrain);
|
wolffd@0
|
2469 }
|
wolffd@0
|
2470
|
wolffd@0
|
2471 void compute_shared_slacks(DOC **docs, long int *label,
|
wolffd@0
|
2472 double *a, double *lin,
|
wolffd@0
|
2473 double *c, long int *active2dnum,
|
wolffd@0
|
2474 LEARN_PARM *learn_parm,
|
wolffd@0
|
2475 double *slack, double *alphaslack)
|
wolffd@0
|
2476 /* compute the value of shared slacks and the joint alphas */
|
wolffd@0
|
2477 {
|
wolffd@0
|
2478 long jj,i;
|
wolffd@0
|
2479 double dist,target;
|
wolffd@0
|
2480
|
wolffd@0
|
2481 for(jj=0;(i=active2dnum[jj])>=0;jj++) { /* clear slack variables */
|
wolffd@0
|
2482 slack[docs[i]->slackid]=0.0;
|
wolffd@0
|
2483 alphaslack[docs[i]->slackid]=0.0;
|
wolffd@0
|
2484 }
|
wolffd@0
|
2485 for(jj=0;(i=active2dnum[jj])>=0;jj++) { /* recompute slack variables */
|
wolffd@0
|
2486 dist=(lin[i])*(double)label[i];
|
wolffd@0
|
2487 target=-(learn_parm->eps-(double)label[i]*c[i]);
|
wolffd@0
|
2488 if((target-dist) > slack[docs[i]->slackid])
|
wolffd@0
|
2489 slack[docs[i]->slackid]=target-dist;
|
wolffd@0
|
2490 alphaslack[docs[i]->slackid]+=a[i];
|
wolffd@0
|
2491 }
|
wolffd@0
|
2492 }
|
wolffd@0
|
2493
|
wolffd@0
|
2494
|
wolffd@0
|
2495 long identify_inconsistent(double *a, long int *label,
|
wolffd@0
|
2496 long int *unlabeled, long int totdoc,
|
wolffd@0
|
2497 LEARN_PARM *learn_parm,
|
wolffd@0
|
2498 long int *inconsistentnum, long int *inconsistent)
|
wolffd@0
|
2499 {
|
wolffd@0
|
2500 long i,retrain;
|
wolffd@0
|
2501
|
wolffd@0
|
2502 /* Throw out examples with multipliers at upper bound. This */
|
wolffd@0
|
2503 /* corresponds to the -i 1 option. */
|
wolffd@0
|
2504 /* ATTENTION: this is just a heuristic for finding a close */
|
wolffd@0
|
2505 /* to minimum number of examples to exclude to */
|
wolffd@0
|
2506 /* make the problem separable with desired margin */
|
wolffd@0
|
2507 retrain=0;
|
wolffd@0
|
2508 for(i=0;i<totdoc;i++) {
|
wolffd@0
|
2509 if((!inconsistent[i]) && (!unlabeled[i])
|
wolffd@0
|
2510 && (a[i]>=(learn_parm->svm_cost[i]-learn_parm->epsilon_a))) {
|
wolffd@0
|
2511 (*inconsistentnum)++;
|
wolffd@0
|
2512 inconsistent[i]=1; /* never choose again */
|
wolffd@0
|
2513 retrain=2; /* start over */
|
wolffd@0
|
2514 if(verbosity>=3) {
|
wolffd@0
|
2515 printf("inconsistent(%ld)..",i); fflush(stdout);
|
wolffd@0
|
2516 }
|
wolffd@0
|
2517 }
|
wolffd@0
|
2518 }
|
wolffd@0
|
2519 return(retrain);
|
wolffd@0
|
2520 }
|
wolffd@0
|
2521
|
wolffd@0
|
2522 long identify_misclassified(double *lin, long int *label,
|
wolffd@0
|
2523 long int *unlabeled, long int totdoc,
|
wolffd@0
|
2524 MODEL *model, long int *inconsistentnum,
|
wolffd@0
|
2525 long int *inconsistent)
|
wolffd@0
|
2526 {
|
wolffd@0
|
2527 long i,retrain;
|
wolffd@0
|
2528 double dist;
|
wolffd@0
|
2529
|
wolffd@0
|
2530 /* Throw out misclassified examples. This */
|
wolffd@0
|
2531 /* corresponds to the -i 2 option. */
|
wolffd@0
|
2532 /* ATTENTION: this is just a heuristic for finding a close */
|
wolffd@0
|
2533 /* to minimum number of examples to exclude to */
|
wolffd@0
|
2534 /* make the problem separable with desired margin */
|
wolffd@0
|
2535 retrain=0;
|
wolffd@0
|
2536 for(i=0;i<totdoc;i++) {
|
wolffd@0
|
2537 dist=(lin[i]-model->b)*(double)label[i]; /* 'distance' from hyperplane*/
|
wolffd@0
|
2538 if((!inconsistent[i]) && (!unlabeled[i]) && (dist <= 0)) {
|
wolffd@0
|
2539 (*inconsistentnum)++;
|
wolffd@0
|
2540 inconsistent[i]=1; /* never choose again */
|
wolffd@0
|
2541 retrain=2; /* start over */
|
wolffd@0
|
2542 if(verbosity>=3) {
|
wolffd@0
|
2543 printf("inconsistent(%ld)..",i); fflush(stdout);
|
wolffd@0
|
2544 }
|
wolffd@0
|
2545 }
|
wolffd@0
|
2546 }
|
wolffd@0
|
2547 return(retrain);
|
wolffd@0
|
2548 }
|
wolffd@0
|
2549
|
wolffd@0
|
2550 long identify_one_misclassified(double *lin, long int *label,
|
wolffd@0
|
2551 long int *unlabeled,
|
wolffd@0
|
2552 long int totdoc, MODEL *model,
|
wolffd@0
|
2553 long int *inconsistentnum,
|
wolffd@0
|
2554 long int *inconsistent)
|
wolffd@0
|
2555 {
|
wolffd@0
|
2556 long i,retrain,maxex=-1;
|
wolffd@0
|
2557 double dist,maxdist=0;
|
wolffd@0
|
2558
|
wolffd@0
|
2559 /* Throw out the 'most misclassified' example. This */
|
wolffd@0
|
2560 /* corresponds to the -i 3 option. */
|
wolffd@0
|
2561 /* ATTENTION: this is just a heuristic for finding a close */
|
wolffd@0
|
2562 /* to minimum number of examples to exclude to */
|
wolffd@0
|
2563 /* make the problem separable with desired margin */
|
wolffd@0
|
2564 retrain=0;
|
wolffd@0
|
2565 for(i=0;i<totdoc;i++) {
|
wolffd@0
|
2566 if((!inconsistent[i]) && (!unlabeled[i])) {
|
wolffd@0
|
2567 dist=(lin[i]-model->b)*(double)label[i];/* 'distance' from hyperplane*/
|
wolffd@0
|
2568 if(dist<maxdist) {
|
wolffd@0
|
2569 maxdist=dist;
|
wolffd@0
|
2570 maxex=i;
|
wolffd@0
|
2571 }
|
wolffd@0
|
2572 }
|
wolffd@0
|
2573 }
|
wolffd@0
|
2574 if(maxex>=0) {
|
wolffd@0
|
2575 (*inconsistentnum)++;
|
wolffd@0
|
2576 inconsistent[maxex]=1; /* never choose again */
|
wolffd@0
|
2577 retrain=2; /* start over */
|
wolffd@0
|
2578 if(verbosity>=3) {
|
wolffd@0
|
2579 printf("inconsistent(%ld)..",i); fflush(stdout);
|
wolffd@0
|
2580 }
|
wolffd@0
|
2581 }
|
wolffd@0
|
2582 return(retrain);
|
wolffd@0
|
2583 }
|
wolffd@0
|
2584
|
wolffd@0
|
2585 void update_linear_component(DOC **docs, long int *label,
|
wolffd@0
|
2586 long int *active2dnum, double *a,
|
wolffd@0
|
2587 double *a_old, long int *working2dnum,
|
wolffd@0
|
2588 long int totdoc, long int totwords,
|
wolffd@0
|
2589 KERNEL_PARM *kernel_parm,
|
wolffd@0
|
2590 KERNEL_CACHE *kernel_cache,
|
wolffd@0
|
2591 double *lin, CFLOAT *aicache, double *weights)
|
wolffd@0
|
2592 /* keep track of the linear component */
|
wolffd@0
|
2593 /* lin of the gradient etc. by updating */
|
wolffd@0
|
2594 /* based on the change of the variables */
|
wolffd@0
|
2595 /* in the current working set */
|
wolffd@0
|
2596 {
|
wolffd@0
|
2597 register long i,ii,j,jj;
|
wolffd@0
|
2598 register double tec;
|
wolffd@0
|
2599 SVECTOR *f;
|
wolffd@0
|
2600
|
wolffd@0
|
2601 if(kernel_parm->kernel_type==0) { /* special linear case */
|
wolffd@0
|
2602 clear_vector_n(weights,totwords);
|
wolffd@0
|
2603 for(ii=0;(i=working2dnum[ii])>=0;ii++) {
|
wolffd@0
|
2604 if(a[i] != a_old[i]) {
|
wolffd@0
|
2605 for(f=docs[i]->fvec;f;f=f->next)
|
wolffd@0
|
2606 add_vector_ns(weights,f,
|
wolffd@0
|
2607 f->factor*((a[i]-a_old[i])*(double)label[i]));
|
wolffd@0
|
2608 }
|
wolffd@0
|
2609 }
|
wolffd@0
|
2610 for(jj=0;(j=active2dnum[jj])>=0;jj++) {
|
wolffd@0
|
2611 for(f=docs[j]->fvec;f;f=f->next)
|
wolffd@0
|
2612 lin[j]+=f->factor*sprod_ns(weights,f);
|
wolffd@0
|
2613 }
|
wolffd@0
|
2614 }
|
wolffd@0
|
2615 else { /* general case */
|
wolffd@0
|
2616 for(jj=0;(i=working2dnum[jj])>=0;jj++) {
|
wolffd@0
|
2617 if(a[i] != a_old[i]) {
|
wolffd@0
|
2618 get_kernel_row(kernel_cache,docs,i,totdoc,active2dnum,aicache,
|
wolffd@0
|
2619 kernel_parm);
|
wolffd@0
|
2620 for(ii=0;(j=active2dnum[ii])>=0;ii++) {
|
wolffd@0
|
2621 tec=aicache[j];
|
wolffd@0
|
2622 lin[j]+=(((a[i]*tec)-(a_old[i]*tec))*(double)label[i]);
|
wolffd@0
|
2623 }
|
wolffd@0
|
2624 }
|
wolffd@0
|
2625 }
|
wolffd@0
|
2626 }
|
wolffd@0
|
2627 }
|
wolffd@0
|
2628
|
wolffd@0
|
2629
|
wolffd@0
|
2630 long incorporate_unlabeled_examples(MODEL *model, long int *label,
|
wolffd@0
|
2631 long int *inconsistent,
|
wolffd@0
|
2632 long int *unlabeled,
|
wolffd@0
|
2633 double *a, double *lin,
|
wolffd@0
|
2634 long int totdoc, double *selcrit,
|
wolffd@0
|
2635 long int *select, long int *key,
|
wolffd@0
|
2636 long int transductcycle,
|
wolffd@0
|
2637 KERNEL_PARM *kernel_parm,
|
wolffd@0
|
2638 LEARN_PARM *learn_parm)
|
wolffd@0
|
2639 {
|
wolffd@0
|
2640 long i,j,k,j1,j2,j3,j4,unsupaddnum1=0,unsupaddnum2=0;
|
wolffd@0
|
2641 long pos,neg,upos,uneg,orgpos,orgneg,nolabel,newpos,newneg,allunlab;
|
wolffd@0
|
2642 double dist,model_length,posratio,negratio;
|
wolffd@0
|
2643 long check_every=2;
|
wolffd@0
|
2644 double loss;
|
wolffd@0
|
2645 static double switchsens=0.0,switchsensorg=0.0;
|
wolffd@0
|
2646 double umin,umax,sumalpha;
|
wolffd@0
|
2647 long imin=0,imax=0;
|
wolffd@0
|
2648 static long switchnum=0;
|
wolffd@0
|
2649
|
wolffd@0
|
2650 switchsens/=1.2;
|
wolffd@0
|
2651
|
wolffd@0
|
2652 /* assumes that lin[] is up to date -> no inactive vars */
|
wolffd@0
|
2653
|
wolffd@0
|
2654 orgpos=0;
|
wolffd@0
|
2655 orgneg=0;
|
wolffd@0
|
2656 newpos=0;
|
wolffd@0
|
2657 newneg=0;
|
wolffd@0
|
2658 nolabel=0;
|
wolffd@0
|
2659 allunlab=0;
|
wolffd@0
|
2660 for(i=0;i<totdoc;i++) {
|
wolffd@0
|
2661 if(!unlabeled[i]) {
|
wolffd@0
|
2662 if(label[i] > 0) {
|
wolffd@0
|
2663 orgpos++;
|
wolffd@0
|
2664 }
|
wolffd@0
|
2665 else {
|
wolffd@0
|
2666 orgneg++;
|
wolffd@0
|
2667 }
|
wolffd@0
|
2668 }
|
wolffd@0
|
2669 else {
|
wolffd@0
|
2670 allunlab++;
|
wolffd@0
|
2671 if(unlabeled[i]) {
|
wolffd@0
|
2672 if(label[i] > 0) {
|
wolffd@0
|
2673 newpos++;
|
wolffd@0
|
2674 }
|
wolffd@0
|
2675 else if(label[i] < 0) {
|
wolffd@0
|
2676 newneg++;
|
wolffd@0
|
2677 }
|
wolffd@0
|
2678 }
|
wolffd@0
|
2679 }
|
wolffd@0
|
2680 if(label[i]==0) {
|
wolffd@0
|
2681 nolabel++;
|
wolffd@0
|
2682 }
|
wolffd@0
|
2683 }
|
wolffd@0
|
2684
|
wolffd@0
|
2685 if(learn_parm->transduction_posratio >= 0) {
|
wolffd@0
|
2686 posratio=learn_parm->transduction_posratio;
|
wolffd@0
|
2687 }
|
wolffd@0
|
2688 else {
|
wolffd@0
|
2689 posratio=(double)orgpos/(double)(orgpos+orgneg); /* use ratio of pos/neg */
|
wolffd@0
|
2690 } /* in training data */
|
wolffd@0
|
2691 negratio=1.0-posratio;
|
wolffd@0
|
2692
|
wolffd@0
|
2693 learn_parm->svm_costratio=1.0; /* global */
|
wolffd@0
|
2694 if(posratio>0) {
|
wolffd@0
|
2695 learn_parm->svm_costratio_unlab=negratio/posratio;
|
wolffd@0
|
2696 }
|
wolffd@0
|
2697 else {
|
wolffd@0
|
2698 learn_parm->svm_costratio_unlab=1.0;
|
wolffd@0
|
2699 }
|
wolffd@0
|
2700
|
wolffd@0
|
2701 pos=0;
|
wolffd@0
|
2702 neg=0;
|
wolffd@0
|
2703 upos=0;
|
wolffd@0
|
2704 uneg=0;
|
wolffd@0
|
2705 for(i=0;i<totdoc;i++) {
|
wolffd@0
|
2706 dist=(lin[i]-model->b); /* 'distance' from hyperplane*/
|
wolffd@0
|
2707 if(dist>0) {
|
wolffd@0
|
2708 pos++;
|
wolffd@0
|
2709 }
|
wolffd@0
|
2710 else {
|
wolffd@0
|
2711 neg++;
|
wolffd@0
|
2712 }
|
wolffd@0
|
2713 if(unlabeled[i]) {
|
wolffd@0
|
2714 if(dist>0) {
|
wolffd@0
|
2715 upos++;
|
wolffd@0
|
2716 }
|
wolffd@0
|
2717 else {
|
wolffd@0
|
2718 uneg++;
|
wolffd@0
|
2719 }
|
wolffd@0
|
2720 }
|
wolffd@0
|
2721 if((!unlabeled[i]) && (a[i]>(learn_parm->svm_cost[i]-learn_parm->epsilon_a))) {
|
wolffd@0
|
2722 /* printf("Ubounded %ld (class %ld, unlabeled %ld)\n",i,label[i],unlabeled[i]); */
|
wolffd@0
|
2723 }
|
wolffd@0
|
2724 }
|
wolffd@0
|
2725 if(verbosity>=2) {
|
wolffd@0
|
2726 printf("POS=%ld, ORGPOS=%ld, ORGNEG=%ld\n",pos,orgpos,orgneg);
|
wolffd@0
|
2727 printf("POS=%ld, NEWPOS=%ld, NEWNEG=%ld\n",pos,newpos,newneg);
|
wolffd@0
|
2728 printf("pos ratio = %f (%f).\n",(double)(upos)/(double)(allunlab),posratio);
|
wolffd@0
|
2729 fflush(stdout);
|
wolffd@0
|
2730 }
|
wolffd@0
|
2731
|
wolffd@0
|
2732 if(transductcycle == 0) {
|
wolffd@0
|
2733 j1=0;
|
wolffd@0
|
2734 j2=0;
|
wolffd@0
|
2735 j4=0;
|
wolffd@0
|
2736 for(i=0;i<totdoc;i++) {
|
wolffd@0
|
2737 dist=(lin[i]-model->b); /* 'distance' from hyperplane*/
|
wolffd@0
|
2738 if((label[i]==0) && (unlabeled[i])) {
|
wolffd@0
|
2739 selcrit[j4]=dist;
|
wolffd@0
|
2740 key[j4]=i;
|
wolffd@0
|
2741 j4++;
|
wolffd@0
|
2742 }
|
wolffd@0
|
2743 }
|
wolffd@0
|
2744 unsupaddnum1=0;
|
wolffd@0
|
2745 unsupaddnum2=0;
|
wolffd@0
|
2746 select_top_n(selcrit,j4,select,(long)(allunlab*posratio+0.5));
|
wolffd@0
|
2747 for(k=0;(k<(long)(allunlab*posratio+0.5));k++) {
|
wolffd@0
|
2748 i=key[select[k]];
|
wolffd@0
|
2749 label[i]=1;
|
wolffd@0
|
2750 unsupaddnum1++;
|
wolffd@0
|
2751 j1++;
|
wolffd@0
|
2752 }
|
wolffd@0
|
2753 for(i=0;i<totdoc;i++) {
|
wolffd@0
|
2754 if((label[i]==0) && (unlabeled[i])) {
|
wolffd@0
|
2755 label[i]=-1;
|
wolffd@0
|
2756 j2++;
|
wolffd@0
|
2757 unsupaddnum2++;
|
wolffd@0
|
2758 }
|
wolffd@0
|
2759 }
|
wolffd@0
|
2760 for(i=0;i<totdoc;i++) { /* set upper bounds on vars */
|
wolffd@0
|
2761 if(unlabeled[i]) {
|
wolffd@0
|
2762 if(label[i] == 1) {
|
wolffd@0
|
2763 learn_parm->svm_cost[i]=learn_parm->svm_c*
|
wolffd@0
|
2764 learn_parm->svm_costratio_unlab*learn_parm->svm_unlabbound;
|
wolffd@0
|
2765 }
|
wolffd@0
|
2766 else if(label[i] == -1) {
|
wolffd@0
|
2767 learn_parm->svm_cost[i]=learn_parm->svm_c*
|
wolffd@0
|
2768 learn_parm->svm_unlabbound;
|
wolffd@0
|
2769 }
|
wolffd@0
|
2770 }
|
wolffd@0
|
2771 }
|
wolffd@0
|
2772 if(verbosity>=1) {
|
wolffd@0
|
2773 /* printf("costratio %f, costratio_unlab %f, unlabbound %f\n",
|
wolffd@0
|
2774 learn_parm->svm_costratio,learn_parm->svm_costratio_unlab,
|
wolffd@0
|
2775 learn_parm->svm_unlabbound); */
|
wolffd@0
|
2776 printf("Classifying unlabeled data as %ld POS / %ld NEG.\n",
|
wolffd@0
|
2777 unsupaddnum1,unsupaddnum2);
|
wolffd@0
|
2778 fflush(stdout);
|
wolffd@0
|
2779 }
|
wolffd@0
|
2780 if(verbosity >= 1)
|
wolffd@0
|
2781 printf("Retraining.");
|
wolffd@0
|
2782 if(verbosity >= 2) printf("\n");
|
wolffd@0
|
2783 return((long)3);
|
wolffd@0
|
2784 }
|
wolffd@0
|
2785 if((transductcycle % check_every) == 0) {
|
wolffd@0
|
2786 if(verbosity >= 1)
|
wolffd@0
|
2787 printf("Retraining.");
|
wolffd@0
|
2788 if(verbosity >= 2) printf("\n");
|
wolffd@0
|
2789 j1=0;
|
wolffd@0
|
2790 j2=0;
|
wolffd@0
|
2791 unsupaddnum1=0;
|
wolffd@0
|
2792 unsupaddnum2=0;
|
wolffd@0
|
2793 for(i=0;i<totdoc;i++) {
|
wolffd@0
|
2794 if((unlabeled[i] == 2)) {
|
wolffd@0
|
2795 unlabeled[i]=1;
|
wolffd@0
|
2796 label[i]=1;
|
wolffd@0
|
2797 j1++;
|
wolffd@0
|
2798 unsupaddnum1++;
|
wolffd@0
|
2799 }
|
wolffd@0
|
2800 else if((unlabeled[i] == 3)) {
|
wolffd@0
|
2801 unlabeled[i]=1;
|
wolffd@0
|
2802 label[i]=-1;
|
wolffd@0
|
2803 j2++;
|
wolffd@0
|
2804 unsupaddnum2++;
|
wolffd@0
|
2805 }
|
wolffd@0
|
2806 }
|
wolffd@0
|
2807 for(i=0;i<totdoc;i++) { /* set upper bounds on vars */
|
wolffd@0
|
2808 if(unlabeled[i]) {
|
wolffd@0
|
2809 if(label[i] == 1) {
|
wolffd@0
|
2810 learn_parm->svm_cost[i]=learn_parm->svm_c*
|
wolffd@0
|
2811 learn_parm->svm_costratio_unlab*learn_parm->svm_unlabbound;
|
wolffd@0
|
2812 }
|
wolffd@0
|
2813 else if(label[i] == -1) {
|
wolffd@0
|
2814 learn_parm->svm_cost[i]=learn_parm->svm_c*
|
wolffd@0
|
2815 learn_parm->svm_unlabbound;
|
wolffd@0
|
2816 }
|
wolffd@0
|
2817 }
|
wolffd@0
|
2818 }
|
wolffd@0
|
2819
|
wolffd@0
|
2820 if(verbosity>=2) {
|
wolffd@0
|
2821 /* printf("costratio %f, costratio_unlab %f, unlabbound %f\n",
|
wolffd@0
|
2822 learn_parm->svm_costratio,learn_parm->svm_costratio_unlab,
|
wolffd@0
|
2823 learn_parm->svm_unlabbound); */
|
wolffd@0
|
2824 printf("%ld positive -> Added %ld POS / %ld NEG unlabeled examples.\n",
|
wolffd@0
|
2825 upos,unsupaddnum1,unsupaddnum2);
|
wolffd@0
|
2826 fflush(stdout);
|
wolffd@0
|
2827 }
|
wolffd@0
|
2828
|
wolffd@0
|
2829 if(learn_parm->svm_unlabbound == 1) {
|
wolffd@0
|
2830 learn_parm->epsilon_crit=0.001; /* do the last run right */
|
wolffd@0
|
2831 }
|
wolffd@0
|
2832 else {
|
wolffd@0
|
2833 learn_parm->epsilon_crit=0.01; /* otherwise, no need to be so picky */
|
wolffd@0
|
2834 }
|
wolffd@0
|
2835
|
wolffd@0
|
2836 return((long)3);
|
wolffd@0
|
2837 }
|
wolffd@0
|
2838 else if(((transductcycle % check_every) < check_every)) {
|
wolffd@0
|
2839 model_length=0;
|
wolffd@0
|
2840 sumalpha=0;
|
wolffd@0
|
2841 loss=0;
|
wolffd@0
|
2842 for(i=0;i<totdoc;i++) {
|
wolffd@0
|
2843 model_length+=a[i]*label[i]*lin[i];
|
wolffd@0
|
2844 sumalpha+=a[i];
|
wolffd@0
|
2845 dist=(lin[i]-model->b); /* 'distance' from hyperplane*/
|
wolffd@0
|
2846 if((label[i]*dist)<(1.0-learn_parm->epsilon_crit)) {
|
wolffd@0
|
2847 loss+=(1.0-(label[i]*dist))*learn_parm->svm_cost[i];
|
wolffd@0
|
2848 }
|
wolffd@0
|
2849 }
|
wolffd@0
|
2850 model_length=sqrt(model_length);
|
wolffd@0
|
2851 if(verbosity>=2) {
|
wolffd@0
|
2852 printf("Model-length = %f (%f), loss = %f, objective = %f\n",
|
wolffd@0
|
2853 model_length,sumalpha,loss,loss+0.5*model_length*model_length);
|
wolffd@0
|
2854 fflush(stdout);
|
wolffd@0
|
2855 }
|
wolffd@0
|
2856 j1=0;
|
wolffd@0
|
2857 j2=0;
|
wolffd@0
|
2858 j3=0;
|
wolffd@0
|
2859 j4=0;
|
wolffd@0
|
2860 unsupaddnum1=0;
|
wolffd@0
|
2861 unsupaddnum2=0;
|
wolffd@0
|
2862 umin=99999;
|
wolffd@0
|
2863 umax=-99999;
|
wolffd@0
|
2864 j4=1;
|
wolffd@0
|
2865 while(j4) {
|
wolffd@0
|
2866 umin=99999;
|
wolffd@0
|
2867 umax=-99999;
|
wolffd@0
|
2868 for(i=0;(i<totdoc);i++) {
|
wolffd@0
|
2869 dist=(lin[i]-model->b);
|
wolffd@0
|
2870 if((label[i]>0) && (unlabeled[i]) && (!inconsistent[i])
|
wolffd@0
|
2871 && (dist<umin)) {
|
wolffd@0
|
2872 umin=dist;
|
wolffd@0
|
2873 imin=i;
|
wolffd@0
|
2874 }
|
wolffd@0
|
2875 if((label[i]<0) && (unlabeled[i]) && (!inconsistent[i])
|
wolffd@0
|
2876 && (dist>umax)) {
|
wolffd@0
|
2877 umax=dist;
|
wolffd@0
|
2878 imax=i;
|
wolffd@0
|
2879 }
|
wolffd@0
|
2880 }
|
wolffd@0
|
2881 if((umin < (umax+switchsens-1E-4))) {
|
wolffd@0
|
2882 j1++;
|
wolffd@0
|
2883 j2++;
|
wolffd@0
|
2884 unsupaddnum1++;
|
wolffd@0
|
2885 unlabeled[imin]=3;
|
wolffd@0
|
2886 inconsistent[imin]=1;
|
wolffd@0
|
2887 unsupaddnum2++;
|
wolffd@0
|
2888 unlabeled[imax]=2;
|
wolffd@0
|
2889 inconsistent[imax]=1;
|
wolffd@0
|
2890 }
|
wolffd@0
|
2891 else
|
wolffd@0
|
2892 j4=0;
|
wolffd@0
|
2893 j4=0;
|
wolffd@0
|
2894 }
|
wolffd@0
|
2895 for(j=0;(j<totdoc);j++) {
|
wolffd@0
|
2896 if(unlabeled[j] && (!inconsistent[j])) {
|
wolffd@0
|
2897 if(label[j]>0) {
|
wolffd@0
|
2898 unlabeled[j]=2;
|
wolffd@0
|
2899 }
|
wolffd@0
|
2900 else if(label[j]<0) {
|
wolffd@0
|
2901 unlabeled[j]=3;
|
wolffd@0
|
2902 }
|
wolffd@0
|
2903 /* inconsistent[j]=1; */
|
wolffd@0
|
2904 j3++;
|
wolffd@0
|
2905 }
|
wolffd@0
|
2906 }
|
wolffd@0
|
2907 switchnum+=unsupaddnum1+unsupaddnum2;
|
wolffd@0
|
2908
|
wolffd@0
|
2909 /* stop and print out current margin
|
wolffd@0
|
2910 printf("switchnum %ld %ld\n",switchnum,kernel_parm->poly_degree);
|
wolffd@0
|
2911 if(switchnum == 2*kernel_parm->poly_degree) {
|
wolffd@0
|
2912 learn_parm->svm_unlabbound=1;
|
wolffd@0
|
2913 }
|
wolffd@0
|
2914 */
|
wolffd@0
|
2915
|
wolffd@0
|
2916 if((!unsupaddnum1) && (!unsupaddnum2)) {
|
wolffd@0
|
2917 if((learn_parm->svm_unlabbound>=1) && ((newpos+newneg) == allunlab)) {
|
wolffd@0
|
2918 for(j=0;(j<totdoc);j++) {
|
wolffd@0
|
2919 inconsistent[j]=0;
|
wolffd@0
|
2920 if(unlabeled[j]) unlabeled[j]=1;
|
wolffd@0
|
2921 }
|
wolffd@0
|
2922 write_prediction(learn_parm->predfile,model,lin,a,unlabeled,label,
|
wolffd@0
|
2923 totdoc,learn_parm);
|
wolffd@0
|
2924 if(verbosity>=1)
|
wolffd@0
|
2925 printf("Number of switches: %ld\n",switchnum);
|
wolffd@0
|
2926 return((long)0);
|
wolffd@0
|
2927 }
|
wolffd@0
|
2928 switchsens=switchsensorg;
|
wolffd@0
|
2929 learn_parm->svm_unlabbound*=1.5;
|
wolffd@0
|
2930 if(learn_parm->svm_unlabbound>1) {
|
wolffd@0
|
2931 learn_parm->svm_unlabbound=1;
|
wolffd@0
|
2932 }
|
wolffd@0
|
2933 model->at_upper_bound=0; /* since upper bound increased */
|
wolffd@0
|
2934 if(verbosity>=1)
|
wolffd@0
|
2935 printf("Increasing influence of unlabeled examples to %f%% .",
|
wolffd@0
|
2936 learn_parm->svm_unlabbound*100.0);
|
wolffd@0
|
2937 }
|
wolffd@0
|
2938 else if(verbosity>=1) {
|
wolffd@0
|
2939 printf("%ld positive -> Switching labels of %ld POS / %ld NEG unlabeled examples.",
|
wolffd@0
|
2940 upos,unsupaddnum1,unsupaddnum2);
|
wolffd@0
|
2941 fflush(stdout);
|
wolffd@0
|
2942 }
|
wolffd@0
|
2943
|
wolffd@0
|
2944 if(verbosity >= 2) printf("\n");
|
wolffd@0
|
2945
|
wolffd@0
|
2946 learn_parm->epsilon_crit=0.5; /* don't need to be so picky */
|
wolffd@0
|
2947
|
wolffd@0
|
2948 for(i=0;i<totdoc;i++) { /* set upper bounds on vars */
|
wolffd@0
|
2949 if(unlabeled[i]) {
|
wolffd@0
|
2950 if(label[i] == 1) {
|
wolffd@0
|
2951 learn_parm->svm_cost[i]=learn_parm->svm_c*
|
wolffd@0
|
2952 learn_parm->svm_costratio_unlab*learn_parm->svm_unlabbound;
|
wolffd@0
|
2953 }
|
wolffd@0
|
2954 else if(label[i] == -1) {
|
wolffd@0
|
2955 learn_parm->svm_cost[i]=learn_parm->svm_c*
|
wolffd@0
|
2956 learn_parm->svm_unlabbound;
|
wolffd@0
|
2957 }
|
wolffd@0
|
2958 }
|
wolffd@0
|
2959 }
|
wolffd@0
|
2960
|
wolffd@0
|
2961 return((long)2);
|
wolffd@0
|
2962 }
|
wolffd@0
|
2963
|
wolffd@0
|
2964 return((long)0);
|
wolffd@0
|
2965 }
|
wolffd@0
|
2966
|
wolffd@0
|
2967 /*************************** Working set selection ***************************/
|
wolffd@0
|
2968
|
wolffd@0
|
2969 long select_next_qp_subproblem_grad(long int *label,
|
wolffd@0
|
2970 long int *unlabeled,
|
wolffd@0
|
2971 double *a, double *lin,
|
wolffd@0
|
2972 double *c, long int totdoc,
|
wolffd@0
|
2973 long int qp_size,
|
wolffd@0
|
2974 LEARN_PARM *learn_parm,
|
wolffd@0
|
2975 long int *inconsistent,
|
wolffd@0
|
2976 long int *active2dnum,
|
wolffd@0
|
2977 long int *working2dnum,
|
wolffd@0
|
2978 double *selcrit,
|
wolffd@0
|
2979 long int *select,
|
wolffd@0
|
2980 KERNEL_CACHE *kernel_cache,
|
wolffd@0
|
2981 long int cache_only,
|
wolffd@0
|
2982 long int *key, long int *chosen)
|
wolffd@0
|
2983 /* Use the feasible direction approach to select the next
|
wolffd@0
|
2984 qp-subproblem (see chapter 'Selecting a good working set'). If
|
wolffd@0
|
2985 'cache_only' is true, then the variables are selected only among
|
wolffd@0
|
2986 those for which the kernel evaluations are cached. */
|
wolffd@0
|
2987 {
|
wolffd@0
|
2988 long choosenum,i,j,k,activedoc,inum,valid;
|
wolffd@0
|
2989 double s;
|
wolffd@0
|
2990
|
wolffd@0
|
2991 for(inum=0;working2dnum[inum]>=0;inum++); /* find end of index */
|
wolffd@0
|
2992 choosenum=0;
|
wolffd@0
|
2993 activedoc=0;
|
wolffd@0
|
2994 for(i=0;(j=active2dnum[i])>=0;i++) {
|
wolffd@0
|
2995 s=-label[j];
|
wolffd@0
|
2996 if(kernel_cache && cache_only)
|
wolffd@0
|
2997 valid=(kernel_cache->index[j]>=0);
|
wolffd@0
|
2998 else
|
wolffd@0
|
2999 valid=1;
|
wolffd@0
|
3000 if(valid
|
wolffd@0
|
3001 && (!((a[j]<=(0+learn_parm->epsilon_a)) && (s<0)))
|
wolffd@0
|
3002 && (!((a[j]>=(learn_parm->svm_cost[j]-learn_parm->epsilon_a))
|
wolffd@0
|
3003 && (s>0)))
|
wolffd@0
|
3004 && (!chosen[j])
|
wolffd@0
|
3005 && (label[j])
|
wolffd@0
|
3006 && (!inconsistent[j]))
|
wolffd@0
|
3007 {
|
wolffd@0
|
3008 selcrit[activedoc]=(double)label[j]*(learn_parm->eps-(double)label[j]*c[j]+(double)label[j]*lin[j]);
|
wolffd@0
|
3009 /* selcrit[activedoc]=(double)label[j]*(-1.0+(double)label[j]*lin[j]); */
|
wolffd@0
|
3010 key[activedoc]=j;
|
wolffd@0
|
3011 activedoc++;
|
wolffd@0
|
3012 }
|
wolffd@0
|
3013 }
|
wolffd@0
|
3014 select_top_n(selcrit,activedoc,select,(long)(qp_size/2));
|
wolffd@0
|
3015 for(k=0;(choosenum<(qp_size/2)) && (k<(qp_size/2)) && (k<activedoc);k++) {
|
wolffd@0
|
3016 /* if(learn_parm->biased_hyperplane || (selcrit[select[k]] > 0)) { */
|
wolffd@0
|
3017 i=key[select[k]];
|
wolffd@0
|
3018 chosen[i]=1;
|
wolffd@0
|
3019 working2dnum[inum+choosenum]=i;
|
wolffd@0
|
3020 choosenum+=1;
|
wolffd@0
|
3021 if(kernel_cache)
|
wolffd@0
|
3022 kernel_cache_touch(kernel_cache,i); /* make sure it does not get
|
wolffd@0
|
3023 kicked out of cache */
|
wolffd@0
|
3024 /* } */
|
wolffd@0
|
3025 }
|
wolffd@0
|
3026
|
wolffd@0
|
3027 activedoc=0;
|
wolffd@0
|
3028 for(i=0;(j=active2dnum[i])>=0;i++) {
|
wolffd@0
|
3029 s=label[j];
|
wolffd@0
|
3030 if(kernel_cache && cache_only)
|
wolffd@0
|
3031 valid=(kernel_cache->index[j]>=0);
|
wolffd@0
|
3032 else
|
wolffd@0
|
3033 valid=1;
|
wolffd@0
|
3034 if(valid
|
wolffd@0
|
3035 && (!((a[j]<=(0+learn_parm->epsilon_a)) && (s<0)))
|
wolffd@0
|
3036 && (!((a[j]>=(learn_parm->svm_cost[j]-learn_parm->epsilon_a))
|
wolffd@0
|
3037 && (s>0)))
|
wolffd@0
|
3038 && (!chosen[j])
|
wolffd@0
|
3039 && (label[j])
|
wolffd@0
|
3040 && (!inconsistent[j]))
|
wolffd@0
|
3041 {
|
wolffd@0
|
3042 selcrit[activedoc]=-(double)label[j]*(learn_parm->eps-(double)label[j]*c[j]+(double)label[j]*lin[j]);
|
wolffd@0
|
3043 /* selcrit[activedoc]=-(double)(label[j]*(-1.0+(double)label[j]*lin[j])); */
|
wolffd@0
|
3044 key[activedoc]=j;
|
wolffd@0
|
3045 activedoc++;
|
wolffd@0
|
3046 }
|
wolffd@0
|
3047 }
|
wolffd@0
|
3048 select_top_n(selcrit,activedoc,select,(long)(qp_size/2));
|
wolffd@0
|
3049 for(k=0;(choosenum<qp_size) && (k<(qp_size/2)) && (k<activedoc);k++) {
|
wolffd@0
|
3050 /* if(learn_parm->biased_hyperplane || (selcrit[select[k]] > 0)) { */
|
wolffd@0
|
3051 i=key[select[k]];
|
wolffd@0
|
3052 chosen[i]=1;
|
wolffd@0
|
3053 working2dnum[inum+choosenum]=i;
|
wolffd@0
|
3054 choosenum+=1;
|
wolffd@0
|
3055 if(kernel_cache)
|
wolffd@0
|
3056 kernel_cache_touch(kernel_cache,i); /* make sure it does not get
|
wolffd@0
|
3057 kicked out of cache */
|
wolffd@0
|
3058 /* } */
|
wolffd@0
|
3059 }
|
wolffd@0
|
3060 working2dnum[inum+choosenum]=-1; /* complete index */
|
wolffd@0
|
3061 return(choosenum);
|
wolffd@0
|
3062 }
|
wolffd@0
|
3063
|
wolffd@0
|
3064 long select_next_qp_subproblem_rand(long int *label,
|
wolffd@0
|
3065 long int *unlabeled,
|
wolffd@0
|
3066 double *a, double *lin,
|
wolffd@0
|
3067 double *c, long int totdoc,
|
wolffd@0
|
3068 long int qp_size,
|
wolffd@0
|
3069 LEARN_PARM *learn_parm,
|
wolffd@0
|
3070 long int *inconsistent,
|
wolffd@0
|
3071 long int *active2dnum,
|
wolffd@0
|
3072 long int *working2dnum,
|
wolffd@0
|
3073 double *selcrit,
|
wolffd@0
|
3074 long int *select,
|
wolffd@0
|
3075 KERNEL_CACHE *kernel_cache,
|
wolffd@0
|
3076 long int *key,
|
wolffd@0
|
3077 long int *chosen,
|
wolffd@0
|
3078 long int iteration)
|
wolffd@0
|
3079 /* Use the feasible direction approach to select the next
|
wolffd@0
|
3080 qp-subproblem (see section 'Selecting a good working set'). Chooses
|
wolffd@0
|
3081 a feasible direction at (pseudo) random to help jump over numerical
|
wolffd@0
|
3082 problem. */
|
wolffd@0
|
3083 {
|
wolffd@0
|
3084 long choosenum,i,j,k,activedoc,inum;
|
wolffd@0
|
3085 double s;
|
wolffd@0
|
3086
|
wolffd@0
|
3087 for(inum=0;working2dnum[inum]>=0;inum++); /* find end of index */
|
wolffd@0
|
3088 choosenum=0;
|
wolffd@0
|
3089 activedoc=0;
|
wolffd@0
|
3090 for(i=0;(j=active2dnum[i])>=0;i++) {
|
wolffd@0
|
3091 s=-label[j];
|
wolffd@0
|
3092 if((!((a[j]<=(0+learn_parm->epsilon_a)) && (s<0)))
|
wolffd@0
|
3093 && (!((a[j]>=(learn_parm->svm_cost[j]-learn_parm->epsilon_a))
|
wolffd@0
|
3094 && (s>0)))
|
wolffd@0
|
3095 && (!inconsistent[j])
|
wolffd@0
|
3096 && (label[j])
|
wolffd@0
|
3097 && (!chosen[j])) {
|
wolffd@0
|
3098 selcrit[activedoc]=(j+iteration) % totdoc;
|
wolffd@0
|
3099 key[activedoc]=j;
|
wolffd@0
|
3100 activedoc++;
|
wolffd@0
|
3101 }
|
wolffd@0
|
3102 }
|
wolffd@0
|
3103 select_top_n(selcrit,activedoc,select,(long)(qp_size/2));
|
wolffd@0
|
3104 for(k=0;(choosenum<(qp_size/2)) && (k<(qp_size/2)) && (k<activedoc);k++) {
|
wolffd@0
|
3105 i=key[select[k]];
|
wolffd@0
|
3106 chosen[i]=1;
|
wolffd@0
|
3107 working2dnum[inum+choosenum]=i;
|
wolffd@0
|
3108 choosenum+=1;
|
wolffd@0
|
3109 kernel_cache_touch(kernel_cache,i); /* make sure it does not get kicked */
|
wolffd@0
|
3110 /* out of cache */
|
wolffd@0
|
3111 }
|
wolffd@0
|
3112
|
wolffd@0
|
3113 activedoc=0;
|
wolffd@0
|
3114 for(i=0;(j=active2dnum[i])>=0;i++) {
|
wolffd@0
|
3115 s=label[j];
|
wolffd@0
|
3116 if((!((a[j]<=(0+learn_parm->epsilon_a)) && (s<0)))
|
wolffd@0
|
3117 && (!((a[j]>=(learn_parm->svm_cost[j]-learn_parm->epsilon_a))
|
wolffd@0
|
3118 && (s>0)))
|
wolffd@0
|
3119 && (!inconsistent[j])
|
wolffd@0
|
3120 && (label[j])
|
wolffd@0
|
3121 && (!chosen[j])) {
|
wolffd@0
|
3122 selcrit[activedoc]=(j+iteration) % totdoc;
|
wolffd@0
|
3123 key[activedoc]=j;
|
wolffd@0
|
3124 activedoc++;
|
wolffd@0
|
3125 }
|
wolffd@0
|
3126 }
|
wolffd@0
|
3127 select_top_n(selcrit,activedoc,select,(long)(qp_size/2));
|
wolffd@0
|
3128 for(k=0;(choosenum<qp_size) && (k<(qp_size/2)) && (k<activedoc);k++) {
|
wolffd@0
|
3129 i=key[select[k]];
|
wolffd@0
|
3130 chosen[i]=1;
|
wolffd@0
|
3131 working2dnum[inum+choosenum]=i;
|
wolffd@0
|
3132 choosenum+=1;
|
wolffd@0
|
3133 kernel_cache_touch(kernel_cache,i); /* make sure it does not get kicked */
|
wolffd@0
|
3134 /* out of cache */
|
wolffd@0
|
3135 }
|
wolffd@0
|
3136 working2dnum[inum+choosenum]=-1; /* complete index */
|
wolffd@0
|
3137 return(choosenum);
|
wolffd@0
|
3138 }
|
wolffd@0
|
3139
|
wolffd@0
|
3140 long select_next_qp_slackset(DOC **docs, long int *label,
|
wolffd@0
|
3141 double *a, double *lin,
|
wolffd@0
|
3142 double *slack, double *alphaslack,
|
wolffd@0
|
3143 double *c,
|
wolffd@0
|
3144 LEARN_PARM *learn_parm,
|
wolffd@0
|
3145 long int *active2dnum, double *maxviol)
|
wolffd@0
|
3146 /* returns the slackset with the largest internal violation */
|
wolffd@0
|
3147 {
|
wolffd@0
|
3148 long i,ii,maxdiffid;
|
wolffd@0
|
3149 double dist,target,maxdiff,ex_c;
|
wolffd@0
|
3150
|
wolffd@0
|
3151 maxdiff=0;
|
wolffd@0
|
3152 maxdiffid=0;
|
wolffd@0
|
3153 for(ii=0;(i=active2dnum[ii])>=0;ii++) {
|
wolffd@0
|
3154 ex_c=learn_parm->svm_c-learn_parm->epsilon_a;
|
wolffd@0
|
3155 if(alphaslack[docs[i]->slackid] >= ex_c) {
|
wolffd@0
|
3156 dist=(lin[i])*(double)label[i]+slack[docs[i]->slackid]; /* distance */
|
wolffd@0
|
3157 target=-(learn_parm->eps-(double)label[i]*c[i]); /* rhs of constraint */
|
wolffd@0
|
3158 if((a[i]>learn_parm->epsilon_a) && (dist > target)) {
|
wolffd@0
|
3159 if((dist-target)>maxdiff) { /* largest violation */
|
wolffd@0
|
3160 maxdiff=dist-target;
|
wolffd@0
|
3161 maxdiffid=docs[i]->slackid;
|
wolffd@0
|
3162 }
|
wolffd@0
|
3163 }
|
wolffd@0
|
3164 }
|
wolffd@0
|
3165 }
|
wolffd@0
|
3166 (*maxviol)=maxdiff;
|
wolffd@0
|
3167 return(maxdiffid);
|
wolffd@0
|
3168 }
|
wolffd@0
|
3169
|
wolffd@0
|
3170
|
wolffd@0
|
3171 void select_top_n(double *selcrit, long int range, long int *select,
|
wolffd@0
|
3172 long int n)
|
wolffd@0
|
3173 {
|
wolffd@0
|
3174 register long i,j;
|
wolffd@0
|
3175
|
wolffd@0
|
3176 for(i=0;(i<n) && (i<range);i++) { /* Initialize with the first n elements */
|
wolffd@0
|
3177 for(j=i;j>=0;j--) {
|
wolffd@0
|
3178 if((j>0) && (selcrit[select[j-1]]<selcrit[i])){
|
wolffd@0
|
3179 select[j]=select[j-1];
|
wolffd@0
|
3180 }
|
wolffd@0
|
3181 else {
|
wolffd@0
|
3182 select[j]=i;
|
wolffd@0
|
3183 j=-1;
|
wolffd@0
|
3184 }
|
wolffd@0
|
3185 }
|
wolffd@0
|
3186 }
|
wolffd@0
|
3187 if(n>0) {
|
wolffd@0
|
3188 for(i=n;i<range;i++) {
|
wolffd@0
|
3189 if(selcrit[i]>selcrit[select[n-1]]) {
|
wolffd@0
|
3190 for(j=n-1;j>=0;j--) {
|
wolffd@0
|
3191 if((j>0) && (selcrit[select[j-1]]<selcrit[i])) {
|
wolffd@0
|
3192 select[j]=select[j-1];
|
wolffd@0
|
3193 }
|
wolffd@0
|
3194 else {
|
wolffd@0
|
3195 select[j]=i;
|
wolffd@0
|
3196 j=-1;
|
wolffd@0
|
3197 }
|
wolffd@0
|
3198 }
|
wolffd@0
|
3199 }
|
wolffd@0
|
3200 }
|
wolffd@0
|
3201 }
|
wolffd@0
|
3202 }
|
wolffd@0
|
3203
|
wolffd@0
|
3204
|
wolffd@0
|
3205 /******************************** Shrinking *********************************/
|
wolffd@0
|
3206
|
wolffd@0
|
3207 void init_shrink_state(SHRINK_STATE *shrink_state, long int totdoc,
|
wolffd@0
|
3208 long int maxhistory)
|
wolffd@0
|
3209 {
|
wolffd@0
|
3210 long i;
|
wolffd@0
|
3211
|
wolffd@0
|
3212 shrink_state->deactnum=0;
|
wolffd@0
|
3213 shrink_state->active = (long *)my_malloc(sizeof(long)*totdoc);
|
wolffd@0
|
3214 shrink_state->inactive_since = (long *)my_malloc(sizeof(long)*totdoc);
|
wolffd@0
|
3215 shrink_state->a_history = (double **)my_malloc(sizeof(double *)*maxhistory);
|
wolffd@0
|
3216 shrink_state->maxhistory=maxhistory;
|
wolffd@0
|
3217 shrink_state->last_lin = (double *)my_malloc(sizeof(double)*totdoc);
|
wolffd@0
|
3218 shrink_state->last_a = (double *)my_malloc(sizeof(double)*totdoc);
|
wolffd@0
|
3219
|
wolffd@0
|
3220 for(i=0;i<totdoc;i++) {
|
wolffd@0
|
3221 shrink_state->active[i]=1;
|
wolffd@0
|
3222 shrink_state->inactive_since[i]=0;
|
wolffd@0
|
3223 shrink_state->last_a[i]=0;
|
wolffd@0
|
3224 shrink_state->last_lin[i]=0;
|
wolffd@0
|
3225 }
|
wolffd@0
|
3226 }
|
wolffd@0
|
3227
|
wolffd@0
|
3228 void shrink_state_cleanup(SHRINK_STATE *shrink_state)
|
wolffd@0
|
3229 {
|
wolffd@0
|
3230 free(shrink_state->active);
|
wolffd@0
|
3231 free(shrink_state->inactive_since);
|
wolffd@0
|
3232 if(shrink_state->deactnum > 0)
|
wolffd@0
|
3233 free(shrink_state->a_history[shrink_state->deactnum-1]);
|
wolffd@0
|
3234 free(shrink_state->a_history);
|
wolffd@0
|
3235 free(shrink_state->last_a);
|
wolffd@0
|
3236 free(shrink_state->last_lin);
|
wolffd@0
|
3237 }
|
wolffd@0
|
3238
|
wolffd@0
|
3239 long shrink_problem(DOC **docs,
|
wolffd@0
|
3240 LEARN_PARM *learn_parm,
|
wolffd@0
|
3241 SHRINK_STATE *shrink_state,
|
wolffd@0
|
3242 KERNEL_PARM *kernel_parm,
|
wolffd@0
|
3243 long int *active2dnum,
|
wolffd@0
|
3244 long int *last_suboptimal_at,
|
wolffd@0
|
3245 long int iteration,
|
wolffd@0
|
3246 long int totdoc,
|
wolffd@0
|
3247 long int minshrink,
|
wolffd@0
|
3248 double *a,
|
wolffd@0
|
3249 long int *inconsistent)
|
wolffd@0
|
3250 /* Shrink some variables away. Do the shrinking only if at least
|
wolffd@0
|
3251 minshrink variables can be removed. */
|
wolffd@0
|
3252 {
|
wolffd@0
|
3253 long i,ii,change,activenum,lastiter;
|
wolffd@0
|
3254 double *a_old;
|
wolffd@0
|
3255
|
wolffd@0
|
3256 activenum=0;
|
wolffd@0
|
3257 change=0;
|
wolffd@0
|
3258 for(ii=0;active2dnum[ii]>=0;ii++) {
|
wolffd@0
|
3259 i=active2dnum[ii];
|
wolffd@0
|
3260 activenum++;
|
wolffd@0
|
3261 if(learn_parm->sharedslack)
|
wolffd@0
|
3262 lastiter=last_suboptimal_at[docs[i]->slackid];
|
wolffd@0
|
3263 else
|
wolffd@0
|
3264 lastiter=last_suboptimal_at[i];
|
wolffd@0
|
3265 if(((iteration-lastiter) > learn_parm->svm_iter_to_shrink)
|
wolffd@0
|
3266 || (inconsistent[i])) {
|
wolffd@0
|
3267 change++;
|
wolffd@0
|
3268 }
|
wolffd@0
|
3269 }
|
wolffd@0
|
3270 if((change>=minshrink) /* shrink only if sufficiently many candidates */
|
wolffd@0
|
3271 && (shrink_state->deactnum<shrink_state->maxhistory)) { /* and enough memory */
|
wolffd@0
|
3272 /* Shrink problem by removing those variables which are */
|
wolffd@0
|
3273 /* optimal at a bound for a minimum number of iterations */
|
wolffd@0
|
3274 if(verbosity>=2) {
|
wolffd@0
|
3275 printf(" Shrinking..."); fflush(stdout);
|
wolffd@0
|
3276 }
|
wolffd@0
|
3277 if(kernel_parm->kernel_type != LINEAR) { /* non-linear case save alphas */
|
wolffd@0
|
3278 a_old=(double *)my_malloc(sizeof(double)*totdoc);
|
wolffd@0
|
3279 shrink_state->a_history[shrink_state->deactnum]=a_old;
|
wolffd@0
|
3280 for(i=0;i<totdoc;i++) {
|
wolffd@0
|
3281 a_old[i]=a[i];
|
wolffd@0
|
3282 }
|
wolffd@0
|
3283 }
|
wolffd@0
|
3284 for(ii=0;active2dnum[ii]>=0;ii++) {
|
wolffd@0
|
3285 i=active2dnum[ii];
|
wolffd@0
|
3286 if(learn_parm->sharedslack)
|
wolffd@0
|
3287 lastiter=last_suboptimal_at[docs[i]->slackid];
|
wolffd@0
|
3288 else
|
wolffd@0
|
3289 lastiter=last_suboptimal_at[i];
|
wolffd@0
|
3290 if(((iteration-lastiter) > learn_parm->svm_iter_to_shrink)
|
wolffd@0
|
3291 || (inconsistent[i])) {
|
wolffd@0
|
3292 shrink_state->active[i]=0;
|
wolffd@0
|
3293 shrink_state->inactive_since[i]=shrink_state->deactnum;
|
wolffd@0
|
3294 }
|
wolffd@0
|
3295 }
|
wolffd@0
|
3296 activenum=compute_index(shrink_state->active,totdoc,active2dnum);
|
wolffd@0
|
3297 shrink_state->deactnum++;
|
wolffd@0
|
3298 if(kernel_parm->kernel_type == LINEAR) {
|
wolffd@0
|
3299 shrink_state->deactnum=0;
|
wolffd@0
|
3300 }
|
wolffd@0
|
3301 if(verbosity>=2) {
|
wolffd@0
|
3302 printf("done.\n"); fflush(stdout);
|
wolffd@0
|
3303 printf(" Number of inactive variables = %ld\n",totdoc-activenum);
|
wolffd@0
|
3304 }
|
wolffd@0
|
3305 }
|
wolffd@0
|
3306 return(activenum);
|
wolffd@0
|
3307 }
|
wolffd@0
|
3308
|
wolffd@0
|
3309
|
wolffd@0
|
3310 void reactivate_inactive_examples(long int *label,
|
wolffd@0
|
3311 long int *unlabeled,
|
wolffd@0
|
3312 double *a,
|
wolffd@0
|
3313 SHRINK_STATE *shrink_state,
|
wolffd@0
|
3314 double *lin,
|
wolffd@0
|
3315 double *c,
|
wolffd@0
|
3316 long int totdoc,
|
wolffd@0
|
3317 long int totwords,
|
wolffd@0
|
3318 long int iteration,
|
wolffd@0
|
3319 LEARN_PARM *learn_parm,
|
wolffd@0
|
3320 long int *inconsistent,
|
wolffd@0
|
3321 DOC **docs,
|
wolffd@0
|
3322 KERNEL_PARM *kernel_parm,
|
wolffd@0
|
3323 KERNEL_CACHE *kernel_cache,
|
wolffd@0
|
3324 MODEL *model,
|
wolffd@0
|
3325 CFLOAT *aicache,
|
wolffd@0
|
3326 double *weights,
|
wolffd@0
|
3327 double *maxdiff)
|
wolffd@0
|
3328 /* Make all variables active again which had been removed by
|
wolffd@0
|
3329 shrinking. */
|
wolffd@0
|
3330 /* Computes lin for those variables from scratch. */
|
wolffd@0
|
3331 {
|
wolffd@0
|
3332 register long i,j,ii,jj,t,*changed2dnum,*inactive2dnum;
|
wolffd@0
|
3333 long *changed,*inactive;
|
wolffd@0
|
3334 register double kernel_val,*a_old,dist;
|
wolffd@0
|
3335 double ex_c,target;
|
wolffd@0
|
3336 SVECTOR *f;
|
wolffd@0
|
3337
|
wolffd@0
|
3338 if(kernel_parm->kernel_type == LINEAR) { /* special linear case */
|
wolffd@0
|
3339 a_old=shrink_state->last_a;
|
wolffd@0
|
3340 clear_vector_n(weights,totwords);
|
wolffd@0
|
3341 for(i=0;i<totdoc;i++) {
|
wolffd@0
|
3342 if(a[i] != a_old[i]) {
|
wolffd@0
|
3343 for(f=docs[i]->fvec;f;f=f->next)
|
wolffd@0
|
3344 add_vector_ns(weights,f,
|
wolffd@0
|
3345 f->factor*((a[i]-a_old[i])*(double)label[i]));
|
wolffd@0
|
3346 a_old[i]=a[i];
|
wolffd@0
|
3347 }
|
wolffd@0
|
3348 }
|
wolffd@0
|
3349 for(i=0;i<totdoc;i++) {
|
wolffd@0
|
3350 if(!shrink_state->active[i]) {
|
wolffd@0
|
3351 for(f=docs[i]->fvec;f;f=f->next)
|
wolffd@0
|
3352 lin[i]=shrink_state->last_lin[i]+f->factor*sprod_ns(weights,f);
|
wolffd@0
|
3353 }
|
wolffd@0
|
3354 shrink_state->last_lin[i]=lin[i];
|
wolffd@0
|
3355 }
|
wolffd@0
|
3356 }
|
wolffd@0
|
3357 else {
|
wolffd@0
|
3358 changed=(long *)my_malloc(sizeof(long)*totdoc);
|
wolffd@0
|
3359 changed2dnum=(long *)my_malloc(sizeof(long)*(totdoc+11));
|
wolffd@0
|
3360 inactive=(long *)my_malloc(sizeof(long)*totdoc);
|
wolffd@0
|
3361 inactive2dnum=(long *)my_malloc(sizeof(long)*(totdoc+11));
|
wolffd@0
|
3362 for(t=shrink_state->deactnum-1;(t>=0) && shrink_state->a_history[t];t--) {
|
wolffd@0
|
3363 if(verbosity>=2) {
|
wolffd@0
|
3364 printf("%ld..",t); fflush(stdout);
|
wolffd@0
|
3365 }
|
wolffd@0
|
3366 a_old=shrink_state->a_history[t];
|
wolffd@0
|
3367 for(i=0;i<totdoc;i++) {
|
wolffd@0
|
3368 inactive[i]=((!shrink_state->active[i])
|
wolffd@0
|
3369 && (shrink_state->inactive_since[i] == t));
|
wolffd@0
|
3370 changed[i]= (a[i] != a_old[i]);
|
wolffd@0
|
3371 }
|
wolffd@0
|
3372 compute_index(inactive,totdoc,inactive2dnum);
|
wolffd@0
|
3373 compute_index(changed,totdoc,changed2dnum);
|
wolffd@0
|
3374
|
wolffd@0
|
3375 for(ii=0;(i=changed2dnum[ii])>=0;ii++) {
|
wolffd@0
|
3376 get_kernel_row(kernel_cache,docs,i,totdoc,inactive2dnum,aicache,
|
wolffd@0
|
3377 kernel_parm);
|
wolffd@0
|
3378 for(jj=0;(j=inactive2dnum[jj])>=0;jj++) {
|
wolffd@0
|
3379 kernel_val=aicache[j];
|
wolffd@0
|
3380 lin[j]+=(((a[i]*kernel_val)-(a_old[i]*kernel_val))*(double)label[i]);
|
wolffd@0
|
3381 }
|
wolffd@0
|
3382 }
|
wolffd@0
|
3383 }
|
wolffd@0
|
3384 free(changed);
|
wolffd@0
|
3385 free(changed2dnum);
|
wolffd@0
|
3386 free(inactive);
|
wolffd@0
|
3387 free(inactive2dnum);
|
wolffd@0
|
3388 }
|
wolffd@0
|
3389 (*maxdiff)=0;
|
wolffd@0
|
3390 for(i=0;i<totdoc;i++) {
|
wolffd@0
|
3391 shrink_state->inactive_since[i]=shrink_state->deactnum-1;
|
wolffd@0
|
3392 if(!inconsistent[i]) {
|
wolffd@0
|
3393 dist=(lin[i]-model->b)*(double)label[i];
|
wolffd@0
|
3394 target=-(learn_parm->eps-(double)label[i]*c[i]);
|
wolffd@0
|
3395 ex_c=learn_parm->svm_cost[i]-learn_parm->epsilon_a;
|
wolffd@0
|
3396 if((a[i]>learn_parm->epsilon_a) && (dist > target)) {
|
wolffd@0
|
3397 if((dist-target)>(*maxdiff)) /* largest violation */
|
wolffd@0
|
3398 (*maxdiff)=dist-target;
|
wolffd@0
|
3399 }
|
wolffd@0
|
3400 else if((a[i]<ex_c) && (dist < target)) {
|
wolffd@0
|
3401 if((target-dist)>(*maxdiff)) /* largest violation */
|
wolffd@0
|
3402 (*maxdiff)=target-dist;
|
wolffd@0
|
3403 }
|
wolffd@0
|
3404 if((a[i]>(0+learn_parm->epsilon_a))
|
wolffd@0
|
3405 && (a[i]<ex_c)) {
|
wolffd@0
|
3406 shrink_state->active[i]=1; /* not at bound */
|
wolffd@0
|
3407 }
|
wolffd@0
|
3408 else if((a[i]<=(0+learn_parm->epsilon_a)) && (dist < (target+learn_parm->epsilon_shrink))) {
|
wolffd@0
|
3409 shrink_state->active[i]=1;
|
wolffd@0
|
3410 }
|
wolffd@0
|
3411 else if((a[i]>=ex_c)
|
wolffd@0
|
3412 && (dist > (target-learn_parm->epsilon_shrink))) {
|
wolffd@0
|
3413 shrink_state->active[i]=1;
|
wolffd@0
|
3414 }
|
wolffd@0
|
3415 else if(learn_parm->sharedslack) { /* make all active when sharedslack */
|
wolffd@0
|
3416 shrink_state->active[i]=1;
|
wolffd@0
|
3417 }
|
wolffd@0
|
3418 }
|
wolffd@0
|
3419 }
|
wolffd@0
|
3420 if(kernel_parm->kernel_type != LINEAR) { /* update history for non-linear */
|
wolffd@0
|
3421 for(i=0;i<totdoc;i++) {
|
wolffd@0
|
3422 (shrink_state->a_history[shrink_state->deactnum-1])[i]=a[i];
|
wolffd@0
|
3423 }
|
wolffd@0
|
3424 for(t=shrink_state->deactnum-2;(t>=0) && shrink_state->a_history[t];t--) {
|
wolffd@0
|
3425 free(shrink_state->a_history[t]);
|
wolffd@0
|
3426 shrink_state->a_history[t]=0;
|
wolffd@0
|
3427 }
|
wolffd@0
|
3428 }
|
wolffd@0
|
3429 }
|
wolffd@0
|
3430
|
wolffd@0
|
3431 /****************************** Cache handling *******************************/
|
wolffd@0
|
3432
|
wolffd@0
|
3433 void get_kernel_row(KERNEL_CACHE *kernel_cache, DOC **docs,
|
wolffd@0
|
3434 long int docnum, long int totdoc,
|
wolffd@0
|
3435 long int *active2dnum, CFLOAT *buffer,
|
wolffd@0
|
3436 KERNEL_PARM *kernel_parm)
|
wolffd@0
|
3437 /* Get's a row of the matrix of kernel values This matrix has the
|
wolffd@0
|
3438 same form as the Hessian, just that the elements are not
|
wolffd@0
|
3439 multiplied by */
|
wolffd@0
|
3440 /* y_i * y_j * a_i * a_j */
|
wolffd@0
|
3441 /* Takes the values from the cache if available. */
|
wolffd@0
|
3442 {
|
wolffd@0
|
3443 register long i,j,start;
|
wolffd@0
|
3444 DOC *ex;
|
wolffd@0
|
3445
|
wolffd@0
|
3446 ex=docs[docnum];
|
wolffd@0
|
3447
|
wolffd@0
|
3448 if(kernel_cache->index[docnum] != -1) { /* row is cached? */
|
wolffd@0
|
3449 kernel_cache->lru[kernel_cache->index[docnum]]=kernel_cache->time; /* lru */
|
wolffd@0
|
3450 start=kernel_cache->activenum*kernel_cache->index[docnum];
|
wolffd@0
|
3451 for(i=0;(j=active2dnum[i])>=0;i++) {
|
wolffd@0
|
3452 if(kernel_cache->totdoc2active[j] >= 0) { /* column is cached? */
|
wolffd@0
|
3453 buffer[j]=kernel_cache->buffer[start+kernel_cache->totdoc2active[j]];
|
wolffd@0
|
3454 }
|
wolffd@0
|
3455 else {
|
wolffd@0
|
3456 buffer[j]=(CFLOAT)kernel(kernel_parm,ex,docs[j]);
|
wolffd@0
|
3457 }
|
wolffd@0
|
3458 }
|
wolffd@0
|
3459 }
|
wolffd@0
|
3460 else {
|
wolffd@0
|
3461 for(i=0;(j=active2dnum[i])>=0;i++) {
|
wolffd@0
|
3462 buffer[j]=(CFLOAT)kernel(kernel_parm,ex,docs[j]);
|
wolffd@0
|
3463 }
|
wolffd@0
|
3464 }
|
wolffd@0
|
3465 }
|
wolffd@0
|
3466
|
wolffd@0
|
3467
|
wolffd@0
|
3468 void cache_kernel_row(KERNEL_CACHE *kernel_cache, DOC **docs,
|
wolffd@0
|
3469 long int m, KERNEL_PARM *kernel_parm)
|
wolffd@0
|
3470 /* Fills cache for the row m */
|
wolffd@0
|
3471 {
|
wolffd@0
|
3472 register DOC *ex;
|
wolffd@0
|
3473 register long j,k,l;
|
wolffd@0
|
3474 register CFLOAT *cache;
|
wolffd@0
|
3475
|
wolffd@0
|
3476 if(!kernel_cache_check(kernel_cache,m)) { /* not cached yet*/
|
wolffd@0
|
3477 cache = kernel_cache_clean_and_malloc(kernel_cache,m);
|
wolffd@0
|
3478 if(cache) {
|
wolffd@0
|
3479 l=kernel_cache->totdoc2active[m];
|
wolffd@0
|
3480 ex=docs[m];
|
wolffd@0
|
3481 for(j=0;j<kernel_cache->activenum;j++) { /* fill cache */
|
wolffd@0
|
3482 k=kernel_cache->active2totdoc[j];
|
wolffd@0
|
3483 if((kernel_cache->index[k] != -1) && (l != -1) && (k != m)) {
|
wolffd@0
|
3484 cache[j]=kernel_cache->buffer[kernel_cache->activenum
|
wolffd@0
|
3485 *kernel_cache->index[k]+l];
|
wolffd@0
|
3486 }
|
wolffd@0
|
3487 else {
|
wolffd@0
|
3488 cache[j]=kernel(kernel_parm,ex,docs[k]);
|
wolffd@0
|
3489 }
|
wolffd@0
|
3490 }
|
wolffd@0
|
3491 }
|
wolffd@0
|
3492 else {
|
wolffd@0
|
3493 perror("Error: Kernel cache full! => increase cache size");
|
wolffd@0
|
3494 }
|
wolffd@0
|
3495 }
|
wolffd@0
|
3496 }
|
wolffd@0
|
3497
|
wolffd@0
|
3498
|
wolffd@0
|
3499 void cache_multiple_kernel_rows(KERNEL_CACHE *kernel_cache, DOC **docs,
|
wolffd@0
|
3500 long int *key, long int varnum,
|
wolffd@0
|
3501 KERNEL_PARM *kernel_parm)
|
wolffd@0
|
3502 /* Fills cache for the rows in key */
|
wolffd@0
|
3503 {
|
wolffd@0
|
3504 register long i;
|
wolffd@0
|
3505
|
wolffd@0
|
3506 for(i=0;i<varnum;i++) { /* fill up kernel cache */
|
wolffd@0
|
3507 cache_kernel_row(kernel_cache,docs,key[i],kernel_parm);
|
wolffd@0
|
3508 }
|
wolffd@0
|
3509 }
|
wolffd@0
|
3510
|
wolffd@0
|
3511
|
wolffd@0
|
3512 void kernel_cache_shrink(KERNEL_CACHE *kernel_cache, long int totdoc,
|
wolffd@0
|
3513 long int numshrink, long int *after)
|
wolffd@0
|
3514 /* Remove numshrink columns in the cache which correspond to
|
wolffd@0
|
3515 examples marked 0 in after. */
|
wolffd@0
|
3516 {
|
wolffd@0
|
3517 register long i,j,jj,from=0,to=0,scount;
|
wolffd@0
|
3518 long *keep;
|
wolffd@0
|
3519
|
wolffd@0
|
3520 if(verbosity>=2) {
|
wolffd@0
|
3521 printf(" Reorganizing cache..."); fflush(stdout);
|
wolffd@0
|
3522 }
|
wolffd@0
|
3523
|
wolffd@0
|
3524 keep=(long *)my_malloc(sizeof(long)*totdoc);
|
wolffd@0
|
3525 for(j=0;j<totdoc;j++) {
|
wolffd@0
|
3526 keep[j]=1;
|
wolffd@0
|
3527 }
|
wolffd@0
|
3528 scount=0;
|
wolffd@0
|
3529 for(jj=0;(jj<kernel_cache->activenum) && (scount<numshrink);jj++) {
|
wolffd@0
|
3530 j=kernel_cache->active2totdoc[jj];
|
wolffd@0
|
3531 if(!after[j]) {
|
wolffd@0
|
3532 scount++;
|
wolffd@0
|
3533 keep[j]=0;
|
wolffd@0
|
3534 }
|
wolffd@0
|
3535 }
|
wolffd@0
|
3536
|
wolffd@0
|
3537 for(i=0;i<kernel_cache->max_elems;i++) {
|
wolffd@0
|
3538 for(jj=0;jj<kernel_cache->activenum;jj++) {
|
wolffd@0
|
3539 j=kernel_cache->active2totdoc[jj];
|
wolffd@0
|
3540 if(!keep[j]) {
|
wolffd@0
|
3541 from++;
|
wolffd@0
|
3542 }
|
wolffd@0
|
3543 else {
|
wolffd@0
|
3544 kernel_cache->buffer[to]=kernel_cache->buffer[from];
|
wolffd@0
|
3545 to++;
|
wolffd@0
|
3546 from++;
|
wolffd@0
|
3547 }
|
wolffd@0
|
3548 }
|
wolffd@0
|
3549 }
|
wolffd@0
|
3550
|
wolffd@0
|
3551 kernel_cache->activenum=0;
|
wolffd@0
|
3552 for(j=0;j<totdoc;j++) {
|
wolffd@0
|
3553 if((keep[j]) && (kernel_cache->totdoc2active[j] != -1)) {
|
wolffd@0
|
3554 kernel_cache->active2totdoc[kernel_cache->activenum]=j;
|
wolffd@0
|
3555 kernel_cache->totdoc2active[j]=kernel_cache->activenum;
|
wolffd@0
|
3556 kernel_cache->activenum++;
|
wolffd@0
|
3557 }
|
wolffd@0
|
3558 else {
|
wolffd@0
|
3559 kernel_cache->totdoc2active[j]=-1;
|
wolffd@0
|
3560 }
|
wolffd@0
|
3561 }
|
wolffd@0
|
3562
|
wolffd@0
|
3563 kernel_cache->max_elems=(long)(kernel_cache->buffsize/kernel_cache->activenum);
|
wolffd@0
|
3564 if(kernel_cache->max_elems>totdoc) {
|
wolffd@0
|
3565 kernel_cache->max_elems=totdoc;
|
wolffd@0
|
3566 }
|
wolffd@0
|
3567
|
wolffd@0
|
3568 free(keep);
|
wolffd@0
|
3569
|
wolffd@0
|
3570 if(verbosity>=2) {
|
wolffd@0
|
3571 printf("done.\n"); fflush(stdout);
|
wolffd@0
|
3572 printf(" Cache-size in rows = %ld\n",kernel_cache->max_elems);
|
wolffd@0
|
3573 }
|
wolffd@0
|
3574 }
|
wolffd@0
|
3575
|
wolffd@0
|
3576 KERNEL_CACHE *kernel_cache_init(long int totdoc, long int buffsize)
|
wolffd@0
|
3577 {
|
wolffd@0
|
3578 long i;
|
wolffd@0
|
3579 KERNEL_CACHE *kernel_cache;
|
wolffd@0
|
3580
|
wolffd@0
|
3581 kernel_cache=(KERNEL_CACHE *)my_malloc(sizeof(KERNEL_CACHE));
|
wolffd@0
|
3582 kernel_cache->index = (long *)my_malloc(sizeof(long)*totdoc);
|
wolffd@0
|
3583 kernel_cache->occu = (long *)my_malloc(sizeof(long)*totdoc);
|
wolffd@0
|
3584 kernel_cache->lru = (long *)my_malloc(sizeof(long)*totdoc);
|
wolffd@0
|
3585 kernel_cache->invindex = (long *)my_malloc(sizeof(long)*totdoc);
|
wolffd@0
|
3586 kernel_cache->active2totdoc = (long *)my_malloc(sizeof(long)*totdoc);
|
wolffd@0
|
3587 kernel_cache->totdoc2active = (long *)my_malloc(sizeof(long)*totdoc);
|
wolffd@0
|
3588 kernel_cache->buffer = (CFLOAT *)my_malloc((size_t)(buffsize)*1024*1024);
|
wolffd@0
|
3589
|
wolffd@0
|
3590 kernel_cache->buffsize=(long)(buffsize/sizeof(CFLOAT)*1024*1024);
|
wolffd@0
|
3591
|
wolffd@0
|
3592 kernel_cache->max_elems=(long)(kernel_cache->buffsize/totdoc);
|
wolffd@0
|
3593 if(kernel_cache->max_elems>totdoc) {
|
wolffd@0
|
3594 kernel_cache->max_elems=totdoc;
|
wolffd@0
|
3595 }
|
wolffd@0
|
3596
|
wolffd@0
|
3597 if(verbosity>=2) {
|
wolffd@0
|
3598 printf(" Cache-size in rows = %ld\n",kernel_cache->max_elems);
|
wolffd@0
|
3599 printf(" Kernel evals so far: %ld\n",kernel_cache_statistic);
|
wolffd@0
|
3600 }
|
wolffd@0
|
3601
|
wolffd@0
|
3602 kernel_cache->elems=0; /* initialize cache */
|
wolffd@0
|
3603 for(i=0;i<totdoc;i++) {
|
wolffd@0
|
3604 kernel_cache->index[i]=-1;
|
wolffd@0
|
3605 kernel_cache->lru[i]=0;
|
wolffd@0
|
3606 }
|
wolffd@0
|
3607 for(i=0;i<totdoc;i++) {
|
wolffd@0
|
3608 kernel_cache->occu[i]=0;
|
wolffd@0
|
3609 kernel_cache->invindex[i]=-1;
|
wolffd@0
|
3610 }
|
wolffd@0
|
3611
|
wolffd@0
|
3612 kernel_cache->activenum=totdoc;;
|
wolffd@0
|
3613 for(i=0;i<totdoc;i++) {
|
wolffd@0
|
3614 kernel_cache->active2totdoc[i]=i;
|
wolffd@0
|
3615 kernel_cache->totdoc2active[i]=i;
|
wolffd@0
|
3616 }
|
wolffd@0
|
3617
|
wolffd@0
|
3618 kernel_cache->time=0;
|
wolffd@0
|
3619
|
wolffd@0
|
3620 return(kernel_cache);
|
wolffd@0
|
3621 }
|
wolffd@0
|
3622
|
wolffd@0
|
3623 void kernel_cache_reset_lru(KERNEL_CACHE *kernel_cache)
|
wolffd@0
|
3624 {
|
wolffd@0
|
3625 long maxlru=0,k;
|
wolffd@0
|
3626
|
wolffd@0
|
3627 for(k=0;k<kernel_cache->max_elems;k++) {
|
wolffd@0
|
3628 if(maxlru < kernel_cache->lru[k])
|
wolffd@0
|
3629 maxlru=kernel_cache->lru[k];
|
wolffd@0
|
3630 }
|
wolffd@0
|
3631 for(k=0;k<kernel_cache->max_elems;k++) {
|
wolffd@0
|
3632 kernel_cache->lru[k]-=maxlru;
|
wolffd@0
|
3633 }
|
wolffd@0
|
3634 }
|
wolffd@0
|
3635
|
wolffd@0
|
3636 void kernel_cache_cleanup(KERNEL_CACHE *kernel_cache)
|
wolffd@0
|
3637 {
|
wolffd@0
|
3638 free(kernel_cache->index);
|
wolffd@0
|
3639 free(kernel_cache->occu);
|
wolffd@0
|
3640 free(kernel_cache->lru);
|
wolffd@0
|
3641 free(kernel_cache->invindex);
|
wolffd@0
|
3642 free(kernel_cache->active2totdoc);
|
wolffd@0
|
3643 free(kernel_cache->totdoc2active);
|
wolffd@0
|
3644 free(kernel_cache->buffer);
|
wolffd@0
|
3645 free(kernel_cache);
|
wolffd@0
|
3646 }
|
wolffd@0
|
3647
|
wolffd@0
|
3648 long kernel_cache_malloc(KERNEL_CACHE *kernel_cache)
|
wolffd@0
|
3649 {
|
wolffd@0
|
3650 long i;
|
wolffd@0
|
3651
|
wolffd@0
|
3652 if(kernel_cache_space_available(kernel_cache)) {
|
wolffd@0
|
3653 for(i=0;i<kernel_cache->max_elems;i++) {
|
wolffd@0
|
3654 if(!kernel_cache->occu[i]) {
|
wolffd@0
|
3655 kernel_cache->occu[i]=1;
|
wolffd@0
|
3656 kernel_cache->elems++;
|
wolffd@0
|
3657 return(i);
|
wolffd@0
|
3658 }
|
wolffd@0
|
3659 }
|
wolffd@0
|
3660 }
|
wolffd@0
|
3661 return(-1);
|
wolffd@0
|
3662 }
|
wolffd@0
|
3663
|
wolffd@0
|
3664 void kernel_cache_free(KERNEL_CACHE *kernel_cache, long int i)
|
wolffd@0
|
3665 {
|
wolffd@0
|
3666 kernel_cache->occu[i]=0;
|
wolffd@0
|
3667 kernel_cache->elems--;
|
wolffd@0
|
3668 }
|
wolffd@0
|
3669
|
wolffd@0
|
3670 long kernel_cache_free_lru(KERNEL_CACHE *kernel_cache)
|
wolffd@0
|
3671 /* remove least recently used cache element */
|
wolffd@0
|
3672 {
|
wolffd@0
|
3673 register long k,least_elem=-1,least_time;
|
wolffd@0
|
3674
|
wolffd@0
|
3675 least_time=kernel_cache->time+1;
|
wolffd@0
|
3676 for(k=0;k<kernel_cache->max_elems;k++) {
|
wolffd@0
|
3677 if(kernel_cache->invindex[k] != -1) {
|
wolffd@0
|
3678 if(kernel_cache->lru[k]<least_time) {
|
wolffd@0
|
3679 least_time=kernel_cache->lru[k];
|
wolffd@0
|
3680 least_elem=k;
|
wolffd@0
|
3681 }
|
wolffd@0
|
3682 }
|
wolffd@0
|
3683 }
|
wolffd@0
|
3684 if(least_elem != -1) {
|
wolffd@0
|
3685 kernel_cache_free(kernel_cache,least_elem);
|
wolffd@0
|
3686 kernel_cache->index[kernel_cache->invindex[least_elem]]=-1;
|
wolffd@0
|
3687 kernel_cache->invindex[least_elem]=-1;
|
wolffd@0
|
3688 return(1);
|
wolffd@0
|
3689 }
|
wolffd@0
|
3690 return(0);
|
wolffd@0
|
3691 }
|
wolffd@0
|
3692
|
wolffd@0
|
3693
|
wolffd@0
|
3694 CFLOAT *kernel_cache_clean_and_malloc(KERNEL_CACHE *kernel_cache,
|
wolffd@0
|
3695 long int docnum)
|
wolffd@0
|
3696 /* Get a free cache entry. In case cache is full, the lru element
|
wolffd@0
|
3697 is removed. */
|
wolffd@0
|
3698 {
|
wolffd@0
|
3699 long result;
|
wolffd@0
|
3700 if((result = kernel_cache_malloc(kernel_cache)) == -1) {
|
wolffd@0
|
3701 if(kernel_cache_free_lru(kernel_cache)) {
|
wolffd@0
|
3702 result = kernel_cache_malloc(kernel_cache);
|
wolffd@0
|
3703 }
|
wolffd@0
|
3704 }
|
wolffd@0
|
3705 kernel_cache->index[docnum]=result;
|
wolffd@0
|
3706 if(result == -1) {
|
wolffd@0
|
3707 return(0);
|
wolffd@0
|
3708 }
|
wolffd@0
|
3709 kernel_cache->invindex[result]=docnum;
|
wolffd@0
|
3710 kernel_cache->lru[kernel_cache->index[docnum]]=kernel_cache->time; /* lru */
|
wolffd@0
|
3711 return((CFLOAT *)((long)kernel_cache->buffer
|
wolffd@0
|
3712 +(kernel_cache->activenum*sizeof(CFLOAT)*
|
wolffd@0
|
3713 kernel_cache->index[docnum])));
|
wolffd@0
|
3714 }
|
wolffd@0
|
3715
|
wolffd@0
|
3716 long kernel_cache_touch(KERNEL_CACHE *kernel_cache, long int docnum)
|
wolffd@0
|
3717 /* Update lru time to avoid removal from cache. */
|
wolffd@0
|
3718 {
|
wolffd@0
|
3719 if(kernel_cache && kernel_cache->index[docnum] != -1) {
|
wolffd@0
|
3720 kernel_cache->lru[kernel_cache->index[docnum]]=kernel_cache->time; /* lru */
|
wolffd@0
|
3721 return(1);
|
wolffd@0
|
3722 }
|
wolffd@0
|
3723 return(0);
|
wolffd@0
|
3724 }
|
wolffd@0
|
3725
|
wolffd@0
|
3726 long kernel_cache_check(KERNEL_CACHE *kernel_cache, long int docnum)
|
wolffd@0
|
3727 /* Is that row cached? */
|
wolffd@0
|
3728 {
|
wolffd@0
|
3729 return(kernel_cache->index[docnum] != -1);
|
wolffd@0
|
3730 }
|
wolffd@0
|
3731
|
wolffd@0
|
3732 long kernel_cache_space_available(KERNEL_CACHE *kernel_cache)
|
wolffd@0
|
3733 /* Is there room for one more row? */
|
wolffd@0
|
3734 {
|
wolffd@0
|
3735 return(kernel_cache->elems < kernel_cache->max_elems);
|
wolffd@0
|
3736 }
|
wolffd@0
|
3737
|
wolffd@0
|
3738 /************************** Compute estimates ******************************/
|
wolffd@0
|
3739
|
wolffd@0
|
3740 void compute_xa_estimates(MODEL *model, long int *label,
|
wolffd@0
|
3741 long int *unlabeled, long int totdoc,
|
wolffd@0
|
3742 DOC **docs, double *lin, double *a,
|
wolffd@0
|
3743 KERNEL_PARM *kernel_parm,
|
wolffd@0
|
3744 LEARN_PARM *learn_parm, double *error,
|
wolffd@0
|
3745 double *recall, double *precision)
|
wolffd@0
|
3746 /* Computes xa-estimate of error rate, recall, and precision. See
|
wolffd@0
|
3747 T. Joachims, Estimating the Generalization Performance of an SVM
|
wolffd@0
|
3748 Efficiently, IMCL, 2000. */
|
wolffd@0
|
3749 {
|
wolffd@0
|
3750 long i,looerror,looposerror,loonegerror;
|
wolffd@0
|
3751 long totex,totposex;
|
wolffd@0
|
3752 double xi,r_delta,r_delta_sq,sim=0;
|
wolffd@0
|
3753 long *sv2dnum=NULL,*sv=NULL,svnum;
|
wolffd@0
|
3754
|
wolffd@0
|
3755 r_delta=estimate_r_delta(docs,totdoc,kernel_parm);
|
wolffd@0
|
3756 r_delta_sq=r_delta*r_delta;
|
wolffd@0
|
3757
|
wolffd@0
|
3758 looerror=0;
|
wolffd@0
|
3759 looposerror=0;
|
wolffd@0
|
3760 loonegerror=0;
|
wolffd@0
|
3761 totex=0;
|
wolffd@0
|
3762 totposex=0;
|
wolffd@0
|
3763 svnum=0;
|
wolffd@0
|
3764
|
wolffd@0
|
3765 if(learn_parm->xa_depth > 0) {
|
wolffd@0
|
3766 sv = (long *)my_malloc(sizeof(long)*(totdoc+11));
|
wolffd@0
|
3767 for(i=0;i<totdoc;i++)
|
wolffd@0
|
3768 sv[i]=0;
|
wolffd@0
|
3769 for(i=1;i<model->sv_num;i++)
|
wolffd@0
|
3770 if(a[model->supvec[i]->docnum]
|
wolffd@0
|
3771 < (learn_parm->svm_cost[model->supvec[i]->docnum]
|
wolffd@0
|
3772 -learn_parm->epsilon_a)) {
|
wolffd@0
|
3773 sv[model->supvec[i]->docnum]=1;
|
wolffd@0
|
3774 svnum++;
|
wolffd@0
|
3775 }
|
wolffd@0
|
3776 sv2dnum = (long *)my_malloc(sizeof(long)*(totdoc+11));
|
wolffd@0
|
3777 clear_index(sv2dnum);
|
wolffd@0
|
3778 compute_index(sv,totdoc,sv2dnum);
|
wolffd@0
|
3779 }
|
wolffd@0
|
3780
|
wolffd@0
|
3781 for(i=0;i<totdoc;i++) {
|
wolffd@0
|
3782 if(unlabeled[i]) {
|
wolffd@0
|
3783 /* ignore it */
|
wolffd@0
|
3784 }
|
wolffd@0
|
3785 else {
|
wolffd@0
|
3786 xi=1.0-((lin[i]-model->b)*(double)label[i]);
|
wolffd@0
|
3787 if(xi<0) xi=0;
|
wolffd@0
|
3788 if(label[i]>0) {
|
wolffd@0
|
3789 totposex++;
|
wolffd@0
|
3790 }
|
wolffd@0
|
3791 if((learn_parm->rho*a[i]*r_delta_sq+xi) >= 1.0) {
|
wolffd@0
|
3792 if(learn_parm->xa_depth > 0) { /* makes assumptions */
|
wolffd@0
|
3793 sim=distribute_alpha_t_greedily(sv2dnum,svnum,docs,a,i,label,
|
wolffd@0
|
3794 kernel_parm,learn_parm,
|
wolffd@0
|
3795 (double)((1.0-xi-a[i]*r_delta_sq)/(2.0*a[i])));
|
wolffd@0
|
3796 }
|
wolffd@0
|
3797 if((learn_parm->xa_depth == 0) ||
|
wolffd@0
|
3798 ((a[i]*kernel(kernel_parm,docs[i],docs[i])+a[i]*2.0*sim+xi) >= 1.0)) {
|
wolffd@0
|
3799 looerror++;
|
wolffd@0
|
3800 if(label[i]>0) {
|
wolffd@0
|
3801 looposerror++;
|
wolffd@0
|
3802 }
|
wolffd@0
|
3803 else {
|
wolffd@0
|
3804 loonegerror++;
|
wolffd@0
|
3805 }
|
wolffd@0
|
3806 }
|
wolffd@0
|
3807 }
|
wolffd@0
|
3808 totex++;
|
wolffd@0
|
3809 }
|
wolffd@0
|
3810 }
|
wolffd@0
|
3811
|
wolffd@0
|
3812 (*error)=((double)looerror/(double)totex)*100.0;
|
wolffd@0
|
3813 (*recall)=(1.0-(double)looposerror/(double)totposex)*100.0;
|
wolffd@0
|
3814 (*precision)=(((double)totposex-(double)looposerror)
|
wolffd@0
|
3815 /((double)totposex-(double)looposerror+(double)loonegerror))*100.0;
|
wolffd@0
|
3816
|
wolffd@0
|
3817 free(sv);
|
wolffd@0
|
3818 free(sv2dnum);
|
wolffd@0
|
3819 }
|
wolffd@0
|
3820
|
wolffd@0
|
3821
|
wolffd@0
|
3822 double distribute_alpha_t_greedily(long int *sv2dnum, long int svnum,
|
wolffd@0
|
3823 DOC **docs, double *a,
|
wolffd@0
|
3824 long int docnum,
|
wolffd@0
|
3825 long int *label,
|
wolffd@0
|
3826 KERNEL_PARM *kernel_parm,
|
wolffd@0
|
3827 LEARN_PARM *learn_parm, double thresh)
|
wolffd@0
|
3828 /* Experimental Code improving plain XiAlpha Estimates by
|
wolffd@0
|
3829 computing a better bound using a greedy optimzation strategy. */
|
wolffd@0
|
3830 {
|
wolffd@0
|
3831 long best_depth=0;
|
wolffd@0
|
3832 long i,j,k,d,skip,allskip;
|
wolffd@0
|
3833 double best,best_val[101],val,init_val_sq,init_val_lin;
|
wolffd@0
|
3834 long best_ex[101];
|
wolffd@0
|
3835 CFLOAT *cache,*trow;
|
wolffd@0
|
3836
|
wolffd@0
|
3837 cache=(CFLOAT *)my_malloc(sizeof(CFLOAT)*learn_parm->xa_depth*svnum);
|
wolffd@0
|
3838 trow = (CFLOAT *)my_malloc(sizeof(CFLOAT)*svnum);
|
wolffd@0
|
3839
|
wolffd@0
|
3840 for(k=0;k<svnum;k++) {
|
wolffd@0
|
3841 trow[k]=kernel(kernel_parm,docs[docnum],docs[sv2dnum[k]]);
|
wolffd@0
|
3842 }
|
wolffd@0
|
3843
|
wolffd@0
|
3844 init_val_sq=0;
|
wolffd@0
|
3845 init_val_lin=0;
|
wolffd@0
|
3846 best=0;
|
wolffd@0
|
3847
|
wolffd@0
|
3848 for(d=0;d<learn_parm->xa_depth;d++) {
|
wolffd@0
|
3849 allskip=1;
|
wolffd@0
|
3850 if(d>=1) {
|
wolffd@0
|
3851 init_val_sq+=cache[best_ex[d-1]+svnum*(d-1)];
|
wolffd@0
|
3852 for(k=0;k<d-1;k++) {
|
wolffd@0
|
3853 init_val_sq+=2.0*cache[best_ex[k]+svnum*(d-1)];
|
wolffd@0
|
3854 }
|
wolffd@0
|
3855 init_val_lin+=trow[best_ex[d-1]];
|
wolffd@0
|
3856 }
|
wolffd@0
|
3857 for(i=0;i<svnum;i++) {
|
wolffd@0
|
3858 skip=0;
|
wolffd@0
|
3859 if(sv2dnum[i] == docnum) skip=1;
|
wolffd@0
|
3860 for(j=0;j<d;j++) {
|
wolffd@0
|
3861 if(i == best_ex[j]) skip=1;
|
wolffd@0
|
3862 }
|
wolffd@0
|
3863
|
wolffd@0
|
3864 if(!skip) {
|
wolffd@0
|
3865 val=init_val_sq;
|
wolffd@0
|
3866 if(kernel_parm->kernel_type == LINEAR)
|
wolffd@0
|
3867 val+=docs[sv2dnum[i]]->fvec->twonorm_sq;
|
wolffd@0
|
3868 else
|
wolffd@0
|
3869 val+=kernel(kernel_parm,docs[sv2dnum[i]],docs[sv2dnum[i]]);
|
wolffd@0
|
3870 for(j=0;j<d;j++) {
|
wolffd@0
|
3871 val+=2.0*cache[i+j*svnum];
|
wolffd@0
|
3872 }
|
wolffd@0
|
3873 val*=(1.0/(2.0*(d+1.0)*(d+1.0)));
|
wolffd@0
|
3874 val-=((init_val_lin+trow[i])/(d+1.0));
|
wolffd@0
|
3875
|
wolffd@0
|
3876 if(allskip || (val < best_val[d])) {
|
wolffd@0
|
3877 best_val[d]=val;
|
wolffd@0
|
3878 best_ex[d]=i;
|
wolffd@0
|
3879 }
|
wolffd@0
|
3880 allskip=0;
|
wolffd@0
|
3881 if(val < thresh) {
|
wolffd@0
|
3882 i=svnum;
|
wolffd@0
|
3883 /* printf("EARLY"); */
|
wolffd@0
|
3884 }
|
wolffd@0
|
3885 }
|
wolffd@0
|
3886 }
|
wolffd@0
|
3887 if(!allskip) {
|
wolffd@0
|
3888 for(k=0;k<svnum;k++) {
|
wolffd@0
|
3889 cache[d*svnum+k]=kernel(kernel_parm,
|
wolffd@0
|
3890 docs[sv2dnum[best_ex[d]]],
|
wolffd@0
|
3891 docs[sv2dnum[k]]);
|
wolffd@0
|
3892 }
|
wolffd@0
|
3893 }
|
wolffd@0
|
3894 if((!allskip) && ((best_val[d] < best) || (d == 0))) {
|
wolffd@0
|
3895 best=best_val[d];
|
wolffd@0
|
3896 best_depth=d;
|
wolffd@0
|
3897 }
|
wolffd@0
|
3898 if(allskip || (best < thresh)) {
|
wolffd@0
|
3899 d=learn_parm->xa_depth;
|
wolffd@0
|
3900 }
|
wolffd@0
|
3901 }
|
wolffd@0
|
3902
|
wolffd@0
|
3903 free(cache);
|
wolffd@0
|
3904 free(trow);
|
wolffd@0
|
3905
|
wolffd@0
|
3906 /* printf("Distribute[%ld](%ld)=%f, ",docnum,best_depth,best); */
|
wolffd@0
|
3907 return(best);
|
wolffd@0
|
3908 }
|
wolffd@0
|
3909
|
wolffd@0
|
3910
|
wolffd@0
|
3911 void estimate_transduction_quality(MODEL *model, long int *label,
|
wolffd@0
|
3912 long int *unlabeled,
|
wolffd@0
|
3913 long int totdoc, DOC **docs, double *lin)
|
wolffd@0
|
3914 /* Loo-bound based on observation that loo-errors must have an
|
wolffd@0
|
3915 equal distribution in both training and test examples, given
|
wolffd@0
|
3916 that the test examples are classified correctly. Compare
|
wolffd@0
|
3917 chapter "Constraints on the Transductive Hyperplane" in my
|
wolffd@0
|
3918 Dissertation. */
|
wolffd@0
|
3919 {
|
wolffd@0
|
3920 long i,j,l=0,ulab=0,lab=0,labpos=0,labneg=0,ulabpos=0,ulabneg=0,totulab=0;
|
wolffd@0
|
3921 double totlab=0,totlabpos=0,totlabneg=0,labsum=0,ulabsum=0;
|
wolffd@0
|
3922 double r_delta,r_delta_sq,xi,xisum=0,asum=0;
|
wolffd@0
|
3923
|
wolffd@0
|
3924 r_delta=estimate_r_delta(docs,totdoc,&(model->kernel_parm));
|
wolffd@0
|
3925 r_delta_sq=r_delta*r_delta;
|
wolffd@0
|
3926
|
wolffd@0
|
3927 for(j=0;j<totdoc;j++) {
|
wolffd@0
|
3928 if(unlabeled[j]) {
|
wolffd@0
|
3929 totulab++;
|
wolffd@0
|
3930 }
|
wolffd@0
|
3931 else {
|
wolffd@0
|
3932 totlab++;
|
wolffd@0
|
3933 if(label[j] > 0)
|
wolffd@0
|
3934 totlabpos++;
|
wolffd@0
|
3935 else
|
wolffd@0
|
3936 totlabneg++;
|
wolffd@0
|
3937 }
|
wolffd@0
|
3938 }
|
wolffd@0
|
3939 for(j=1;j<model->sv_num;j++) {
|
wolffd@0
|
3940 i=model->supvec[j]->docnum;
|
wolffd@0
|
3941 xi=1.0-((lin[i]-model->b)*(double)label[i]);
|
wolffd@0
|
3942 if(xi<0) xi=0;
|
wolffd@0
|
3943
|
wolffd@0
|
3944 xisum+=xi;
|
wolffd@0
|
3945 asum+=fabs(model->alpha[j]);
|
wolffd@0
|
3946 if(unlabeled[i]) {
|
wolffd@0
|
3947 ulabsum+=(fabs(model->alpha[j])*r_delta_sq+xi);
|
wolffd@0
|
3948 }
|
wolffd@0
|
3949 else {
|
wolffd@0
|
3950 labsum+=(fabs(model->alpha[j])*r_delta_sq+xi);
|
wolffd@0
|
3951 }
|
wolffd@0
|
3952 if((fabs(model->alpha[j])*r_delta_sq+xi) >= 1) {
|
wolffd@0
|
3953 l++;
|
wolffd@0
|
3954 if(unlabeled[model->supvec[j]->docnum]) {
|
wolffd@0
|
3955 ulab++;
|
wolffd@0
|
3956 if(model->alpha[j] > 0)
|
wolffd@0
|
3957 ulabpos++;
|
wolffd@0
|
3958 else
|
wolffd@0
|
3959 ulabneg++;
|
wolffd@0
|
3960 }
|
wolffd@0
|
3961 else {
|
wolffd@0
|
3962 lab++;
|
wolffd@0
|
3963 if(model->alpha[j] > 0)
|
wolffd@0
|
3964 labpos++;
|
wolffd@0
|
3965 else
|
wolffd@0
|
3966 labneg++;
|
wolffd@0
|
3967 }
|
wolffd@0
|
3968 }
|
wolffd@0
|
3969 }
|
wolffd@0
|
3970 printf("xacrit>=1: labeledpos=%.5f labeledneg=%.5f default=%.5f\n",(double)labpos/(double)totlab*100.0,(double)labneg/(double)totlab*100.0,(double)totlabpos/(double)(totlab)*100.0);
|
wolffd@0
|
3971 printf("xacrit>=1: unlabelpos=%.5f unlabelneg=%.5f\n",(double)ulabpos/(double)totulab*100.0,(double)ulabneg/(double)totulab*100.0);
|
wolffd@0
|
3972 printf("xacrit>=1: labeled=%.5f unlabled=%.5f all=%.5f\n",(double)lab/(double)totlab*100.0,(double)ulab/(double)totulab*100.0,(double)l/(double)(totdoc)*100.0);
|
wolffd@0
|
3973 printf("xacritsum: labeled=%.5f unlabled=%.5f all=%.5f\n",(double)labsum/(double)totlab*100.0,(double)ulabsum/(double)totulab*100.0,(double)(labsum+ulabsum)/(double)(totdoc)*100.0);
|
wolffd@0
|
3974 printf("r_delta_sq=%.5f xisum=%.5f asum=%.5f\n",r_delta_sq,xisum,asum);
|
wolffd@0
|
3975 }
|
wolffd@0
|
3976
|
wolffd@0
|
3977 double estimate_margin_vcdim(MODEL *model, double w, double R,
|
wolffd@0
|
3978 KERNEL_PARM *kernel_parm)
|
wolffd@0
|
3979 /* optional: length of model vector in feature space */
|
wolffd@0
|
3980 /* optional: radius of ball containing the data */
|
wolffd@0
|
3981 {
|
wolffd@0
|
3982 double h;
|
wolffd@0
|
3983
|
wolffd@0
|
3984 /* follows chapter 5.6.4 in [Vapnik/95] */
|
wolffd@0
|
3985
|
wolffd@0
|
3986 if(w<0) {
|
wolffd@0
|
3987 w=model_length_s(model,kernel_parm);
|
wolffd@0
|
3988 }
|
wolffd@0
|
3989 if(R<0) {
|
wolffd@0
|
3990 R=estimate_sphere(model,kernel_parm);
|
wolffd@0
|
3991 }
|
wolffd@0
|
3992 h = w*w * R*R +1;
|
wolffd@0
|
3993 return(h);
|
wolffd@0
|
3994 }
|
wolffd@0
|
3995
|
wolffd@0
|
3996 double estimate_sphere(MODEL *model, KERNEL_PARM *kernel_parm)
|
wolffd@0
|
3997 /* Approximates the radius of the ball containing */
|
wolffd@0
|
3998 /* the support vectors by bounding it with the */
|
wolffd@0
|
3999 { /* length of the longest support vector. This is */
|
wolffd@0
|
4000 register long j; /* pretty good for text categorization, since all */
|
wolffd@0
|
4001 double xlen,maxxlen=0; /* documents have feature vectors of length 1. It */
|
wolffd@0
|
4002 DOC *nulldoc; /* assumes that the center of the ball is at the */
|
wolffd@0
|
4003 WORD nullword; /* origin of the space. */
|
wolffd@0
|
4004
|
wolffd@0
|
4005 nullword.wnum=0;
|
wolffd@0
|
4006 nulldoc=create_example(-2,0,0,0.0,create_svector(&nullword,"",1.0));
|
wolffd@0
|
4007
|
wolffd@0
|
4008 for(j=1;j<model->sv_num;j++) {
|
wolffd@0
|
4009 xlen=sqrt(kernel(kernel_parm,model->supvec[j],model->supvec[j])
|
wolffd@0
|
4010 -2*kernel(kernel_parm,model->supvec[j],nulldoc)
|
wolffd@0
|
4011 +kernel(kernel_parm,nulldoc,nulldoc));
|
wolffd@0
|
4012 if(xlen>maxxlen) {
|
wolffd@0
|
4013 maxxlen=xlen;
|
wolffd@0
|
4014 }
|
wolffd@0
|
4015 }
|
wolffd@0
|
4016
|
wolffd@0
|
4017 free_example(nulldoc,1);
|
wolffd@0
|
4018 return(maxxlen);
|
wolffd@0
|
4019 }
|
wolffd@0
|
4020
|
wolffd@0
|
4021 double estimate_r_delta(DOC **docs, long int totdoc, KERNEL_PARM *kernel_parm)
|
wolffd@0
|
4022 {
|
wolffd@0
|
4023 long i;
|
wolffd@0
|
4024 double maxxlen,xlen;
|
wolffd@0
|
4025 DOC *nulldoc; /* assumes that the center of the ball is at the */
|
wolffd@0
|
4026 WORD nullword; /* origin of the space. */
|
wolffd@0
|
4027
|
wolffd@0
|
4028 nullword.wnum=0;
|
wolffd@0
|
4029 nulldoc=create_example(-2,0,0,0.0,create_svector(&nullword,"",1.0));
|
wolffd@0
|
4030
|
wolffd@0
|
4031 maxxlen=0;
|
wolffd@0
|
4032 for(i=0;i<totdoc;i++) {
|
wolffd@0
|
4033 xlen=sqrt(kernel(kernel_parm,docs[i],docs[i])
|
wolffd@0
|
4034 -2*kernel(kernel_parm,docs[i],nulldoc)
|
wolffd@0
|
4035 +kernel(kernel_parm,nulldoc,nulldoc));
|
wolffd@0
|
4036 if(xlen>maxxlen) {
|
wolffd@0
|
4037 maxxlen=xlen;
|
wolffd@0
|
4038 }
|
wolffd@0
|
4039 }
|
wolffd@0
|
4040
|
wolffd@0
|
4041 free_example(nulldoc,1);
|
wolffd@0
|
4042 return(maxxlen);
|
wolffd@0
|
4043 }
|
wolffd@0
|
4044
|
wolffd@0
|
4045 double estimate_r_delta_average(DOC **docs, long int totdoc,
|
wolffd@0
|
4046 KERNEL_PARM *kernel_parm)
|
wolffd@0
|
4047 {
|
wolffd@0
|
4048 long i;
|
wolffd@0
|
4049 double avgxlen;
|
wolffd@0
|
4050 DOC *nulldoc; /* assumes that the center of the ball is at the */
|
wolffd@0
|
4051 WORD nullword; /* origin of the space. */
|
wolffd@0
|
4052
|
wolffd@0
|
4053 nullword.wnum=0;
|
wolffd@0
|
4054 nulldoc=create_example(-2,0,0,0.0,create_svector(&nullword,"",1.0));
|
wolffd@0
|
4055
|
wolffd@0
|
4056 avgxlen=0;
|
wolffd@0
|
4057 for(i=0;i<totdoc;i++) {
|
wolffd@0
|
4058 avgxlen+=sqrt(kernel(kernel_parm,docs[i],docs[i])
|
wolffd@0
|
4059 -2*kernel(kernel_parm,docs[i],nulldoc)
|
wolffd@0
|
4060 +kernel(kernel_parm,nulldoc,nulldoc));
|
wolffd@0
|
4061 }
|
wolffd@0
|
4062
|
wolffd@0
|
4063 free_example(nulldoc,1);
|
wolffd@0
|
4064 return(avgxlen/totdoc);
|
wolffd@0
|
4065 }
|
wolffd@0
|
4066
|
wolffd@0
|
4067 double length_of_longest_document_vector(DOC **docs, long int totdoc,
|
wolffd@0
|
4068 KERNEL_PARM *kernel_parm)
|
wolffd@0
|
4069 {
|
wolffd@0
|
4070 long i;
|
wolffd@0
|
4071 double maxxlen,xlen;
|
wolffd@0
|
4072
|
wolffd@0
|
4073 maxxlen=0;
|
wolffd@0
|
4074 for(i=0;i<totdoc;i++) {
|
wolffd@0
|
4075 xlen=sqrt(kernel(kernel_parm,docs[i],docs[i]));
|
wolffd@0
|
4076 if(xlen>maxxlen) {
|
wolffd@0
|
4077 maxxlen=xlen;
|
wolffd@0
|
4078 }
|
wolffd@0
|
4079 }
|
wolffd@0
|
4080
|
wolffd@0
|
4081 return(maxxlen);
|
wolffd@0
|
4082 }
|
wolffd@0
|
4083
|
wolffd@0
|
4084 /****************************** IO-handling **********************************/
|
wolffd@0
|
4085
|
wolffd@0
|
4086 void write_prediction(char *predfile, MODEL *model, double *lin,
|
wolffd@0
|
4087 double *a, long int *unlabeled,
|
wolffd@0
|
4088 long int *label, long int totdoc,
|
wolffd@0
|
4089 LEARN_PARM *learn_parm)
|
wolffd@0
|
4090 {
|
wolffd@0
|
4091 FILE *predfl;
|
wolffd@0
|
4092 long i;
|
wolffd@0
|
4093 double dist,a_max;
|
wolffd@0
|
4094
|
wolffd@0
|
4095 if(verbosity>=1) {
|
wolffd@0
|
4096 printf("Writing prediction file..."); fflush(stdout);
|
wolffd@0
|
4097 }
|
wolffd@0
|
4098 if ((predfl = fopen (predfile, "w")) == NULL)
|
wolffd@0
|
4099 { perror (predfile); exit (1); }
|
wolffd@0
|
4100 a_max=learn_parm->epsilon_a;
|
wolffd@0
|
4101 for(i=0;i<totdoc;i++) {
|
wolffd@0
|
4102 if((unlabeled[i]) && (a[i]>a_max)) {
|
wolffd@0
|
4103 a_max=a[i];
|
wolffd@0
|
4104 }
|
wolffd@0
|
4105 }
|
wolffd@0
|
4106 for(i=0;i<totdoc;i++) {
|
wolffd@0
|
4107 if(unlabeled[i]) {
|
wolffd@0
|
4108 if((a[i]>(learn_parm->epsilon_a))) {
|
wolffd@0
|
4109 dist=(double)label[i]*(1.0-learn_parm->epsilon_crit-a[i]/(a_max*2.0));
|
wolffd@0
|
4110 }
|
wolffd@0
|
4111 else {
|
wolffd@0
|
4112 dist=(lin[i]-model->b);
|
wolffd@0
|
4113 }
|
wolffd@0
|
4114 if(dist>0) {
|
wolffd@0
|
4115 fprintf(predfl,"%.8g:+1 %.8g:-1\n",dist,-dist);
|
wolffd@0
|
4116 }
|
wolffd@0
|
4117 else {
|
wolffd@0
|
4118 fprintf(predfl,"%.8g:-1 %.8g:+1\n",-dist,dist);
|
wolffd@0
|
4119 }
|
wolffd@0
|
4120 }
|
wolffd@0
|
4121 }
|
wolffd@0
|
4122 fclose(predfl);
|
wolffd@0
|
4123 if(verbosity>=1) {
|
wolffd@0
|
4124 printf("done\n");
|
wolffd@0
|
4125 }
|
wolffd@0
|
4126 }
|
wolffd@0
|
4127
|
wolffd@0
|
4128 void write_alphas(char *alphafile, double *a,
|
wolffd@0
|
4129 long int *label, long int totdoc)
|
wolffd@0
|
4130 {
|
wolffd@0
|
4131 FILE *alphafl;
|
wolffd@0
|
4132 long i;
|
wolffd@0
|
4133
|
wolffd@0
|
4134 if(verbosity>=1) {
|
wolffd@0
|
4135 printf("Writing alpha file..."); fflush(stdout);
|
wolffd@0
|
4136 }
|
wolffd@0
|
4137 if ((alphafl = fopen (alphafile, "w")) == NULL)
|
wolffd@0
|
4138 { perror (alphafile); exit (1); }
|
wolffd@0
|
4139 for(i=0;i<totdoc;i++) {
|
wolffd@0
|
4140 fprintf(alphafl,"%.18g\n",a[i]*(double)label[i]);
|
wolffd@0
|
4141 }
|
wolffd@0
|
4142 fclose(alphafl);
|
wolffd@0
|
4143 if(verbosity>=1) {
|
wolffd@0
|
4144 printf("done\n");
|
wolffd@0
|
4145 }
|
wolffd@0
|
4146 }
|
wolffd@0
|
4147
|