comparison toolboxes/SVM-light/src/svm_learn.c @ 0:e9a9cd732c1e tip

first hg version after svn
author wolffd
date Tue, 10 Feb 2015 15:05:51 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:e9a9cd732c1e
1 /***********************************************************************/
2 /* */
3 /* svm_learn.c */
4 /* */
5 /* Learning module of Support Vector Machine. */
6 /* */
7 /* Author: Thorsten Joachims */
8 /* Date: 02.07.02 */
9 /* */
10 /* Copyright (c) 2002 Thorsten Joachims - All rights reserved */
11 /* */
12 /* This software is available for non-commercial use only. It must */
13 /* not be modified and distributed without prior permission of the */
14 /* author. The author is not responsible for implications from the */
15 /* use of this software. */
16 /* */
17 /***********************************************************************/
18
19
20 # include "svm_common.h"
21 # include "svm_learn.h"
22
23
24 /* interface to QP-solver */
25 double *optimize_qp(QP *, double *, long, double *, LEARN_PARM *);
26
27 /*---------------------------------------------------------------------------*/
28
29 /* Learns an SVM classification model based on the training data in
30 docs/label. The resulting model is returned in the structure
31 model. */
32
33 void svm_learn_classification(DOC **docs, double *class, long int
34 totdoc, long int totwords,
35 LEARN_PARM *learn_parm,
36 KERNEL_PARM *kernel_parm,
37 KERNEL_CACHE *kernel_cache,
38 MODEL *model,
39 double *alpha)
40 /* docs: Training vectors (x-part) */
41 /* class: Training labels (y-part, zero if test example for
42 transduction) */
43 /* totdoc: Number of examples in docs/label */
44 /* totwords: Number of features (i.e. highest feature index) */
45 /* learn_parm: Learning paramenters */
46 /* kernel_parm: Kernel paramenters */
47 /* kernel_cache:Initialized Cache of size totdoc, if using a kernel.
48 NULL if linear.*/
49 /* model: Returns learning result (assumed empty before called) */
50 /* alpha: Start values for the alpha variables or NULL
51 pointer. The new alpha values are returned after
52 optimization if not NULL. Array must be of size totdoc. */
53 {
54 long *inconsistent,i,*label;
55 long inconsistentnum;
56 long misclassified,upsupvecnum;
57 double loss,model_length,example_length;
58 double maxdiff,*lin,*a,*c;
59 long runtime_start,runtime_end;
60 long iterations;
61 long *unlabeled,transduction;
62 long heldout;
63 long loo_count=0,loo_count_pos=0,loo_count_neg=0,trainpos=0,trainneg=0;
64 long loocomputed=0,runtime_start_loo=0,runtime_start_xa=0;
65 double heldout_c=0,r_delta_sq=0,r_delta,r_delta_avg;
66 long *index,*index2dnum;
67 double *weights;
68 CFLOAT *aicache; /* buffer to keep one row of hessian */
69
70 double *xi_fullset; /* buffer for storing xi on full sample in loo */
71 double *a_fullset; /* buffer for storing alpha on full sample in loo */
72 TIMING timing_profile;
73 SHRINK_STATE shrink_state;
74
75 runtime_start=get_runtime();
76 timing_profile.time_kernel=0;
77 timing_profile.time_opti=0;
78 timing_profile.time_shrink=0;
79 timing_profile.time_update=0;
80 timing_profile.time_model=0;
81 timing_profile.time_check=0;
82 timing_profile.time_select=0;
83 kernel_cache_statistic=0;
84
85 learn_parm->totwords=totwords;
86
87 /* make sure -n value is reasonable */
88 if((learn_parm->svm_newvarsinqp < 2)
89 || (learn_parm->svm_newvarsinqp > learn_parm->svm_maxqpsize)) {
90 learn_parm->svm_newvarsinqp=learn_parm->svm_maxqpsize;
91 }
92
93 init_shrink_state(&shrink_state,totdoc,(long)MAXSHRINK);
94
95 label = (long *)my_malloc(sizeof(long)*totdoc);
96 inconsistent = (long *)my_malloc(sizeof(long)*totdoc);
97 unlabeled = (long *)my_malloc(sizeof(long)*totdoc);
98 c = (double *)my_malloc(sizeof(double)*totdoc);
99 a = (double *)my_malloc(sizeof(double)*totdoc);
100 a_fullset = (double *)my_malloc(sizeof(double)*totdoc);
101 xi_fullset = (double *)my_malloc(sizeof(double)*totdoc);
102 lin = (double *)my_malloc(sizeof(double)*totdoc);
103 learn_parm->svm_cost = (double *)my_malloc(sizeof(double)*totdoc);
104 model->supvec = (DOC **)my_malloc(sizeof(DOC *)*(totdoc+2));
105 model->alpha = (double *)my_malloc(sizeof(double)*(totdoc+2));
106 model->index = (long *)my_malloc(sizeof(long)*(totdoc+2));
107
108 model->at_upper_bound=0;
109 model->b=0;
110 model->supvec[0]=0; /* element 0 reserved and empty for now */
111 model->alpha[0]=0;
112 model->lin_weights=NULL;
113 model->totwords=totwords;
114 model->totdoc=totdoc;
115 model->kernel_parm=(*kernel_parm);
116 model->sv_num=1;
117 model->loo_error=-1;
118 model->loo_recall=-1;
119 model->loo_precision=-1;
120 model->xa_error=-1;
121 model->xa_recall=-1;
122 model->xa_precision=-1;
123 inconsistentnum=0;
124 transduction=0;
125
126 r_delta=estimate_r_delta(docs,totdoc,kernel_parm);
127 r_delta_sq=r_delta*r_delta;
128
129 r_delta_avg=estimate_r_delta_average(docs,totdoc,kernel_parm);
130 if(learn_parm->svm_c == 0.0) { /* default value for C */
131 learn_parm->svm_c=1.0/(r_delta_avg*r_delta_avg);
132 if(verbosity>=1)
133 printf("Setting default regularization parameter C=%.4f\n",
134 learn_parm->svm_c);
135 }
136
137 learn_parm->eps=-1.0; /* equivalent regression epsilon for
138 classification */
139
140 for(i=0;i<totdoc;i++) { /* various inits */
141 docs[i]->docnum=i;
142 inconsistent[i]=0;
143 a[i]=0;
144 lin[i]=0;
145 c[i]=0.0;
146 unlabeled[i]=0;
147 if(class[i] == 0) {
148 unlabeled[i]=1;
149 label[i]=0;
150 transduction=1;
151 }
152 if(class[i] > 0) {
153 learn_parm->svm_cost[i]=learn_parm->svm_c*learn_parm->svm_costratio*
154 docs[i]->costfactor;
155 label[i]=1;
156 trainpos++;
157 }
158 else if(class[i] < 0) {
159 learn_parm->svm_cost[i]=learn_parm->svm_c*docs[i]->costfactor;
160 label[i]=-1;
161 trainneg++;
162 }
163 else {
164 learn_parm->svm_cost[i]=0;
165 }
166 }
167 if(verbosity>=2) {
168 printf("%ld positive, %ld negative, and %ld unlabeled examples.\n",trainpos,trainneg,totdoc-trainpos-trainneg); fflush(stdout);
169 }
170
171 /* caching makes no sense for linear kernel */
172 if(kernel_parm->kernel_type == LINEAR) {
173 kernel_cache = NULL;
174 }
175
176 /* compute starting state for initial alpha values */
177 if(alpha) {
178 if(verbosity>=1) {
179 printf("Computing starting state..."); fflush(stdout);
180 }
181 index = (long *)my_malloc(sizeof(long)*totdoc);
182 index2dnum = (long *)my_malloc(sizeof(long)*(totdoc+11));
183 weights=(double *)my_malloc(sizeof(double)*(totwords+1));
184 aicache = (CFLOAT *)my_malloc(sizeof(CFLOAT)*totdoc);
185 for(i=0;i<totdoc;i++) { /* create full index and clip alphas */
186 index[i]=1;
187 alpha[i]=fabs(alpha[i]);
188 if(alpha[i]<0) alpha[i]=0;
189 if(alpha[i]>learn_parm->svm_cost[i]) alpha[i]=learn_parm->svm_cost[i];
190 }
191 if(kernel_parm->kernel_type != LINEAR) {
192 for(i=0;i<totdoc;i++) /* fill kernel cache with unbounded SV */
193 if((alpha[i]>0) && (alpha[i]<learn_parm->svm_cost[i])
194 && (kernel_cache_space_available(kernel_cache)))
195 cache_kernel_row(kernel_cache,docs,i,kernel_parm);
196 for(i=0;i<totdoc;i++) /* fill rest of kernel cache with bounded SV */
197 if((alpha[i]==learn_parm->svm_cost[i])
198 && (kernel_cache_space_available(kernel_cache)))
199 cache_kernel_row(kernel_cache,docs,i,kernel_parm);
200 }
201 (void)compute_index(index,totdoc,index2dnum);
202 update_linear_component(docs,label,index2dnum,alpha,a,index2dnum,totdoc,
203 totwords,kernel_parm,kernel_cache,lin,aicache,
204 weights);
205 (void)calculate_svm_model(docs,label,unlabeled,lin,alpha,a,c,
206 learn_parm,index2dnum,index2dnum,model);
207 for(i=0;i<totdoc;i++) { /* copy initial alphas */
208 a[i]=alpha[i];
209 }
210 free(index);
211 free(index2dnum);
212 free(weights);
213 free(aicache);
214 if(verbosity>=1) {
215 printf("done.\n"); fflush(stdout);
216 }
217 }
218
219 if(transduction) {
220 learn_parm->svm_iter_to_shrink=99999999;
221 if(verbosity >= 1)
222 printf("\nDeactivating Shrinking due to an incompatibility with the transductive \nlearner in the current version.\n\n");
223 }
224
225 if(transduction && learn_parm->compute_loo) {
226 learn_parm->compute_loo=0;
227 if(verbosity >= 1)
228 printf("\nCannot compute leave-one-out estimates for transductive learner.\n\n");
229 }
230
231 if(learn_parm->remove_inconsistent && learn_parm->compute_loo) {
232 learn_parm->compute_loo=0;
233 printf("\nCannot compute leave-one-out estimates when removing inconsistent examples.\n\n");
234 }
235
236 if(learn_parm->compute_loo && ((trainpos == 1) || (trainneg == 1))) {
237 learn_parm->compute_loo=0;
238 printf("\nCannot compute leave-one-out with only one example in one class.\n\n");
239 }
240
241
242 if(verbosity==1) {
243 printf("Optimizing"); fflush(stdout);
244 }
245
246 /* train the svm */
247 iterations=optimize_to_convergence(docs,label,totdoc,totwords,learn_parm,
248 kernel_parm,kernel_cache,&shrink_state,model,
249 inconsistent,unlabeled,a,lin,
250 c,&timing_profile,
251 &maxdiff,(long)-1,
252 (long)1);
253
254 if(verbosity>=1) {
255 if(verbosity==1) printf("done. (%ld iterations)\n",iterations);
256
257 misclassified=0;
258 for(i=0;(i<totdoc);i++) { /* get final statistic */
259 if((lin[i]-model->b)*(double)label[i] <= 0.0)
260 misclassified++;
261 }
262
263 printf("Optimization finished (%ld misclassified, maxdiff=%.5f).\n",
264 misclassified,maxdiff);
265
266 runtime_end=get_runtime();
267 if(verbosity>=2) {
268 printf("Runtime in cpu-seconds: %.2f (%.2f%% for kernel/%.2f%% for optimizer/%.2f%% for final/%.2f%% for update/%.2f%% for model/%.2f%% for check/%.2f%% for select)\n",
269 ((float)runtime_end-(float)runtime_start)/100.0,
270 (100.0*timing_profile.time_kernel)/(float)(runtime_end-runtime_start),
271 (100.0*timing_profile.time_opti)/(float)(runtime_end-runtime_start),
272 (100.0*timing_profile.time_shrink)/(float)(runtime_end-runtime_start),
273 (100.0*timing_profile.time_update)/(float)(runtime_end-runtime_start),
274 (100.0*timing_profile.time_model)/(float)(runtime_end-runtime_start),
275 (100.0*timing_profile.time_check)/(float)(runtime_end-runtime_start),
276 (100.0*timing_profile.time_select)/(float)(runtime_end-runtime_start));
277 }
278 else {
279 printf("Runtime in cpu-seconds: %.2f\n",
280 (runtime_end-runtime_start)/100.0);
281 }
282
283 if(learn_parm->remove_inconsistent) {
284 inconsistentnum=0;
285 for(i=0;i<totdoc;i++)
286 if(inconsistent[i])
287 inconsistentnum++;
288 printf("Number of SV: %ld (plus %ld inconsistent examples)\n",
289 model->sv_num-1,inconsistentnum);
290 }
291 else {
292 upsupvecnum=0;
293 for(i=1;i<model->sv_num;i++) {
294 if(fabs(model->alpha[i]) >=
295 (learn_parm->svm_cost[(model->supvec[i])->docnum]-
296 learn_parm->epsilon_a))
297 upsupvecnum++;
298 }
299 printf("Number of SV: %ld (including %ld at upper bound)\n",
300 model->sv_num-1,upsupvecnum);
301 }
302
303 if((verbosity>=1) && (!learn_parm->skip_final_opt_check)) {
304 loss=0;
305 model_length=0;
306 for(i=0;i<totdoc;i++) {
307 if((lin[i]-model->b)*(double)label[i] < 1.0-learn_parm->epsilon_crit)
308 loss+=1.0-(lin[i]-model->b)*(double)label[i];
309 model_length+=a[i]*label[i]*lin[i];
310 }
311 model_length=sqrt(model_length);
312 fprintf(stdout,"L1 loss: loss=%.5f\n",loss);
313 fprintf(stdout,"Norm of weight vector: |w|=%.5f\n",model_length);
314 example_length=estimate_sphere(model,kernel_parm);
315 fprintf(stdout,"Norm of longest example vector: |x|=%.5f\n",
316 length_of_longest_document_vector(docs,totdoc,kernel_parm));
317 fprintf(stdout,"Estimated VCdim of classifier: VCdim<=%.5f\n",
318 estimate_margin_vcdim(model,model_length,example_length,
319 kernel_parm));
320 if((!learn_parm->remove_inconsistent) && (!transduction)) {
321 runtime_start_xa=get_runtime();
322 if(verbosity>=1) {
323 printf("Computing XiAlpha-estimates..."); fflush(stdout);
324 }
325 compute_xa_estimates(model,label,unlabeled,totdoc,docs,lin,a,
326 kernel_parm,learn_parm,&(model->xa_error),
327 &(model->xa_recall),&(model->xa_precision));
328 if(verbosity>=1) {
329 printf("done\n");
330 }
331 printf("Runtime for XiAlpha-estimates in cpu-seconds: %.2f\n",
332 (get_runtime()-runtime_start_xa)/100.0);
333
334 fprintf(stdout,"XiAlpha-estimate of the error: error<=%.2f%% (rho=%.2f,depth=%ld)\n",
335 model->xa_error,learn_parm->rho,learn_parm->xa_depth);
336 fprintf(stdout,"XiAlpha-estimate of the recall: recall=>%.2f%% (rho=%.2f,depth=%ld)\n",
337 model->xa_recall,learn_parm->rho,learn_parm->xa_depth);
338 fprintf(stdout,"XiAlpha-estimate of the precision: precision=>%.2f%% (rho=%.2f,depth=%ld)\n",
339 model->xa_precision,learn_parm->rho,learn_parm->xa_depth);
340 }
341 else if(!learn_parm->remove_inconsistent) {
342 estimate_transduction_quality(model,label,unlabeled,totdoc,docs,lin);
343 }
344 }
345 if(verbosity>=1) {
346 printf("Number of kernel evaluations: %ld\n",kernel_cache_statistic);
347 }
348 }
349
350
351 /* leave-one-out testing starts now */
352 if(learn_parm->compute_loo) {
353 /* save results of training on full dataset for leave-one-out */
354 runtime_start_loo=get_runtime();
355 for(i=0;i<totdoc;i++) {
356 xi_fullset[i]=1.0-((lin[i]-model->b)*(double)label[i]);
357 if(xi_fullset[i]<0) xi_fullset[i]=0;
358 a_fullset[i]=a[i];
359 }
360 if(verbosity>=1) {
361 printf("Computing leave-one-out");
362 }
363
364 /* repeat this loop for every held-out example */
365 for(heldout=0;(heldout<totdoc);heldout++) {
366 if(learn_parm->rho*a_fullset[heldout]*r_delta_sq+xi_fullset[heldout]
367 < 1.0) {
368 /* guaranteed to not produce a leave-one-out error */
369 if(verbosity==1) {
370 printf("+"); fflush(stdout);
371 }
372 }
373 else if(xi_fullset[heldout] > 1.0) {
374 /* guaranteed to produce a leave-one-out error */
375 loo_count++;
376 if(label[heldout] > 0) loo_count_pos++; else loo_count_neg++;
377 if(verbosity==1) {
378 printf("-"); fflush(stdout);
379 }
380 }
381 else {
382 loocomputed++;
383 heldout_c=learn_parm->svm_cost[heldout]; /* set upper bound to zero */
384 learn_parm->svm_cost[heldout]=0;
385 /* make sure heldout example is not currently */
386 /* shrunk away. Assumes that lin is up to date! */
387 shrink_state.active[heldout]=1;
388 if(verbosity>=2)
389 printf("\nLeave-One-Out test on example %ld\n",heldout);
390 if(verbosity>=1) {
391 printf("(?[%ld]",heldout); fflush(stdout);
392 }
393
394 optimize_to_convergence(docs,label,totdoc,totwords,learn_parm,
395 kernel_parm,
396 kernel_cache,&shrink_state,model,inconsistent,unlabeled,
397 a,lin,c,&timing_profile,
398 &maxdiff,heldout,(long)2);
399
400 /* printf("%.20f\n",(lin[heldout]-model->b)*(double)label[heldout]); */
401
402 if(((lin[heldout]-model->b)*(double)label[heldout]) <= 0.0) {
403 loo_count++; /* there was a loo-error */
404 if(label[heldout] > 0) loo_count_pos++; else loo_count_neg++;
405 if(verbosity>=1) {
406 printf("-)"); fflush(stdout);
407 }
408 }
409 else {
410 if(verbosity>=1) {
411 printf("+)"); fflush(stdout);
412 }
413 }
414 /* now we need to restore the original data set*/
415 learn_parm->svm_cost[heldout]=heldout_c; /* restore upper bound */
416 }
417 } /* end of leave-one-out loop */
418
419
420 if(verbosity>=1) {
421 printf("\nRetrain on full problem"); fflush(stdout);
422 }
423 optimize_to_convergence(docs,label,totdoc,totwords,learn_parm,
424 kernel_parm,
425 kernel_cache,&shrink_state,model,inconsistent,unlabeled,
426 a,lin,c,&timing_profile,
427 &maxdiff,(long)-1,(long)1);
428 if(verbosity >= 1)
429 printf("done.\n");
430
431
432 /* after all leave-one-out computed */
433 model->loo_error=100.0*loo_count/(double)totdoc;
434 model->loo_recall=(1.0-(double)loo_count_pos/(double)trainpos)*100.0;
435 model->loo_precision=(trainpos-loo_count_pos)/
436 (double)(trainpos-loo_count_pos+loo_count_neg)*100.0;
437 if(verbosity >= 1) {
438 fprintf(stdout,"Leave-one-out estimate of the error: error=%.2f%%\n",
439 model->loo_error);
440 fprintf(stdout,"Leave-one-out estimate of the recall: recall=%.2f%%\n",
441 model->loo_recall);
442 fprintf(stdout,"Leave-one-out estimate of the precision: precision=%.2f%%\n",
443 model->loo_precision);
444 fprintf(stdout,"Actual leave-one-outs computed: %ld (rho=%.2f)\n",
445 loocomputed,learn_parm->rho);
446 printf("Runtime for leave-one-out in cpu-seconds: %.2f\n",
447 (double)(get_runtime()-runtime_start_loo)/100.0);
448 }
449 }
450
451 if(learn_parm->alphafile[0])
452 write_alphas(learn_parm->alphafile,a,label,totdoc);
453
454 shrink_state_cleanup(&shrink_state);
455 free(label);
456 free(inconsistent);
457 free(unlabeled);
458 free(c);
459 free(a);
460 free(a_fullset);
461 free(xi_fullset);
462 free(lin);
463 free(learn_parm->svm_cost);
464 }
465
466
467 /* Learns an SVM regression model based on the training data in
468 docs/label. The resulting model is returned in the structure
469 model. */
470
471 void svm_learn_regression(DOC **docs, double *value, long int totdoc,
472 long int totwords, LEARN_PARM *learn_parm,
473 KERNEL_PARM *kernel_parm,
474 KERNEL_CACHE **kernel_cache, MODEL *model)
475 /* docs: Training vectors (x-part) */
476 /* class: Training value (y-part) */
477 /* totdoc: Number of examples in docs/label */
478 /* totwords: Number of features (i.e. highest feature index) */
479 /* learn_parm: Learning paramenters */
480 /* kernel_parm: Kernel paramenters */
481 /* kernel_cache:Initialized Cache, if using a kernel. NULL if
482 linear. Note that it will be free'd and reassigned */
483 /* model: Returns learning result (assumed empty before called) */
484 {
485 long *inconsistent,i,j;
486 long inconsistentnum;
487 long upsupvecnum;
488 double loss,model_length,example_length;
489 double maxdiff,*lin,*a,*c;
490 long runtime_start,runtime_end;
491 long iterations,kernel_cache_size;
492 long *unlabeled;
493 double r_delta_sq=0,r_delta,r_delta_avg;
494 double *xi_fullset; /* buffer for storing xi on full sample in loo */
495 double *a_fullset; /* buffer for storing alpha on full sample in loo */
496 TIMING timing_profile;
497 SHRINK_STATE shrink_state;
498 DOC **docs_org;
499 long *label;
500
501 /* set up regression problem in standard form */
502 docs_org=docs;
503 docs = (DOC **)my_malloc(sizeof(DOC)*2*totdoc);
504 label = (long *)my_malloc(sizeof(long)*2*totdoc);
505 c = (double *)my_malloc(sizeof(double)*2*totdoc);
506 for(i=0;i<totdoc;i++) {
507 j=2*totdoc-1-i;
508 docs[i]=create_example(i,0,0,docs_org[i]->costfactor,docs_org[i]->fvec);
509 label[i]=+1;
510 c[i]=value[i];
511 docs[j]=create_example(j,0,0,docs_org[i]->costfactor,docs_org[i]->fvec);
512 label[j]=-1;
513 c[j]=value[i];
514 }
515 totdoc*=2;
516
517 /* need to get a bigger kernel cache */
518 if(*kernel_cache) {
519 kernel_cache_size=(*kernel_cache)->buffsize*sizeof(CFLOAT)/(1024*1024);
520 kernel_cache_cleanup(*kernel_cache);
521 (*kernel_cache)=kernel_cache_init(totdoc,kernel_cache_size);
522 }
523
524 runtime_start=get_runtime();
525 timing_profile.time_kernel=0;
526 timing_profile.time_opti=0;
527 timing_profile.time_shrink=0;
528 timing_profile.time_update=0;
529 timing_profile.time_model=0;
530 timing_profile.time_check=0;
531 timing_profile.time_select=0;
532 kernel_cache_statistic=0;
533
534 learn_parm->totwords=totwords;
535
536 /* make sure -n value is reasonable */
537 if((learn_parm->svm_newvarsinqp < 2)
538 || (learn_parm->svm_newvarsinqp > learn_parm->svm_maxqpsize)) {
539 learn_parm->svm_newvarsinqp=learn_parm->svm_maxqpsize;
540 }
541
542 init_shrink_state(&shrink_state,totdoc,(long)MAXSHRINK);
543
544 inconsistent = (long *)my_malloc(sizeof(long)*totdoc);
545 unlabeled = (long *)my_malloc(sizeof(long)*totdoc);
546 a = (double *)my_malloc(sizeof(double)*totdoc);
547 a_fullset = (double *)my_malloc(sizeof(double)*totdoc);
548 xi_fullset = (double *)my_malloc(sizeof(double)*totdoc);
549 lin = (double *)my_malloc(sizeof(double)*totdoc);
550 learn_parm->svm_cost = (double *)my_malloc(sizeof(double)*totdoc);
551 model->supvec = (DOC **)my_malloc(sizeof(DOC *)*(totdoc+2));
552 model->alpha = (double *)my_malloc(sizeof(double)*(totdoc+2));
553 model->index = (long *)my_malloc(sizeof(long)*(totdoc+2));
554
555 model->at_upper_bound=0;
556 model->b=0;
557 model->supvec[0]=0; /* element 0 reserved and empty for now */
558 model->alpha[0]=0;
559 model->lin_weights=NULL;
560 model->totwords=totwords;
561 model->totdoc=totdoc;
562 model->kernel_parm=(*kernel_parm);
563 model->sv_num=1;
564 model->loo_error=-1;
565 model->loo_recall=-1;
566 model->loo_precision=-1;
567 model->xa_error=-1;
568 model->xa_recall=-1;
569 model->xa_precision=-1;
570 inconsistentnum=0;
571
572 r_delta=estimate_r_delta(docs,totdoc,kernel_parm);
573 r_delta_sq=r_delta*r_delta;
574
575 r_delta_avg=estimate_r_delta_average(docs,totdoc,kernel_parm);
576 if(learn_parm->svm_c == 0.0) { /* default value for C */
577 learn_parm->svm_c=1.0/(r_delta_avg*r_delta_avg);
578 if(verbosity>=1)
579 printf("Setting default regularization parameter C=%.4f\n",
580 learn_parm->svm_c);
581 }
582
583 for(i=0;i<totdoc;i++) { /* various inits */
584 inconsistent[i]=0;
585 a[i]=0;
586 lin[i]=0;
587 unlabeled[i]=0;
588 if(label[i] > 0) {
589 learn_parm->svm_cost[i]=learn_parm->svm_c*learn_parm->svm_costratio*
590 docs[i]->costfactor;
591 }
592 else if(label[i] < 0) {
593 learn_parm->svm_cost[i]=learn_parm->svm_c*docs[i]->costfactor;
594 }
595 }
596
597 /* caching makes no sense for linear kernel */
598 if((kernel_parm->kernel_type == LINEAR) && (*kernel_cache)) {
599 printf("WARNING: Using a kernel cache for linear case will slow optimization down!\n");
600 }
601
602 if(verbosity==1) {
603 printf("Optimizing"); fflush(stdout);
604 }
605
606 /* train the svm */
607 iterations=optimize_to_convergence(docs,label,totdoc,totwords,learn_parm,
608 kernel_parm,*kernel_cache,&shrink_state,
609 model,inconsistent,unlabeled,a,lin,c,
610 &timing_profile,&maxdiff,(long)-1,
611 (long)1);
612
613 if(verbosity>=1) {
614 if(verbosity==1) printf("done. (%ld iterations)\n",iterations);
615
616 printf("Optimization finished (maxdiff=%.5f).\n",maxdiff);
617
618 runtime_end=get_runtime();
619 if(verbosity>=2) {
620 printf("Runtime in cpu-seconds: %.2f (%.2f%% for kernel/%.2f%% for optimizer/%.2f%% for final/%.2f%% for update/%.2f%% for model/%.2f%% for check/%.2f%% for select)\n",
621 ((float)runtime_end-(float)runtime_start)/100.0,
622 (100.0*timing_profile.time_kernel)/(float)(runtime_end-runtime_start),
623 (100.0*timing_profile.time_opti)/(float)(runtime_end-runtime_start),
624 (100.0*timing_profile.time_shrink)/(float)(runtime_end-runtime_start),
625 (100.0*timing_profile.time_update)/(float)(runtime_end-runtime_start),
626 (100.0*timing_profile.time_model)/(float)(runtime_end-runtime_start),
627 (100.0*timing_profile.time_check)/(float)(runtime_end-runtime_start),
628 (100.0*timing_profile.time_select)/(float)(runtime_end-runtime_start));
629 }
630 else {
631 printf("Runtime in cpu-seconds: %.2f\n",
632 (runtime_end-runtime_start)/100.0);
633 }
634
635 if(learn_parm->remove_inconsistent) {
636 inconsistentnum=0;
637 for(i=0;i<totdoc;i++)
638 if(inconsistent[i])
639 inconsistentnum++;
640 printf("Number of SV: %ld (plus %ld inconsistent examples)\n",
641 model->sv_num-1,inconsistentnum);
642 }
643 else {
644 upsupvecnum=0;
645 for(i=1;i<model->sv_num;i++) {
646 if(fabs(model->alpha[i]) >=
647 (learn_parm->svm_cost[(model->supvec[i])->docnum]-
648 learn_parm->epsilon_a))
649 upsupvecnum++;
650 }
651 printf("Number of SV: %ld (including %ld at upper bound)\n",
652 model->sv_num-1,upsupvecnum);
653 }
654
655 if((verbosity>=1) && (!learn_parm->skip_final_opt_check)) {
656 loss=0;
657 model_length=0;
658 for(i=0;i<totdoc;i++) {
659 if((lin[i]-model->b)*(double)label[i] < (-learn_parm->eps+(double)label[i]*c[i])-learn_parm->epsilon_crit)
660 loss+=-learn_parm->eps+(double)label[i]*c[i]-(lin[i]-model->b)*(double)label[i];
661 model_length+=a[i]*label[i]*lin[i];
662 }
663 model_length=sqrt(model_length);
664 fprintf(stdout,"L1 loss: loss=%.5f\n",loss);
665 fprintf(stdout,"Norm of weight vector: |w|=%.5f\n",model_length);
666 example_length=estimate_sphere(model,kernel_parm);
667 fprintf(stdout,"Norm of longest example vector: |x|=%.5f\n",
668 length_of_longest_document_vector(docs,totdoc,kernel_parm));
669 }
670 if(verbosity>=1) {
671 printf("Number of kernel evaluations: %ld\n",kernel_cache_statistic);
672 }
673 }
674
675 if(learn_parm->alphafile[0])
676 write_alphas(learn_parm->alphafile,a,label,totdoc);
677
678 /* this makes sure the model we return does not contain pointers to the
679 temporary documents */
680 for(i=1;i<model->sv_num;i++) {
681 j=model->supvec[i]->docnum;
682 if(j >= (totdoc/2)) {
683 j=totdoc-j-1;
684 }
685 model->supvec[i]=docs_org[j];
686 }
687
688 shrink_state_cleanup(&shrink_state);
689 for(i=0;i<totdoc;i++)
690 free_example(docs[i],0);
691 free(docs);
692 free(label);
693 free(inconsistent);
694 free(unlabeled);
695 free(c);
696 free(a);
697 free(a_fullset);
698 free(xi_fullset);
699 free(lin);
700 free(learn_parm->svm_cost);
701 }
702
703 void svm_learn_ranking(DOC **docs, double *rankvalue, long int totdoc,
704 long int totwords, LEARN_PARM *learn_parm,
705 KERNEL_PARM *kernel_parm, KERNEL_CACHE **kernel_cache,
706 MODEL *model)
707 /* docs: Training vectors (x-part) */
708 /* rankvalue: Training target values that determine the ranking */
709 /* totdoc: Number of examples in docs/label */
710 /* totwords: Number of features (i.e. highest feature index) */
711 /* learn_parm: Learning paramenters */
712 /* kernel_parm: Kernel paramenters */
713 /* kernel_cache:Initialized pointer to Cache of size 1*totdoc, if
714 using a kernel. NULL if linear. NOTE: Cache is
715 getting reinitialized in this function */
716 /* model: Returns learning result (assumed empty before called) */
717 {
718 DOC **docdiff;
719 long i,j,k,totpair,kernel_cache_size;
720 double *target,*alpha,cost;
721 long *greater,*lesser;
722 MODEL *pairmodel;
723 SVECTOR *flow,*fhigh;
724
725 totpair=0;
726 for(i=0;i<totdoc;i++) {
727 for(j=i+1;j<totdoc;j++) {
728 if((docs[i]->queryid==docs[j]->queryid) && (rankvalue[i] != rankvalue[j])) {
729 totpair++;
730 }
731 }
732 }
733
734 printf("Constructing %ld rank constraints...",totpair); fflush(stdout);
735 docdiff=(DOC **)my_malloc(sizeof(DOC)*totpair);
736 target=(double *)my_malloc(sizeof(double)*totpair);
737 greater=(long *)my_malloc(sizeof(long)*totpair);
738 lesser=(long *)my_malloc(sizeof(long)*totpair);
739
740 k=0;
741 for(i=0;i<totdoc;i++) {
742 for(j=i+1;j<totdoc;j++) {
743 if(docs[i]->queryid == docs[j]->queryid) {
744 cost=(docs[i]->costfactor+docs[j]->costfactor)/2.0;
745 if(rankvalue[i] > rankvalue[j]) {
746 if(kernel_parm->kernel_type == LINEAR)
747 docdiff[k]=create_example(k,0,0,cost,
748 sub_ss(docs[i]->fvec,docs[j]->fvec));
749 else {
750 flow=copy_svector(docs[j]->fvec);
751 flow->factor=-1.0;
752 flow->next=NULL;
753 fhigh=copy_svector(docs[i]->fvec);
754 fhigh->factor=1.0;
755 fhigh->next=flow;
756 docdiff[k]=create_example(k,0,0,cost,fhigh);
757 }
758 target[k]=1;
759 greater[k]=i;
760 lesser[k]=j;
761 k++;
762 }
763 else if(rankvalue[i] < rankvalue[j]) {
764 if(kernel_parm->kernel_type == LINEAR)
765 docdiff[k]=create_example(k,0,0,cost,
766 sub_ss(docs[i]->fvec,docs[j]->fvec));
767 else {
768 flow=copy_svector(docs[j]->fvec);
769 flow->factor=-1.0;
770 flow->next=NULL;
771 fhigh=copy_svector(docs[i]->fvec);
772 fhigh->factor=1.0;
773 fhigh->next=flow;
774 docdiff[k]=create_example(k,0,0,cost,fhigh);
775 }
776 target[k]=-1;
777 greater[k]=i;
778 lesser[k]=j;
779 k++;
780 }
781 }
782 }
783 }
784 printf("done.\n"); fflush(stdout);
785
786 /* need to get a bigger kernel cache */
787 if(*kernel_cache) {
788 kernel_cache_size=(*kernel_cache)->buffsize*sizeof(CFLOAT)/(1024*1024);
789 kernel_cache_cleanup(*kernel_cache);
790 (*kernel_cache)=kernel_cache_init(totpair,kernel_cache_size);
791 }
792
793 /* must use unbiased hyperplane on difference vectors */
794 learn_parm->biased_hyperplane=0;
795 pairmodel=(MODEL *)my_malloc(sizeof(MODEL));
796 svm_learn_classification(docdiff,target,totpair,totwords,learn_parm,
797 kernel_parm,(*kernel_cache),pairmodel,NULL);
798
799 /* Transfer the result into a more compact model. If you would like
800 to output the original model on pairs of documents, see below. */
801 alpha=(double *)my_malloc(sizeof(double)*totdoc);
802 for(i=0;i<totdoc;i++) {
803 alpha[i]=0;
804 }
805 for(i=1;i<pairmodel->sv_num;i++) {
806 alpha[lesser[(pairmodel->supvec[i])->docnum]]-=pairmodel->alpha[i];
807 alpha[greater[(pairmodel->supvec[i])->docnum]]+=pairmodel->alpha[i];
808 }
809 model->supvec = (DOC **)my_malloc(sizeof(DOC *)*(totdoc+2));
810 model->alpha = (double *)my_malloc(sizeof(double)*(totdoc+2));
811 model->index = (long *)my_malloc(sizeof(long)*(totdoc+2));
812 model->supvec[0]=0; /* element 0 reserved and empty for now */
813 model->alpha[0]=0;
814 model->sv_num=1;
815 for(i=0;i<totdoc;i++) {
816 if(alpha[i]) {
817 model->supvec[model->sv_num]=docs[i];
818 model->alpha[model->sv_num]=alpha[i];
819 model->index[i]=model->sv_num;
820 model->sv_num++;
821 }
822 else {
823 model->index[i]=-1;
824 }
825 }
826 model->at_upper_bound=0;
827 model->b=0;
828 model->lin_weights=NULL;
829 model->totwords=totwords;
830 model->totdoc=totdoc;
831 model->kernel_parm=(*kernel_parm);
832 model->loo_error=-1;
833 model->loo_recall=-1;
834 model->loo_precision=-1;
835 model->xa_error=-1;
836 model->xa_recall=-1;
837 model->xa_precision=-1;
838
839 free(alpha);
840 free(greater);
841 free(lesser);
842 free(target);
843
844 /* If you would like to output the original model on pairs of
845 document, replace the following lines with '(*model)=(*pairmodel);' */
846 for(i=0;i<totpair;i++)
847 free_example(docdiff[i],1);
848 free(docdiff);
849 free_model(pairmodel,0);
850 }
851
852
853 /* The following solves a freely defined and given set of
854 inequalities. The optimization problem is of the following form:
855
856 min 0.5 w*w + C sum_i C_i \xi_i
857 s.t. x_i * w > rhs_i - \xi_i
858
859 This corresponds to the -z o option. */
860
861 void svm_learn_optimization(DOC **docs, double *rhs, long int
862 totdoc, long int totwords,
863 LEARN_PARM *learn_parm,
864 KERNEL_PARM *kernel_parm,
865 KERNEL_CACHE *kernel_cache, MODEL *model,
866 double *alpha)
867 /* docs: Left-hand side of inequalities (x-part) */
868 /* rhs: Right-hand side of inequalities */
869 /* totdoc: Number of examples in docs/label */
870 /* totwords: Number of features (i.e. highest feature index) */
871 /* learn_parm: Learning paramenters */
872 /* kernel_parm: Kernel paramenters */
873 /* kernel_cache:Initialized Cache of size 1*totdoc, if using a kernel.
874 NULL if linear.*/
875 /* model: Returns solution as SV expansion (assumed empty before called) */
876 /* alpha: Start values for the alpha variables or NULL
877 pointer. The new alpha values are returned after
878 optimization if not NULL. Array must be of size totdoc. */
879 {
880 long i,*label;
881 long misclassified,upsupvecnum;
882 double loss,model_length,example_length;
883 double maxdiff,*lin,*a,*c;
884 long runtime_start,runtime_end;
885 long iterations,maxslackid,svsetnum;
886 long *unlabeled,*inconsistent;
887 double r_delta_sq=0,r_delta,r_delta_avg;
888 long *index,*index2dnum;
889 double *weights,*slack,*alphaslack;
890 CFLOAT *aicache; /* buffer to keep one row of hessian */
891
892 TIMING timing_profile;
893 SHRINK_STATE shrink_state;
894
895 runtime_start=get_runtime();
896 timing_profile.time_kernel=0;
897 timing_profile.time_opti=0;
898 timing_profile.time_shrink=0;
899 timing_profile.time_update=0;
900 timing_profile.time_model=0;
901 timing_profile.time_check=0;
902 timing_profile.time_select=0;
903 kernel_cache_statistic=0;
904
905 learn_parm->totwords=totwords;
906
907 /* make sure -n value is reasonable */
908 if((learn_parm->svm_newvarsinqp < 2)
909 || (learn_parm->svm_newvarsinqp > learn_parm->svm_maxqpsize)) {
910 learn_parm->svm_newvarsinqp=learn_parm->svm_maxqpsize;
911 }
912
913 init_shrink_state(&shrink_state,totdoc,(long)MAXSHRINK);
914
915 label = (long *)my_malloc(sizeof(long)*totdoc);
916 unlabeled = (long *)my_malloc(sizeof(long)*totdoc);
917 inconsistent = (long *)my_malloc(sizeof(long)*totdoc);
918 c = (double *)my_malloc(sizeof(double)*totdoc);
919 a = (double *)my_malloc(sizeof(double)*totdoc);
920 lin = (double *)my_malloc(sizeof(double)*totdoc);
921 learn_parm->svm_cost = (double *)my_malloc(sizeof(double)*totdoc);
922 model->supvec = (DOC **)my_malloc(sizeof(DOC *)*(totdoc+2));
923 model->alpha = (double *)my_malloc(sizeof(double)*(totdoc+2));
924 model->index = (long *)my_malloc(sizeof(long)*(totdoc+2));
925
926 model->at_upper_bound=0;
927 model->b=0;
928 model->supvec[0]=0; /* element 0 reserved and empty for now */
929 model->alpha[0]=0;
930 model->lin_weights=NULL;
931 model->totwords=totwords;
932 model->totdoc=totdoc;
933 model->kernel_parm=(*kernel_parm);
934 model->sv_num=1;
935 model->loo_error=-1;
936 model->loo_recall=-1;
937 model->loo_precision=-1;
938 model->xa_error=-1;
939 model->xa_recall=-1;
940 model->xa_precision=-1;
941
942 r_delta=estimate_r_delta(docs,totdoc,kernel_parm);
943 r_delta_sq=r_delta*r_delta;
944
945 r_delta_avg=estimate_r_delta_average(docs,totdoc,kernel_parm);
946 if(learn_parm->svm_c == 0.0) { /* default value for C */
947 learn_parm->svm_c=1.0/(r_delta_avg*r_delta_avg);
948 if(verbosity>=1)
949 printf("Setting default regularization parameter C=%.4f\n",
950 learn_parm->svm_c);
951 }
952
953 learn_parm->biased_hyperplane=0; /* learn an unbiased hyperplane */
954
955 learn_parm->eps=0.0; /* No margin, unless explicitly handcoded
956 in the right-hand side in the training
957 set. */
958
959 for(i=0;i<totdoc;i++) { /* various inits */
960 docs[i]->docnum=i;
961 a[i]=0;
962 lin[i]=0;
963 c[i]=rhs[i]; /* set right-hand side */
964 unlabeled[i]=0;
965 inconsistent[i]=0;
966 learn_parm->svm_cost[i]=learn_parm->svm_c*learn_parm->svm_costratio*
967 docs[i]->costfactor;
968 label[i]=1;
969 }
970 if(learn_parm->sharedslack) /* if shared slacks are used, they must */
971 for(i=0;i<totdoc;i++) /* be used on every constraint */
972 if(!docs[i]->slackid) {
973 perror("Error: Missing shared slacks definitions in some of the examples.");
974 exit(0);
975 }
976
977 /* compute starting state for initial alpha values */
978 if(alpha) {
979 if(verbosity>=1) {
980 printf("Computing starting state..."); fflush(stdout);
981 }
982 index = (long *)my_malloc(sizeof(long)*totdoc);
983 index2dnum = (long *)my_malloc(sizeof(long)*(totdoc+11));
984 weights=(double *)my_malloc(sizeof(double)*(totwords+1));
985 aicache = (CFLOAT *)my_malloc(sizeof(CFLOAT)*totdoc);
986 for(i=0;i<totdoc;i++) { /* create full index and clip alphas */
987 index[i]=1;
988 alpha[i]=fabs(alpha[i]);
989 if(alpha[i]<0) alpha[i]=0;
990 if(alpha[i]>learn_parm->svm_cost[i]) alpha[i]=learn_parm->svm_cost[i];
991 }
992 if(kernel_parm->kernel_type != LINEAR) {
993 for(i=0;i<totdoc;i++) /* fill kernel cache with unbounded SV */
994 if((alpha[i]>0) && (alpha[i]<learn_parm->svm_cost[i])
995 && (kernel_cache_space_available(kernel_cache)))
996 cache_kernel_row(kernel_cache,docs,i,kernel_parm);
997 for(i=0;i<totdoc;i++) /* fill rest of kernel cache with bounded SV */
998 if((alpha[i]==learn_parm->svm_cost[i])
999 && (kernel_cache_space_available(kernel_cache)))
1000 cache_kernel_row(kernel_cache,docs,i,kernel_parm);
1001 }
1002 (void)compute_index(index,totdoc,index2dnum);
1003 update_linear_component(docs,label,index2dnum,alpha,a,index2dnum,totdoc,
1004 totwords,kernel_parm,kernel_cache,lin,aicache,
1005 weights);
1006 (void)calculate_svm_model(docs,label,unlabeled,lin,alpha,a,c,
1007 learn_parm,index2dnum,index2dnum,model);
1008 for(i=0;i<totdoc;i++) { /* copy initial alphas */
1009 a[i]=alpha[i];
1010 }
1011 free(index);
1012 free(index2dnum);
1013 free(weights);
1014 free(aicache);
1015 if(verbosity>=1) {
1016 printf("done.\n"); fflush(stdout);
1017 }
1018 }
1019
1020 /* removing inconsistent does not work for general optimization problem */
1021 if(learn_parm->remove_inconsistent) {
1022 learn_parm->remove_inconsistent = 0;
1023 printf("'remove inconsistent' not available in this mode. Switching option off!"); fflush(stdout);
1024 }
1025
1026 /* caching makes no sense for linear kernel */
1027 if(kernel_parm->kernel_type == LINEAR) {
1028 kernel_cache = NULL;
1029 }
1030
1031 if(verbosity==1) {
1032 printf("Optimizing"); fflush(stdout);
1033 }
1034
1035 /* train the svm */
1036 if(learn_parm->sharedslack)
1037 iterations=optimize_to_convergence_sharedslack(docs,label,totdoc,
1038 totwords,learn_parm,kernel_parm,
1039 kernel_cache,&shrink_state,model,
1040 a,lin,c,&timing_profile,
1041 &maxdiff);
1042 else
1043 iterations=optimize_to_convergence(docs,label,totdoc,
1044 totwords,learn_parm,kernel_parm,
1045 kernel_cache,&shrink_state,model,
1046 inconsistent,unlabeled,
1047 a,lin,c,&timing_profile,
1048 &maxdiff,(long)-1,(long)1);
1049
1050 if(verbosity>=1) {
1051 if(verbosity==1) printf("done. (%ld iterations)\n",iterations);
1052
1053 misclassified=0;
1054 for(i=0;(i<totdoc);i++) { /* get final statistic */
1055 if((lin[i]-model->b)*(double)label[i] <= 0.0)
1056 misclassified++;
1057 }
1058
1059 printf("Optimization finished (maxdiff=%.5f).\n",maxdiff);
1060
1061 runtime_end=get_runtime();
1062 if(verbosity>=2) {
1063 printf("Runtime in cpu-seconds: %.2f (%.2f%% for kernel/%.2f%% for optimizer/%.2f%% for final/%.2f%% for update/%.2f%% for model/%.2f%% for check/%.2f%% for select)\n",
1064 ((float)runtime_end-(float)runtime_start)/100.0,
1065 (100.0*timing_profile.time_kernel)/(float)(runtime_end-runtime_start),
1066 (100.0*timing_profile.time_opti)/(float)(runtime_end-runtime_start),
1067 (100.0*timing_profile.time_shrink)/(float)(runtime_end-runtime_start),
1068 (100.0*timing_profile.time_update)/(float)(runtime_end-runtime_start),
1069 (100.0*timing_profile.time_model)/(float)(runtime_end-runtime_start),
1070 (100.0*timing_profile.time_check)/(float)(runtime_end-runtime_start),
1071 (100.0*timing_profile.time_select)/(float)(runtime_end-runtime_start));
1072 }
1073 else {
1074 printf("Runtime in cpu-seconds: %.2f\n",
1075 (runtime_end-runtime_start)/100.0);
1076 }
1077 }
1078 if((verbosity>=1) && (!learn_parm->skip_final_opt_check)) {
1079 loss=0;
1080 model_length=0;
1081 for(i=0;i<totdoc;i++) {
1082 if((lin[i]-model->b)*(double)label[i] < c[i]-learn_parm->epsilon_crit)
1083 loss+=c[i]-(lin[i]-model->b)*(double)label[i];
1084 model_length+=a[i]*label[i]*lin[i];
1085 }
1086 model_length=sqrt(model_length);
1087 fprintf(stdout,"Norm of weight vector: |w|=%.5f\n",model_length);
1088 }
1089
1090 if(learn_parm->sharedslack) {
1091 index = (long *)my_malloc(sizeof(long)*totdoc);
1092 index2dnum = (long *)my_malloc(sizeof(long)*(totdoc+11));
1093 maxslackid=0;
1094 for(i=0;i<totdoc;i++) { /* create full index */
1095 index[i]=1;
1096 if(maxslackid<docs[i]->slackid)
1097 maxslackid=docs[i]->slackid;
1098 }
1099 (void)compute_index(index,totdoc,index2dnum);
1100 slack=(double *)my_malloc(sizeof(double)*(maxslackid+1));
1101 alphaslack=(double *)my_malloc(sizeof(double)*(maxslackid+1));
1102 for(i=0;i<=maxslackid;i++) { /* init shared slacks */
1103 slack[i]=0;
1104 alphaslack[i]=0;
1105 }
1106 compute_shared_slacks(docs,label,a,lin,c,index2dnum,learn_parm,
1107 slack,alphaslack);
1108 loss=0;
1109 model->at_upper_bound=0;
1110 svsetnum=0;
1111 for(i=0;i<=maxslackid;i++) { /* create full index */
1112 loss+=slack[i];
1113 if(alphaslack[i] > (learn_parm->svm_c - learn_parm->epsilon_a))
1114 model->at_upper_bound++;
1115 if(alphaslack[i] > learn_parm->epsilon_a)
1116 svsetnum++;
1117 }
1118 free(index);
1119 free(index2dnum);
1120 free(slack);
1121 free(alphaslack);
1122 }
1123
1124 if((verbosity>=1) && (!learn_parm->skip_final_opt_check)) {
1125 if(learn_parm->sharedslack) {
1126 printf("Number of SV: %ld\n",
1127 model->sv_num-1);
1128 printf("Number of non-zero slack variables: %ld (out of %ld)\n",
1129 model->at_upper_bound,svsetnum);
1130 fprintf(stdout,"L1 loss: loss=%.5f\n",loss);
1131 }
1132 else {
1133 upsupvecnum=0;
1134 for(i=1;i<model->sv_num;i++) {
1135 if(fabs(model->alpha[i]) >=
1136 (learn_parm->svm_cost[(model->supvec[i])->docnum]-
1137 learn_parm->epsilon_a))
1138 upsupvecnum++;
1139 }
1140 printf("Number of SV: %ld (including %ld at upper bound)\n",
1141 model->sv_num-1,upsupvecnum);
1142 fprintf(stdout,"L1 loss: loss=%.5f\n",loss);
1143 }
1144 example_length=estimate_sphere(model,kernel_parm);
1145 fprintf(stdout,"Norm of longest example vector: |x|=%.5f\n",
1146 length_of_longest_document_vector(docs,totdoc,kernel_parm));
1147 }
1148 if(verbosity>=1) {
1149 printf("Number of kernel evaluations: %ld\n",kernel_cache_statistic);
1150 }
1151
1152 if(alpha) {
1153 for(i=0;i<totdoc;i++) { /* copy final alphas */
1154 alpha[i]=a[i];
1155 }
1156 }
1157
1158 if(learn_parm->alphafile[0])
1159 write_alphas(learn_parm->alphafile,a,label,totdoc);
1160
1161 shrink_state_cleanup(&shrink_state);
1162 free(label);
1163 free(unlabeled);
1164 free(inconsistent);
1165 free(c);
1166 free(a);
1167 free(lin);
1168 free(learn_parm->svm_cost);
1169 }
1170
1171
1172 long optimize_to_convergence(DOC **docs, long int *label, long int totdoc,
1173 long int totwords, LEARN_PARM *learn_parm,
1174 KERNEL_PARM *kernel_parm,
1175 KERNEL_CACHE *kernel_cache,
1176 SHRINK_STATE *shrink_state, MODEL *model,
1177 long int *inconsistent, long int *unlabeled,
1178 double *a, double *lin, double *c,
1179 TIMING *timing_profile, double *maxdiff,
1180 long int heldout, long int retrain)
1181 /* docs: Training vectors (x-part) */
1182 /* label: Training labels/value (y-part, zero if test example for
1183 transduction) */
1184 /* totdoc: Number of examples in docs/label */
1185 /* totwords: Number of features (i.e. highest feature index) */
1186 /* laern_parm: Learning paramenters */
1187 /* kernel_parm: Kernel paramenters */
1188 /* kernel_cache: Initialized/partly filled Cache, if using a kernel.
1189 NULL if linear. */
1190 /* shrink_state: State of active variables */
1191 /* model: Returns learning result */
1192 /* inconsistent: examples thrown out as inconstistent */
1193 /* unlabeled: test examples for transduction */
1194 /* a: alphas */
1195 /* lin: linear component of gradient */
1196 /* c: right hand side of inequalities (margin) */
1197 /* maxdiff: returns maximum violation of KT-conditions */
1198 /* heldout: marks held-out example for leave-one-out (or -1) */
1199 /* retrain: selects training mode (1=regular / 2=holdout) */
1200 {
1201 long *chosen,*key,i,j,jj,*last_suboptimal_at,noshrink;
1202 long inconsistentnum,choosenum,already_chosen=0,iteration;
1203 long misclassified,supvecnum=0,*active2dnum,inactivenum;
1204 long *working2dnum,*selexam;
1205 long activenum;
1206 double criterion,eq;
1207 double *a_old;
1208 long t0=0,t1=0,t2=0,t3=0,t4=0,t5=0,t6=0; /* timing */
1209 long transductcycle;
1210 long transduction;
1211 double epsilon_crit_org;
1212 double bestmaxdiff;
1213 long bestmaxdiffiter,terminate;
1214
1215 double *selcrit; /* buffer for sorting */
1216 CFLOAT *aicache; /* buffer to keep one row of hessian */
1217 double *weights; /* buffer for weight vector in linear case */
1218 QP qp; /* buffer for one quadratic program */
1219
1220 epsilon_crit_org=learn_parm->epsilon_crit; /* save org */
1221 if(kernel_parm->kernel_type == LINEAR) {
1222 learn_parm->epsilon_crit=2.0;
1223 kernel_cache=NULL; /* caching makes no sense for linear kernel */
1224 }
1225 learn_parm->epsilon_shrink=2;
1226 (*maxdiff)=1;
1227
1228 learn_parm->totwords=totwords;
1229
1230 chosen = (long *)my_malloc(sizeof(long)*totdoc);
1231 last_suboptimal_at = (long *)my_malloc(sizeof(long)*totdoc);
1232 key = (long *)my_malloc(sizeof(long)*(totdoc+11));
1233 selcrit = (double *)my_malloc(sizeof(double)*totdoc);
1234 selexam = (long *)my_malloc(sizeof(long)*totdoc);
1235 a_old = (double *)my_malloc(sizeof(double)*totdoc);
1236 aicache = (CFLOAT *)my_malloc(sizeof(CFLOAT)*totdoc);
1237 working2dnum = (long *)my_malloc(sizeof(long)*(totdoc+11));
1238 active2dnum = (long *)my_malloc(sizeof(long)*(totdoc+11));
1239 qp.opt_ce = (double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize);
1240 qp.opt_ce0 = (double *)my_malloc(sizeof(double));
1241 qp.opt_g = (double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize
1242 *learn_parm->svm_maxqpsize);
1243 qp.opt_g0 = (double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize);
1244 qp.opt_xinit = (double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize);
1245 qp.opt_low=(double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize);
1246 qp.opt_up=(double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize);
1247 weights=(double *)my_malloc(sizeof(double)*(totwords+1));
1248
1249 choosenum=0;
1250 inconsistentnum=0;
1251 transductcycle=0;
1252 transduction=0;
1253 if(!retrain) retrain=1;
1254 iteration=1;
1255 bestmaxdiffiter=1;
1256 bestmaxdiff=999999999;
1257 terminate=0;
1258
1259 if(kernel_cache) {
1260 kernel_cache->time=iteration; /* for lru cache */
1261 kernel_cache_reset_lru(kernel_cache);
1262 }
1263
1264 for(i=0;i<totdoc;i++) { /* various inits */
1265 chosen[i]=0;
1266 a_old[i]=a[i];
1267 last_suboptimal_at[i]=1;
1268 if(inconsistent[i])
1269 inconsistentnum++;
1270 if(unlabeled[i]) {
1271 transduction=1;
1272 }
1273 }
1274 activenum=compute_index(shrink_state->active,totdoc,active2dnum);
1275 inactivenum=totdoc-activenum;
1276 clear_index(working2dnum);
1277
1278 /* repeat this loop until we have convergence */
1279 for(;retrain && (!terminate);iteration++) {
1280
1281 if(kernel_cache)
1282 kernel_cache->time=iteration; /* for lru cache */
1283 if(verbosity>=2) {
1284 printf(
1285 "Iteration %ld: ",iteration); fflush(stdout);
1286 }
1287 else if(verbosity==1) {
1288 printf("."); fflush(stdout);
1289 }
1290
1291 if(verbosity>=2) t0=get_runtime();
1292 if(verbosity>=3) {
1293 printf("\nSelecting working set... "); fflush(stdout);
1294 }
1295
1296 if(learn_parm->svm_newvarsinqp>learn_parm->svm_maxqpsize)
1297 learn_parm->svm_newvarsinqp=learn_parm->svm_maxqpsize;
1298
1299 i=0;
1300 for(jj=0;(j=working2dnum[jj])>=0;jj++) { /* clear working set */
1301 if((chosen[j]>=(learn_parm->svm_maxqpsize/
1302 minl(learn_parm->svm_maxqpsize,
1303 learn_parm->svm_newvarsinqp)))
1304 || (inconsistent[j])
1305 || (j == heldout)) {
1306 chosen[j]=0;
1307 choosenum--;
1308 }
1309 else {
1310 chosen[j]++;
1311 working2dnum[i++]=j;
1312 }
1313 }
1314 working2dnum[i]=-1;
1315
1316 if(retrain == 2) {
1317 choosenum=0;
1318 for(jj=0;(j=working2dnum[jj])>=0;jj++) { /* fully clear working set */
1319 chosen[j]=0;
1320 }
1321 clear_index(working2dnum);
1322 for(i=0;i<totdoc;i++) { /* set inconsistent examples to zero (-i 1) */
1323 if((inconsistent[i] || (heldout==i)) && (a[i] != 0.0)) {
1324 chosen[i]=99999;
1325 choosenum++;
1326 a[i]=0;
1327 }
1328 }
1329 if(learn_parm->biased_hyperplane) {
1330 eq=0;
1331 for(i=0;i<totdoc;i++) { /* make sure we fulfill equality constraint */
1332 eq+=a[i]*label[i];
1333 }
1334 for(i=0;(i<totdoc) && (fabs(eq) > learn_parm->epsilon_a);i++) {
1335 if((eq*label[i] > 0) && (a[i] > 0)) {
1336 chosen[i]=88888;
1337 choosenum++;
1338 if((eq*label[i]) > a[i]) {
1339 eq-=(a[i]*label[i]);
1340 a[i]=0;
1341 }
1342 else {
1343 a[i]-=(eq*label[i]);
1344 eq=0;
1345 }
1346 }
1347 }
1348 }
1349 compute_index(chosen,totdoc,working2dnum);
1350 }
1351 else { /* select working set according to steepest gradient */
1352 if(iteration % 101) {
1353 already_chosen=0;
1354 if((minl(learn_parm->svm_newvarsinqp,
1355 learn_parm->svm_maxqpsize-choosenum)>=4)
1356 && (kernel_parm->kernel_type != LINEAR)) {
1357 /* select part of the working set from cache */
1358 already_chosen=select_next_qp_subproblem_grad(
1359 label,unlabeled,a,lin,c,totdoc,
1360 (long)(minl(learn_parm->svm_maxqpsize-choosenum,
1361 learn_parm->svm_newvarsinqp)
1362 /2),
1363 learn_parm,inconsistent,active2dnum,
1364 working2dnum,selcrit,selexam,kernel_cache,1,
1365 key,chosen);
1366 choosenum+=already_chosen;
1367 }
1368 choosenum+=select_next_qp_subproblem_grad(
1369 label,unlabeled,a,lin,c,totdoc,
1370 minl(learn_parm->svm_maxqpsize-choosenum,
1371 learn_parm->svm_newvarsinqp-already_chosen),
1372 learn_parm,inconsistent,active2dnum,
1373 working2dnum,selcrit,selexam,kernel_cache,0,key,
1374 chosen);
1375 }
1376 else { /* once in a while, select a somewhat random working set
1377 to get unlocked of infinite loops due to numerical
1378 inaccuracies in the core qp-solver */
1379 choosenum+=select_next_qp_subproblem_rand(
1380 label,unlabeled,a,lin,c,totdoc,
1381 minl(learn_parm->svm_maxqpsize-choosenum,
1382 learn_parm->svm_newvarsinqp),
1383 learn_parm,inconsistent,active2dnum,
1384 working2dnum,selcrit,selexam,kernel_cache,key,
1385 chosen,iteration);
1386 }
1387 }
1388
1389 if(verbosity>=2) {
1390 printf(" %ld vectors chosen\n",choosenum); fflush(stdout);
1391 }
1392
1393 if(verbosity>=2) t1=get_runtime();
1394
1395 if(kernel_cache)
1396 cache_multiple_kernel_rows(kernel_cache,docs,working2dnum,
1397 choosenum,kernel_parm);
1398
1399 if(verbosity>=2) t2=get_runtime();
1400 if(retrain != 2) {
1401 optimize_svm(docs,label,unlabeled,inconsistent,0.0,chosen,active2dnum,
1402 model,totdoc,working2dnum,choosenum,a,lin,c,learn_parm,
1403 aicache,kernel_parm,&qp,&epsilon_crit_org);
1404 }
1405
1406 if(verbosity>=2) t3=get_runtime();
1407 update_linear_component(docs,label,active2dnum,a,a_old,working2dnum,totdoc,
1408 totwords,kernel_parm,kernel_cache,lin,aicache,
1409 weights);
1410
1411 if(verbosity>=2) t4=get_runtime();
1412 supvecnum=calculate_svm_model(docs,label,unlabeled,lin,a,a_old,c,
1413 learn_parm,working2dnum,active2dnum,model);
1414
1415 if(verbosity>=2) t5=get_runtime();
1416
1417 /* The following computation of the objective function works only */
1418 /* relative to the active variables */
1419 if(verbosity>=3) {
1420 criterion=compute_objective_function(a,lin,c,learn_parm->eps,label,
1421 active2dnum);
1422 printf("Objective function (over active variables): %.16f\n",criterion);
1423 fflush(stdout);
1424 }
1425
1426 for(jj=0;(i=working2dnum[jj])>=0;jj++) {
1427 a_old[i]=a[i];
1428 }
1429
1430 if(retrain == 2) { /* reset inconsistent unlabeled examples */
1431 for(i=0;(i<totdoc);i++) {
1432 if(inconsistent[i] && unlabeled[i]) {
1433 inconsistent[i]=0;
1434 label[i]=0;
1435 }
1436 }
1437 }
1438
1439 retrain=check_optimality(model,label,unlabeled,a,lin,c,totdoc,learn_parm,
1440 maxdiff,epsilon_crit_org,&misclassified,
1441 inconsistent,active2dnum,last_suboptimal_at,
1442 iteration,kernel_parm);
1443
1444 if(verbosity>=2) {
1445 t6=get_runtime();
1446 timing_profile->time_select+=t1-t0;
1447 timing_profile->time_kernel+=t2-t1;
1448 timing_profile->time_opti+=t3-t2;
1449 timing_profile->time_update+=t4-t3;
1450 timing_profile->time_model+=t5-t4;
1451 timing_profile->time_check+=t6-t5;
1452 }
1453
1454 /* checking whether optimizer got stuck */
1455 if((*maxdiff) < bestmaxdiff) {
1456 bestmaxdiff=(*maxdiff);
1457 bestmaxdiffiter=iteration;
1458 }
1459 if(iteration > (bestmaxdiffiter+learn_parm->maxiter)) {
1460 /* long time no progress? */
1461 terminate=1;
1462 retrain=0;
1463 if(verbosity>=1)
1464 printf("\nWARNING: Relaxing KT-Conditions due to slow progress! Terminating!\n");
1465 }
1466
1467 noshrink=0;
1468 if((!retrain) && (inactivenum>0)
1469 && ((!learn_parm->skip_final_opt_check)
1470 || (kernel_parm->kernel_type == LINEAR))) {
1471 if(((verbosity>=1) && (kernel_parm->kernel_type != LINEAR))
1472 || (verbosity>=2)) {
1473 if(verbosity==1) {
1474 printf("\n");
1475 }
1476 printf(" Checking optimality of inactive variables...");
1477 fflush(stdout);
1478 }
1479 t1=get_runtime();
1480 reactivate_inactive_examples(label,unlabeled,a,shrink_state,lin,c,totdoc,
1481 totwords,iteration,learn_parm,inconsistent,
1482 docs,kernel_parm,kernel_cache,model,aicache,
1483 weights,maxdiff);
1484 /* Update to new active variables. */
1485 activenum=compute_index(shrink_state->active,totdoc,active2dnum);
1486 inactivenum=totdoc-activenum;
1487 /* reset watchdog */
1488 bestmaxdiff=(*maxdiff);
1489 bestmaxdiffiter=iteration;
1490 /* termination criterion */
1491 noshrink=1;
1492 retrain=0;
1493 if((*maxdiff) > learn_parm->epsilon_crit)
1494 retrain=1;
1495 timing_profile->time_shrink+=get_runtime()-t1;
1496 if(((verbosity>=1) && (kernel_parm->kernel_type != LINEAR))
1497 || (verbosity>=2)) {
1498 printf("done.\n"); fflush(stdout);
1499 printf(" Number of inactive variables = %ld\n",inactivenum);
1500 }
1501 }
1502
1503 if((!retrain) && (learn_parm->epsilon_crit>(*maxdiff)))
1504 learn_parm->epsilon_crit=(*maxdiff);
1505 if((!retrain) && (learn_parm->epsilon_crit>epsilon_crit_org)) {
1506 learn_parm->epsilon_crit/=2.0;
1507 retrain=1;
1508 noshrink=1;
1509 }
1510 if(learn_parm->epsilon_crit<epsilon_crit_org)
1511 learn_parm->epsilon_crit=epsilon_crit_org;
1512
1513 if(verbosity>=2) {
1514 printf(" => (%ld SV (incl. %ld SV at u-bound), max violation=%.5f)\n",
1515 supvecnum,model->at_upper_bound,(*maxdiff));
1516 fflush(stdout);
1517 }
1518 if(verbosity>=3) {
1519 printf("\n");
1520 }
1521
1522 if((!retrain) && (transduction)) {
1523 for(i=0;(i<totdoc);i++) {
1524 shrink_state->active[i]=1;
1525 }
1526 activenum=compute_index(shrink_state->active,totdoc,active2dnum);
1527 inactivenum=0;
1528 if(verbosity==1) printf("done\n");
1529 retrain=incorporate_unlabeled_examples(model,label,inconsistent,
1530 unlabeled,a,lin,totdoc,
1531 selcrit,selexam,key,
1532 transductcycle,kernel_parm,
1533 learn_parm);
1534 epsilon_crit_org=learn_parm->epsilon_crit;
1535 if(kernel_parm->kernel_type == LINEAR)
1536 learn_parm->epsilon_crit=1;
1537 transductcycle++;
1538 /* reset watchdog */
1539 bestmaxdiff=(*maxdiff);
1540 bestmaxdiffiter=iteration;
1541 }
1542 else if(((iteration % 10) == 0) && (!noshrink)) {
1543 activenum=shrink_problem(docs,learn_parm,shrink_state,kernel_parm,
1544 active2dnum,last_suboptimal_at,iteration,totdoc,
1545 maxl((long)(activenum/10),
1546 maxl((long)(totdoc/500),100)),
1547 a,inconsistent);
1548 inactivenum=totdoc-activenum;
1549 if((kernel_cache)
1550 && (supvecnum>kernel_cache->max_elems)
1551 && ((kernel_cache->activenum-activenum)>maxl((long)(activenum/10),500))) {
1552 kernel_cache_shrink(kernel_cache,totdoc,
1553 minl((kernel_cache->activenum-activenum),
1554 (kernel_cache->activenum-supvecnum)),
1555 shrink_state->active);
1556 }
1557 }
1558
1559 if((!retrain) && learn_parm->remove_inconsistent) {
1560 if(verbosity>=1) {
1561 printf(" Moving training errors to inconsistent examples...");
1562 fflush(stdout);
1563 }
1564 if(learn_parm->remove_inconsistent == 1) {
1565 retrain=identify_inconsistent(a,label,unlabeled,totdoc,learn_parm,
1566 &inconsistentnum,inconsistent);
1567 }
1568 else if(learn_parm->remove_inconsistent == 2) {
1569 retrain=identify_misclassified(lin,label,unlabeled,totdoc,
1570 model,&inconsistentnum,inconsistent);
1571 }
1572 else if(learn_parm->remove_inconsistent == 3) {
1573 retrain=identify_one_misclassified(lin,label,unlabeled,totdoc,
1574 model,&inconsistentnum,inconsistent);
1575 }
1576 if(retrain) {
1577 if(kernel_parm->kernel_type == LINEAR) { /* reinit shrinking */
1578 learn_parm->epsilon_crit=2.0;
1579 }
1580 }
1581 if(verbosity>=1) {
1582 printf("done.\n");
1583 if(retrain) {
1584 printf(" Now %ld inconsistent examples.\n",inconsistentnum);
1585 }
1586 }
1587 }
1588 } /* end of loop */
1589
1590 free(chosen);
1591 free(last_suboptimal_at);
1592 free(key);
1593 free(selcrit);
1594 free(selexam);
1595 free(a_old);
1596 free(aicache);
1597 free(working2dnum);
1598 free(active2dnum);
1599 free(qp.opt_ce);
1600 free(qp.opt_ce0);
1601 free(qp.opt_g);
1602 free(qp.opt_g0);
1603 free(qp.opt_xinit);
1604 free(qp.opt_low);
1605 free(qp.opt_up);
1606 free(weights);
1607
1608 learn_parm->epsilon_crit=epsilon_crit_org; /* restore org */
1609 model->maxdiff=(*maxdiff);
1610
1611 return(iteration);
1612 }
1613
1614 long optimize_to_convergence_sharedslack(DOC **docs, long int *label,
1615 long int totdoc,
1616 long int totwords, LEARN_PARM *learn_parm,
1617 KERNEL_PARM *kernel_parm,
1618 KERNEL_CACHE *kernel_cache,
1619 SHRINK_STATE *shrink_state, MODEL *model,
1620 double *a, double *lin, double *c,
1621 TIMING *timing_profile, double *maxdiff)
1622 /* docs: Training vectors (x-part) */
1623 /* label: Training labels/value (y-part, zero if test example for
1624 transduction) */
1625 /* totdoc: Number of examples in docs/label */
1626 /* totwords: Number of features (i.e. highest feature index) */
1627 /* learn_parm: Learning paramenters */
1628 /* kernel_parm: Kernel paramenters */
1629 /* kernel_cache: Initialized/partly filled Cache, if using a kernel.
1630 NULL if linear. */
1631 /* shrink_state: State of active variables */
1632 /* model: Returns learning result */
1633 /* a: alphas */
1634 /* lin: linear component of gradient */
1635 /* c: right hand side of inequalities (margin) */
1636 /* maxdiff: returns maximum violation of KT-conditions */
1637 {
1638 long *chosen,*key,i,j,jj,*last_suboptimal_at,noshrink,*unlabeled;
1639 long *inconsistent,choosenum,already_chosen=0,iteration;
1640 long misclassified,supvecnum=0,*active2dnum,inactivenum;
1641 long *working2dnum,*selexam,*ignore;
1642 long activenum,retrain,maxslackid,slackset,jointstep;
1643 double criterion,eq_target;
1644 double *a_old,*alphaslack;
1645 long t0=0,t1=0,t2=0,t3=0,t4=0,t5=0,t6=0; /* timing */
1646 double epsilon_crit_org,maxsharedviol;
1647 double bestmaxdiff;
1648 long bestmaxdiffiter,terminate;
1649
1650 double *selcrit; /* buffer for sorting */
1651 CFLOAT *aicache; /* buffer to keep one row of hessian */
1652 double *weights; /* buffer for weight vector in linear case */
1653 QP qp; /* buffer for one quadratic program */
1654 double *slack; /* vector of slack variables for optimization with
1655 shared slacks */
1656
1657 epsilon_crit_org=learn_parm->epsilon_crit; /* save org */
1658 if(kernel_parm->kernel_type == LINEAR) {
1659 learn_parm->epsilon_crit=2.0;
1660 kernel_cache=NULL; /* caching makes no sense for linear kernel */
1661 }
1662 learn_parm->epsilon_shrink=2;
1663 (*maxdiff)=1;
1664
1665 learn_parm->totwords=totwords;
1666
1667 chosen = (long *)my_malloc(sizeof(long)*totdoc);
1668 unlabeled = (long *)my_malloc(sizeof(long)*totdoc);
1669 inconsistent = (long *)my_malloc(sizeof(long)*totdoc);
1670 ignore = (long *)my_malloc(sizeof(long)*totdoc);
1671 key = (long *)my_malloc(sizeof(long)*(totdoc+11));
1672 selcrit = (double *)my_malloc(sizeof(double)*totdoc);
1673 selexam = (long *)my_malloc(sizeof(long)*totdoc);
1674 a_old = (double *)my_malloc(sizeof(double)*totdoc);
1675 aicache = (CFLOAT *)my_malloc(sizeof(CFLOAT)*totdoc);
1676 working2dnum = (long *)my_malloc(sizeof(long)*(totdoc+11));
1677 active2dnum = (long *)my_malloc(sizeof(long)*(totdoc+11));
1678 qp.opt_ce = (double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize);
1679 qp.opt_ce0 = (double *)my_malloc(sizeof(double));
1680 qp.opt_g = (double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize
1681 *learn_parm->svm_maxqpsize);
1682 qp.opt_g0 = (double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize);
1683 qp.opt_xinit = (double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize);
1684 qp.opt_low=(double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize);
1685 qp.opt_up=(double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize);
1686 weights=(double *)my_malloc(sizeof(double)*(totwords+1));
1687 maxslackid=0;
1688 for(i=0;i<totdoc;i++) { /* determine size of slack array */
1689 if(maxslackid<docs[i]->slackid)
1690 maxslackid=docs[i]->slackid;
1691 }
1692 slack=(double *)my_malloc(sizeof(double)*(maxslackid+1));
1693 alphaslack=(double *)my_malloc(sizeof(double)*(maxslackid+1));
1694 last_suboptimal_at = (long *)my_malloc(sizeof(long)*(maxslackid+1));
1695 for(i=0;i<=maxslackid;i++) { /* init shared slacks */
1696 slack[i]=0;
1697 alphaslack[i]=0;
1698 last_suboptimal_at[i]=1;
1699 }
1700
1701 choosenum=0;
1702 retrain=1;
1703 iteration=1;
1704 bestmaxdiffiter=1;
1705 bestmaxdiff=999999999;
1706 terminate=0;
1707
1708 if(kernel_cache) {
1709 kernel_cache->time=iteration; /* for lru cache */
1710 kernel_cache_reset_lru(kernel_cache);
1711 }
1712
1713 for(i=0;i<totdoc;i++) { /* various inits */
1714 chosen[i]=0;
1715 unlabeled[i]=0;
1716 inconsistent[i]=0;
1717 ignore[i]=0;
1718 a_old[i]=a[i];
1719 }
1720 activenum=compute_index(shrink_state->active,totdoc,active2dnum);
1721 inactivenum=totdoc-activenum;
1722 clear_index(working2dnum);
1723
1724 /* call to init slack and alphaslack */
1725 compute_shared_slacks(docs,label,a,lin,c,active2dnum,learn_parm,
1726 slack,alphaslack);
1727
1728 /* repeat this loop until we have convergence */
1729 for(;retrain && (!terminate);iteration++) {
1730
1731 if(kernel_cache)
1732 kernel_cache->time=iteration; /* for lru cache */
1733 if(verbosity>=2) {
1734 printf(
1735 "Iteration %ld: ",iteration); fflush(stdout);
1736 }
1737 else if(verbosity==1) {
1738 printf("."); fflush(stdout);
1739 }
1740
1741 if(verbosity>=2) t0=get_runtime();
1742 if(verbosity>=3) {
1743 printf("\nSelecting working set... "); fflush(stdout);
1744 }
1745
1746 if(learn_parm->svm_newvarsinqp>learn_parm->svm_maxqpsize)
1747 learn_parm->svm_newvarsinqp=learn_parm->svm_maxqpsize;
1748
1749 /* select working set according to steepest gradient */
1750 jointstep=0;
1751 eq_target=0;
1752 if(iteration % 101) {
1753 slackset=select_next_qp_slackset(docs,label,a,lin,slack,alphaslack,c,
1754 learn_parm,active2dnum,&maxsharedviol);
1755 if((iteration % 2)
1756 || (!slackset) || (maxsharedviol<learn_parm->epsilon_crit)){
1757 /* do a step with examples from different slack sets */
1758 if(verbosity >= 2) {
1759 printf("(i-step)"); fflush(stdout);
1760 }
1761 i=0;
1762 for(jj=0;(j=working2dnum[jj])>=0;jj++) { /* clear old part of working set */
1763 if((chosen[j]>=(learn_parm->svm_maxqpsize/
1764 minl(learn_parm->svm_maxqpsize,
1765 learn_parm->svm_newvarsinqp)))) {
1766 chosen[j]=0;
1767 choosenum--;
1768 }
1769 else {
1770 chosen[j]++;
1771 working2dnum[i++]=j;
1772 }
1773 }
1774 working2dnum[i]=-1;
1775
1776 already_chosen=0;
1777 if((minl(learn_parm->svm_newvarsinqp,
1778 learn_parm->svm_maxqpsize-choosenum)>=4)
1779 && (kernel_parm->kernel_type != LINEAR)) {
1780 /* select part of the working set from cache */
1781 already_chosen=select_next_qp_subproblem_grad(
1782 label,unlabeled,a,lin,c,totdoc,
1783 (long)(minl(learn_parm->svm_maxqpsize-choosenum,
1784 learn_parm->svm_newvarsinqp)
1785 /2),
1786 learn_parm,inconsistent,active2dnum,
1787 working2dnum,selcrit,selexam,kernel_cache,
1788 (long)1,key,chosen);
1789 choosenum+=already_chosen;
1790 }
1791 choosenum+=select_next_qp_subproblem_grad(
1792 label,unlabeled,a,lin,c,totdoc,
1793 minl(learn_parm->svm_maxqpsize-choosenum,
1794 learn_parm->svm_newvarsinqp-already_chosen),
1795 learn_parm,inconsistent,active2dnum,
1796 working2dnum,selcrit,selexam,kernel_cache,
1797 (long)0,key,chosen);
1798 }
1799 else { /* do a step with all examples from same slack set */
1800 if(verbosity >= 2) {
1801 printf("(j-step on %ld)",slackset); fflush(stdout);
1802 }
1803 jointstep=1;
1804 for(jj=0;(j=working2dnum[jj])>=0;jj++) { /* clear working set */
1805 chosen[j]=0;
1806 }
1807 working2dnum[0]=-1;
1808 eq_target=alphaslack[slackset];
1809 for(j=0;j<totdoc;j++) { /* mask all but slackset */
1810 /* for(jj=0;(j=active2dnum[jj])>=0;jj++) { */
1811 if(docs[j]->slackid != slackset)
1812 ignore[j]=1;
1813 else {
1814 ignore[j]=0;
1815 learn_parm->svm_cost[j]=learn_parm->svm_c;
1816 /* printf("Inslackset(%ld,%ld)",j,shrink_state->active[j]); */
1817 }
1818 }
1819 learn_parm->biased_hyperplane=1;
1820 choosenum=select_next_qp_subproblem_grad(
1821 label,unlabeled,a,lin,c,totdoc,
1822 learn_parm->svm_maxqpsize,
1823 learn_parm,ignore,active2dnum,
1824 working2dnum,selcrit,selexam,kernel_cache,
1825 (long)0,key,chosen);
1826 learn_parm->biased_hyperplane=0;
1827 }
1828 }
1829 else { /* once in a while, select a somewhat random working set
1830 to get unlocked of infinite loops due to numerical
1831 inaccuracies in the core qp-solver */
1832 choosenum+=select_next_qp_subproblem_rand(
1833 label,unlabeled,a,lin,c,totdoc,
1834 minl(learn_parm->svm_maxqpsize-choosenum,
1835 learn_parm->svm_newvarsinqp),
1836 learn_parm,inconsistent,active2dnum,
1837 working2dnum,selcrit,selexam,kernel_cache,key,
1838 chosen,iteration);
1839 }
1840
1841 if(verbosity>=2) {
1842 printf(" %ld vectors chosen\n",choosenum); fflush(stdout);
1843 }
1844
1845 if(verbosity>=2) t1=get_runtime();
1846
1847 if(kernel_cache)
1848 cache_multiple_kernel_rows(kernel_cache,docs,working2dnum,
1849 choosenum,kernel_parm);
1850
1851 if(verbosity>=2) t2=get_runtime();
1852 if(jointstep) learn_parm->biased_hyperplane=1;
1853 optimize_svm(docs,label,unlabeled,ignore,eq_target,chosen,active2dnum,
1854 model,totdoc,working2dnum,choosenum,a,lin,c,learn_parm,
1855 aicache,kernel_parm,&qp,&epsilon_crit_org);
1856 learn_parm->biased_hyperplane=0;
1857
1858 for(jj=0;(i=working2dnum[jj])>=0;jj++) /* recompute sums of alphas */
1859 alphaslack[docs[i]->slackid]+=(a[i]-a_old[i]);
1860 for(jj=0;(i=working2dnum[jj])>=0;jj++) { /* reduce alpha to fulfill
1861 constraints */
1862 if(alphaslack[docs[i]->slackid] > learn_parm->svm_c) {
1863 if(a[i] < (alphaslack[docs[i]->slackid]-learn_parm->svm_c)) {
1864 alphaslack[docs[i]->slackid]-=a[i];
1865 a[i]=0;
1866 }
1867 else {
1868 a[i]-=(alphaslack[docs[i]->slackid]-learn_parm->svm_c);
1869 alphaslack[docs[i]->slackid]=learn_parm->svm_c;
1870 }
1871 }
1872 }
1873 for(jj=0;(i=active2dnum[jj])>=0;jj++)
1874 learn_parm->svm_cost[i]=a[i]+(learn_parm->svm_c
1875 -alphaslack[docs[i]->slackid]);
1876
1877 if(verbosity>=2) t3=get_runtime();
1878 update_linear_component(docs,label,active2dnum,a,a_old,working2dnum,totdoc,
1879 totwords,kernel_parm,kernel_cache,lin,aicache,
1880 weights);
1881 compute_shared_slacks(docs,label,a,lin,c,active2dnum,learn_parm,
1882 slack,alphaslack);
1883
1884 if(verbosity>=2) t4=get_runtime();
1885 supvecnum=calculate_svm_model(docs,label,unlabeled,lin,a,a_old,c,
1886 learn_parm,working2dnum,active2dnum,model);
1887
1888 if(verbosity>=2) t5=get_runtime();
1889
1890 /* The following computation of the objective function works only */
1891 /* relative to the active variables */
1892 if(verbosity>=3) {
1893 criterion=compute_objective_function(a,lin,c,learn_parm->eps,label,
1894 active2dnum);
1895 printf("Objective function (over active variables): %.16f\n",criterion);
1896 fflush(stdout);
1897 }
1898
1899 for(jj=0;(i=working2dnum[jj])>=0;jj++) {
1900 a_old[i]=a[i];
1901 }
1902
1903 retrain=check_optimality_sharedslack(docs,model,label,a,lin,c,
1904 slack,alphaslack,totdoc,learn_parm,
1905 maxdiff,epsilon_crit_org,&misclassified,
1906 active2dnum,last_suboptimal_at,
1907 iteration,kernel_parm);
1908
1909 if(verbosity>=2) {
1910 t6=get_runtime();
1911 timing_profile->time_select+=t1-t0;
1912 timing_profile->time_kernel+=t2-t1;
1913 timing_profile->time_opti+=t3-t2;
1914 timing_profile->time_update+=t4-t3;
1915 timing_profile->time_model+=t5-t4;
1916 timing_profile->time_check+=t6-t5;
1917 }
1918
1919 /* checking whether optimizer got stuck */
1920 if((*maxdiff) < bestmaxdiff) {
1921 bestmaxdiff=(*maxdiff);
1922 bestmaxdiffiter=iteration;
1923 }
1924 if(iteration > (bestmaxdiffiter+learn_parm->maxiter)) {
1925 /* long time no progress? */
1926 terminate=1;
1927 retrain=0;
1928 if(verbosity>=1)
1929 printf("\nWARNING: Relaxing KT-Conditions due to slow progress! Terminating!\n");
1930 }
1931
1932 noshrink=0;
1933
1934 if((!retrain) && (inactivenum>0)
1935 && ((!learn_parm->skip_final_opt_check)
1936 || (kernel_parm->kernel_type == LINEAR))) {
1937 if(((verbosity>=1) && (kernel_parm->kernel_type != LINEAR))
1938 || (verbosity>=2)) {
1939 if(verbosity==1) {
1940 printf("\n");
1941 }
1942 printf(" Checking optimality of inactive variables...");
1943 fflush(stdout);
1944 }
1945 t1=get_runtime();
1946 reactivate_inactive_examples(label,unlabeled,a,shrink_state,lin,c,totdoc,
1947 totwords,iteration,learn_parm,inconsistent,
1948 docs,kernel_parm,kernel_cache,model,aicache,
1949 weights,maxdiff);
1950 /* Update to new active variables. */
1951 activenum=compute_index(shrink_state->active,totdoc,active2dnum);
1952 inactivenum=totdoc-activenum;
1953 /* check optimality, since check in reactivate does not work for
1954 sharedslacks */
1955 retrain=check_optimality_sharedslack(docs,model,label,a,lin,c,
1956 slack,alphaslack,totdoc,learn_parm,
1957 maxdiff,epsilon_crit_org,&misclassified,
1958 active2dnum,last_suboptimal_at,
1959 iteration,kernel_parm);
1960
1961 /* reset watchdog */
1962 bestmaxdiff=(*maxdiff);
1963 bestmaxdiffiter=iteration;
1964 /* termination criterion */
1965 noshrink=1;
1966 retrain=0;
1967 if((*maxdiff) > learn_parm->epsilon_crit)
1968 retrain=1;
1969 timing_profile->time_shrink+=get_runtime()-t1;
1970 if(((verbosity>=1) && (kernel_parm->kernel_type != LINEAR))
1971 || (verbosity>=2)) {
1972 printf("done.\n"); fflush(stdout);
1973 printf(" Number of inactive variables = %ld\n",inactivenum);
1974 }
1975 }
1976
1977 if((!retrain) && (learn_parm->epsilon_crit>(*maxdiff)))
1978 learn_parm->epsilon_crit=(*maxdiff);
1979 if((!retrain) && (learn_parm->epsilon_crit>epsilon_crit_org)) {
1980 learn_parm->epsilon_crit/=2.0;
1981 retrain=1;
1982 noshrink=1;
1983 }
1984 if(learn_parm->epsilon_crit<epsilon_crit_org)
1985 learn_parm->epsilon_crit=epsilon_crit_org;
1986
1987 if(verbosity>=2) {
1988 printf(" => (%ld SV (incl. %ld SV at u-bound), max violation=%.5f)\n",
1989 supvecnum,model->at_upper_bound,(*maxdiff));
1990 fflush(stdout);
1991 }
1992 if(verbosity>=3) {
1993 printf("\n");
1994 }
1995
1996 if(((iteration % 10) == 0) && (!noshrink)) {
1997 activenum=shrink_problem(docs,learn_parm,shrink_state,
1998 kernel_parm,active2dnum,
1999 last_suboptimal_at,iteration,totdoc,
2000 maxl((long)(activenum/10),
2001 maxl((long)(totdoc/500),100)),
2002 a,inconsistent);
2003 inactivenum=totdoc-activenum;
2004 if((kernel_cache)
2005 && (supvecnum>kernel_cache->max_elems)
2006 && ((kernel_cache->activenum-activenum)>maxl((long)(activenum/10),500))) {
2007 kernel_cache_shrink(kernel_cache,totdoc,
2008 minl((kernel_cache->activenum-activenum),
2009 (kernel_cache->activenum-supvecnum)),
2010 shrink_state->active);
2011 }
2012 }
2013
2014 } /* end of loop */
2015
2016
2017 free(alphaslack);
2018 free(slack);
2019 free(chosen);
2020 free(unlabeled);
2021 free(inconsistent);
2022 free(ignore);
2023 free(last_suboptimal_at);
2024 free(key);
2025 free(selcrit);
2026 free(selexam);
2027 free(a_old);
2028 free(aicache);
2029 free(working2dnum);
2030 free(active2dnum);
2031 free(qp.opt_ce);
2032 free(qp.opt_ce0);
2033 free(qp.opt_g);
2034 free(qp.opt_g0);
2035 free(qp.opt_xinit);
2036 free(qp.opt_low);
2037 free(qp.opt_up);
2038 free(weights);
2039
2040 learn_parm->epsilon_crit=epsilon_crit_org; /* restore org */
2041 model->maxdiff=(*maxdiff);
2042
2043 return(iteration);
2044 }
2045
2046
2047 double compute_objective_function(double *a, double *lin, double *c,
2048 double eps, long int *label,
2049 long int *active2dnum)
2050 /* Return value of objective function. */
2051 /* Works only relative to the active variables! */
2052 {
2053 long i,ii;
2054 double criterion;
2055 /* calculate value of objective function */
2056 criterion=0;
2057 for(ii=0;active2dnum[ii]>=0;ii++) {
2058 i=active2dnum[ii];
2059 criterion=criterion+(eps-(double)label[i]*c[i])*a[i]+0.5*a[i]*label[i]*lin[i];
2060 }
2061 return(criterion);
2062 }
2063
2064 void clear_index(long int *index)
2065 /* initializes and empties index */
2066 {
2067 index[0]=-1;
2068 }
2069
2070 void add_to_index(long int *index, long int elem)
2071 /* initializes and empties index */
2072 {
2073 register long i;
2074 for(i=0;index[i] != -1;i++);
2075 index[i]=elem;
2076 index[i+1]=-1;
2077 }
2078
2079 long compute_index(long int *binfeature, long int range, long int *index)
2080 /* create an inverted index of binfeature */
2081 {
2082 register long i,ii;
2083
2084 ii=0;
2085 for(i=0;i<range;i++) {
2086 if(binfeature[i]) {
2087 index[ii]=i;
2088 ii++;
2089 }
2090 }
2091 for(i=0;i<4;i++) {
2092 index[ii+i]=-1;
2093 }
2094 return(ii);
2095 }
2096
2097
2098 void optimize_svm(DOC **docs, long int *label, long int *unlabeled,
2099 long int *exclude_from_eq_const, double eq_target,
2100 long int *chosen, long int *active2dnum, MODEL *model,
2101 long int totdoc, long int *working2dnum, long int varnum,
2102 double *a, double *lin, double *c, LEARN_PARM *learn_parm,
2103 CFLOAT *aicache, KERNEL_PARM *kernel_parm, QP *qp,
2104 double *epsilon_crit_target)
2105 /* Do optimization on the working set. */
2106 {
2107 long i;
2108 double *a_v;
2109
2110 compute_matrices_for_optimization(docs,label,unlabeled,
2111 exclude_from_eq_const,eq_target,chosen,
2112 active2dnum,working2dnum,model,a,lin,c,
2113 varnum,totdoc,learn_parm,aicache,
2114 kernel_parm,qp);
2115
2116 if(verbosity>=3) {
2117 printf("Running optimizer..."); fflush(stdout);
2118 }
2119 /* call the qp-subsolver */
2120 a_v=optimize_qp(qp,epsilon_crit_target,
2121 learn_parm->svm_maxqpsize,
2122 &(model->b), /* in case the optimizer gives us */
2123 /* the threshold for free. otherwise */
2124 /* b is calculated in calculate_model. */
2125 learn_parm);
2126 if(verbosity>=3) {
2127 printf("done\n");
2128 }
2129
2130 for(i=0;i<varnum;i++) {
2131 a[working2dnum[i]]=a_v[i];
2132 /*
2133 if(a_v[i]<=(0+learn_parm->epsilon_a)) {
2134 a[working2dnum[i]]=0;
2135 }
2136 else if(a_v[i]>=(learn_parm->svm_cost[working2dnum[i]]-learn_parm->epsilon_a)) {
2137 a[working2dnum[i]]=learn_parm->svm_cost[working2dnum[i]];
2138 }
2139 */
2140 }
2141 }
2142
2143 void compute_matrices_for_optimization(DOC **docs, long int *label,
2144 long int *unlabeled, long *exclude_from_eq_const, double eq_target,
2145 long int *chosen, long int *active2dnum,
2146 long int *key, MODEL *model, double *a, double *lin, double *c,
2147 long int varnum, long int totdoc, LEARN_PARM *learn_parm,
2148 CFLOAT *aicache, KERNEL_PARM *kernel_parm, QP *qp)
2149 {
2150 register long ki,kj,i,j;
2151 register double kernel_temp;
2152
2153 if(verbosity>=3) {
2154 fprintf(stdout,"Computing qp-matrices (type %ld kernel [degree %ld, rbf_gamma %f, coef_lin %f, coef_const %f])...",kernel_parm->kernel_type,kernel_parm->poly_degree,kernel_parm->rbf_gamma,kernel_parm->coef_lin,kernel_parm->coef_const);
2155 fflush(stdout);
2156 }
2157
2158 qp->opt_n=varnum;
2159 qp->opt_ce0[0]=-eq_target; /* compute the constant for equality constraint */
2160 for(j=1;j<model->sv_num;j++) { /* start at 1 */
2161 if((!chosen[(model->supvec[j])->docnum])
2162 && (!exclude_from_eq_const[(model->supvec[j])->docnum])) {
2163 qp->opt_ce0[0]+=model->alpha[j];
2164 }
2165 }
2166 if(learn_parm->biased_hyperplane)
2167 qp->opt_m=1;
2168 else
2169 qp->opt_m=0; /* eq-constraint will be ignored */
2170
2171 /* init linear part of objective function */
2172 for(i=0;i<varnum;i++) {
2173 qp->opt_g0[i]=lin[key[i]];
2174 }
2175
2176 for(i=0;i<varnum;i++) {
2177 ki=key[i];
2178
2179 /* Compute the matrix for equality constraints */
2180 qp->opt_ce[i]=label[ki];
2181 qp->opt_low[i]=0;
2182 qp->opt_up[i]=learn_parm->svm_cost[ki];
2183
2184 kernel_temp=(double)kernel(kernel_parm,docs[ki],docs[ki]);
2185 /* compute linear part of objective function */
2186 qp->opt_g0[i]-=(kernel_temp*a[ki]*(double)label[ki]);
2187 /* compute quadratic part of objective function */
2188 qp->opt_g[varnum*i+i]=kernel_temp;
2189 for(j=i+1;j<varnum;j++) {
2190 kj=key[j];
2191 kernel_temp=(double)kernel(kernel_parm,docs[ki],docs[kj]);
2192 /* compute linear part of objective function */
2193 qp->opt_g0[i]-=(kernel_temp*a[kj]*(double)label[kj]);
2194 qp->opt_g0[j]-=(kernel_temp*a[ki]*(double)label[ki]);
2195 /* compute quadratic part of objective function */
2196 qp->opt_g[varnum*i+j]=(double)label[ki]*(double)label[kj]*kernel_temp;
2197 qp->opt_g[varnum*j+i]=(double)label[ki]*(double)label[kj]*kernel_temp;
2198 }
2199
2200 if(verbosity>=3) {
2201 if(i % 20 == 0) {
2202 fprintf(stdout,"%ld..",i); fflush(stdout);
2203 }
2204 }
2205 }
2206
2207 for(i=0;i<varnum;i++) {
2208 /* assure starting at feasible point */
2209 qp->opt_xinit[i]=a[key[i]];
2210 /* set linear part of objective function */
2211 qp->opt_g0[i]=(learn_parm->eps-(double)label[key[i]]*c[key[i]])+qp->opt_g0[i]*(double)label[key[i]];
2212 }
2213
2214 if(verbosity>=3) {
2215 fprintf(stdout,"done\n");
2216 }
2217 }
2218
2219 long calculate_svm_model(DOC **docs, long int *label, long int *unlabeled,
2220 double *lin, double *a, double *a_old, double *c,
2221 LEARN_PARM *learn_parm, long int *working2dnum,
2222 long int *active2dnum, MODEL *model)
2223 /* Compute decision function based on current values */
2224 /* of alpha. */
2225 {
2226 long i,ii,pos,b_calculated=0,first_low,first_high;
2227 double ex_c,b_temp,b_low,b_high;
2228
2229 if(verbosity>=3) {
2230 printf("Calculating model..."); fflush(stdout);
2231 }
2232
2233 if(!learn_parm->biased_hyperplane) {
2234 model->b=0;
2235 b_calculated=1;
2236 }
2237
2238 for(ii=0;(i=working2dnum[ii])>=0;ii++) {
2239 if((a_old[i]>0) && (a[i]==0)) { /* remove from model */
2240 pos=model->index[i];
2241 model->index[i]=-1;
2242 (model->sv_num)--;
2243 model->supvec[pos]=model->supvec[model->sv_num];
2244 model->alpha[pos]=model->alpha[model->sv_num];
2245 model->index[(model->supvec[pos])->docnum]=pos;
2246 }
2247 else if((a_old[i]==0) && (a[i]>0)) { /* add to model */
2248 model->supvec[model->sv_num]=docs[i];
2249 model->alpha[model->sv_num]=a[i]*(double)label[i];
2250 model->index[i]=model->sv_num;
2251 (model->sv_num)++;
2252 }
2253 else if(a_old[i]==a[i]) { /* nothing to do */
2254 }
2255 else { /* just update alpha */
2256 model->alpha[model->index[i]]=a[i]*(double)label[i];
2257 }
2258
2259 ex_c=learn_parm->svm_cost[i]-learn_parm->epsilon_a;
2260 if((a_old[i]>=ex_c) && (a[i]<ex_c)) {
2261 (model->at_upper_bound)--;
2262 }
2263 else if((a_old[i]<ex_c) && (a[i]>=ex_c)) {
2264 (model->at_upper_bound)++;
2265 }
2266
2267 if((!b_calculated)
2268 && (a[i]>learn_parm->epsilon_a) && (a[i]<ex_c)) { /* calculate b */
2269 model->b=((double)label[i]*learn_parm->eps-c[i]+lin[i]);
2270 /* model->b=(-(double)label[i]+lin[i]); */
2271 b_calculated=1;
2272 }
2273 }
2274
2275 /* No alpha in the working set not at bounds, so b was not
2276 calculated in the usual way. The following handles this special
2277 case. */
2278 if(learn_parm->biased_hyperplane
2279 && (!b_calculated)
2280 && (model->sv_num-1 == model->at_upper_bound)) {
2281 first_low=1;
2282 first_high=1;
2283 b_low=0;
2284 b_high=0;
2285 for(ii=0;(i=active2dnum[ii])>=0;ii++) {
2286 ex_c=learn_parm->svm_cost[i]-learn_parm->epsilon_a;
2287 if(a[i]<ex_c) {
2288 if(label[i]>0) {
2289 b_temp=-(learn_parm->eps-c[i]+lin[i]);
2290 if((b_temp>b_low) || (first_low)) {
2291 b_low=b_temp;
2292 first_low=0;
2293 }
2294 }
2295 else {
2296 b_temp=-(-learn_parm->eps-c[i]+lin[i]);
2297 if((b_temp<b_high) || (first_high)) {
2298 b_high=b_temp;
2299 first_high=0;
2300 }
2301 }
2302 }
2303 else {
2304 if(label[i]<0) {
2305 b_temp=-(-learn_parm->eps-c[i]+lin[i]);
2306 if((b_temp>b_low) || (first_low)) {
2307 b_low=b_temp;
2308 first_low=0;
2309 }
2310 }
2311 else {
2312 b_temp=-(learn_parm->eps-c[i]+lin[i]);
2313 if((b_temp<b_high) || (first_high)) {
2314 b_high=b_temp;
2315 first_high=0;
2316 }
2317 }
2318 }
2319 }
2320 if(first_high) {
2321 model->b=-b_low;
2322 }
2323 else if(first_low) {
2324 model->b=-b_high;
2325 }
2326 else {
2327 model->b=-(b_high+b_low)/2.0; /* select b as the middle of range */
2328 /* printf("\nb_low=%f, b_high=%f,b=%f\n",b_low,b_high,model->b); */
2329 }
2330 }
2331
2332 if(verbosity>=3) {
2333 printf("done\n"); fflush(stdout);
2334 }
2335
2336 return(model->sv_num-1); /* have to substract one, since element 0 is empty*/
2337 }
2338
2339 long check_optimality(MODEL *model, long int *label, long int *unlabeled,
2340 double *a, double *lin, double *c, long int totdoc,
2341 LEARN_PARM *learn_parm, double *maxdiff,
2342 double epsilon_crit_org, long int *misclassified,
2343 long int *inconsistent, long int *active2dnum,
2344 long int *last_suboptimal_at,
2345 long int iteration, KERNEL_PARM *kernel_parm)
2346 /* Check KT-conditions */
2347 {
2348 long i,ii,retrain;
2349 double dist,ex_c,target;
2350
2351 if(kernel_parm->kernel_type == LINEAR) { /* be optimistic */
2352 learn_parm->epsilon_shrink=-learn_parm->epsilon_crit+epsilon_crit_org;
2353 }
2354 else { /* be conservative */
2355 learn_parm->epsilon_shrink=learn_parm->epsilon_shrink*0.7+(*maxdiff)*0.3;
2356 }
2357 retrain=0;
2358 (*maxdiff)=0;
2359 (*misclassified)=0;
2360 for(ii=0;(i=active2dnum[ii])>=0;ii++) {
2361 if((!inconsistent[i]) && label[i]) {
2362 dist=(lin[i]-model->b)*(double)label[i];/* 'distance' from
2363 hyperplane*/
2364 target=-(learn_parm->eps-(double)label[i]*c[i]);
2365 ex_c=learn_parm->svm_cost[i]-learn_parm->epsilon_a;
2366 if(dist <= 0) {
2367 (*misclassified)++; /* does not work due to deactivation of var */
2368 }
2369 if((a[i]>learn_parm->epsilon_a) && (dist > target)) {
2370 if((dist-target)>(*maxdiff)) /* largest violation */
2371 (*maxdiff)=dist-target;
2372 }
2373 else if((a[i]<ex_c) && (dist < target)) {
2374 if((target-dist)>(*maxdiff)) /* largest violation */
2375 (*maxdiff)=target-dist;
2376 }
2377 /* Count how long a variable was at lower/upper bound (and optimal).*/
2378 /* Variables, which were at the bound and optimal for a long */
2379 /* time are unlikely to become support vectors. In case our */
2380 /* cache is filled up, those variables are excluded to save */
2381 /* kernel evaluations. (See chapter 'Shrinking').*/
2382 if((a[i]>(learn_parm->epsilon_a))
2383 && (a[i]<ex_c)) {
2384 last_suboptimal_at[i]=iteration; /* not at bound */
2385 }
2386 else if((a[i]<=(learn_parm->epsilon_a))
2387 && (dist < (target+learn_parm->epsilon_shrink))) {
2388 last_suboptimal_at[i]=iteration; /* not likely optimal */
2389 }
2390 else if((a[i]>=ex_c)
2391 && (dist > (target-learn_parm->epsilon_shrink))) {
2392 last_suboptimal_at[i]=iteration; /* not likely optimal */
2393 }
2394 }
2395 }
2396 /* termination criterion */
2397 if((!retrain) && ((*maxdiff) > learn_parm->epsilon_crit)) {
2398 retrain=1;
2399 }
2400 return(retrain);
2401 }
2402
2403 long check_optimality_sharedslack(DOC **docs, MODEL *model, long int *label,
2404 double *a, double *lin, double *c, double *slack,
2405 double *alphaslack,
2406 long int totdoc,
2407 LEARN_PARM *learn_parm, double *maxdiff,
2408 double epsilon_crit_org, long int *misclassified,
2409 long int *active2dnum,
2410 long int *last_suboptimal_at,
2411 long int iteration, KERNEL_PARM *kernel_parm)
2412 /* Check KT-conditions */
2413 {
2414 long i,ii,retrain;
2415 double dist,ex_c=0,target;
2416
2417 if(kernel_parm->kernel_type == LINEAR) { /* be optimistic */
2418 learn_parm->epsilon_shrink=-learn_parm->epsilon_crit+epsilon_crit_org;
2419 }
2420 else { /* be conservative */
2421 learn_parm->epsilon_shrink=learn_parm->epsilon_shrink*0.7+(*maxdiff)*0.3;
2422 }
2423
2424 retrain=0;
2425 (*maxdiff)=0;
2426 (*misclassified)=0;
2427 for(ii=0;(i=active2dnum[ii])>=0;ii++) {
2428 /* 'distance' from hyperplane*/
2429 dist=(lin[i]-model->b)*(double)label[i]+slack[docs[i]->slackid];
2430 target=-(learn_parm->eps-(double)label[i]*c[i]);
2431 ex_c=learn_parm->svm_c-learn_parm->epsilon_a;
2432 if((a[i]>learn_parm->epsilon_a) && (dist > target)) {
2433 if((dist-target)>(*maxdiff)) { /* largest violation */
2434 (*maxdiff)=dist-target;
2435 if(verbosity>=5) printf("sid %ld: dist=%.2f, target=%.2f, slack=%.2f, a=%f, alphaslack=%f\n",docs[i]->slackid,dist,target,slack[docs[i]->slackid],a[i],alphaslack[docs[i]->slackid]);
2436 if(verbosity>=5) printf(" (single %f)\n",(*maxdiff));
2437 }
2438 }
2439 if((alphaslack[docs[i]->slackid]<ex_c) && (slack[docs[i]->slackid]>0)) {
2440 if((slack[docs[i]->slackid])>(*maxdiff)) { /* largest violation */
2441 (*maxdiff)=slack[docs[i]->slackid];
2442 if(verbosity>=5) printf("sid %ld: dist=%.2f, target=%.2f, slack=%.2f, a=%f, alphaslack=%f\n",docs[i]->slackid,dist,target,slack[docs[i]->slackid],a[i],alphaslack[docs[i]->slackid]);
2443 if(verbosity>=5) printf(" (joint %f)\n",(*maxdiff));
2444 }
2445 }
2446 /* Count how long a variable was at lower/upper bound (and optimal).*/
2447 /* Variables, which were at the bound and optimal for a long */
2448 /* time are unlikely to become support vectors. In case our */
2449 /* cache is filled up, those variables are excluded to save */
2450 /* kernel evaluations. (See chapter 'Shrinking').*/
2451 if((a[i]>(learn_parm->epsilon_a))
2452 && (a[i]<ex_c)) {
2453 last_suboptimal_at[docs[i]->slackid]=iteration; /* not at bound */
2454 }
2455 else if((a[i]<=(learn_parm->epsilon_a))
2456 && (dist < (target+learn_parm->epsilon_shrink))) {
2457 last_suboptimal_at[docs[i]->slackid]=iteration; /* not likely optimal */
2458 }
2459 else if((a[i]>=ex_c)
2460 && (slack[docs[i]->slackid] < learn_parm->epsilon_shrink)) {
2461 last_suboptimal_at[docs[i]->slackid]=iteration; /* not likely optimal */
2462 }
2463 }
2464 /* termination criterion */
2465 if((!retrain) && ((*maxdiff) > learn_parm->epsilon_crit)) {
2466 retrain=1;
2467 }
2468 return(retrain);
2469 }
2470
2471 void compute_shared_slacks(DOC **docs, long int *label,
2472 double *a, double *lin,
2473 double *c, long int *active2dnum,
2474 LEARN_PARM *learn_parm,
2475 double *slack, double *alphaslack)
2476 /* compute the value of shared slacks and the joint alphas */
2477 {
2478 long jj,i;
2479 double dist,target;
2480
2481 for(jj=0;(i=active2dnum[jj])>=0;jj++) { /* clear slack variables */
2482 slack[docs[i]->slackid]=0.0;
2483 alphaslack[docs[i]->slackid]=0.0;
2484 }
2485 for(jj=0;(i=active2dnum[jj])>=0;jj++) { /* recompute slack variables */
2486 dist=(lin[i])*(double)label[i];
2487 target=-(learn_parm->eps-(double)label[i]*c[i]);
2488 if((target-dist) > slack[docs[i]->slackid])
2489 slack[docs[i]->slackid]=target-dist;
2490 alphaslack[docs[i]->slackid]+=a[i];
2491 }
2492 }
2493
2494
2495 long identify_inconsistent(double *a, long int *label,
2496 long int *unlabeled, long int totdoc,
2497 LEARN_PARM *learn_parm,
2498 long int *inconsistentnum, long int *inconsistent)
2499 {
2500 long i,retrain;
2501
2502 /* Throw out examples with multipliers at upper bound. This */
2503 /* corresponds to the -i 1 option. */
2504 /* ATTENTION: this is just a heuristic for finding a close */
2505 /* to minimum number of examples to exclude to */
2506 /* make the problem separable with desired margin */
2507 retrain=0;
2508 for(i=0;i<totdoc;i++) {
2509 if((!inconsistent[i]) && (!unlabeled[i])
2510 && (a[i]>=(learn_parm->svm_cost[i]-learn_parm->epsilon_a))) {
2511 (*inconsistentnum)++;
2512 inconsistent[i]=1; /* never choose again */
2513 retrain=2; /* start over */
2514 if(verbosity>=3) {
2515 printf("inconsistent(%ld)..",i); fflush(stdout);
2516 }
2517 }
2518 }
2519 return(retrain);
2520 }
2521
2522 long identify_misclassified(double *lin, long int *label,
2523 long int *unlabeled, long int totdoc,
2524 MODEL *model, long int *inconsistentnum,
2525 long int *inconsistent)
2526 {
2527 long i,retrain;
2528 double dist;
2529
2530 /* Throw out misclassified examples. This */
2531 /* corresponds to the -i 2 option. */
2532 /* ATTENTION: this is just a heuristic for finding a close */
2533 /* to minimum number of examples to exclude to */
2534 /* make the problem separable with desired margin */
2535 retrain=0;
2536 for(i=0;i<totdoc;i++) {
2537 dist=(lin[i]-model->b)*(double)label[i]; /* 'distance' from hyperplane*/
2538 if((!inconsistent[i]) && (!unlabeled[i]) && (dist <= 0)) {
2539 (*inconsistentnum)++;
2540 inconsistent[i]=1; /* never choose again */
2541 retrain=2; /* start over */
2542 if(verbosity>=3) {
2543 printf("inconsistent(%ld)..",i); fflush(stdout);
2544 }
2545 }
2546 }
2547 return(retrain);
2548 }
2549
2550 long identify_one_misclassified(double *lin, long int *label,
2551 long int *unlabeled,
2552 long int totdoc, MODEL *model,
2553 long int *inconsistentnum,
2554 long int *inconsistent)
2555 {
2556 long i,retrain,maxex=-1;
2557 double dist,maxdist=0;
2558
2559 /* Throw out the 'most misclassified' example. This */
2560 /* corresponds to the -i 3 option. */
2561 /* ATTENTION: this is just a heuristic for finding a close */
2562 /* to minimum number of examples to exclude to */
2563 /* make the problem separable with desired margin */
2564 retrain=0;
2565 for(i=0;i<totdoc;i++) {
2566 if((!inconsistent[i]) && (!unlabeled[i])) {
2567 dist=(lin[i]-model->b)*(double)label[i];/* 'distance' from hyperplane*/
2568 if(dist<maxdist) {
2569 maxdist=dist;
2570 maxex=i;
2571 }
2572 }
2573 }
2574 if(maxex>=0) {
2575 (*inconsistentnum)++;
2576 inconsistent[maxex]=1; /* never choose again */
2577 retrain=2; /* start over */
2578 if(verbosity>=3) {
2579 printf("inconsistent(%ld)..",i); fflush(stdout);
2580 }
2581 }
2582 return(retrain);
2583 }
2584
2585 void update_linear_component(DOC **docs, long int *label,
2586 long int *active2dnum, double *a,
2587 double *a_old, long int *working2dnum,
2588 long int totdoc, long int totwords,
2589 KERNEL_PARM *kernel_parm,
2590 KERNEL_CACHE *kernel_cache,
2591 double *lin, CFLOAT *aicache, double *weights)
2592 /* keep track of the linear component */
2593 /* lin of the gradient etc. by updating */
2594 /* based on the change of the variables */
2595 /* in the current working set */
2596 {
2597 register long i,ii,j,jj;
2598 register double tec;
2599 SVECTOR *f;
2600
2601 if(kernel_parm->kernel_type==0) { /* special linear case */
2602 clear_vector_n(weights,totwords);
2603 for(ii=0;(i=working2dnum[ii])>=0;ii++) {
2604 if(a[i] != a_old[i]) {
2605 for(f=docs[i]->fvec;f;f=f->next)
2606 add_vector_ns(weights,f,
2607 f->factor*((a[i]-a_old[i])*(double)label[i]));
2608 }
2609 }
2610 for(jj=0;(j=active2dnum[jj])>=0;jj++) {
2611 for(f=docs[j]->fvec;f;f=f->next)
2612 lin[j]+=f->factor*sprod_ns(weights,f);
2613 }
2614 }
2615 else { /* general case */
2616 for(jj=0;(i=working2dnum[jj])>=0;jj++) {
2617 if(a[i] != a_old[i]) {
2618 get_kernel_row(kernel_cache,docs,i,totdoc,active2dnum,aicache,
2619 kernel_parm);
2620 for(ii=0;(j=active2dnum[ii])>=0;ii++) {
2621 tec=aicache[j];
2622 lin[j]+=(((a[i]*tec)-(a_old[i]*tec))*(double)label[i]);
2623 }
2624 }
2625 }
2626 }
2627 }
2628
2629
2630 long incorporate_unlabeled_examples(MODEL *model, long int *label,
2631 long int *inconsistent,
2632 long int *unlabeled,
2633 double *a, double *lin,
2634 long int totdoc, double *selcrit,
2635 long int *select, long int *key,
2636 long int transductcycle,
2637 KERNEL_PARM *kernel_parm,
2638 LEARN_PARM *learn_parm)
2639 {
2640 long i,j,k,j1,j2,j3,j4,unsupaddnum1=0,unsupaddnum2=0;
2641 long pos,neg,upos,uneg,orgpos,orgneg,nolabel,newpos,newneg,allunlab;
2642 double dist,model_length,posratio,negratio;
2643 long check_every=2;
2644 double loss;
2645 static double switchsens=0.0,switchsensorg=0.0;
2646 double umin,umax,sumalpha;
2647 long imin=0,imax=0;
2648 static long switchnum=0;
2649
2650 switchsens/=1.2;
2651
2652 /* assumes that lin[] is up to date -> no inactive vars */
2653
2654 orgpos=0;
2655 orgneg=0;
2656 newpos=0;
2657 newneg=0;
2658 nolabel=0;
2659 allunlab=0;
2660 for(i=0;i<totdoc;i++) {
2661 if(!unlabeled[i]) {
2662 if(label[i] > 0) {
2663 orgpos++;
2664 }
2665 else {
2666 orgneg++;
2667 }
2668 }
2669 else {
2670 allunlab++;
2671 if(unlabeled[i]) {
2672 if(label[i] > 0) {
2673 newpos++;
2674 }
2675 else if(label[i] < 0) {
2676 newneg++;
2677 }
2678 }
2679 }
2680 if(label[i]==0) {
2681 nolabel++;
2682 }
2683 }
2684
2685 if(learn_parm->transduction_posratio >= 0) {
2686 posratio=learn_parm->transduction_posratio;
2687 }
2688 else {
2689 posratio=(double)orgpos/(double)(orgpos+orgneg); /* use ratio of pos/neg */
2690 } /* in training data */
2691 negratio=1.0-posratio;
2692
2693 learn_parm->svm_costratio=1.0; /* global */
2694 if(posratio>0) {
2695 learn_parm->svm_costratio_unlab=negratio/posratio;
2696 }
2697 else {
2698 learn_parm->svm_costratio_unlab=1.0;
2699 }
2700
2701 pos=0;
2702 neg=0;
2703 upos=0;
2704 uneg=0;
2705 for(i=0;i<totdoc;i++) {
2706 dist=(lin[i]-model->b); /* 'distance' from hyperplane*/
2707 if(dist>0) {
2708 pos++;
2709 }
2710 else {
2711 neg++;
2712 }
2713 if(unlabeled[i]) {
2714 if(dist>0) {
2715 upos++;
2716 }
2717 else {
2718 uneg++;
2719 }
2720 }
2721 if((!unlabeled[i]) && (a[i]>(learn_parm->svm_cost[i]-learn_parm->epsilon_a))) {
2722 /* printf("Ubounded %ld (class %ld, unlabeled %ld)\n",i,label[i],unlabeled[i]); */
2723 }
2724 }
2725 if(verbosity>=2) {
2726 printf("POS=%ld, ORGPOS=%ld, ORGNEG=%ld\n",pos,orgpos,orgneg);
2727 printf("POS=%ld, NEWPOS=%ld, NEWNEG=%ld\n",pos,newpos,newneg);
2728 printf("pos ratio = %f (%f).\n",(double)(upos)/(double)(allunlab),posratio);
2729 fflush(stdout);
2730 }
2731
2732 if(transductcycle == 0) {
2733 j1=0;
2734 j2=0;
2735 j4=0;
2736 for(i=0;i<totdoc;i++) {
2737 dist=(lin[i]-model->b); /* 'distance' from hyperplane*/
2738 if((label[i]==0) && (unlabeled[i])) {
2739 selcrit[j4]=dist;
2740 key[j4]=i;
2741 j4++;
2742 }
2743 }
2744 unsupaddnum1=0;
2745 unsupaddnum2=0;
2746 select_top_n(selcrit,j4,select,(long)(allunlab*posratio+0.5));
2747 for(k=0;(k<(long)(allunlab*posratio+0.5));k++) {
2748 i=key[select[k]];
2749 label[i]=1;
2750 unsupaddnum1++;
2751 j1++;
2752 }
2753 for(i=0;i<totdoc;i++) {
2754 if((label[i]==0) && (unlabeled[i])) {
2755 label[i]=-1;
2756 j2++;
2757 unsupaddnum2++;
2758 }
2759 }
2760 for(i=0;i<totdoc;i++) { /* set upper bounds on vars */
2761 if(unlabeled[i]) {
2762 if(label[i] == 1) {
2763 learn_parm->svm_cost[i]=learn_parm->svm_c*
2764 learn_parm->svm_costratio_unlab*learn_parm->svm_unlabbound;
2765 }
2766 else if(label[i] == -1) {
2767 learn_parm->svm_cost[i]=learn_parm->svm_c*
2768 learn_parm->svm_unlabbound;
2769 }
2770 }
2771 }
2772 if(verbosity>=1) {
2773 /* printf("costratio %f, costratio_unlab %f, unlabbound %f\n",
2774 learn_parm->svm_costratio,learn_parm->svm_costratio_unlab,
2775 learn_parm->svm_unlabbound); */
2776 printf("Classifying unlabeled data as %ld POS / %ld NEG.\n",
2777 unsupaddnum1,unsupaddnum2);
2778 fflush(stdout);
2779 }
2780 if(verbosity >= 1)
2781 printf("Retraining.");
2782 if(verbosity >= 2) printf("\n");
2783 return((long)3);
2784 }
2785 if((transductcycle % check_every) == 0) {
2786 if(verbosity >= 1)
2787 printf("Retraining.");
2788 if(verbosity >= 2) printf("\n");
2789 j1=0;
2790 j2=0;
2791 unsupaddnum1=0;
2792 unsupaddnum2=0;
2793 for(i=0;i<totdoc;i++) {
2794 if((unlabeled[i] == 2)) {
2795 unlabeled[i]=1;
2796 label[i]=1;
2797 j1++;
2798 unsupaddnum1++;
2799 }
2800 else if((unlabeled[i] == 3)) {
2801 unlabeled[i]=1;
2802 label[i]=-1;
2803 j2++;
2804 unsupaddnum2++;
2805 }
2806 }
2807 for(i=0;i<totdoc;i++) { /* set upper bounds on vars */
2808 if(unlabeled[i]) {
2809 if(label[i] == 1) {
2810 learn_parm->svm_cost[i]=learn_parm->svm_c*
2811 learn_parm->svm_costratio_unlab*learn_parm->svm_unlabbound;
2812 }
2813 else if(label[i] == -1) {
2814 learn_parm->svm_cost[i]=learn_parm->svm_c*
2815 learn_parm->svm_unlabbound;
2816 }
2817 }
2818 }
2819
2820 if(verbosity>=2) {
2821 /* printf("costratio %f, costratio_unlab %f, unlabbound %f\n",
2822 learn_parm->svm_costratio,learn_parm->svm_costratio_unlab,
2823 learn_parm->svm_unlabbound); */
2824 printf("%ld positive -> Added %ld POS / %ld NEG unlabeled examples.\n",
2825 upos,unsupaddnum1,unsupaddnum2);
2826 fflush(stdout);
2827 }
2828
2829 if(learn_parm->svm_unlabbound == 1) {
2830 learn_parm->epsilon_crit=0.001; /* do the last run right */
2831 }
2832 else {
2833 learn_parm->epsilon_crit=0.01; /* otherwise, no need to be so picky */
2834 }
2835
2836 return((long)3);
2837 }
2838 else if(((transductcycle % check_every) < check_every)) {
2839 model_length=0;
2840 sumalpha=0;
2841 loss=0;
2842 for(i=0;i<totdoc;i++) {
2843 model_length+=a[i]*label[i]*lin[i];
2844 sumalpha+=a[i];
2845 dist=(lin[i]-model->b); /* 'distance' from hyperplane*/
2846 if((label[i]*dist)<(1.0-learn_parm->epsilon_crit)) {
2847 loss+=(1.0-(label[i]*dist))*learn_parm->svm_cost[i];
2848 }
2849 }
2850 model_length=sqrt(model_length);
2851 if(verbosity>=2) {
2852 printf("Model-length = %f (%f), loss = %f, objective = %f\n",
2853 model_length,sumalpha,loss,loss+0.5*model_length*model_length);
2854 fflush(stdout);
2855 }
2856 j1=0;
2857 j2=0;
2858 j3=0;
2859 j4=0;
2860 unsupaddnum1=0;
2861 unsupaddnum2=0;
2862 umin=99999;
2863 umax=-99999;
2864 j4=1;
2865 while(j4) {
2866 umin=99999;
2867 umax=-99999;
2868 for(i=0;(i<totdoc);i++) {
2869 dist=(lin[i]-model->b);
2870 if((label[i]>0) && (unlabeled[i]) && (!inconsistent[i])
2871 && (dist<umin)) {
2872 umin=dist;
2873 imin=i;
2874 }
2875 if((label[i]<0) && (unlabeled[i]) && (!inconsistent[i])
2876 && (dist>umax)) {
2877 umax=dist;
2878 imax=i;
2879 }
2880 }
2881 if((umin < (umax+switchsens-1E-4))) {
2882 j1++;
2883 j2++;
2884 unsupaddnum1++;
2885 unlabeled[imin]=3;
2886 inconsistent[imin]=1;
2887 unsupaddnum2++;
2888 unlabeled[imax]=2;
2889 inconsistent[imax]=1;
2890 }
2891 else
2892 j4=0;
2893 j4=0;
2894 }
2895 for(j=0;(j<totdoc);j++) {
2896 if(unlabeled[j] && (!inconsistent[j])) {
2897 if(label[j]>0) {
2898 unlabeled[j]=2;
2899 }
2900 else if(label[j]<0) {
2901 unlabeled[j]=3;
2902 }
2903 /* inconsistent[j]=1; */
2904 j3++;
2905 }
2906 }
2907 switchnum+=unsupaddnum1+unsupaddnum2;
2908
2909 /* stop and print out current margin
2910 printf("switchnum %ld %ld\n",switchnum,kernel_parm->poly_degree);
2911 if(switchnum == 2*kernel_parm->poly_degree) {
2912 learn_parm->svm_unlabbound=1;
2913 }
2914 */
2915
2916 if((!unsupaddnum1) && (!unsupaddnum2)) {
2917 if((learn_parm->svm_unlabbound>=1) && ((newpos+newneg) == allunlab)) {
2918 for(j=0;(j<totdoc);j++) {
2919 inconsistent[j]=0;
2920 if(unlabeled[j]) unlabeled[j]=1;
2921 }
2922 write_prediction(learn_parm->predfile,model,lin,a,unlabeled,label,
2923 totdoc,learn_parm);
2924 if(verbosity>=1)
2925 printf("Number of switches: %ld\n",switchnum);
2926 return((long)0);
2927 }
2928 switchsens=switchsensorg;
2929 learn_parm->svm_unlabbound*=1.5;
2930 if(learn_parm->svm_unlabbound>1) {
2931 learn_parm->svm_unlabbound=1;
2932 }
2933 model->at_upper_bound=0; /* since upper bound increased */
2934 if(verbosity>=1)
2935 printf("Increasing influence of unlabeled examples to %f%% .",
2936 learn_parm->svm_unlabbound*100.0);
2937 }
2938 else if(verbosity>=1) {
2939 printf("%ld positive -> Switching labels of %ld POS / %ld NEG unlabeled examples.",
2940 upos,unsupaddnum1,unsupaddnum2);
2941 fflush(stdout);
2942 }
2943
2944 if(verbosity >= 2) printf("\n");
2945
2946 learn_parm->epsilon_crit=0.5; /* don't need to be so picky */
2947
2948 for(i=0;i<totdoc;i++) { /* set upper bounds on vars */
2949 if(unlabeled[i]) {
2950 if(label[i] == 1) {
2951 learn_parm->svm_cost[i]=learn_parm->svm_c*
2952 learn_parm->svm_costratio_unlab*learn_parm->svm_unlabbound;
2953 }
2954 else if(label[i] == -1) {
2955 learn_parm->svm_cost[i]=learn_parm->svm_c*
2956 learn_parm->svm_unlabbound;
2957 }
2958 }
2959 }
2960
2961 return((long)2);
2962 }
2963
2964 return((long)0);
2965 }
2966
2967 /*************************** Working set selection ***************************/
2968
2969 long select_next_qp_subproblem_grad(long int *label,
2970 long int *unlabeled,
2971 double *a, double *lin,
2972 double *c, long int totdoc,
2973 long int qp_size,
2974 LEARN_PARM *learn_parm,
2975 long int *inconsistent,
2976 long int *active2dnum,
2977 long int *working2dnum,
2978 double *selcrit,
2979 long int *select,
2980 KERNEL_CACHE *kernel_cache,
2981 long int cache_only,
2982 long int *key, long int *chosen)
2983 /* Use the feasible direction approach to select the next
2984 qp-subproblem (see chapter 'Selecting a good working set'). If
2985 'cache_only' is true, then the variables are selected only among
2986 those for which the kernel evaluations are cached. */
2987 {
2988 long choosenum,i,j,k,activedoc,inum,valid;
2989 double s;
2990
2991 for(inum=0;working2dnum[inum]>=0;inum++); /* find end of index */
2992 choosenum=0;
2993 activedoc=0;
2994 for(i=0;(j=active2dnum[i])>=0;i++) {
2995 s=-label[j];
2996 if(kernel_cache && cache_only)
2997 valid=(kernel_cache->index[j]>=0);
2998 else
2999 valid=1;
3000 if(valid
3001 && (!((a[j]<=(0+learn_parm->epsilon_a)) && (s<0)))
3002 && (!((a[j]>=(learn_parm->svm_cost[j]-learn_parm->epsilon_a))
3003 && (s>0)))
3004 && (!chosen[j])
3005 && (label[j])
3006 && (!inconsistent[j]))
3007 {
3008 selcrit[activedoc]=(double)label[j]*(learn_parm->eps-(double)label[j]*c[j]+(double)label[j]*lin[j]);
3009 /* selcrit[activedoc]=(double)label[j]*(-1.0+(double)label[j]*lin[j]); */
3010 key[activedoc]=j;
3011 activedoc++;
3012 }
3013 }
3014 select_top_n(selcrit,activedoc,select,(long)(qp_size/2));
3015 for(k=0;(choosenum<(qp_size/2)) && (k<(qp_size/2)) && (k<activedoc);k++) {
3016 /* if(learn_parm->biased_hyperplane || (selcrit[select[k]] > 0)) { */
3017 i=key[select[k]];
3018 chosen[i]=1;
3019 working2dnum[inum+choosenum]=i;
3020 choosenum+=1;
3021 if(kernel_cache)
3022 kernel_cache_touch(kernel_cache,i); /* make sure it does not get
3023 kicked out of cache */
3024 /* } */
3025 }
3026
3027 activedoc=0;
3028 for(i=0;(j=active2dnum[i])>=0;i++) {
3029 s=label[j];
3030 if(kernel_cache && cache_only)
3031 valid=(kernel_cache->index[j]>=0);
3032 else
3033 valid=1;
3034 if(valid
3035 && (!((a[j]<=(0+learn_parm->epsilon_a)) && (s<0)))
3036 && (!((a[j]>=(learn_parm->svm_cost[j]-learn_parm->epsilon_a))
3037 && (s>0)))
3038 && (!chosen[j])
3039 && (label[j])
3040 && (!inconsistent[j]))
3041 {
3042 selcrit[activedoc]=-(double)label[j]*(learn_parm->eps-(double)label[j]*c[j]+(double)label[j]*lin[j]);
3043 /* selcrit[activedoc]=-(double)(label[j]*(-1.0+(double)label[j]*lin[j])); */
3044 key[activedoc]=j;
3045 activedoc++;
3046 }
3047 }
3048 select_top_n(selcrit,activedoc,select,(long)(qp_size/2));
3049 for(k=0;(choosenum<qp_size) && (k<(qp_size/2)) && (k<activedoc);k++) {
3050 /* if(learn_parm->biased_hyperplane || (selcrit[select[k]] > 0)) { */
3051 i=key[select[k]];
3052 chosen[i]=1;
3053 working2dnum[inum+choosenum]=i;
3054 choosenum+=1;
3055 if(kernel_cache)
3056 kernel_cache_touch(kernel_cache,i); /* make sure it does not get
3057 kicked out of cache */
3058 /* } */
3059 }
3060 working2dnum[inum+choosenum]=-1; /* complete index */
3061 return(choosenum);
3062 }
3063
3064 long select_next_qp_subproblem_rand(long int *label,
3065 long int *unlabeled,
3066 double *a, double *lin,
3067 double *c, long int totdoc,
3068 long int qp_size,
3069 LEARN_PARM *learn_parm,
3070 long int *inconsistent,
3071 long int *active2dnum,
3072 long int *working2dnum,
3073 double *selcrit,
3074 long int *select,
3075 KERNEL_CACHE *kernel_cache,
3076 long int *key,
3077 long int *chosen,
3078 long int iteration)
3079 /* Use the feasible direction approach to select the next
3080 qp-subproblem (see section 'Selecting a good working set'). Chooses
3081 a feasible direction at (pseudo) random to help jump over numerical
3082 problem. */
3083 {
3084 long choosenum,i,j,k,activedoc,inum;
3085 double s;
3086
3087 for(inum=0;working2dnum[inum]>=0;inum++); /* find end of index */
3088 choosenum=0;
3089 activedoc=0;
3090 for(i=0;(j=active2dnum[i])>=0;i++) {
3091 s=-label[j];
3092 if((!((a[j]<=(0+learn_parm->epsilon_a)) && (s<0)))
3093 && (!((a[j]>=(learn_parm->svm_cost[j]-learn_parm->epsilon_a))
3094 && (s>0)))
3095 && (!inconsistent[j])
3096 && (label[j])
3097 && (!chosen[j])) {
3098 selcrit[activedoc]=(j+iteration) % totdoc;
3099 key[activedoc]=j;
3100 activedoc++;
3101 }
3102 }
3103 select_top_n(selcrit,activedoc,select,(long)(qp_size/2));
3104 for(k=0;(choosenum<(qp_size/2)) && (k<(qp_size/2)) && (k<activedoc);k++) {
3105 i=key[select[k]];
3106 chosen[i]=1;
3107 working2dnum[inum+choosenum]=i;
3108 choosenum+=1;
3109 kernel_cache_touch(kernel_cache,i); /* make sure it does not get kicked */
3110 /* out of cache */
3111 }
3112
3113 activedoc=0;
3114 for(i=0;(j=active2dnum[i])>=0;i++) {
3115 s=label[j];
3116 if((!((a[j]<=(0+learn_parm->epsilon_a)) && (s<0)))
3117 && (!((a[j]>=(learn_parm->svm_cost[j]-learn_parm->epsilon_a))
3118 && (s>0)))
3119 && (!inconsistent[j])
3120 && (label[j])
3121 && (!chosen[j])) {
3122 selcrit[activedoc]=(j+iteration) % totdoc;
3123 key[activedoc]=j;
3124 activedoc++;
3125 }
3126 }
3127 select_top_n(selcrit,activedoc,select,(long)(qp_size/2));
3128 for(k=0;(choosenum<qp_size) && (k<(qp_size/2)) && (k<activedoc);k++) {
3129 i=key[select[k]];
3130 chosen[i]=1;
3131 working2dnum[inum+choosenum]=i;
3132 choosenum+=1;
3133 kernel_cache_touch(kernel_cache,i); /* make sure it does not get kicked */
3134 /* out of cache */
3135 }
3136 working2dnum[inum+choosenum]=-1; /* complete index */
3137 return(choosenum);
3138 }
3139
3140 long select_next_qp_slackset(DOC **docs, long int *label,
3141 double *a, double *lin,
3142 double *slack, double *alphaslack,
3143 double *c,
3144 LEARN_PARM *learn_parm,
3145 long int *active2dnum, double *maxviol)
3146 /* returns the slackset with the largest internal violation */
3147 {
3148 long i,ii,maxdiffid;
3149 double dist,target,maxdiff,ex_c;
3150
3151 maxdiff=0;
3152 maxdiffid=0;
3153 for(ii=0;(i=active2dnum[ii])>=0;ii++) {
3154 ex_c=learn_parm->svm_c-learn_parm->epsilon_a;
3155 if(alphaslack[docs[i]->slackid] >= ex_c) {
3156 dist=(lin[i])*(double)label[i]+slack[docs[i]->slackid]; /* distance */
3157 target=-(learn_parm->eps-(double)label[i]*c[i]); /* rhs of constraint */
3158 if((a[i]>learn_parm->epsilon_a) && (dist > target)) {
3159 if((dist-target)>maxdiff) { /* largest violation */
3160 maxdiff=dist-target;
3161 maxdiffid=docs[i]->slackid;
3162 }
3163 }
3164 }
3165 }
3166 (*maxviol)=maxdiff;
3167 return(maxdiffid);
3168 }
3169
3170
3171 void select_top_n(double *selcrit, long int range, long int *select,
3172 long int n)
3173 {
3174 register long i,j;
3175
3176 for(i=0;(i<n) && (i<range);i++) { /* Initialize with the first n elements */
3177 for(j=i;j>=0;j--) {
3178 if((j>0) && (selcrit[select[j-1]]<selcrit[i])){
3179 select[j]=select[j-1];
3180 }
3181 else {
3182 select[j]=i;
3183 j=-1;
3184 }
3185 }
3186 }
3187 if(n>0) {
3188 for(i=n;i<range;i++) {
3189 if(selcrit[i]>selcrit[select[n-1]]) {
3190 for(j=n-1;j>=0;j--) {
3191 if((j>0) && (selcrit[select[j-1]]<selcrit[i])) {
3192 select[j]=select[j-1];
3193 }
3194 else {
3195 select[j]=i;
3196 j=-1;
3197 }
3198 }
3199 }
3200 }
3201 }
3202 }
3203
3204
3205 /******************************** Shrinking *********************************/
3206
3207 void init_shrink_state(SHRINK_STATE *shrink_state, long int totdoc,
3208 long int maxhistory)
3209 {
3210 long i;
3211
3212 shrink_state->deactnum=0;
3213 shrink_state->active = (long *)my_malloc(sizeof(long)*totdoc);
3214 shrink_state->inactive_since = (long *)my_malloc(sizeof(long)*totdoc);
3215 shrink_state->a_history = (double **)my_malloc(sizeof(double *)*maxhistory);
3216 shrink_state->maxhistory=maxhistory;
3217 shrink_state->last_lin = (double *)my_malloc(sizeof(double)*totdoc);
3218 shrink_state->last_a = (double *)my_malloc(sizeof(double)*totdoc);
3219
3220 for(i=0;i<totdoc;i++) {
3221 shrink_state->active[i]=1;
3222 shrink_state->inactive_since[i]=0;
3223 shrink_state->last_a[i]=0;
3224 shrink_state->last_lin[i]=0;
3225 }
3226 }
3227
3228 void shrink_state_cleanup(SHRINK_STATE *shrink_state)
3229 {
3230 free(shrink_state->active);
3231 free(shrink_state->inactive_since);
3232 if(shrink_state->deactnum > 0)
3233 free(shrink_state->a_history[shrink_state->deactnum-1]);
3234 free(shrink_state->a_history);
3235 free(shrink_state->last_a);
3236 free(shrink_state->last_lin);
3237 }
3238
3239 long shrink_problem(DOC **docs,
3240 LEARN_PARM *learn_parm,
3241 SHRINK_STATE *shrink_state,
3242 KERNEL_PARM *kernel_parm,
3243 long int *active2dnum,
3244 long int *last_suboptimal_at,
3245 long int iteration,
3246 long int totdoc,
3247 long int minshrink,
3248 double *a,
3249 long int *inconsistent)
3250 /* Shrink some variables away. Do the shrinking only if at least
3251 minshrink variables can be removed. */
3252 {
3253 long i,ii,change,activenum,lastiter;
3254 double *a_old;
3255
3256 activenum=0;
3257 change=0;
3258 for(ii=0;active2dnum[ii]>=0;ii++) {
3259 i=active2dnum[ii];
3260 activenum++;
3261 if(learn_parm->sharedslack)
3262 lastiter=last_suboptimal_at[docs[i]->slackid];
3263 else
3264 lastiter=last_suboptimal_at[i];
3265 if(((iteration-lastiter) > learn_parm->svm_iter_to_shrink)
3266 || (inconsistent[i])) {
3267 change++;
3268 }
3269 }
3270 if((change>=minshrink) /* shrink only if sufficiently many candidates */
3271 && (shrink_state->deactnum<shrink_state->maxhistory)) { /* and enough memory */
3272 /* Shrink problem by removing those variables which are */
3273 /* optimal at a bound for a minimum number of iterations */
3274 if(verbosity>=2) {
3275 printf(" Shrinking..."); fflush(stdout);
3276 }
3277 if(kernel_parm->kernel_type != LINEAR) { /* non-linear case save alphas */
3278 a_old=(double *)my_malloc(sizeof(double)*totdoc);
3279 shrink_state->a_history[shrink_state->deactnum]=a_old;
3280 for(i=0;i<totdoc;i++) {
3281 a_old[i]=a[i];
3282 }
3283 }
3284 for(ii=0;active2dnum[ii]>=0;ii++) {
3285 i=active2dnum[ii];
3286 if(learn_parm->sharedslack)
3287 lastiter=last_suboptimal_at[docs[i]->slackid];
3288 else
3289 lastiter=last_suboptimal_at[i];
3290 if(((iteration-lastiter) > learn_parm->svm_iter_to_shrink)
3291 || (inconsistent[i])) {
3292 shrink_state->active[i]=0;
3293 shrink_state->inactive_since[i]=shrink_state->deactnum;
3294 }
3295 }
3296 activenum=compute_index(shrink_state->active,totdoc,active2dnum);
3297 shrink_state->deactnum++;
3298 if(kernel_parm->kernel_type == LINEAR) {
3299 shrink_state->deactnum=0;
3300 }
3301 if(verbosity>=2) {
3302 printf("done.\n"); fflush(stdout);
3303 printf(" Number of inactive variables = %ld\n",totdoc-activenum);
3304 }
3305 }
3306 return(activenum);
3307 }
3308
3309
3310 void reactivate_inactive_examples(long int *label,
3311 long int *unlabeled,
3312 double *a,
3313 SHRINK_STATE *shrink_state,
3314 double *lin,
3315 double *c,
3316 long int totdoc,
3317 long int totwords,
3318 long int iteration,
3319 LEARN_PARM *learn_parm,
3320 long int *inconsistent,
3321 DOC **docs,
3322 KERNEL_PARM *kernel_parm,
3323 KERNEL_CACHE *kernel_cache,
3324 MODEL *model,
3325 CFLOAT *aicache,
3326 double *weights,
3327 double *maxdiff)
3328 /* Make all variables active again which had been removed by
3329 shrinking. */
3330 /* Computes lin for those variables from scratch. */
3331 {
3332 register long i,j,ii,jj,t,*changed2dnum,*inactive2dnum;
3333 long *changed,*inactive;
3334 register double kernel_val,*a_old,dist;
3335 double ex_c,target;
3336 SVECTOR *f;
3337
3338 if(kernel_parm->kernel_type == LINEAR) { /* special linear case */
3339 a_old=shrink_state->last_a;
3340 clear_vector_n(weights,totwords);
3341 for(i=0;i<totdoc;i++) {
3342 if(a[i] != a_old[i]) {
3343 for(f=docs[i]->fvec;f;f=f->next)
3344 add_vector_ns(weights,f,
3345 f->factor*((a[i]-a_old[i])*(double)label[i]));
3346 a_old[i]=a[i];
3347 }
3348 }
3349 for(i=0;i<totdoc;i++) {
3350 if(!shrink_state->active[i]) {
3351 for(f=docs[i]->fvec;f;f=f->next)
3352 lin[i]=shrink_state->last_lin[i]+f->factor*sprod_ns(weights,f);
3353 }
3354 shrink_state->last_lin[i]=lin[i];
3355 }
3356 }
3357 else {
3358 changed=(long *)my_malloc(sizeof(long)*totdoc);
3359 changed2dnum=(long *)my_malloc(sizeof(long)*(totdoc+11));
3360 inactive=(long *)my_malloc(sizeof(long)*totdoc);
3361 inactive2dnum=(long *)my_malloc(sizeof(long)*(totdoc+11));
3362 for(t=shrink_state->deactnum-1;(t>=0) && shrink_state->a_history[t];t--) {
3363 if(verbosity>=2) {
3364 printf("%ld..",t); fflush(stdout);
3365 }
3366 a_old=shrink_state->a_history[t];
3367 for(i=0;i<totdoc;i++) {
3368 inactive[i]=((!shrink_state->active[i])
3369 && (shrink_state->inactive_since[i] == t));
3370 changed[i]= (a[i] != a_old[i]);
3371 }
3372 compute_index(inactive,totdoc,inactive2dnum);
3373 compute_index(changed,totdoc,changed2dnum);
3374
3375 for(ii=0;(i=changed2dnum[ii])>=0;ii++) {
3376 get_kernel_row(kernel_cache,docs,i,totdoc,inactive2dnum,aicache,
3377 kernel_parm);
3378 for(jj=0;(j=inactive2dnum[jj])>=0;jj++) {
3379 kernel_val=aicache[j];
3380 lin[j]+=(((a[i]*kernel_val)-(a_old[i]*kernel_val))*(double)label[i]);
3381 }
3382 }
3383 }
3384 free(changed);
3385 free(changed2dnum);
3386 free(inactive);
3387 free(inactive2dnum);
3388 }
3389 (*maxdiff)=0;
3390 for(i=0;i<totdoc;i++) {
3391 shrink_state->inactive_since[i]=shrink_state->deactnum-1;
3392 if(!inconsistent[i]) {
3393 dist=(lin[i]-model->b)*(double)label[i];
3394 target=-(learn_parm->eps-(double)label[i]*c[i]);
3395 ex_c=learn_parm->svm_cost[i]-learn_parm->epsilon_a;
3396 if((a[i]>learn_parm->epsilon_a) && (dist > target)) {
3397 if((dist-target)>(*maxdiff)) /* largest violation */
3398 (*maxdiff)=dist-target;
3399 }
3400 else if((a[i]<ex_c) && (dist < target)) {
3401 if((target-dist)>(*maxdiff)) /* largest violation */
3402 (*maxdiff)=target-dist;
3403 }
3404 if((a[i]>(0+learn_parm->epsilon_a))
3405 && (a[i]<ex_c)) {
3406 shrink_state->active[i]=1; /* not at bound */
3407 }
3408 else if((a[i]<=(0+learn_parm->epsilon_a)) && (dist < (target+learn_parm->epsilon_shrink))) {
3409 shrink_state->active[i]=1;
3410 }
3411 else if((a[i]>=ex_c)
3412 && (dist > (target-learn_parm->epsilon_shrink))) {
3413 shrink_state->active[i]=1;
3414 }
3415 else if(learn_parm->sharedslack) { /* make all active when sharedslack */
3416 shrink_state->active[i]=1;
3417 }
3418 }
3419 }
3420 if(kernel_parm->kernel_type != LINEAR) { /* update history for non-linear */
3421 for(i=0;i<totdoc;i++) {
3422 (shrink_state->a_history[shrink_state->deactnum-1])[i]=a[i];
3423 }
3424 for(t=shrink_state->deactnum-2;(t>=0) && shrink_state->a_history[t];t--) {
3425 free(shrink_state->a_history[t]);
3426 shrink_state->a_history[t]=0;
3427 }
3428 }
3429 }
3430
3431 /****************************** Cache handling *******************************/
3432
3433 void get_kernel_row(KERNEL_CACHE *kernel_cache, DOC **docs,
3434 long int docnum, long int totdoc,
3435 long int *active2dnum, CFLOAT *buffer,
3436 KERNEL_PARM *kernel_parm)
3437 /* Get's a row of the matrix of kernel values This matrix has the
3438 same form as the Hessian, just that the elements are not
3439 multiplied by */
3440 /* y_i * y_j * a_i * a_j */
3441 /* Takes the values from the cache if available. */
3442 {
3443 register long i,j,start;
3444 DOC *ex;
3445
3446 ex=docs[docnum];
3447
3448 if(kernel_cache->index[docnum] != -1) { /* row is cached? */
3449 kernel_cache->lru[kernel_cache->index[docnum]]=kernel_cache->time; /* lru */
3450 start=kernel_cache->activenum*kernel_cache->index[docnum];
3451 for(i=0;(j=active2dnum[i])>=0;i++) {
3452 if(kernel_cache->totdoc2active[j] >= 0) { /* column is cached? */
3453 buffer[j]=kernel_cache->buffer[start+kernel_cache->totdoc2active[j]];
3454 }
3455 else {
3456 buffer[j]=(CFLOAT)kernel(kernel_parm,ex,docs[j]);
3457 }
3458 }
3459 }
3460 else {
3461 for(i=0;(j=active2dnum[i])>=0;i++) {
3462 buffer[j]=(CFLOAT)kernel(kernel_parm,ex,docs[j]);
3463 }
3464 }
3465 }
3466
3467
3468 void cache_kernel_row(KERNEL_CACHE *kernel_cache, DOC **docs,
3469 long int m, KERNEL_PARM *kernel_parm)
3470 /* Fills cache for the row m */
3471 {
3472 register DOC *ex;
3473 register long j,k,l;
3474 register CFLOAT *cache;
3475
3476 if(!kernel_cache_check(kernel_cache,m)) { /* not cached yet*/
3477 cache = kernel_cache_clean_and_malloc(kernel_cache,m);
3478 if(cache) {
3479 l=kernel_cache->totdoc2active[m];
3480 ex=docs[m];
3481 for(j=0;j<kernel_cache->activenum;j++) { /* fill cache */
3482 k=kernel_cache->active2totdoc[j];
3483 if((kernel_cache->index[k] != -1) && (l != -1) && (k != m)) {
3484 cache[j]=kernel_cache->buffer[kernel_cache->activenum
3485 *kernel_cache->index[k]+l];
3486 }
3487 else {
3488 cache[j]=kernel(kernel_parm,ex,docs[k]);
3489 }
3490 }
3491 }
3492 else {
3493 perror("Error: Kernel cache full! => increase cache size");
3494 }
3495 }
3496 }
3497
3498
3499 void cache_multiple_kernel_rows(KERNEL_CACHE *kernel_cache, DOC **docs,
3500 long int *key, long int varnum,
3501 KERNEL_PARM *kernel_parm)
3502 /* Fills cache for the rows in key */
3503 {
3504 register long i;
3505
3506 for(i=0;i<varnum;i++) { /* fill up kernel cache */
3507 cache_kernel_row(kernel_cache,docs,key[i],kernel_parm);
3508 }
3509 }
3510
3511
3512 void kernel_cache_shrink(KERNEL_CACHE *kernel_cache, long int totdoc,
3513 long int numshrink, long int *after)
3514 /* Remove numshrink columns in the cache which correspond to
3515 examples marked 0 in after. */
3516 {
3517 register long i,j,jj,from=0,to=0,scount;
3518 long *keep;
3519
3520 if(verbosity>=2) {
3521 printf(" Reorganizing cache..."); fflush(stdout);
3522 }
3523
3524 keep=(long *)my_malloc(sizeof(long)*totdoc);
3525 for(j=0;j<totdoc;j++) {
3526 keep[j]=1;
3527 }
3528 scount=0;
3529 for(jj=0;(jj<kernel_cache->activenum) && (scount<numshrink);jj++) {
3530 j=kernel_cache->active2totdoc[jj];
3531 if(!after[j]) {
3532 scount++;
3533 keep[j]=0;
3534 }
3535 }
3536
3537 for(i=0;i<kernel_cache->max_elems;i++) {
3538 for(jj=0;jj<kernel_cache->activenum;jj++) {
3539 j=kernel_cache->active2totdoc[jj];
3540 if(!keep[j]) {
3541 from++;
3542 }
3543 else {
3544 kernel_cache->buffer[to]=kernel_cache->buffer[from];
3545 to++;
3546 from++;
3547 }
3548 }
3549 }
3550
3551 kernel_cache->activenum=0;
3552 for(j=0;j<totdoc;j++) {
3553 if((keep[j]) && (kernel_cache->totdoc2active[j] != -1)) {
3554 kernel_cache->active2totdoc[kernel_cache->activenum]=j;
3555 kernel_cache->totdoc2active[j]=kernel_cache->activenum;
3556 kernel_cache->activenum++;
3557 }
3558 else {
3559 kernel_cache->totdoc2active[j]=-1;
3560 }
3561 }
3562
3563 kernel_cache->max_elems=(long)(kernel_cache->buffsize/kernel_cache->activenum);
3564 if(kernel_cache->max_elems>totdoc) {
3565 kernel_cache->max_elems=totdoc;
3566 }
3567
3568 free(keep);
3569
3570 if(verbosity>=2) {
3571 printf("done.\n"); fflush(stdout);
3572 printf(" Cache-size in rows = %ld\n",kernel_cache->max_elems);
3573 }
3574 }
3575
3576 KERNEL_CACHE *kernel_cache_init(long int totdoc, long int buffsize)
3577 {
3578 long i;
3579 KERNEL_CACHE *kernel_cache;
3580
3581 kernel_cache=(KERNEL_CACHE *)my_malloc(sizeof(KERNEL_CACHE));
3582 kernel_cache->index = (long *)my_malloc(sizeof(long)*totdoc);
3583 kernel_cache->occu = (long *)my_malloc(sizeof(long)*totdoc);
3584 kernel_cache->lru = (long *)my_malloc(sizeof(long)*totdoc);
3585 kernel_cache->invindex = (long *)my_malloc(sizeof(long)*totdoc);
3586 kernel_cache->active2totdoc = (long *)my_malloc(sizeof(long)*totdoc);
3587 kernel_cache->totdoc2active = (long *)my_malloc(sizeof(long)*totdoc);
3588 kernel_cache->buffer = (CFLOAT *)my_malloc((size_t)(buffsize)*1024*1024);
3589
3590 kernel_cache->buffsize=(long)(buffsize/sizeof(CFLOAT)*1024*1024);
3591
3592 kernel_cache->max_elems=(long)(kernel_cache->buffsize/totdoc);
3593 if(kernel_cache->max_elems>totdoc) {
3594 kernel_cache->max_elems=totdoc;
3595 }
3596
3597 if(verbosity>=2) {
3598 printf(" Cache-size in rows = %ld\n",kernel_cache->max_elems);
3599 printf(" Kernel evals so far: %ld\n",kernel_cache_statistic);
3600 }
3601
3602 kernel_cache->elems=0; /* initialize cache */
3603 for(i=0;i<totdoc;i++) {
3604 kernel_cache->index[i]=-1;
3605 kernel_cache->lru[i]=0;
3606 }
3607 for(i=0;i<totdoc;i++) {
3608 kernel_cache->occu[i]=0;
3609 kernel_cache->invindex[i]=-1;
3610 }
3611
3612 kernel_cache->activenum=totdoc;;
3613 for(i=0;i<totdoc;i++) {
3614 kernel_cache->active2totdoc[i]=i;
3615 kernel_cache->totdoc2active[i]=i;
3616 }
3617
3618 kernel_cache->time=0;
3619
3620 return(kernel_cache);
3621 }
3622
3623 void kernel_cache_reset_lru(KERNEL_CACHE *kernel_cache)
3624 {
3625 long maxlru=0,k;
3626
3627 for(k=0;k<kernel_cache->max_elems;k++) {
3628 if(maxlru < kernel_cache->lru[k])
3629 maxlru=kernel_cache->lru[k];
3630 }
3631 for(k=0;k<kernel_cache->max_elems;k++) {
3632 kernel_cache->lru[k]-=maxlru;
3633 }
3634 }
3635
3636 void kernel_cache_cleanup(KERNEL_CACHE *kernel_cache)
3637 {
3638 free(kernel_cache->index);
3639 free(kernel_cache->occu);
3640 free(kernel_cache->lru);
3641 free(kernel_cache->invindex);
3642 free(kernel_cache->active2totdoc);
3643 free(kernel_cache->totdoc2active);
3644 free(kernel_cache->buffer);
3645 free(kernel_cache);
3646 }
3647
3648 long kernel_cache_malloc(KERNEL_CACHE *kernel_cache)
3649 {
3650 long i;
3651
3652 if(kernel_cache_space_available(kernel_cache)) {
3653 for(i=0;i<kernel_cache->max_elems;i++) {
3654 if(!kernel_cache->occu[i]) {
3655 kernel_cache->occu[i]=1;
3656 kernel_cache->elems++;
3657 return(i);
3658 }
3659 }
3660 }
3661 return(-1);
3662 }
3663
3664 void kernel_cache_free(KERNEL_CACHE *kernel_cache, long int i)
3665 {
3666 kernel_cache->occu[i]=0;
3667 kernel_cache->elems--;
3668 }
3669
3670 long kernel_cache_free_lru(KERNEL_CACHE *kernel_cache)
3671 /* remove least recently used cache element */
3672 {
3673 register long k,least_elem=-1,least_time;
3674
3675 least_time=kernel_cache->time+1;
3676 for(k=0;k<kernel_cache->max_elems;k++) {
3677 if(kernel_cache->invindex[k] != -1) {
3678 if(kernel_cache->lru[k]<least_time) {
3679 least_time=kernel_cache->lru[k];
3680 least_elem=k;
3681 }
3682 }
3683 }
3684 if(least_elem != -1) {
3685 kernel_cache_free(kernel_cache,least_elem);
3686 kernel_cache->index[kernel_cache->invindex[least_elem]]=-1;
3687 kernel_cache->invindex[least_elem]=-1;
3688 return(1);
3689 }
3690 return(0);
3691 }
3692
3693
3694 CFLOAT *kernel_cache_clean_and_malloc(KERNEL_CACHE *kernel_cache,
3695 long int docnum)
3696 /* Get a free cache entry. In case cache is full, the lru element
3697 is removed. */
3698 {
3699 long result;
3700 if((result = kernel_cache_malloc(kernel_cache)) == -1) {
3701 if(kernel_cache_free_lru(kernel_cache)) {
3702 result = kernel_cache_malloc(kernel_cache);
3703 }
3704 }
3705 kernel_cache->index[docnum]=result;
3706 if(result == -1) {
3707 return(0);
3708 }
3709 kernel_cache->invindex[result]=docnum;
3710 kernel_cache->lru[kernel_cache->index[docnum]]=kernel_cache->time; /* lru */
3711 return((CFLOAT *)((long)kernel_cache->buffer
3712 +(kernel_cache->activenum*sizeof(CFLOAT)*
3713 kernel_cache->index[docnum])));
3714 }
3715
3716 long kernel_cache_touch(KERNEL_CACHE *kernel_cache, long int docnum)
3717 /* Update lru time to avoid removal from cache. */
3718 {
3719 if(kernel_cache && kernel_cache->index[docnum] != -1) {
3720 kernel_cache->lru[kernel_cache->index[docnum]]=kernel_cache->time; /* lru */
3721 return(1);
3722 }
3723 return(0);
3724 }
3725
3726 long kernel_cache_check(KERNEL_CACHE *kernel_cache, long int docnum)
3727 /* Is that row cached? */
3728 {
3729 return(kernel_cache->index[docnum] != -1);
3730 }
3731
3732 long kernel_cache_space_available(KERNEL_CACHE *kernel_cache)
3733 /* Is there room for one more row? */
3734 {
3735 return(kernel_cache->elems < kernel_cache->max_elems);
3736 }
3737
3738 /************************** Compute estimates ******************************/
3739
3740 void compute_xa_estimates(MODEL *model, long int *label,
3741 long int *unlabeled, long int totdoc,
3742 DOC **docs, double *lin, double *a,
3743 KERNEL_PARM *kernel_parm,
3744 LEARN_PARM *learn_parm, double *error,
3745 double *recall, double *precision)
3746 /* Computes xa-estimate of error rate, recall, and precision. See
3747 T. Joachims, Estimating the Generalization Performance of an SVM
3748 Efficiently, IMCL, 2000. */
3749 {
3750 long i,looerror,looposerror,loonegerror;
3751 long totex,totposex;
3752 double xi,r_delta,r_delta_sq,sim=0;
3753 long *sv2dnum=NULL,*sv=NULL,svnum;
3754
3755 r_delta=estimate_r_delta(docs,totdoc,kernel_parm);
3756 r_delta_sq=r_delta*r_delta;
3757
3758 looerror=0;
3759 looposerror=0;
3760 loonegerror=0;
3761 totex=0;
3762 totposex=0;
3763 svnum=0;
3764
3765 if(learn_parm->xa_depth > 0) {
3766 sv = (long *)my_malloc(sizeof(long)*(totdoc+11));
3767 for(i=0;i<totdoc;i++)
3768 sv[i]=0;
3769 for(i=1;i<model->sv_num;i++)
3770 if(a[model->supvec[i]->docnum]
3771 < (learn_parm->svm_cost[model->supvec[i]->docnum]
3772 -learn_parm->epsilon_a)) {
3773 sv[model->supvec[i]->docnum]=1;
3774 svnum++;
3775 }
3776 sv2dnum = (long *)my_malloc(sizeof(long)*(totdoc+11));
3777 clear_index(sv2dnum);
3778 compute_index(sv,totdoc,sv2dnum);
3779 }
3780
3781 for(i=0;i<totdoc;i++) {
3782 if(unlabeled[i]) {
3783 /* ignore it */
3784 }
3785 else {
3786 xi=1.0-((lin[i]-model->b)*(double)label[i]);
3787 if(xi<0) xi=0;
3788 if(label[i]>0) {
3789 totposex++;
3790 }
3791 if((learn_parm->rho*a[i]*r_delta_sq+xi) >= 1.0) {
3792 if(learn_parm->xa_depth > 0) { /* makes assumptions */
3793 sim=distribute_alpha_t_greedily(sv2dnum,svnum,docs,a,i,label,
3794 kernel_parm,learn_parm,
3795 (double)((1.0-xi-a[i]*r_delta_sq)/(2.0*a[i])));
3796 }
3797 if((learn_parm->xa_depth == 0) ||
3798 ((a[i]*kernel(kernel_parm,docs[i],docs[i])+a[i]*2.0*sim+xi) >= 1.0)) {
3799 looerror++;
3800 if(label[i]>0) {
3801 looposerror++;
3802 }
3803 else {
3804 loonegerror++;
3805 }
3806 }
3807 }
3808 totex++;
3809 }
3810 }
3811
3812 (*error)=((double)looerror/(double)totex)*100.0;
3813 (*recall)=(1.0-(double)looposerror/(double)totposex)*100.0;
3814 (*precision)=(((double)totposex-(double)looposerror)
3815 /((double)totposex-(double)looposerror+(double)loonegerror))*100.0;
3816
3817 free(sv);
3818 free(sv2dnum);
3819 }
3820
3821
3822 double distribute_alpha_t_greedily(long int *sv2dnum, long int svnum,
3823 DOC **docs, double *a,
3824 long int docnum,
3825 long int *label,
3826 KERNEL_PARM *kernel_parm,
3827 LEARN_PARM *learn_parm, double thresh)
3828 /* Experimental Code improving plain XiAlpha Estimates by
3829 computing a better bound using a greedy optimzation strategy. */
3830 {
3831 long best_depth=0;
3832 long i,j,k,d,skip,allskip;
3833 double best,best_val[101],val,init_val_sq,init_val_lin;
3834 long best_ex[101];
3835 CFLOAT *cache,*trow;
3836
3837 cache=(CFLOAT *)my_malloc(sizeof(CFLOAT)*learn_parm->xa_depth*svnum);
3838 trow = (CFLOAT *)my_malloc(sizeof(CFLOAT)*svnum);
3839
3840 for(k=0;k<svnum;k++) {
3841 trow[k]=kernel(kernel_parm,docs[docnum],docs[sv2dnum[k]]);
3842 }
3843
3844 init_val_sq=0;
3845 init_val_lin=0;
3846 best=0;
3847
3848 for(d=0;d<learn_parm->xa_depth;d++) {
3849 allskip=1;
3850 if(d>=1) {
3851 init_val_sq+=cache[best_ex[d-1]+svnum*(d-1)];
3852 for(k=0;k<d-1;k++) {
3853 init_val_sq+=2.0*cache[best_ex[k]+svnum*(d-1)];
3854 }
3855 init_val_lin+=trow[best_ex[d-1]];
3856 }
3857 for(i=0;i<svnum;i++) {
3858 skip=0;
3859 if(sv2dnum[i] == docnum) skip=1;
3860 for(j=0;j<d;j++) {
3861 if(i == best_ex[j]) skip=1;
3862 }
3863
3864 if(!skip) {
3865 val=init_val_sq;
3866 if(kernel_parm->kernel_type == LINEAR)
3867 val+=docs[sv2dnum[i]]->fvec->twonorm_sq;
3868 else
3869 val+=kernel(kernel_parm,docs[sv2dnum[i]],docs[sv2dnum[i]]);
3870 for(j=0;j<d;j++) {
3871 val+=2.0*cache[i+j*svnum];
3872 }
3873 val*=(1.0/(2.0*(d+1.0)*(d+1.0)));
3874 val-=((init_val_lin+trow[i])/(d+1.0));
3875
3876 if(allskip || (val < best_val[d])) {
3877 best_val[d]=val;
3878 best_ex[d]=i;
3879 }
3880 allskip=0;
3881 if(val < thresh) {
3882 i=svnum;
3883 /* printf("EARLY"); */
3884 }
3885 }
3886 }
3887 if(!allskip) {
3888 for(k=0;k<svnum;k++) {
3889 cache[d*svnum+k]=kernel(kernel_parm,
3890 docs[sv2dnum[best_ex[d]]],
3891 docs[sv2dnum[k]]);
3892 }
3893 }
3894 if((!allskip) && ((best_val[d] < best) || (d == 0))) {
3895 best=best_val[d];
3896 best_depth=d;
3897 }
3898 if(allskip || (best < thresh)) {
3899 d=learn_parm->xa_depth;
3900 }
3901 }
3902
3903 free(cache);
3904 free(trow);
3905
3906 /* printf("Distribute[%ld](%ld)=%f, ",docnum,best_depth,best); */
3907 return(best);
3908 }
3909
3910
3911 void estimate_transduction_quality(MODEL *model, long int *label,
3912 long int *unlabeled,
3913 long int totdoc, DOC **docs, double *lin)
3914 /* Loo-bound based on observation that loo-errors must have an
3915 equal distribution in both training and test examples, given
3916 that the test examples are classified correctly. Compare
3917 chapter "Constraints on the Transductive Hyperplane" in my
3918 Dissertation. */
3919 {
3920 long i,j,l=0,ulab=0,lab=0,labpos=0,labneg=0,ulabpos=0,ulabneg=0,totulab=0;
3921 double totlab=0,totlabpos=0,totlabneg=0,labsum=0,ulabsum=0;
3922 double r_delta,r_delta_sq,xi,xisum=0,asum=0;
3923
3924 r_delta=estimate_r_delta(docs,totdoc,&(model->kernel_parm));
3925 r_delta_sq=r_delta*r_delta;
3926
3927 for(j=0;j<totdoc;j++) {
3928 if(unlabeled[j]) {
3929 totulab++;
3930 }
3931 else {
3932 totlab++;
3933 if(label[j] > 0)
3934 totlabpos++;
3935 else
3936 totlabneg++;
3937 }
3938 }
3939 for(j=1;j<model->sv_num;j++) {
3940 i=model->supvec[j]->docnum;
3941 xi=1.0-((lin[i]-model->b)*(double)label[i]);
3942 if(xi<0) xi=0;
3943
3944 xisum+=xi;
3945 asum+=fabs(model->alpha[j]);
3946 if(unlabeled[i]) {
3947 ulabsum+=(fabs(model->alpha[j])*r_delta_sq+xi);
3948 }
3949 else {
3950 labsum+=(fabs(model->alpha[j])*r_delta_sq+xi);
3951 }
3952 if((fabs(model->alpha[j])*r_delta_sq+xi) >= 1) {
3953 l++;
3954 if(unlabeled[model->supvec[j]->docnum]) {
3955 ulab++;
3956 if(model->alpha[j] > 0)
3957 ulabpos++;
3958 else
3959 ulabneg++;
3960 }
3961 else {
3962 lab++;
3963 if(model->alpha[j] > 0)
3964 labpos++;
3965 else
3966 labneg++;
3967 }
3968 }
3969 }
3970 printf("xacrit>=1: labeledpos=%.5f labeledneg=%.5f default=%.5f\n",(double)labpos/(double)totlab*100.0,(double)labneg/(double)totlab*100.0,(double)totlabpos/(double)(totlab)*100.0);
3971 printf("xacrit>=1: unlabelpos=%.5f unlabelneg=%.5f\n",(double)ulabpos/(double)totulab*100.0,(double)ulabneg/(double)totulab*100.0);
3972 printf("xacrit>=1: labeled=%.5f unlabled=%.5f all=%.5f\n",(double)lab/(double)totlab*100.0,(double)ulab/(double)totulab*100.0,(double)l/(double)(totdoc)*100.0);
3973 printf("xacritsum: labeled=%.5f unlabled=%.5f all=%.5f\n",(double)labsum/(double)totlab*100.0,(double)ulabsum/(double)totulab*100.0,(double)(labsum+ulabsum)/(double)(totdoc)*100.0);
3974 printf("r_delta_sq=%.5f xisum=%.5f asum=%.5f\n",r_delta_sq,xisum,asum);
3975 }
3976
3977 double estimate_margin_vcdim(MODEL *model, double w, double R,
3978 KERNEL_PARM *kernel_parm)
3979 /* optional: length of model vector in feature space */
3980 /* optional: radius of ball containing the data */
3981 {
3982 double h;
3983
3984 /* follows chapter 5.6.4 in [Vapnik/95] */
3985
3986 if(w<0) {
3987 w=model_length_s(model,kernel_parm);
3988 }
3989 if(R<0) {
3990 R=estimate_sphere(model,kernel_parm);
3991 }
3992 h = w*w * R*R +1;
3993 return(h);
3994 }
3995
3996 double estimate_sphere(MODEL *model, KERNEL_PARM *kernel_parm)
3997 /* Approximates the radius of the ball containing */
3998 /* the support vectors by bounding it with the */
3999 { /* length of the longest support vector. This is */
4000 register long j; /* pretty good for text categorization, since all */
4001 double xlen,maxxlen=0; /* documents have feature vectors of length 1. It */
4002 DOC *nulldoc; /* assumes that the center of the ball is at the */
4003 WORD nullword; /* origin of the space. */
4004
4005 nullword.wnum=0;
4006 nulldoc=create_example(-2,0,0,0.0,create_svector(&nullword,"",1.0));
4007
4008 for(j=1;j<model->sv_num;j++) {
4009 xlen=sqrt(kernel(kernel_parm,model->supvec[j],model->supvec[j])
4010 -2*kernel(kernel_parm,model->supvec[j],nulldoc)
4011 +kernel(kernel_parm,nulldoc,nulldoc));
4012 if(xlen>maxxlen) {
4013 maxxlen=xlen;
4014 }
4015 }
4016
4017 free_example(nulldoc,1);
4018 return(maxxlen);
4019 }
4020
4021 double estimate_r_delta(DOC **docs, long int totdoc, KERNEL_PARM *kernel_parm)
4022 {
4023 long i;
4024 double maxxlen,xlen;
4025 DOC *nulldoc; /* assumes that the center of the ball is at the */
4026 WORD nullword; /* origin of the space. */
4027
4028 nullword.wnum=0;
4029 nulldoc=create_example(-2,0,0,0.0,create_svector(&nullword,"",1.0));
4030
4031 maxxlen=0;
4032 for(i=0;i<totdoc;i++) {
4033 xlen=sqrt(kernel(kernel_parm,docs[i],docs[i])
4034 -2*kernel(kernel_parm,docs[i],nulldoc)
4035 +kernel(kernel_parm,nulldoc,nulldoc));
4036 if(xlen>maxxlen) {
4037 maxxlen=xlen;
4038 }
4039 }
4040
4041 free_example(nulldoc,1);
4042 return(maxxlen);
4043 }
4044
4045 double estimate_r_delta_average(DOC **docs, long int totdoc,
4046 KERNEL_PARM *kernel_parm)
4047 {
4048 long i;
4049 double avgxlen;
4050 DOC *nulldoc; /* assumes that the center of the ball is at the */
4051 WORD nullword; /* origin of the space. */
4052
4053 nullword.wnum=0;
4054 nulldoc=create_example(-2,0,0,0.0,create_svector(&nullword,"",1.0));
4055
4056 avgxlen=0;
4057 for(i=0;i<totdoc;i++) {
4058 avgxlen+=sqrt(kernel(kernel_parm,docs[i],docs[i])
4059 -2*kernel(kernel_parm,docs[i],nulldoc)
4060 +kernel(kernel_parm,nulldoc,nulldoc));
4061 }
4062
4063 free_example(nulldoc,1);
4064 return(avgxlen/totdoc);
4065 }
4066
4067 double length_of_longest_document_vector(DOC **docs, long int totdoc,
4068 KERNEL_PARM *kernel_parm)
4069 {
4070 long i;
4071 double maxxlen,xlen;
4072
4073 maxxlen=0;
4074 for(i=0;i<totdoc;i++) {
4075 xlen=sqrt(kernel(kernel_parm,docs[i],docs[i]));
4076 if(xlen>maxxlen) {
4077 maxxlen=xlen;
4078 }
4079 }
4080
4081 return(maxxlen);
4082 }
4083
4084 /****************************** IO-handling **********************************/
4085
4086 void write_prediction(char *predfile, MODEL *model, double *lin,
4087 double *a, long int *unlabeled,
4088 long int *label, long int totdoc,
4089 LEARN_PARM *learn_parm)
4090 {
4091 FILE *predfl;
4092 long i;
4093 double dist,a_max;
4094
4095 if(verbosity>=1) {
4096 printf("Writing prediction file..."); fflush(stdout);
4097 }
4098 if ((predfl = fopen (predfile, "w")) == NULL)
4099 { perror (predfile); exit (1); }
4100 a_max=learn_parm->epsilon_a;
4101 for(i=0;i<totdoc;i++) {
4102 if((unlabeled[i]) && (a[i]>a_max)) {
4103 a_max=a[i];
4104 }
4105 }
4106 for(i=0;i<totdoc;i++) {
4107 if(unlabeled[i]) {
4108 if((a[i]>(learn_parm->epsilon_a))) {
4109 dist=(double)label[i]*(1.0-learn_parm->epsilon_crit-a[i]/(a_max*2.0));
4110 }
4111 else {
4112 dist=(lin[i]-model->b);
4113 }
4114 if(dist>0) {
4115 fprintf(predfl,"%.8g:+1 %.8g:-1\n",dist,-dist);
4116 }
4117 else {
4118 fprintf(predfl,"%.8g:-1 %.8g:+1\n",-dist,dist);
4119 }
4120 }
4121 }
4122 fclose(predfl);
4123 if(verbosity>=1) {
4124 printf("done\n");
4125 }
4126 }
4127
4128 void write_alphas(char *alphafile, double *a,
4129 long int *label, long int totdoc)
4130 {
4131 FILE *alphafl;
4132 long i;
4133
4134 if(verbosity>=1) {
4135 printf("Writing alpha file..."); fflush(stdout);
4136 }
4137 if ((alphafl = fopen (alphafile, "w")) == NULL)
4138 { perror (alphafile); exit (1); }
4139 for(i=0;i<totdoc;i++) {
4140 fprintf(alphafl,"%.18g\n",a[i]*(double)label[i]);
4141 }
4142 fclose(alphafl);
4143 if(verbosity>=1) {
4144 printf("done\n");
4145 }
4146 }
4147