annotate toolboxes/SVM-light/src/svm_learn.c @ 0:e9a9cd732c1e tip

first hg version after svn
author wolffd
date Tue, 10 Feb 2015 15:05:51 +0000
parents
children
rev   line source
wolffd@0 1 /***********************************************************************/
wolffd@0 2 /* */
wolffd@0 3 /* svm_learn.c */
wolffd@0 4 /* */
wolffd@0 5 /* Learning module of Support Vector Machine. */
wolffd@0 6 /* */
wolffd@0 7 /* Author: Thorsten Joachims */
wolffd@0 8 /* Date: 02.07.02 */
wolffd@0 9 /* */
wolffd@0 10 /* Copyright (c) 2002 Thorsten Joachims - All rights reserved */
wolffd@0 11 /* */
wolffd@0 12 /* This software is available for non-commercial use only. It must */
wolffd@0 13 /* not be modified and distributed without prior permission of the */
wolffd@0 14 /* author. The author is not responsible for implications from the */
wolffd@0 15 /* use of this software. */
wolffd@0 16 /* */
wolffd@0 17 /***********************************************************************/
wolffd@0 18
wolffd@0 19
wolffd@0 20 # include "svm_common.h"
wolffd@0 21 # include "svm_learn.h"
wolffd@0 22
wolffd@0 23
wolffd@0 24 /* interface to QP-solver */
wolffd@0 25 double *optimize_qp(QP *, double *, long, double *, LEARN_PARM *);
wolffd@0 26
wolffd@0 27 /*---------------------------------------------------------------------------*/
wolffd@0 28
wolffd@0 29 /* Learns an SVM classification model based on the training data in
wolffd@0 30 docs/label. The resulting model is returned in the structure
wolffd@0 31 model. */
wolffd@0 32
wolffd@0 33 void svm_learn_classification(DOC **docs, double *class, long int
wolffd@0 34 totdoc, long int totwords,
wolffd@0 35 LEARN_PARM *learn_parm,
wolffd@0 36 KERNEL_PARM *kernel_parm,
wolffd@0 37 KERNEL_CACHE *kernel_cache,
wolffd@0 38 MODEL *model,
wolffd@0 39 double *alpha)
wolffd@0 40 /* docs: Training vectors (x-part) */
wolffd@0 41 /* class: Training labels (y-part, zero if test example for
wolffd@0 42 transduction) */
wolffd@0 43 /* totdoc: Number of examples in docs/label */
wolffd@0 44 /* totwords: Number of features (i.e. highest feature index) */
wolffd@0 45 /* learn_parm: Learning paramenters */
wolffd@0 46 /* kernel_parm: Kernel paramenters */
wolffd@0 47 /* kernel_cache:Initialized Cache of size totdoc, if using a kernel.
wolffd@0 48 NULL if linear.*/
wolffd@0 49 /* model: Returns learning result (assumed empty before called) */
wolffd@0 50 /* alpha: Start values for the alpha variables or NULL
wolffd@0 51 pointer. The new alpha values are returned after
wolffd@0 52 optimization if not NULL. Array must be of size totdoc. */
wolffd@0 53 {
wolffd@0 54 long *inconsistent,i,*label;
wolffd@0 55 long inconsistentnum;
wolffd@0 56 long misclassified,upsupvecnum;
wolffd@0 57 double loss,model_length,example_length;
wolffd@0 58 double maxdiff,*lin,*a,*c;
wolffd@0 59 long runtime_start,runtime_end;
wolffd@0 60 long iterations;
wolffd@0 61 long *unlabeled,transduction;
wolffd@0 62 long heldout;
wolffd@0 63 long loo_count=0,loo_count_pos=0,loo_count_neg=0,trainpos=0,trainneg=0;
wolffd@0 64 long loocomputed=0,runtime_start_loo=0,runtime_start_xa=0;
wolffd@0 65 double heldout_c=0,r_delta_sq=0,r_delta,r_delta_avg;
wolffd@0 66 long *index,*index2dnum;
wolffd@0 67 double *weights;
wolffd@0 68 CFLOAT *aicache; /* buffer to keep one row of hessian */
wolffd@0 69
wolffd@0 70 double *xi_fullset; /* buffer for storing xi on full sample in loo */
wolffd@0 71 double *a_fullset; /* buffer for storing alpha on full sample in loo */
wolffd@0 72 TIMING timing_profile;
wolffd@0 73 SHRINK_STATE shrink_state;
wolffd@0 74
wolffd@0 75 runtime_start=get_runtime();
wolffd@0 76 timing_profile.time_kernel=0;
wolffd@0 77 timing_profile.time_opti=0;
wolffd@0 78 timing_profile.time_shrink=0;
wolffd@0 79 timing_profile.time_update=0;
wolffd@0 80 timing_profile.time_model=0;
wolffd@0 81 timing_profile.time_check=0;
wolffd@0 82 timing_profile.time_select=0;
wolffd@0 83 kernel_cache_statistic=0;
wolffd@0 84
wolffd@0 85 learn_parm->totwords=totwords;
wolffd@0 86
wolffd@0 87 /* make sure -n value is reasonable */
wolffd@0 88 if((learn_parm->svm_newvarsinqp < 2)
wolffd@0 89 || (learn_parm->svm_newvarsinqp > learn_parm->svm_maxqpsize)) {
wolffd@0 90 learn_parm->svm_newvarsinqp=learn_parm->svm_maxqpsize;
wolffd@0 91 }
wolffd@0 92
wolffd@0 93 init_shrink_state(&shrink_state,totdoc,(long)MAXSHRINK);
wolffd@0 94
wolffd@0 95 label = (long *)my_malloc(sizeof(long)*totdoc);
wolffd@0 96 inconsistent = (long *)my_malloc(sizeof(long)*totdoc);
wolffd@0 97 unlabeled = (long *)my_malloc(sizeof(long)*totdoc);
wolffd@0 98 c = (double *)my_malloc(sizeof(double)*totdoc);
wolffd@0 99 a = (double *)my_malloc(sizeof(double)*totdoc);
wolffd@0 100 a_fullset = (double *)my_malloc(sizeof(double)*totdoc);
wolffd@0 101 xi_fullset = (double *)my_malloc(sizeof(double)*totdoc);
wolffd@0 102 lin = (double *)my_malloc(sizeof(double)*totdoc);
wolffd@0 103 learn_parm->svm_cost = (double *)my_malloc(sizeof(double)*totdoc);
wolffd@0 104 model->supvec = (DOC **)my_malloc(sizeof(DOC *)*(totdoc+2));
wolffd@0 105 model->alpha = (double *)my_malloc(sizeof(double)*(totdoc+2));
wolffd@0 106 model->index = (long *)my_malloc(sizeof(long)*(totdoc+2));
wolffd@0 107
wolffd@0 108 model->at_upper_bound=0;
wolffd@0 109 model->b=0;
wolffd@0 110 model->supvec[0]=0; /* element 0 reserved and empty for now */
wolffd@0 111 model->alpha[0]=0;
wolffd@0 112 model->lin_weights=NULL;
wolffd@0 113 model->totwords=totwords;
wolffd@0 114 model->totdoc=totdoc;
wolffd@0 115 model->kernel_parm=(*kernel_parm);
wolffd@0 116 model->sv_num=1;
wolffd@0 117 model->loo_error=-1;
wolffd@0 118 model->loo_recall=-1;
wolffd@0 119 model->loo_precision=-1;
wolffd@0 120 model->xa_error=-1;
wolffd@0 121 model->xa_recall=-1;
wolffd@0 122 model->xa_precision=-1;
wolffd@0 123 inconsistentnum=0;
wolffd@0 124 transduction=0;
wolffd@0 125
wolffd@0 126 r_delta=estimate_r_delta(docs,totdoc,kernel_parm);
wolffd@0 127 r_delta_sq=r_delta*r_delta;
wolffd@0 128
wolffd@0 129 r_delta_avg=estimate_r_delta_average(docs,totdoc,kernel_parm);
wolffd@0 130 if(learn_parm->svm_c == 0.0) { /* default value for C */
wolffd@0 131 learn_parm->svm_c=1.0/(r_delta_avg*r_delta_avg);
wolffd@0 132 if(verbosity>=1)
wolffd@0 133 printf("Setting default regularization parameter C=%.4f\n",
wolffd@0 134 learn_parm->svm_c);
wolffd@0 135 }
wolffd@0 136
wolffd@0 137 learn_parm->eps=-1.0; /* equivalent regression epsilon for
wolffd@0 138 classification */
wolffd@0 139
wolffd@0 140 for(i=0;i<totdoc;i++) { /* various inits */
wolffd@0 141 docs[i]->docnum=i;
wolffd@0 142 inconsistent[i]=0;
wolffd@0 143 a[i]=0;
wolffd@0 144 lin[i]=0;
wolffd@0 145 c[i]=0.0;
wolffd@0 146 unlabeled[i]=0;
wolffd@0 147 if(class[i] == 0) {
wolffd@0 148 unlabeled[i]=1;
wolffd@0 149 label[i]=0;
wolffd@0 150 transduction=1;
wolffd@0 151 }
wolffd@0 152 if(class[i] > 0) {
wolffd@0 153 learn_parm->svm_cost[i]=learn_parm->svm_c*learn_parm->svm_costratio*
wolffd@0 154 docs[i]->costfactor;
wolffd@0 155 label[i]=1;
wolffd@0 156 trainpos++;
wolffd@0 157 }
wolffd@0 158 else if(class[i] < 0) {
wolffd@0 159 learn_parm->svm_cost[i]=learn_parm->svm_c*docs[i]->costfactor;
wolffd@0 160 label[i]=-1;
wolffd@0 161 trainneg++;
wolffd@0 162 }
wolffd@0 163 else {
wolffd@0 164 learn_parm->svm_cost[i]=0;
wolffd@0 165 }
wolffd@0 166 }
wolffd@0 167 if(verbosity>=2) {
wolffd@0 168 printf("%ld positive, %ld negative, and %ld unlabeled examples.\n",trainpos,trainneg,totdoc-trainpos-trainneg); fflush(stdout);
wolffd@0 169 }
wolffd@0 170
wolffd@0 171 /* caching makes no sense for linear kernel */
wolffd@0 172 if(kernel_parm->kernel_type == LINEAR) {
wolffd@0 173 kernel_cache = NULL;
wolffd@0 174 }
wolffd@0 175
wolffd@0 176 /* compute starting state for initial alpha values */
wolffd@0 177 if(alpha) {
wolffd@0 178 if(verbosity>=1) {
wolffd@0 179 printf("Computing starting state..."); fflush(stdout);
wolffd@0 180 }
wolffd@0 181 index = (long *)my_malloc(sizeof(long)*totdoc);
wolffd@0 182 index2dnum = (long *)my_malloc(sizeof(long)*(totdoc+11));
wolffd@0 183 weights=(double *)my_malloc(sizeof(double)*(totwords+1));
wolffd@0 184 aicache = (CFLOAT *)my_malloc(sizeof(CFLOAT)*totdoc);
wolffd@0 185 for(i=0;i<totdoc;i++) { /* create full index and clip alphas */
wolffd@0 186 index[i]=1;
wolffd@0 187 alpha[i]=fabs(alpha[i]);
wolffd@0 188 if(alpha[i]<0) alpha[i]=0;
wolffd@0 189 if(alpha[i]>learn_parm->svm_cost[i]) alpha[i]=learn_parm->svm_cost[i];
wolffd@0 190 }
wolffd@0 191 if(kernel_parm->kernel_type != LINEAR) {
wolffd@0 192 for(i=0;i<totdoc;i++) /* fill kernel cache with unbounded SV */
wolffd@0 193 if((alpha[i]>0) && (alpha[i]<learn_parm->svm_cost[i])
wolffd@0 194 && (kernel_cache_space_available(kernel_cache)))
wolffd@0 195 cache_kernel_row(kernel_cache,docs,i,kernel_parm);
wolffd@0 196 for(i=0;i<totdoc;i++) /* fill rest of kernel cache with bounded SV */
wolffd@0 197 if((alpha[i]==learn_parm->svm_cost[i])
wolffd@0 198 && (kernel_cache_space_available(kernel_cache)))
wolffd@0 199 cache_kernel_row(kernel_cache,docs,i,kernel_parm);
wolffd@0 200 }
wolffd@0 201 (void)compute_index(index,totdoc,index2dnum);
wolffd@0 202 update_linear_component(docs,label,index2dnum,alpha,a,index2dnum,totdoc,
wolffd@0 203 totwords,kernel_parm,kernel_cache,lin,aicache,
wolffd@0 204 weights);
wolffd@0 205 (void)calculate_svm_model(docs,label,unlabeled,lin,alpha,a,c,
wolffd@0 206 learn_parm,index2dnum,index2dnum,model);
wolffd@0 207 for(i=0;i<totdoc;i++) { /* copy initial alphas */
wolffd@0 208 a[i]=alpha[i];
wolffd@0 209 }
wolffd@0 210 free(index);
wolffd@0 211 free(index2dnum);
wolffd@0 212 free(weights);
wolffd@0 213 free(aicache);
wolffd@0 214 if(verbosity>=1) {
wolffd@0 215 printf("done.\n"); fflush(stdout);
wolffd@0 216 }
wolffd@0 217 }
wolffd@0 218
wolffd@0 219 if(transduction) {
wolffd@0 220 learn_parm->svm_iter_to_shrink=99999999;
wolffd@0 221 if(verbosity >= 1)
wolffd@0 222 printf("\nDeactivating Shrinking due to an incompatibility with the transductive \nlearner in the current version.\n\n");
wolffd@0 223 }
wolffd@0 224
wolffd@0 225 if(transduction && learn_parm->compute_loo) {
wolffd@0 226 learn_parm->compute_loo=0;
wolffd@0 227 if(verbosity >= 1)
wolffd@0 228 printf("\nCannot compute leave-one-out estimates for transductive learner.\n\n");
wolffd@0 229 }
wolffd@0 230
wolffd@0 231 if(learn_parm->remove_inconsistent && learn_parm->compute_loo) {
wolffd@0 232 learn_parm->compute_loo=0;
wolffd@0 233 printf("\nCannot compute leave-one-out estimates when removing inconsistent examples.\n\n");
wolffd@0 234 }
wolffd@0 235
wolffd@0 236 if(learn_parm->compute_loo && ((trainpos == 1) || (trainneg == 1))) {
wolffd@0 237 learn_parm->compute_loo=0;
wolffd@0 238 printf("\nCannot compute leave-one-out with only one example in one class.\n\n");
wolffd@0 239 }
wolffd@0 240
wolffd@0 241
wolffd@0 242 if(verbosity==1) {
wolffd@0 243 printf("Optimizing"); fflush(stdout);
wolffd@0 244 }
wolffd@0 245
wolffd@0 246 /* train the svm */
wolffd@0 247 iterations=optimize_to_convergence(docs,label,totdoc,totwords,learn_parm,
wolffd@0 248 kernel_parm,kernel_cache,&shrink_state,model,
wolffd@0 249 inconsistent,unlabeled,a,lin,
wolffd@0 250 c,&timing_profile,
wolffd@0 251 &maxdiff,(long)-1,
wolffd@0 252 (long)1);
wolffd@0 253
wolffd@0 254 if(verbosity>=1) {
wolffd@0 255 if(verbosity==1) printf("done. (%ld iterations)\n",iterations);
wolffd@0 256
wolffd@0 257 misclassified=0;
wolffd@0 258 for(i=0;(i<totdoc);i++) { /* get final statistic */
wolffd@0 259 if((lin[i]-model->b)*(double)label[i] <= 0.0)
wolffd@0 260 misclassified++;
wolffd@0 261 }
wolffd@0 262
wolffd@0 263 printf("Optimization finished (%ld misclassified, maxdiff=%.5f).\n",
wolffd@0 264 misclassified,maxdiff);
wolffd@0 265
wolffd@0 266 runtime_end=get_runtime();
wolffd@0 267 if(verbosity>=2) {
wolffd@0 268 printf("Runtime in cpu-seconds: %.2f (%.2f%% for kernel/%.2f%% for optimizer/%.2f%% for final/%.2f%% for update/%.2f%% for model/%.2f%% for check/%.2f%% for select)\n",
wolffd@0 269 ((float)runtime_end-(float)runtime_start)/100.0,
wolffd@0 270 (100.0*timing_profile.time_kernel)/(float)(runtime_end-runtime_start),
wolffd@0 271 (100.0*timing_profile.time_opti)/(float)(runtime_end-runtime_start),
wolffd@0 272 (100.0*timing_profile.time_shrink)/(float)(runtime_end-runtime_start),
wolffd@0 273 (100.0*timing_profile.time_update)/(float)(runtime_end-runtime_start),
wolffd@0 274 (100.0*timing_profile.time_model)/(float)(runtime_end-runtime_start),
wolffd@0 275 (100.0*timing_profile.time_check)/(float)(runtime_end-runtime_start),
wolffd@0 276 (100.0*timing_profile.time_select)/(float)(runtime_end-runtime_start));
wolffd@0 277 }
wolffd@0 278 else {
wolffd@0 279 printf("Runtime in cpu-seconds: %.2f\n",
wolffd@0 280 (runtime_end-runtime_start)/100.0);
wolffd@0 281 }
wolffd@0 282
wolffd@0 283 if(learn_parm->remove_inconsistent) {
wolffd@0 284 inconsistentnum=0;
wolffd@0 285 for(i=0;i<totdoc;i++)
wolffd@0 286 if(inconsistent[i])
wolffd@0 287 inconsistentnum++;
wolffd@0 288 printf("Number of SV: %ld (plus %ld inconsistent examples)\n",
wolffd@0 289 model->sv_num-1,inconsistentnum);
wolffd@0 290 }
wolffd@0 291 else {
wolffd@0 292 upsupvecnum=0;
wolffd@0 293 for(i=1;i<model->sv_num;i++) {
wolffd@0 294 if(fabs(model->alpha[i]) >=
wolffd@0 295 (learn_parm->svm_cost[(model->supvec[i])->docnum]-
wolffd@0 296 learn_parm->epsilon_a))
wolffd@0 297 upsupvecnum++;
wolffd@0 298 }
wolffd@0 299 printf("Number of SV: %ld (including %ld at upper bound)\n",
wolffd@0 300 model->sv_num-1,upsupvecnum);
wolffd@0 301 }
wolffd@0 302
wolffd@0 303 if((verbosity>=1) && (!learn_parm->skip_final_opt_check)) {
wolffd@0 304 loss=0;
wolffd@0 305 model_length=0;
wolffd@0 306 for(i=0;i<totdoc;i++) {
wolffd@0 307 if((lin[i]-model->b)*(double)label[i] < 1.0-learn_parm->epsilon_crit)
wolffd@0 308 loss+=1.0-(lin[i]-model->b)*(double)label[i];
wolffd@0 309 model_length+=a[i]*label[i]*lin[i];
wolffd@0 310 }
wolffd@0 311 model_length=sqrt(model_length);
wolffd@0 312 fprintf(stdout,"L1 loss: loss=%.5f\n",loss);
wolffd@0 313 fprintf(stdout,"Norm of weight vector: |w|=%.5f\n",model_length);
wolffd@0 314 example_length=estimate_sphere(model,kernel_parm);
wolffd@0 315 fprintf(stdout,"Norm of longest example vector: |x|=%.5f\n",
wolffd@0 316 length_of_longest_document_vector(docs,totdoc,kernel_parm));
wolffd@0 317 fprintf(stdout,"Estimated VCdim of classifier: VCdim<=%.5f\n",
wolffd@0 318 estimate_margin_vcdim(model,model_length,example_length,
wolffd@0 319 kernel_parm));
wolffd@0 320 if((!learn_parm->remove_inconsistent) && (!transduction)) {
wolffd@0 321 runtime_start_xa=get_runtime();
wolffd@0 322 if(verbosity>=1) {
wolffd@0 323 printf("Computing XiAlpha-estimates..."); fflush(stdout);
wolffd@0 324 }
wolffd@0 325 compute_xa_estimates(model,label,unlabeled,totdoc,docs,lin,a,
wolffd@0 326 kernel_parm,learn_parm,&(model->xa_error),
wolffd@0 327 &(model->xa_recall),&(model->xa_precision));
wolffd@0 328 if(verbosity>=1) {
wolffd@0 329 printf("done\n");
wolffd@0 330 }
wolffd@0 331 printf("Runtime for XiAlpha-estimates in cpu-seconds: %.2f\n",
wolffd@0 332 (get_runtime()-runtime_start_xa)/100.0);
wolffd@0 333
wolffd@0 334 fprintf(stdout,"XiAlpha-estimate of the error: error<=%.2f%% (rho=%.2f,depth=%ld)\n",
wolffd@0 335 model->xa_error,learn_parm->rho,learn_parm->xa_depth);
wolffd@0 336 fprintf(stdout,"XiAlpha-estimate of the recall: recall=>%.2f%% (rho=%.2f,depth=%ld)\n",
wolffd@0 337 model->xa_recall,learn_parm->rho,learn_parm->xa_depth);
wolffd@0 338 fprintf(stdout,"XiAlpha-estimate of the precision: precision=>%.2f%% (rho=%.2f,depth=%ld)\n",
wolffd@0 339 model->xa_precision,learn_parm->rho,learn_parm->xa_depth);
wolffd@0 340 }
wolffd@0 341 else if(!learn_parm->remove_inconsistent) {
wolffd@0 342 estimate_transduction_quality(model,label,unlabeled,totdoc,docs,lin);
wolffd@0 343 }
wolffd@0 344 }
wolffd@0 345 if(verbosity>=1) {
wolffd@0 346 printf("Number of kernel evaluations: %ld\n",kernel_cache_statistic);
wolffd@0 347 }
wolffd@0 348 }
wolffd@0 349
wolffd@0 350
wolffd@0 351 /* leave-one-out testing starts now */
wolffd@0 352 if(learn_parm->compute_loo) {
wolffd@0 353 /* save results of training on full dataset for leave-one-out */
wolffd@0 354 runtime_start_loo=get_runtime();
wolffd@0 355 for(i=0;i<totdoc;i++) {
wolffd@0 356 xi_fullset[i]=1.0-((lin[i]-model->b)*(double)label[i]);
wolffd@0 357 if(xi_fullset[i]<0) xi_fullset[i]=0;
wolffd@0 358 a_fullset[i]=a[i];
wolffd@0 359 }
wolffd@0 360 if(verbosity>=1) {
wolffd@0 361 printf("Computing leave-one-out");
wolffd@0 362 }
wolffd@0 363
wolffd@0 364 /* repeat this loop for every held-out example */
wolffd@0 365 for(heldout=0;(heldout<totdoc);heldout++) {
wolffd@0 366 if(learn_parm->rho*a_fullset[heldout]*r_delta_sq+xi_fullset[heldout]
wolffd@0 367 < 1.0) {
wolffd@0 368 /* guaranteed to not produce a leave-one-out error */
wolffd@0 369 if(verbosity==1) {
wolffd@0 370 printf("+"); fflush(stdout);
wolffd@0 371 }
wolffd@0 372 }
wolffd@0 373 else if(xi_fullset[heldout] > 1.0) {
wolffd@0 374 /* guaranteed to produce a leave-one-out error */
wolffd@0 375 loo_count++;
wolffd@0 376 if(label[heldout] > 0) loo_count_pos++; else loo_count_neg++;
wolffd@0 377 if(verbosity==1) {
wolffd@0 378 printf("-"); fflush(stdout);
wolffd@0 379 }
wolffd@0 380 }
wolffd@0 381 else {
wolffd@0 382 loocomputed++;
wolffd@0 383 heldout_c=learn_parm->svm_cost[heldout]; /* set upper bound to zero */
wolffd@0 384 learn_parm->svm_cost[heldout]=0;
wolffd@0 385 /* make sure heldout example is not currently */
wolffd@0 386 /* shrunk away. Assumes that lin is up to date! */
wolffd@0 387 shrink_state.active[heldout]=1;
wolffd@0 388 if(verbosity>=2)
wolffd@0 389 printf("\nLeave-One-Out test on example %ld\n",heldout);
wolffd@0 390 if(verbosity>=1) {
wolffd@0 391 printf("(?[%ld]",heldout); fflush(stdout);
wolffd@0 392 }
wolffd@0 393
wolffd@0 394 optimize_to_convergence(docs,label,totdoc,totwords,learn_parm,
wolffd@0 395 kernel_parm,
wolffd@0 396 kernel_cache,&shrink_state,model,inconsistent,unlabeled,
wolffd@0 397 a,lin,c,&timing_profile,
wolffd@0 398 &maxdiff,heldout,(long)2);
wolffd@0 399
wolffd@0 400 /* printf("%.20f\n",(lin[heldout]-model->b)*(double)label[heldout]); */
wolffd@0 401
wolffd@0 402 if(((lin[heldout]-model->b)*(double)label[heldout]) <= 0.0) {
wolffd@0 403 loo_count++; /* there was a loo-error */
wolffd@0 404 if(label[heldout] > 0) loo_count_pos++; else loo_count_neg++;
wolffd@0 405 if(verbosity>=1) {
wolffd@0 406 printf("-)"); fflush(stdout);
wolffd@0 407 }
wolffd@0 408 }
wolffd@0 409 else {
wolffd@0 410 if(verbosity>=1) {
wolffd@0 411 printf("+)"); fflush(stdout);
wolffd@0 412 }
wolffd@0 413 }
wolffd@0 414 /* now we need to restore the original data set*/
wolffd@0 415 learn_parm->svm_cost[heldout]=heldout_c; /* restore upper bound */
wolffd@0 416 }
wolffd@0 417 } /* end of leave-one-out loop */
wolffd@0 418
wolffd@0 419
wolffd@0 420 if(verbosity>=1) {
wolffd@0 421 printf("\nRetrain on full problem"); fflush(stdout);
wolffd@0 422 }
wolffd@0 423 optimize_to_convergence(docs,label,totdoc,totwords,learn_parm,
wolffd@0 424 kernel_parm,
wolffd@0 425 kernel_cache,&shrink_state,model,inconsistent,unlabeled,
wolffd@0 426 a,lin,c,&timing_profile,
wolffd@0 427 &maxdiff,(long)-1,(long)1);
wolffd@0 428 if(verbosity >= 1)
wolffd@0 429 printf("done.\n");
wolffd@0 430
wolffd@0 431
wolffd@0 432 /* after all leave-one-out computed */
wolffd@0 433 model->loo_error=100.0*loo_count/(double)totdoc;
wolffd@0 434 model->loo_recall=(1.0-(double)loo_count_pos/(double)trainpos)*100.0;
wolffd@0 435 model->loo_precision=(trainpos-loo_count_pos)/
wolffd@0 436 (double)(trainpos-loo_count_pos+loo_count_neg)*100.0;
wolffd@0 437 if(verbosity >= 1) {
wolffd@0 438 fprintf(stdout,"Leave-one-out estimate of the error: error=%.2f%%\n",
wolffd@0 439 model->loo_error);
wolffd@0 440 fprintf(stdout,"Leave-one-out estimate of the recall: recall=%.2f%%\n",
wolffd@0 441 model->loo_recall);
wolffd@0 442 fprintf(stdout,"Leave-one-out estimate of the precision: precision=%.2f%%\n",
wolffd@0 443 model->loo_precision);
wolffd@0 444 fprintf(stdout,"Actual leave-one-outs computed: %ld (rho=%.2f)\n",
wolffd@0 445 loocomputed,learn_parm->rho);
wolffd@0 446 printf("Runtime for leave-one-out in cpu-seconds: %.2f\n",
wolffd@0 447 (double)(get_runtime()-runtime_start_loo)/100.0);
wolffd@0 448 }
wolffd@0 449 }
wolffd@0 450
wolffd@0 451 if(learn_parm->alphafile[0])
wolffd@0 452 write_alphas(learn_parm->alphafile,a,label,totdoc);
wolffd@0 453
wolffd@0 454 shrink_state_cleanup(&shrink_state);
wolffd@0 455 free(label);
wolffd@0 456 free(inconsistent);
wolffd@0 457 free(unlabeled);
wolffd@0 458 free(c);
wolffd@0 459 free(a);
wolffd@0 460 free(a_fullset);
wolffd@0 461 free(xi_fullset);
wolffd@0 462 free(lin);
wolffd@0 463 free(learn_parm->svm_cost);
wolffd@0 464 }
wolffd@0 465
wolffd@0 466
wolffd@0 467 /* Learns an SVM regression model based on the training data in
wolffd@0 468 docs/label. The resulting model is returned in the structure
wolffd@0 469 model. */
wolffd@0 470
wolffd@0 471 void svm_learn_regression(DOC **docs, double *value, long int totdoc,
wolffd@0 472 long int totwords, LEARN_PARM *learn_parm,
wolffd@0 473 KERNEL_PARM *kernel_parm,
wolffd@0 474 KERNEL_CACHE **kernel_cache, MODEL *model)
wolffd@0 475 /* docs: Training vectors (x-part) */
wolffd@0 476 /* class: Training value (y-part) */
wolffd@0 477 /* totdoc: Number of examples in docs/label */
wolffd@0 478 /* totwords: Number of features (i.e. highest feature index) */
wolffd@0 479 /* learn_parm: Learning paramenters */
wolffd@0 480 /* kernel_parm: Kernel paramenters */
wolffd@0 481 /* kernel_cache:Initialized Cache, if using a kernel. NULL if
wolffd@0 482 linear. Note that it will be free'd and reassigned */
wolffd@0 483 /* model: Returns learning result (assumed empty before called) */
wolffd@0 484 {
wolffd@0 485 long *inconsistent,i,j;
wolffd@0 486 long inconsistentnum;
wolffd@0 487 long upsupvecnum;
wolffd@0 488 double loss,model_length,example_length;
wolffd@0 489 double maxdiff,*lin,*a,*c;
wolffd@0 490 long runtime_start,runtime_end;
wolffd@0 491 long iterations,kernel_cache_size;
wolffd@0 492 long *unlabeled;
wolffd@0 493 double r_delta_sq=0,r_delta,r_delta_avg;
wolffd@0 494 double *xi_fullset; /* buffer for storing xi on full sample in loo */
wolffd@0 495 double *a_fullset; /* buffer for storing alpha on full sample in loo */
wolffd@0 496 TIMING timing_profile;
wolffd@0 497 SHRINK_STATE shrink_state;
wolffd@0 498 DOC **docs_org;
wolffd@0 499 long *label;
wolffd@0 500
wolffd@0 501 /* set up regression problem in standard form */
wolffd@0 502 docs_org=docs;
wolffd@0 503 docs = (DOC **)my_malloc(sizeof(DOC)*2*totdoc);
wolffd@0 504 label = (long *)my_malloc(sizeof(long)*2*totdoc);
wolffd@0 505 c = (double *)my_malloc(sizeof(double)*2*totdoc);
wolffd@0 506 for(i=0;i<totdoc;i++) {
wolffd@0 507 j=2*totdoc-1-i;
wolffd@0 508 docs[i]=create_example(i,0,0,docs_org[i]->costfactor,docs_org[i]->fvec);
wolffd@0 509 label[i]=+1;
wolffd@0 510 c[i]=value[i];
wolffd@0 511 docs[j]=create_example(j,0,0,docs_org[i]->costfactor,docs_org[i]->fvec);
wolffd@0 512 label[j]=-1;
wolffd@0 513 c[j]=value[i];
wolffd@0 514 }
wolffd@0 515 totdoc*=2;
wolffd@0 516
wolffd@0 517 /* need to get a bigger kernel cache */
wolffd@0 518 if(*kernel_cache) {
wolffd@0 519 kernel_cache_size=(*kernel_cache)->buffsize*sizeof(CFLOAT)/(1024*1024);
wolffd@0 520 kernel_cache_cleanup(*kernel_cache);
wolffd@0 521 (*kernel_cache)=kernel_cache_init(totdoc,kernel_cache_size);
wolffd@0 522 }
wolffd@0 523
wolffd@0 524 runtime_start=get_runtime();
wolffd@0 525 timing_profile.time_kernel=0;
wolffd@0 526 timing_profile.time_opti=0;
wolffd@0 527 timing_profile.time_shrink=0;
wolffd@0 528 timing_profile.time_update=0;
wolffd@0 529 timing_profile.time_model=0;
wolffd@0 530 timing_profile.time_check=0;
wolffd@0 531 timing_profile.time_select=0;
wolffd@0 532 kernel_cache_statistic=0;
wolffd@0 533
wolffd@0 534 learn_parm->totwords=totwords;
wolffd@0 535
wolffd@0 536 /* make sure -n value is reasonable */
wolffd@0 537 if((learn_parm->svm_newvarsinqp < 2)
wolffd@0 538 || (learn_parm->svm_newvarsinqp > learn_parm->svm_maxqpsize)) {
wolffd@0 539 learn_parm->svm_newvarsinqp=learn_parm->svm_maxqpsize;
wolffd@0 540 }
wolffd@0 541
wolffd@0 542 init_shrink_state(&shrink_state,totdoc,(long)MAXSHRINK);
wolffd@0 543
wolffd@0 544 inconsistent = (long *)my_malloc(sizeof(long)*totdoc);
wolffd@0 545 unlabeled = (long *)my_malloc(sizeof(long)*totdoc);
wolffd@0 546 a = (double *)my_malloc(sizeof(double)*totdoc);
wolffd@0 547 a_fullset = (double *)my_malloc(sizeof(double)*totdoc);
wolffd@0 548 xi_fullset = (double *)my_malloc(sizeof(double)*totdoc);
wolffd@0 549 lin = (double *)my_malloc(sizeof(double)*totdoc);
wolffd@0 550 learn_parm->svm_cost = (double *)my_malloc(sizeof(double)*totdoc);
wolffd@0 551 model->supvec = (DOC **)my_malloc(sizeof(DOC *)*(totdoc+2));
wolffd@0 552 model->alpha = (double *)my_malloc(sizeof(double)*(totdoc+2));
wolffd@0 553 model->index = (long *)my_malloc(sizeof(long)*(totdoc+2));
wolffd@0 554
wolffd@0 555 model->at_upper_bound=0;
wolffd@0 556 model->b=0;
wolffd@0 557 model->supvec[0]=0; /* element 0 reserved and empty for now */
wolffd@0 558 model->alpha[0]=0;
wolffd@0 559 model->lin_weights=NULL;
wolffd@0 560 model->totwords=totwords;
wolffd@0 561 model->totdoc=totdoc;
wolffd@0 562 model->kernel_parm=(*kernel_parm);
wolffd@0 563 model->sv_num=1;
wolffd@0 564 model->loo_error=-1;
wolffd@0 565 model->loo_recall=-1;
wolffd@0 566 model->loo_precision=-1;
wolffd@0 567 model->xa_error=-1;
wolffd@0 568 model->xa_recall=-1;
wolffd@0 569 model->xa_precision=-1;
wolffd@0 570 inconsistentnum=0;
wolffd@0 571
wolffd@0 572 r_delta=estimate_r_delta(docs,totdoc,kernel_parm);
wolffd@0 573 r_delta_sq=r_delta*r_delta;
wolffd@0 574
wolffd@0 575 r_delta_avg=estimate_r_delta_average(docs,totdoc,kernel_parm);
wolffd@0 576 if(learn_parm->svm_c == 0.0) { /* default value for C */
wolffd@0 577 learn_parm->svm_c=1.0/(r_delta_avg*r_delta_avg);
wolffd@0 578 if(verbosity>=1)
wolffd@0 579 printf("Setting default regularization parameter C=%.4f\n",
wolffd@0 580 learn_parm->svm_c);
wolffd@0 581 }
wolffd@0 582
wolffd@0 583 for(i=0;i<totdoc;i++) { /* various inits */
wolffd@0 584 inconsistent[i]=0;
wolffd@0 585 a[i]=0;
wolffd@0 586 lin[i]=0;
wolffd@0 587 unlabeled[i]=0;
wolffd@0 588 if(label[i] > 0) {
wolffd@0 589 learn_parm->svm_cost[i]=learn_parm->svm_c*learn_parm->svm_costratio*
wolffd@0 590 docs[i]->costfactor;
wolffd@0 591 }
wolffd@0 592 else if(label[i] < 0) {
wolffd@0 593 learn_parm->svm_cost[i]=learn_parm->svm_c*docs[i]->costfactor;
wolffd@0 594 }
wolffd@0 595 }
wolffd@0 596
wolffd@0 597 /* caching makes no sense for linear kernel */
wolffd@0 598 if((kernel_parm->kernel_type == LINEAR) && (*kernel_cache)) {
wolffd@0 599 printf("WARNING: Using a kernel cache for linear case will slow optimization down!\n");
wolffd@0 600 }
wolffd@0 601
wolffd@0 602 if(verbosity==1) {
wolffd@0 603 printf("Optimizing"); fflush(stdout);
wolffd@0 604 }
wolffd@0 605
wolffd@0 606 /* train the svm */
wolffd@0 607 iterations=optimize_to_convergence(docs,label,totdoc,totwords,learn_parm,
wolffd@0 608 kernel_parm,*kernel_cache,&shrink_state,
wolffd@0 609 model,inconsistent,unlabeled,a,lin,c,
wolffd@0 610 &timing_profile,&maxdiff,(long)-1,
wolffd@0 611 (long)1);
wolffd@0 612
wolffd@0 613 if(verbosity>=1) {
wolffd@0 614 if(verbosity==1) printf("done. (%ld iterations)\n",iterations);
wolffd@0 615
wolffd@0 616 printf("Optimization finished (maxdiff=%.5f).\n",maxdiff);
wolffd@0 617
wolffd@0 618 runtime_end=get_runtime();
wolffd@0 619 if(verbosity>=2) {
wolffd@0 620 printf("Runtime in cpu-seconds: %.2f (%.2f%% for kernel/%.2f%% for optimizer/%.2f%% for final/%.2f%% for update/%.2f%% for model/%.2f%% for check/%.2f%% for select)\n",
wolffd@0 621 ((float)runtime_end-(float)runtime_start)/100.0,
wolffd@0 622 (100.0*timing_profile.time_kernel)/(float)(runtime_end-runtime_start),
wolffd@0 623 (100.0*timing_profile.time_opti)/(float)(runtime_end-runtime_start),
wolffd@0 624 (100.0*timing_profile.time_shrink)/(float)(runtime_end-runtime_start),
wolffd@0 625 (100.0*timing_profile.time_update)/(float)(runtime_end-runtime_start),
wolffd@0 626 (100.0*timing_profile.time_model)/(float)(runtime_end-runtime_start),
wolffd@0 627 (100.0*timing_profile.time_check)/(float)(runtime_end-runtime_start),
wolffd@0 628 (100.0*timing_profile.time_select)/(float)(runtime_end-runtime_start));
wolffd@0 629 }
wolffd@0 630 else {
wolffd@0 631 printf("Runtime in cpu-seconds: %.2f\n",
wolffd@0 632 (runtime_end-runtime_start)/100.0);
wolffd@0 633 }
wolffd@0 634
wolffd@0 635 if(learn_parm->remove_inconsistent) {
wolffd@0 636 inconsistentnum=0;
wolffd@0 637 for(i=0;i<totdoc;i++)
wolffd@0 638 if(inconsistent[i])
wolffd@0 639 inconsistentnum++;
wolffd@0 640 printf("Number of SV: %ld (plus %ld inconsistent examples)\n",
wolffd@0 641 model->sv_num-1,inconsistentnum);
wolffd@0 642 }
wolffd@0 643 else {
wolffd@0 644 upsupvecnum=0;
wolffd@0 645 for(i=1;i<model->sv_num;i++) {
wolffd@0 646 if(fabs(model->alpha[i]) >=
wolffd@0 647 (learn_parm->svm_cost[(model->supvec[i])->docnum]-
wolffd@0 648 learn_parm->epsilon_a))
wolffd@0 649 upsupvecnum++;
wolffd@0 650 }
wolffd@0 651 printf("Number of SV: %ld (including %ld at upper bound)\n",
wolffd@0 652 model->sv_num-1,upsupvecnum);
wolffd@0 653 }
wolffd@0 654
wolffd@0 655 if((verbosity>=1) && (!learn_parm->skip_final_opt_check)) {
wolffd@0 656 loss=0;
wolffd@0 657 model_length=0;
wolffd@0 658 for(i=0;i<totdoc;i++) {
wolffd@0 659 if((lin[i]-model->b)*(double)label[i] < (-learn_parm->eps+(double)label[i]*c[i])-learn_parm->epsilon_crit)
wolffd@0 660 loss+=-learn_parm->eps+(double)label[i]*c[i]-(lin[i]-model->b)*(double)label[i];
wolffd@0 661 model_length+=a[i]*label[i]*lin[i];
wolffd@0 662 }
wolffd@0 663 model_length=sqrt(model_length);
wolffd@0 664 fprintf(stdout,"L1 loss: loss=%.5f\n",loss);
wolffd@0 665 fprintf(stdout,"Norm of weight vector: |w|=%.5f\n",model_length);
wolffd@0 666 example_length=estimate_sphere(model,kernel_parm);
wolffd@0 667 fprintf(stdout,"Norm of longest example vector: |x|=%.5f\n",
wolffd@0 668 length_of_longest_document_vector(docs,totdoc,kernel_parm));
wolffd@0 669 }
wolffd@0 670 if(verbosity>=1) {
wolffd@0 671 printf("Number of kernel evaluations: %ld\n",kernel_cache_statistic);
wolffd@0 672 }
wolffd@0 673 }
wolffd@0 674
wolffd@0 675 if(learn_parm->alphafile[0])
wolffd@0 676 write_alphas(learn_parm->alphafile,a,label,totdoc);
wolffd@0 677
wolffd@0 678 /* this makes sure the model we return does not contain pointers to the
wolffd@0 679 temporary documents */
wolffd@0 680 for(i=1;i<model->sv_num;i++) {
wolffd@0 681 j=model->supvec[i]->docnum;
wolffd@0 682 if(j >= (totdoc/2)) {
wolffd@0 683 j=totdoc-j-1;
wolffd@0 684 }
wolffd@0 685 model->supvec[i]=docs_org[j];
wolffd@0 686 }
wolffd@0 687
wolffd@0 688 shrink_state_cleanup(&shrink_state);
wolffd@0 689 for(i=0;i<totdoc;i++)
wolffd@0 690 free_example(docs[i],0);
wolffd@0 691 free(docs);
wolffd@0 692 free(label);
wolffd@0 693 free(inconsistent);
wolffd@0 694 free(unlabeled);
wolffd@0 695 free(c);
wolffd@0 696 free(a);
wolffd@0 697 free(a_fullset);
wolffd@0 698 free(xi_fullset);
wolffd@0 699 free(lin);
wolffd@0 700 free(learn_parm->svm_cost);
wolffd@0 701 }
wolffd@0 702
wolffd@0 703 void svm_learn_ranking(DOC **docs, double *rankvalue, long int totdoc,
wolffd@0 704 long int totwords, LEARN_PARM *learn_parm,
wolffd@0 705 KERNEL_PARM *kernel_parm, KERNEL_CACHE **kernel_cache,
wolffd@0 706 MODEL *model)
wolffd@0 707 /* docs: Training vectors (x-part) */
wolffd@0 708 /* rankvalue: Training target values that determine the ranking */
wolffd@0 709 /* totdoc: Number of examples in docs/label */
wolffd@0 710 /* totwords: Number of features (i.e. highest feature index) */
wolffd@0 711 /* learn_parm: Learning paramenters */
wolffd@0 712 /* kernel_parm: Kernel paramenters */
wolffd@0 713 /* kernel_cache:Initialized pointer to Cache of size 1*totdoc, if
wolffd@0 714 using a kernel. NULL if linear. NOTE: Cache is
wolffd@0 715 getting reinitialized in this function */
wolffd@0 716 /* model: Returns learning result (assumed empty before called) */
wolffd@0 717 {
wolffd@0 718 DOC **docdiff;
wolffd@0 719 long i,j,k,totpair,kernel_cache_size;
wolffd@0 720 double *target,*alpha,cost;
wolffd@0 721 long *greater,*lesser;
wolffd@0 722 MODEL *pairmodel;
wolffd@0 723 SVECTOR *flow,*fhigh;
wolffd@0 724
wolffd@0 725 totpair=0;
wolffd@0 726 for(i=0;i<totdoc;i++) {
wolffd@0 727 for(j=i+1;j<totdoc;j++) {
wolffd@0 728 if((docs[i]->queryid==docs[j]->queryid) && (rankvalue[i] != rankvalue[j])) {
wolffd@0 729 totpair++;
wolffd@0 730 }
wolffd@0 731 }
wolffd@0 732 }
wolffd@0 733
wolffd@0 734 printf("Constructing %ld rank constraints...",totpair); fflush(stdout);
wolffd@0 735 docdiff=(DOC **)my_malloc(sizeof(DOC)*totpair);
wolffd@0 736 target=(double *)my_malloc(sizeof(double)*totpair);
wolffd@0 737 greater=(long *)my_malloc(sizeof(long)*totpair);
wolffd@0 738 lesser=(long *)my_malloc(sizeof(long)*totpair);
wolffd@0 739
wolffd@0 740 k=0;
wolffd@0 741 for(i=0;i<totdoc;i++) {
wolffd@0 742 for(j=i+1;j<totdoc;j++) {
wolffd@0 743 if(docs[i]->queryid == docs[j]->queryid) {
wolffd@0 744 cost=(docs[i]->costfactor+docs[j]->costfactor)/2.0;
wolffd@0 745 if(rankvalue[i] > rankvalue[j]) {
wolffd@0 746 if(kernel_parm->kernel_type == LINEAR)
wolffd@0 747 docdiff[k]=create_example(k,0,0,cost,
wolffd@0 748 sub_ss(docs[i]->fvec,docs[j]->fvec));
wolffd@0 749 else {
wolffd@0 750 flow=copy_svector(docs[j]->fvec);
wolffd@0 751 flow->factor=-1.0;
wolffd@0 752 flow->next=NULL;
wolffd@0 753 fhigh=copy_svector(docs[i]->fvec);
wolffd@0 754 fhigh->factor=1.0;
wolffd@0 755 fhigh->next=flow;
wolffd@0 756 docdiff[k]=create_example(k,0,0,cost,fhigh);
wolffd@0 757 }
wolffd@0 758 target[k]=1;
wolffd@0 759 greater[k]=i;
wolffd@0 760 lesser[k]=j;
wolffd@0 761 k++;
wolffd@0 762 }
wolffd@0 763 else if(rankvalue[i] < rankvalue[j]) {
wolffd@0 764 if(kernel_parm->kernel_type == LINEAR)
wolffd@0 765 docdiff[k]=create_example(k,0,0,cost,
wolffd@0 766 sub_ss(docs[i]->fvec,docs[j]->fvec));
wolffd@0 767 else {
wolffd@0 768 flow=copy_svector(docs[j]->fvec);
wolffd@0 769 flow->factor=-1.0;
wolffd@0 770 flow->next=NULL;
wolffd@0 771 fhigh=copy_svector(docs[i]->fvec);
wolffd@0 772 fhigh->factor=1.0;
wolffd@0 773 fhigh->next=flow;
wolffd@0 774 docdiff[k]=create_example(k,0,0,cost,fhigh);
wolffd@0 775 }
wolffd@0 776 target[k]=-1;
wolffd@0 777 greater[k]=i;
wolffd@0 778 lesser[k]=j;
wolffd@0 779 k++;
wolffd@0 780 }
wolffd@0 781 }
wolffd@0 782 }
wolffd@0 783 }
wolffd@0 784 printf("done.\n"); fflush(stdout);
wolffd@0 785
wolffd@0 786 /* need to get a bigger kernel cache */
wolffd@0 787 if(*kernel_cache) {
wolffd@0 788 kernel_cache_size=(*kernel_cache)->buffsize*sizeof(CFLOAT)/(1024*1024);
wolffd@0 789 kernel_cache_cleanup(*kernel_cache);
wolffd@0 790 (*kernel_cache)=kernel_cache_init(totpair,kernel_cache_size);
wolffd@0 791 }
wolffd@0 792
wolffd@0 793 /* must use unbiased hyperplane on difference vectors */
wolffd@0 794 learn_parm->biased_hyperplane=0;
wolffd@0 795 pairmodel=(MODEL *)my_malloc(sizeof(MODEL));
wolffd@0 796 svm_learn_classification(docdiff,target,totpair,totwords,learn_parm,
wolffd@0 797 kernel_parm,(*kernel_cache),pairmodel,NULL);
wolffd@0 798
wolffd@0 799 /* Transfer the result into a more compact model. If you would like
wolffd@0 800 to output the original model on pairs of documents, see below. */
wolffd@0 801 alpha=(double *)my_malloc(sizeof(double)*totdoc);
wolffd@0 802 for(i=0;i<totdoc;i++) {
wolffd@0 803 alpha[i]=0;
wolffd@0 804 }
wolffd@0 805 for(i=1;i<pairmodel->sv_num;i++) {
wolffd@0 806 alpha[lesser[(pairmodel->supvec[i])->docnum]]-=pairmodel->alpha[i];
wolffd@0 807 alpha[greater[(pairmodel->supvec[i])->docnum]]+=pairmodel->alpha[i];
wolffd@0 808 }
wolffd@0 809 model->supvec = (DOC **)my_malloc(sizeof(DOC *)*(totdoc+2));
wolffd@0 810 model->alpha = (double *)my_malloc(sizeof(double)*(totdoc+2));
wolffd@0 811 model->index = (long *)my_malloc(sizeof(long)*(totdoc+2));
wolffd@0 812 model->supvec[0]=0; /* element 0 reserved and empty for now */
wolffd@0 813 model->alpha[0]=0;
wolffd@0 814 model->sv_num=1;
wolffd@0 815 for(i=0;i<totdoc;i++) {
wolffd@0 816 if(alpha[i]) {
wolffd@0 817 model->supvec[model->sv_num]=docs[i];
wolffd@0 818 model->alpha[model->sv_num]=alpha[i];
wolffd@0 819 model->index[i]=model->sv_num;
wolffd@0 820 model->sv_num++;
wolffd@0 821 }
wolffd@0 822 else {
wolffd@0 823 model->index[i]=-1;
wolffd@0 824 }
wolffd@0 825 }
wolffd@0 826 model->at_upper_bound=0;
wolffd@0 827 model->b=0;
wolffd@0 828 model->lin_weights=NULL;
wolffd@0 829 model->totwords=totwords;
wolffd@0 830 model->totdoc=totdoc;
wolffd@0 831 model->kernel_parm=(*kernel_parm);
wolffd@0 832 model->loo_error=-1;
wolffd@0 833 model->loo_recall=-1;
wolffd@0 834 model->loo_precision=-1;
wolffd@0 835 model->xa_error=-1;
wolffd@0 836 model->xa_recall=-1;
wolffd@0 837 model->xa_precision=-1;
wolffd@0 838
wolffd@0 839 free(alpha);
wolffd@0 840 free(greater);
wolffd@0 841 free(lesser);
wolffd@0 842 free(target);
wolffd@0 843
wolffd@0 844 /* If you would like to output the original model on pairs of
wolffd@0 845 document, replace the following lines with '(*model)=(*pairmodel);' */
wolffd@0 846 for(i=0;i<totpair;i++)
wolffd@0 847 free_example(docdiff[i],1);
wolffd@0 848 free(docdiff);
wolffd@0 849 free_model(pairmodel,0);
wolffd@0 850 }
wolffd@0 851
wolffd@0 852
wolffd@0 853 /* The following solves a freely defined and given set of
wolffd@0 854 inequalities. The optimization problem is of the following form:
wolffd@0 855
wolffd@0 856 min 0.5 w*w + C sum_i C_i \xi_i
wolffd@0 857 s.t. x_i * w > rhs_i - \xi_i
wolffd@0 858
wolffd@0 859 This corresponds to the -z o option. */
wolffd@0 860
wolffd@0 861 void svm_learn_optimization(DOC **docs, double *rhs, long int
wolffd@0 862 totdoc, long int totwords,
wolffd@0 863 LEARN_PARM *learn_parm,
wolffd@0 864 KERNEL_PARM *kernel_parm,
wolffd@0 865 KERNEL_CACHE *kernel_cache, MODEL *model,
wolffd@0 866 double *alpha)
wolffd@0 867 /* docs: Left-hand side of inequalities (x-part) */
wolffd@0 868 /* rhs: Right-hand side of inequalities */
wolffd@0 869 /* totdoc: Number of examples in docs/label */
wolffd@0 870 /* totwords: Number of features (i.e. highest feature index) */
wolffd@0 871 /* learn_parm: Learning paramenters */
wolffd@0 872 /* kernel_parm: Kernel paramenters */
wolffd@0 873 /* kernel_cache:Initialized Cache of size 1*totdoc, if using a kernel.
wolffd@0 874 NULL if linear.*/
wolffd@0 875 /* model: Returns solution as SV expansion (assumed empty before called) */
wolffd@0 876 /* alpha: Start values for the alpha variables or NULL
wolffd@0 877 pointer. The new alpha values are returned after
wolffd@0 878 optimization if not NULL. Array must be of size totdoc. */
wolffd@0 879 {
wolffd@0 880 long i,*label;
wolffd@0 881 long misclassified,upsupvecnum;
wolffd@0 882 double loss,model_length,example_length;
wolffd@0 883 double maxdiff,*lin,*a,*c;
wolffd@0 884 long runtime_start,runtime_end;
wolffd@0 885 long iterations,maxslackid,svsetnum;
wolffd@0 886 long *unlabeled,*inconsistent;
wolffd@0 887 double r_delta_sq=0,r_delta,r_delta_avg;
wolffd@0 888 long *index,*index2dnum;
wolffd@0 889 double *weights,*slack,*alphaslack;
wolffd@0 890 CFLOAT *aicache; /* buffer to keep one row of hessian */
wolffd@0 891
wolffd@0 892 TIMING timing_profile;
wolffd@0 893 SHRINK_STATE shrink_state;
wolffd@0 894
wolffd@0 895 runtime_start=get_runtime();
wolffd@0 896 timing_profile.time_kernel=0;
wolffd@0 897 timing_profile.time_opti=0;
wolffd@0 898 timing_profile.time_shrink=0;
wolffd@0 899 timing_profile.time_update=0;
wolffd@0 900 timing_profile.time_model=0;
wolffd@0 901 timing_profile.time_check=0;
wolffd@0 902 timing_profile.time_select=0;
wolffd@0 903 kernel_cache_statistic=0;
wolffd@0 904
wolffd@0 905 learn_parm->totwords=totwords;
wolffd@0 906
wolffd@0 907 /* make sure -n value is reasonable */
wolffd@0 908 if((learn_parm->svm_newvarsinqp < 2)
wolffd@0 909 || (learn_parm->svm_newvarsinqp > learn_parm->svm_maxqpsize)) {
wolffd@0 910 learn_parm->svm_newvarsinqp=learn_parm->svm_maxqpsize;
wolffd@0 911 }
wolffd@0 912
wolffd@0 913 init_shrink_state(&shrink_state,totdoc,(long)MAXSHRINK);
wolffd@0 914
wolffd@0 915 label = (long *)my_malloc(sizeof(long)*totdoc);
wolffd@0 916 unlabeled = (long *)my_malloc(sizeof(long)*totdoc);
wolffd@0 917 inconsistent = (long *)my_malloc(sizeof(long)*totdoc);
wolffd@0 918 c = (double *)my_malloc(sizeof(double)*totdoc);
wolffd@0 919 a = (double *)my_malloc(sizeof(double)*totdoc);
wolffd@0 920 lin = (double *)my_malloc(sizeof(double)*totdoc);
wolffd@0 921 learn_parm->svm_cost = (double *)my_malloc(sizeof(double)*totdoc);
wolffd@0 922 model->supvec = (DOC **)my_malloc(sizeof(DOC *)*(totdoc+2));
wolffd@0 923 model->alpha = (double *)my_malloc(sizeof(double)*(totdoc+2));
wolffd@0 924 model->index = (long *)my_malloc(sizeof(long)*(totdoc+2));
wolffd@0 925
wolffd@0 926 model->at_upper_bound=0;
wolffd@0 927 model->b=0;
wolffd@0 928 model->supvec[0]=0; /* element 0 reserved and empty for now */
wolffd@0 929 model->alpha[0]=0;
wolffd@0 930 model->lin_weights=NULL;
wolffd@0 931 model->totwords=totwords;
wolffd@0 932 model->totdoc=totdoc;
wolffd@0 933 model->kernel_parm=(*kernel_parm);
wolffd@0 934 model->sv_num=1;
wolffd@0 935 model->loo_error=-1;
wolffd@0 936 model->loo_recall=-1;
wolffd@0 937 model->loo_precision=-1;
wolffd@0 938 model->xa_error=-1;
wolffd@0 939 model->xa_recall=-1;
wolffd@0 940 model->xa_precision=-1;
wolffd@0 941
wolffd@0 942 r_delta=estimate_r_delta(docs,totdoc,kernel_parm);
wolffd@0 943 r_delta_sq=r_delta*r_delta;
wolffd@0 944
wolffd@0 945 r_delta_avg=estimate_r_delta_average(docs,totdoc,kernel_parm);
wolffd@0 946 if(learn_parm->svm_c == 0.0) { /* default value for C */
wolffd@0 947 learn_parm->svm_c=1.0/(r_delta_avg*r_delta_avg);
wolffd@0 948 if(verbosity>=1)
wolffd@0 949 printf("Setting default regularization parameter C=%.4f\n",
wolffd@0 950 learn_parm->svm_c);
wolffd@0 951 }
wolffd@0 952
wolffd@0 953 learn_parm->biased_hyperplane=0; /* learn an unbiased hyperplane */
wolffd@0 954
wolffd@0 955 learn_parm->eps=0.0; /* No margin, unless explicitly handcoded
wolffd@0 956 in the right-hand side in the training
wolffd@0 957 set. */
wolffd@0 958
wolffd@0 959 for(i=0;i<totdoc;i++) { /* various inits */
wolffd@0 960 docs[i]->docnum=i;
wolffd@0 961 a[i]=0;
wolffd@0 962 lin[i]=0;
wolffd@0 963 c[i]=rhs[i]; /* set right-hand side */
wolffd@0 964 unlabeled[i]=0;
wolffd@0 965 inconsistent[i]=0;
wolffd@0 966 learn_parm->svm_cost[i]=learn_parm->svm_c*learn_parm->svm_costratio*
wolffd@0 967 docs[i]->costfactor;
wolffd@0 968 label[i]=1;
wolffd@0 969 }
wolffd@0 970 if(learn_parm->sharedslack) /* if shared slacks are used, they must */
wolffd@0 971 for(i=0;i<totdoc;i++) /* be used on every constraint */
wolffd@0 972 if(!docs[i]->slackid) {
wolffd@0 973 perror("Error: Missing shared slacks definitions in some of the examples.");
wolffd@0 974 exit(0);
wolffd@0 975 }
wolffd@0 976
wolffd@0 977 /* compute starting state for initial alpha values */
wolffd@0 978 if(alpha) {
wolffd@0 979 if(verbosity>=1) {
wolffd@0 980 printf("Computing starting state..."); fflush(stdout);
wolffd@0 981 }
wolffd@0 982 index = (long *)my_malloc(sizeof(long)*totdoc);
wolffd@0 983 index2dnum = (long *)my_malloc(sizeof(long)*(totdoc+11));
wolffd@0 984 weights=(double *)my_malloc(sizeof(double)*(totwords+1));
wolffd@0 985 aicache = (CFLOAT *)my_malloc(sizeof(CFLOAT)*totdoc);
wolffd@0 986 for(i=0;i<totdoc;i++) { /* create full index and clip alphas */
wolffd@0 987 index[i]=1;
wolffd@0 988 alpha[i]=fabs(alpha[i]);
wolffd@0 989 if(alpha[i]<0) alpha[i]=0;
wolffd@0 990 if(alpha[i]>learn_parm->svm_cost[i]) alpha[i]=learn_parm->svm_cost[i];
wolffd@0 991 }
wolffd@0 992 if(kernel_parm->kernel_type != LINEAR) {
wolffd@0 993 for(i=0;i<totdoc;i++) /* fill kernel cache with unbounded SV */
wolffd@0 994 if((alpha[i]>0) && (alpha[i]<learn_parm->svm_cost[i])
wolffd@0 995 && (kernel_cache_space_available(kernel_cache)))
wolffd@0 996 cache_kernel_row(kernel_cache,docs,i,kernel_parm);
wolffd@0 997 for(i=0;i<totdoc;i++) /* fill rest of kernel cache with bounded SV */
wolffd@0 998 if((alpha[i]==learn_parm->svm_cost[i])
wolffd@0 999 && (kernel_cache_space_available(kernel_cache)))
wolffd@0 1000 cache_kernel_row(kernel_cache,docs,i,kernel_parm);
wolffd@0 1001 }
wolffd@0 1002 (void)compute_index(index,totdoc,index2dnum);
wolffd@0 1003 update_linear_component(docs,label,index2dnum,alpha,a,index2dnum,totdoc,
wolffd@0 1004 totwords,kernel_parm,kernel_cache,lin,aicache,
wolffd@0 1005 weights);
wolffd@0 1006 (void)calculate_svm_model(docs,label,unlabeled,lin,alpha,a,c,
wolffd@0 1007 learn_parm,index2dnum,index2dnum,model);
wolffd@0 1008 for(i=0;i<totdoc;i++) { /* copy initial alphas */
wolffd@0 1009 a[i]=alpha[i];
wolffd@0 1010 }
wolffd@0 1011 free(index);
wolffd@0 1012 free(index2dnum);
wolffd@0 1013 free(weights);
wolffd@0 1014 free(aicache);
wolffd@0 1015 if(verbosity>=1) {
wolffd@0 1016 printf("done.\n"); fflush(stdout);
wolffd@0 1017 }
wolffd@0 1018 }
wolffd@0 1019
wolffd@0 1020 /* removing inconsistent does not work for general optimization problem */
wolffd@0 1021 if(learn_parm->remove_inconsistent) {
wolffd@0 1022 learn_parm->remove_inconsistent = 0;
wolffd@0 1023 printf("'remove inconsistent' not available in this mode. Switching option off!"); fflush(stdout);
wolffd@0 1024 }
wolffd@0 1025
wolffd@0 1026 /* caching makes no sense for linear kernel */
wolffd@0 1027 if(kernel_parm->kernel_type == LINEAR) {
wolffd@0 1028 kernel_cache = NULL;
wolffd@0 1029 }
wolffd@0 1030
wolffd@0 1031 if(verbosity==1) {
wolffd@0 1032 printf("Optimizing"); fflush(stdout);
wolffd@0 1033 }
wolffd@0 1034
wolffd@0 1035 /* train the svm */
wolffd@0 1036 if(learn_parm->sharedslack)
wolffd@0 1037 iterations=optimize_to_convergence_sharedslack(docs,label,totdoc,
wolffd@0 1038 totwords,learn_parm,kernel_parm,
wolffd@0 1039 kernel_cache,&shrink_state,model,
wolffd@0 1040 a,lin,c,&timing_profile,
wolffd@0 1041 &maxdiff);
wolffd@0 1042 else
wolffd@0 1043 iterations=optimize_to_convergence(docs,label,totdoc,
wolffd@0 1044 totwords,learn_parm,kernel_parm,
wolffd@0 1045 kernel_cache,&shrink_state,model,
wolffd@0 1046 inconsistent,unlabeled,
wolffd@0 1047 a,lin,c,&timing_profile,
wolffd@0 1048 &maxdiff,(long)-1,(long)1);
wolffd@0 1049
wolffd@0 1050 if(verbosity>=1) {
wolffd@0 1051 if(verbosity==1) printf("done. (%ld iterations)\n",iterations);
wolffd@0 1052
wolffd@0 1053 misclassified=0;
wolffd@0 1054 for(i=0;(i<totdoc);i++) { /* get final statistic */
wolffd@0 1055 if((lin[i]-model->b)*(double)label[i] <= 0.0)
wolffd@0 1056 misclassified++;
wolffd@0 1057 }
wolffd@0 1058
wolffd@0 1059 printf("Optimization finished (maxdiff=%.5f).\n",maxdiff);
wolffd@0 1060
wolffd@0 1061 runtime_end=get_runtime();
wolffd@0 1062 if(verbosity>=2) {
wolffd@0 1063 printf("Runtime in cpu-seconds: %.2f (%.2f%% for kernel/%.2f%% for optimizer/%.2f%% for final/%.2f%% for update/%.2f%% for model/%.2f%% for check/%.2f%% for select)\n",
wolffd@0 1064 ((float)runtime_end-(float)runtime_start)/100.0,
wolffd@0 1065 (100.0*timing_profile.time_kernel)/(float)(runtime_end-runtime_start),
wolffd@0 1066 (100.0*timing_profile.time_opti)/(float)(runtime_end-runtime_start),
wolffd@0 1067 (100.0*timing_profile.time_shrink)/(float)(runtime_end-runtime_start),
wolffd@0 1068 (100.0*timing_profile.time_update)/(float)(runtime_end-runtime_start),
wolffd@0 1069 (100.0*timing_profile.time_model)/(float)(runtime_end-runtime_start),
wolffd@0 1070 (100.0*timing_profile.time_check)/(float)(runtime_end-runtime_start),
wolffd@0 1071 (100.0*timing_profile.time_select)/(float)(runtime_end-runtime_start));
wolffd@0 1072 }
wolffd@0 1073 else {
wolffd@0 1074 printf("Runtime in cpu-seconds: %.2f\n",
wolffd@0 1075 (runtime_end-runtime_start)/100.0);
wolffd@0 1076 }
wolffd@0 1077 }
wolffd@0 1078 if((verbosity>=1) && (!learn_parm->skip_final_opt_check)) {
wolffd@0 1079 loss=0;
wolffd@0 1080 model_length=0;
wolffd@0 1081 for(i=0;i<totdoc;i++) {
wolffd@0 1082 if((lin[i]-model->b)*(double)label[i] < c[i]-learn_parm->epsilon_crit)
wolffd@0 1083 loss+=c[i]-(lin[i]-model->b)*(double)label[i];
wolffd@0 1084 model_length+=a[i]*label[i]*lin[i];
wolffd@0 1085 }
wolffd@0 1086 model_length=sqrt(model_length);
wolffd@0 1087 fprintf(stdout,"Norm of weight vector: |w|=%.5f\n",model_length);
wolffd@0 1088 }
wolffd@0 1089
wolffd@0 1090 if(learn_parm->sharedslack) {
wolffd@0 1091 index = (long *)my_malloc(sizeof(long)*totdoc);
wolffd@0 1092 index2dnum = (long *)my_malloc(sizeof(long)*(totdoc+11));
wolffd@0 1093 maxslackid=0;
wolffd@0 1094 for(i=0;i<totdoc;i++) { /* create full index */
wolffd@0 1095 index[i]=1;
wolffd@0 1096 if(maxslackid<docs[i]->slackid)
wolffd@0 1097 maxslackid=docs[i]->slackid;
wolffd@0 1098 }
wolffd@0 1099 (void)compute_index(index,totdoc,index2dnum);
wolffd@0 1100 slack=(double *)my_malloc(sizeof(double)*(maxslackid+1));
wolffd@0 1101 alphaslack=(double *)my_malloc(sizeof(double)*(maxslackid+1));
wolffd@0 1102 for(i=0;i<=maxslackid;i++) { /* init shared slacks */
wolffd@0 1103 slack[i]=0;
wolffd@0 1104 alphaslack[i]=0;
wolffd@0 1105 }
wolffd@0 1106 compute_shared_slacks(docs,label,a,lin,c,index2dnum,learn_parm,
wolffd@0 1107 slack,alphaslack);
wolffd@0 1108 loss=0;
wolffd@0 1109 model->at_upper_bound=0;
wolffd@0 1110 svsetnum=0;
wolffd@0 1111 for(i=0;i<=maxslackid;i++) { /* create full index */
wolffd@0 1112 loss+=slack[i];
wolffd@0 1113 if(alphaslack[i] > (learn_parm->svm_c - learn_parm->epsilon_a))
wolffd@0 1114 model->at_upper_bound++;
wolffd@0 1115 if(alphaslack[i] > learn_parm->epsilon_a)
wolffd@0 1116 svsetnum++;
wolffd@0 1117 }
wolffd@0 1118 free(index);
wolffd@0 1119 free(index2dnum);
wolffd@0 1120 free(slack);
wolffd@0 1121 free(alphaslack);
wolffd@0 1122 }
wolffd@0 1123
wolffd@0 1124 if((verbosity>=1) && (!learn_parm->skip_final_opt_check)) {
wolffd@0 1125 if(learn_parm->sharedslack) {
wolffd@0 1126 printf("Number of SV: %ld\n",
wolffd@0 1127 model->sv_num-1);
wolffd@0 1128 printf("Number of non-zero slack variables: %ld (out of %ld)\n",
wolffd@0 1129 model->at_upper_bound,svsetnum);
wolffd@0 1130 fprintf(stdout,"L1 loss: loss=%.5f\n",loss);
wolffd@0 1131 }
wolffd@0 1132 else {
wolffd@0 1133 upsupvecnum=0;
wolffd@0 1134 for(i=1;i<model->sv_num;i++) {
wolffd@0 1135 if(fabs(model->alpha[i]) >=
wolffd@0 1136 (learn_parm->svm_cost[(model->supvec[i])->docnum]-
wolffd@0 1137 learn_parm->epsilon_a))
wolffd@0 1138 upsupvecnum++;
wolffd@0 1139 }
wolffd@0 1140 printf("Number of SV: %ld (including %ld at upper bound)\n",
wolffd@0 1141 model->sv_num-1,upsupvecnum);
wolffd@0 1142 fprintf(stdout,"L1 loss: loss=%.5f\n",loss);
wolffd@0 1143 }
wolffd@0 1144 example_length=estimate_sphere(model,kernel_parm);
wolffd@0 1145 fprintf(stdout,"Norm of longest example vector: |x|=%.5f\n",
wolffd@0 1146 length_of_longest_document_vector(docs,totdoc,kernel_parm));
wolffd@0 1147 }
wolffd@0 1148 if(verbosity>=1) {
wolffd@0 1149 printf("Number of kernel evaluations: %ld\n",kernel_cache_statistic);
wolffd@0 1150 }
wolffd@0 1151
wolffd@0 1152 if(alpha) {
wolffd@0 1153 for(i=0;i<totdoc;i++) { /* copy final alphas */
wolffd@0 1154 alpha[i]=a[i];
wolffd@0 1155 }
wolffd@0 1156 }
wolffd@0 1157
wolffd@0 1158 if(learn_parm->alphafile[0])
wolffd@0 1159 write_alphas(learn_parm->alphafile,a,label,totdoc);
wolffd@0 1160
wolffd@0 1161 shrink_state_cleanup(&shrink_state);
wolffd@0 1162 free(label);
wolffd@0 1163 free(unlabeled);
wolffd@0 1164 free(inconsistent);
wolffd@0 1165 free(c);
wolffd@0 1166 free(a);
wolffd@0 1167 free(lin);
wolffd@0 1168 free(learn_parm->svm_cost);
wolffd@0 1169 }
wolffd@0 1170
wolffd@0 1171
wolffd@0 1172 long optimize_to_convergence(DOC **docs, long int *label, long int totdoc,
wolffd@0 1173 long int totwords, LEARN_PARM *learn_parm,
wolffd@0 1174 KERNEL_PARM *kernel_parm,
wolffd@0 1175 KERNEL_CACHE *kernel_cache,
wolffd@0 1176 SHRINK_STATE *shrink_state, MODEL *model,
wolffd@0 1177 long int *inconsistent, long int *unlabeled,
wolffd@0 1178 double *a, double *lin, double *c,
wolffd@0 1179 TIMING *timing_profile, double *maxdiff,
wolffd@0 1180 long int heldout, long int retrain)
wolffd@0 1181 /* docs: Training vectors (x-part) */
wolffd@0 1182 /* label: Training labels/value (y-part, zero if test example for
wolffd@0 1183 transduction) */
wolffd@0 1184 /* totdoc: Number of examples in docs/label */
wolffd@0 1185 /* totwords: Number of features (i.e. highest feature index) */
wolffd@0 1186 /* laern_parm: Learning paramenters */
wolffd@0 1187 /* kernel_parm: Kernel paramenters */
wolffd@0 1188 /* kernel_cache: Initialized/partly filled Cache, if using a kernel.
wolffd@0 1189 NULL if linear. */
wolffd@0 1190 /* shrink_state: State of active variables */
wolffd@0 1191 /* model: Returns learning result */
wolffd@0 1192 /* inconsistent: examples thrown out as inconstistent */
wolffd@0 1193 /* unlabeled: test examples for transduction */
wolffd@0 1194 /* a: alphas */
wolffd@0 1195 /* lin: linear component of gradient */
wolffd@0 1196 /* c: right hand side of inequalities (margin) */
wolffd@0 1197 /* maxdiff: returns maximum violation of KT-conditions */
wolffd@0 1198 /* heldout: marks held-out example for leave-one-out (or -1) */
wolffd@0 1199 /* retrain: selects training mode (1=regular / 2=holdout) */
wolffd@0 1200 {
wolffd@0 1201 long *chosen,*key,i,j,jj,*last_suboptimal_at,noshrink;
wolffd@0 1202 long inconsistentnum,choosenum,already_chosen=0,iteration;
wolffd@0 1203 long misclassified,supvecnum=0,*active2dnum,inactivenum;
wolffd@0 1204 long *working2dnum,*selexam;
wolffd@0 1205 long activenum;
wolffd@0 1206 double criterion,eq;
wolffd@0 1207 double *a_old;
wolffd@0 1208 long t0=0,t1=0,t2=0,t3=0,t4=0,t5=0,t6=0; /* timing */
wolffd@0 1209 long transductcycle;
wolffd@0 1210 long transduction;
wolffd@0 1211 double epsilon_crit_org;
wolffd@0 1212 double bestmaxdiff;
wolffd@0 1213 long bestmaxdiffiter,terminate;
wolffd@0 1214
wolffd@0 1215 double *selcrit; /* buffer for sorting */
wolffd@0 1216 CFLOAT *aicache; /* buffer to keep one row of hessian */
wolffd@0 1217 double *weights; /* buffer for weight vector in linear case */
wolffd@0 1218 QP qp; /* buffer for one quadratic program */
wolffd@0 1219
wolffd@0 1220 epsilon_crit_org=learn_parm->epsilon_crit; /* save org */
wolffd@0 1221 if(kernel_parm->kernel_type == LINEAR) {
wolffd@0 1222 learn_parm->epsilon_crit=2.0;
wolffd@0 1223 kernel_cache=NULL; /* caching makes no sense for linear kernel */
wolffd@0 1224 }
wolffd@0 1225 learn_parm->epsilon_shrink=2;
wolffd@0 1226 (*maxdiff)=1;
wolffd@0 1227
wolffd@0 1228 learn_parm->totwords=totwords;
wolffd@0 1229
wolffd@0 1230 chosen = (long *)my_malloc(sizeof(long)*totdoc);
wolffd@0 1231 last_suboptimal_at = (long *)my_malloc(sizeof(long)*totdoc);
wolffd@0 1232 key = (long *)my_malloc(sizeof(long)*(totdoc+11));
wolffd@0 1233 selcrit = (double *)my_malloc(sizeof(double)*totdoc);
wolffd@0 1234 selexam = (long *)my_malloc(sizeof(long)*totdoc);
wolffd@0 1235 a_old = (double *)my_malloc(sizeof(double)*totdoc);
wolffd@0 1236 aicache = (CFLOAT *)my_malloc(sizeof(CFLOAT)*totdoc);
wolffd@0 1237 working2dnum = (long *)my_malloc(sizeof(long)*(totdoc+11));
wolffd@0 1238 active2dnum = (long *)my_malloc(sizeof(long)*(totdoc+11));
wolffd@0 1239 qp.opt_ce = (double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize);
wolffd@0 1240 qp.opt_ce0 = (double *)my_malloc(sizeof(double));
wolffd@0 1241 qp.opt_g = (double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize
wolffd@0 1242 *learn_parm->svm_maxqpsize);
wolffd@0 1243 qp.opt_g0 = (double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize);
wolffd@0 1244 qp.opt_xinit = (double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize);
wolffd@0 1245 qp.opt_low=(double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize);
wolffd@0 1246 qp.opt_up=(double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize);
wolffd@0 1247 weights=(double *)my_malloc(sizeof(double)*(totwords+1));
wolffd@0 1248
wolffd@0 1249 choosenum=0;
wolffd@0 1250 inconsistentnum=0;
wolffd@0 1251 transductcycle=0;
wolffd@0 1252 transduction=0;
wolffd@0 1253 if(!retrain) retrain=1;
wolffd@0 1254 iteration=1;
wolffd@0 1255 bestmaxdiffiter=1;
wolffd@0 1256 bestmaxdiff=999999999;
wolffd@0 1257 terminate=0;
wolffd@0 1258
wolffd@0 1259 if(kernel_cache) {
wolffd@0 1260 kernel_cache->time=iteration; /* for lru cache */
wolffd@0 1261 kernel_cache_reset_lru(kernel_cache);
wolffd@0 1262 }
wolffd@0 1263
wolffd@0 1264 for(i=0;i<totdoc;i++) { /* various inits */
wolffd@0 1265 chosen[i]=0;
wolffd@0 1266 a_old[i]=a[i];
wolffd@0 1267 last_suboptimal_at[i]=1;
wolffd@0 1268 if(inconsistent[i])
wolffd@0 1269 inconsistentnum++;
wolffd@0 1270 if(unlabeled[i]) {
wolffd@0 1271 transduction=1;
wolffd@0 1272 }
wolffd@0 1273 }
wolffd@0 1274 activenum=compute_index(shrink_state->active,totdoc,active2dnum);
wolffd@0 1275 inactivenum=totdoc-activenum;
wolffd@0 1276 clear_index(working2dnum);
wolffd@0 1277
wolffd@0 1278 /* repeat this loop until we have convergence */
wolffd@0 1279 for(;retrain && (!terminate);iteration++) {
wolffd@0 1280
wolffd@0 1281 if(kernel_cache)
wolffd@0 1282 kernel_cache->time=iteration; /* for lru cache */
wolffd@0 1283 if(verbosity>=2) {
wolffd@0 1284 printf(
wolffd@0 1285 "Iteration %ld: ",iteration); fflush(stdout);
wolffd@0 1286 }
wolffd@0 1287 else if(verbosity==1) {
wolffd@0 1288 printf("."); fflush(stdout);
wolffd@0 1289 }
wolffd@0 1290
wolffd@0 1291 if(verbosity>=2) t0=get_runtime();
wolffd@0 1292 if(verbosity>=3) {
wolffd@0 1293 printf("\nSelecting working set... "); fflush(stdout);
wolffd@0 1294 }
wolffd@0 1295
wolffd@0 1296 if(learn_parm->svm_newvarsinqp>learn_parm->svm_maxqpsize)
wolffd@0 1297 learn_parm->svm_newvarsinqp=learn_parm->svm_maxqpsize;
wolffd@0 1298
wolffd@0 1299 i=0;
wolffd@0 1300 for(jj=0;(j=working2dnum[jj])>=0;jj++) { /* clear working set */
wolffd@0 1301 if((chosen[j]>=(learn_parm->svm_maxqpsize/
wolffd@0 1302 minl(learn_parm->svm_maxqpsize,
wolffd@0 1303 learn_parm->svm_newvarsinqp)))
wolffd@0 1304 || (inconsistent[j])
wolffd@0 1305 || (j == heldout)) {
wolffd@0 1306 chosen[j]=0;
wolffd@0 1307 choosenum--;
wolffd@0 1308 }
wolffd@0 1309 else {
wolffd@0 1310 chosen[j]++;
wolffd@0 1311 working2dnum[i++]=j;
wolffd@0 1312 }
wolffd@0 1313 }
wolffd@0 1314 working2dnum[i]=-1;
wolffd@0 1315
wolffd@0 1316 if(retrain == 2) {
wolffd@0 1317 choosenum=0;
wolffd@0 1318 for(jj=0;(j=working2dnum[jj])>=0;jj++) { /* fully clear working set */
wolffd@0 1319 chosen[j]=0;
wolffd@0 1320 }
wolffd@0 1321 clear_index(working2dnum);
wolffd@0 1322 for(i=0;i<totdoc;i++) { /* set inconsistent examples to zero (-i 1) */
wolffd@0 1323 if((inconsistent[i] || (heldout==i)) && (a[i] != 0.0)) {
wolffd@0 1324 chosen[i]=99999;
wolffd@0 1325 choosenum++;
wolffd@0 1326 a[i]=0;
wolffd@0 1327 }
wolffd@0 1328 }
wolffd@0 1329 if(learn_parm->biased_hyperplane) {
wolffd@0 1330 eq=0;
wolffd@0 1331 for(i=0;i<totdoc;i++) { /* make sure we fulfill equality constraint */
wolffd@0 1332 eq+=a[i]*label[i];
wolffd@0 1333 }
wolffd@0 1334 for(i=0;(i<totdoc) && (fabs(eq) > learn_parm->epsilon_a);i++) {
wolffd@0 1335 if((eq*label[i] > 0) && (a[i] > 0)) {
wolffd@0 1336 chosen[i]=88888;
wolffd@0 1337 choosenum++;
wolffd@0 1338 if((eq*label[i]) > a[i]) {
wolffd@0 1339 eq-=(a[i]*label[i]);
wolffd@0 1340 a[i]=0;
wolffd@0 1341 }
wolffd@0 1342 else {
wolffd@0 1343 a[i]-=(eq*label[i]);
wolffd@0 1344 eq=0;
wolffd@0 1345 }
wolffd@0 1346 }
wolffd@0 1347 }
wolffd@0 1348 }
wolffd@0 1349 compute_index(chosen,totdoc,working2dnum);
wolffd@0 1350 }
wolffd@0 1351 else { /* select working set according to steepest gradient */
wolffd@0 1352 if(iteration % 101) {
wolffd@0 1353 already_chosen=0;
wolffd@0 1354 if((minl(learn_parm->svm_newvarsinqp,
wolffd@0 1355 learn_parm->svm_maxqpsize-choosenum)>=4)
wolffd@0 1356 && (kernel_parm->kernel_type != LINEAR)) {
wolffd@0 1357 /* select part of the working set from cache */
wolffd@0 1358 already_chosen=select_next_qp_subproblem_grad(
wolffd@0 1359 label,unlabeled,a,lin,c,totdoc,
wolffd@0 1360 (long)(minl(learn_parm->svm_maxqpsize-choosenum,
wolffd@0 1361 learn_parm->svm_newvarsinqp)
wolffd@0 1362 /2),
wolffd@0 1363 learn_parm,inconsistent,active2dnum,
wolffd@0 1364 working2dnum,selcrit,selexam,kernel_cache,1,
wolffd@0 1365 key,chosen);
wolffd@0 1366 choosenum+=already_chosen;
wolffd@0 1367 }
wolffd@0 1368 choosenum+=select_next_qp_subproblem_grad(
wolffd@0 1369 label,unlabeled,a,lin,c,totdoc,
wolffd@0 1370 minl(learn_parm->svm_maxqpsize-choosenum,
wolffd@0 1371 learn_parm->svm_newvarsinqp-already_chosen),
wolffd@0 1372 learn_parm,inconsistent,active2dnum,
wolffd@0 1373 working2dnum,selcrit,selexam,kernel_cache,0,key,
wolffd@0 1374 chosen);
wolffd@0 1375 }
wolffd@0 1376 else { /* once in a while, select a somewhat random working set
wolffd@0 1377 to get unlocked of infinite loops due to numerical
wolffd@0 1378 inaccuracies in the core qp-solver */
wolffd@0 1379 choosenum+=select_next_qp_subproblem_rand(
wolffd@0 1380 label,unlabeled,a,lin,c,totdoc,
wolffd@0 1381 minl(learn_parm->svm_maxqpsize-choosenum,
wolffd@0 1382 learn_parm->svm_newvarsinqp),
wolffd@0 1383 learn_parm,inconsistent,active2dnum,
wolffd@0 1384 working2dnum,selcrit,selexam,kernel_cache,key,
wolffd@0 1385 chosen,iteration);
wolffd@0 1386 }
wolffd@0 1387 }
wolffd@0 1388
wolffd@0 1389 if(verbosity>=2) {
wolffd@0 1390 printf(" %ld vectors chosen\n",choosenum); fflush(stdout);
wolffd@0 1391 }
wolffd@0 1392
wolffd@0 1393 if(verbosity>=2) t1=get_runtime();
wolffd@0 1394
wolffd@0 1395 if(kernel_cache)
wolffd@0 1396 cache_multiple_kernel_rows(kernel_cache,docs,working2dnum,
wolffd@0 1397 choosenum,kernel_parm);
wolffd@0 1398
wolffd@0 1399 if(verbosity>=2) t2=get_runtime();
wolffd@0 1400 if(retrain != 2) {
wolffd@0 1401 optimize_svm(docs,label,unlabeled,inconsistent,0.0,chosen,active2dnum,
wolffd@0 1402 model,totdoc,working2dnum,choosenum,a,lin,c,learn_parm,
wolffd@0 1403 aicache,kernel_parm,&qp,&epsilon_crit_org);
wolffd@0 1404 }
wolffd@0 1405
wolffd@0 1406 if(verbosity>=2) t3=get_runtime();
wolffd@0 1407 update_linear_component(docs,label,active2dnum,a,a_old,working2dnum,totdoc,
wolffd@0 1408 totwords,kernel_parm,kernel_cache,lin,aicache,
wolffd@0 1409 weights);
wolffd@0 1410
wolffd@0 1411 if(verbosity>=2) t4=get_runtime();
wolffd@0 1412 supvecnum=calculate_svm_model(docs,label,unlabeled,lin,a,a_old,c,
wolffd@0 1413 learn_parm,working2dnum,active2dnum,model);
wolffd@0 1414
wolffd@0 1415 if(verbosity>=2) t5=get_runtime();
wolffd@0 1416
wolffd@0 1417 /* The following computation of the objective function works only */
wolffd@0 1418 /* relative to the active variables */
wolffd@0 1419 if(verbosity>=3) {
wolffd@0 1420 criterion=compute_objective_function(a,lin,c,learn_parm->eps,label,
wolffd@0 1421 active2dnum);
wolffd@0 1422 printf("Objective function (over active variables): %.16f\n",criterion);
wolffd@0 1423 fflush(stdout);
wolffd@0 1424 }
wolffd@0 1425
wolffd@0 1426 for(jj=0;(i=working2dnum[jj])>=0;jj++) {
wolffd@0 1427 a_old[i]=a[i];
wolffd@0 1428 }
wolffd@0 1429
wolffd@0 1430 if(retrain == 2) { /* reset inconsistent unlabeled examples */
wolffd@0 1431 for(i=0;(i<totdoc);i++) {
wolffd@0 1432 if(inconsistent[i] && unlabeled[i]) {
wolffd@0 1433 inconsistent[i]=0;
wolffd@0 1434 label[i]=0;
wolffd@0 1435 }
wolffd@0 1436 }
wolffd@0 1437 }
wolffd@0 1438
wolffd@0 1439 retrain=check_optimality(model,label,unlabeled,a,lin,c,totdoc,learn_parm,
wolffd@0 1440 maxdiff,epsilon_crit_org,&misclassified,
wolffd@0 1441 inconsistent,active2dnum,last_suboptimal_at,
wolffd@0 1442 iteration,kernel_parm);
wolffd@0 1443
wolffd@0 1444 if(verbosity>=2) {
wolffd@0 1445 t6=get_runtime();
wolffd@0 1446 timing_profile->time_select+=t1-t0;
wolffd@0 1447 timing_profile->time_kernel+=t2-t1;
wolffd@0 1448 timing_profile->time_opti+=t3-t2;
wolffd@0 1449 timing_profile->time_update+=t4-t3;
wolffd@0 1450 timing_profile->time_model+=t5-t4;
wolffd@0 1451 timing_profile->time_check+=t6-t5;
wolffd@0 1452 }
wolffd@0 1453
wolffd@0 1454 /* checking whether optimizer got stuck */
wolffd@0 1455 if((*maxdiff) < bestmaxdiff) {
wolffd@0 1456 bestmaxdiff=(*maxdiff);
wolffd@0 1457 bestmaxdiffiter=iteration;
wolffd@0 1458 }
wolffd@0 1459 if(iteration > (bestmaxdiffiter+learn_parm->maxiter)) {
wolffd@0 1460 /* long time no progress? */
wolffd@0 1461 terminate=1;
wolffd@0 1462 retrain=0;
wolffd@0 1463 if(verbosity>=1)
wolffd@0 1464 printf("\nWARNING: Relaxing KT-Conditions due to slow progress! Terminating!\n");
wolffd@0 1465 }
wolffd@0 1466
wolffd@0 1467 noshrink=0;
wolffd@0 1468 if((!retrain) && (inactivenum>0)
wolffd@0 1469 && ((!learn_parm->skip_final_opt_check)
wolffd@0 1470 || (kernel_parm->kernel_type == LINEAR))) {
wolffd@0 1471 if(((verbosity>=1) && (kernel_parm->kernel_type != LINEAR))
wolffd@0 1472 || (verbosity>=2)) {
wolffd@0 1473 if(verbosity==1) {
wolffd@0 1474 printf("\n");
wolffd@0 1475 }
wolffd@0 1476 printf(" Checking optimality of inactive variables...");
wolffd@0 1477 fflush(stdout);
wolffd@0 1478 }
wolffd@0 1479 t1=get_runtime();
wolffd@0 1480 reactivate_inactive_examples(label,unlabeled,a,shrink_state,lin,c,totdoc,
wolffd@0 1481 totwords,iteration,learn_parm,inconsistent,
wolffd@0 1482 docs,kernel_parm,kernel_cache,model,aicache,
wolffd@0 1483 weights,maxdiff);
wolffd@0 1484 /* Update to new active variables. */
wolffd@0 1485 activenum=compute_index(shrink_state->active,totdoc,active2dnum);
wolffd@0 1486 inactivenum=totdoc-activenum;
wolffd@0 1487 /* reset watchdog */
wolffd@0 1488 bestmaxdiff=(*maxdiff);
wolffd@0 1489 bestmaxdiffiter=iteration;
wolffd@0 1490 /* termination criterion */
wolffd@0 1491 noshrink=1;
wolffd@0 1492 retrain=0;
wolffd@0 1493 if((*maxdiff) > learn_parm->epsilon_crit)
wolffd@0 1494 retrain=1;
wolffd@0 1495 timing_profile->time_shrink+=get_runtime()-t1;
wolffd@0 1496 if(((verbosity>=1) && (kernel_parm->kernel_type != LINEAR))
wolffd@0 1497 || (verbosity>=2)) {
wolffd@0 1498 printf("done.\n"); fflush(stdout);
wolffd@0 1499 printf(" Number of inactive variables = %ld\n",inactivenum);
wolffd@0 1500 }
wolffd@0 1501 }
wolffd@0 1502
wolffd@0 1503 if((!retrain) && (learn_parm->epsilon_crit>(*maxdiff)))
wolffd@0 1504 learn_parm->epsilon_crit=(*maxdiff);
wolffd@0 1505 if((!retrain) && (learn_parm->epsilon_crit>epsilon_crit_org)) {
wolffd@0 1506 learn_parm->epsilon_crit/=2.0;
wolffd@0 1507 retrain=1;
wolffd@0 1508 noshrink=1;
wolffd@0 1509 }
wolffd@0 1510 if(learn_parm->epsilon_crit<epsilon_crit_org)
wolffd@0 1511 learn_parm->epsilon_crit=epsilon_crit_org;
wolffd@0 1512
wolffd@0 1513 if(verbosity>=2) {
wolffd@0 1514 printf(" => (%ld SV (incl. %ld SV at u-bound), max violation=%.5f)\n",
wolffd@0 1515 supvecnum,model->at_upper_bound,(*maxdiff));
wolffd@0 1516 fflush(stdout);
wolffd@0 1517 }
wolffd@0 1518 if(verbosity>=3) {
wolffd@0 1519 printf("\n");
wolffd@0 1520 }
wolffd@0 1521
wolffd@0 1522 if((!retrain) && (transduction)) {
wolffd@0 1523 for(i=0;(i<totdoc);i++) {
wolffd@0 1524 shrink_state->active[i]=1;
wolffd@0 1525 }
wolffd@0 1526 activenum=compute_index(shrink_state->active,totdoc,active2dnum);
wolffd@0 1527 inactivenum=0;
wolffd@0 1528 if(verbosity==1) printf("done\n");
wolffd@0 1529 retrain=incorporate_unlabeled_examples(model,label,inconsistent,
wolffd@0 1530 unlabeled,a,lin,totdoc,
wolffd@0 1531 selcrit,selexam,key,
wolffd@0 1532 transductcycle,kernel_parm,
wolffd@0 1533 learn_parm);
wolffd@0 1534 epsilon_crit_org=learn_parm->epsilon_crit;
wolffd@0 1535 if(kernel_parm->kernel_type == LINEAR)
wolffd@0 1536 learn_parm->epsilon_crit=1;
wolffd@0 1537 transductcycle++;
wolffd@0 1538 /* reset watchdog */
wolffd@0 1539 bestmaxdiff=(*maxdiff);
wolffd@0 1540 bestmaxdiffiter=iteration;
wolffd@0 1541 }
wolffd@0 1542 else if(((iteration % 10) == 0) && (!noshrink)) {
wolffd@0 1543 activenum=shrink_problem(docs,learn_parm,shrink_state,kernel_parm,
wolffd@0 1544 active2dnum,last_suboptimal_at,iteration,totdoc,
wolffd@0 1545 maxl((long)(activenum/10),
wolffd@0 1546 maxl((long)(totdoc/500),100)),
wolffd@0 1547 a,inconsistent);
wolffd@0 1548 inactivenum=totdoc-activenum;
wolffd@0 1549 if((kernel_cache)
wolffd@0 1550 && (supvecnum>kernel_cache->max_elems)
wolffd@0 1551 && ((kernel_cache->activenum-activenum)>maxl((long)(activenum/10),500))) {
wolffd@0 1552 kernel_cache_shrink(kernel_cache,totdoc,
wolffd@0 1553 minl((kernel_cache->activenum-activenum),
wolffd@0 1554 (kernel_cache->activenum-supvecnum)),
wolffd@0 1555 shrink_state->active);
wolffd@0 1556 }
wolffd@0 1557 }
wolffd@0 1558
wolffd@0 1559 if((!retrain) && learn_parm->remove_inconsistent) {
wolffd@0 1560 if(verbosity>=1) {
wolffd@0 1561 printf(" Moving training errors to inconsistent examples...");
wolffd@0 1562 fflush(stdout);
wolffd@0 1563 }
wolffd@0 1564 if(learn_parm->remove_inconsistent == 1) {
wolffd@0 1565 retrain=identify_inconsistent(a,label,unlabeled,totdoc,learn_parm,
wolffd@0 1566 &inconsistentnum,inconsistent);
wolffd@0 1567 }
wolffd@0 1568 else if(learn_parm->remove_inconsistent == 2) {
wolffd@0 1569 retrain=identify_misclassified(lin,label,unlabeled,totdoc,
wolffd@0 1570 model,&inconsistentnum,inconsistent);
wolffd@0 1571 }
wolffd@0 1572 else if(learn_parm->remove_inconsistent == 3) {
wolffd@0 1573 retrain=identify_one_misclassified(lin,label,unlabeled,totdoc,
wolffd@0 1574 model,&inconsistentnum,inconsistent);
wolffd@0 1575 }
wolffd@0 1576 if(retrain) {
wolffd@0 1577 if(kernel_parm->kernel_type == LINEAR) { /* reinit shrinking */
wolffd@0 1578 learn_parm->epsilon_crit=2.0;
wolffd@0 1579 }
wolffd@0 1580 }
wolffd@0 1581 if(verbosity>=1) {
wolffd@0 1582 printf("done.\n");
wolffd@0 1583 if(retrain) {
wolffd@0 1584 printf(" Now %ld inconsistent examples.\n",inconsistentnum);
wolffd@0 1585 }
wolffd@0 1586 }
wolffd@0 1587 }
wolffd@0 1588 } /* end of loop */
wolffd@0 1589
wolffd@0 1590 free(chosen);
wolffd@0 1591 free(last_suboptimal_at);
wolffd@0 1592 free(key);
wolffd@0 1593 free(selcrit);
wolffd@0 1594 free(selexam);
wolffd@0 1595 free(a_old);
wolffd@0 1596 free(aicache);
wolffd@0 1597 free(working2dnum);
wolffd@0 1598 free(active2dnum);
wolffd@0 1599 free(qp.opt_ce);
wolffd@0 1600 free(qp.opt_ce0);
wolffd@0 1601 free(qp.opt_g);
wolffd@0 1602 free(qp.opt_g0);
wolffd@0 1603 free(qp.opt_xinit);
wolffd@0 1604 free(qp.opt_low);
wolffd@0 1605 free(qp.opt_up);
wolffd@0 1606 free(weights);
wolffd@0 1607
wolffd@0 1608 learn_parm->epsilon_crit=epsilon_crit_org; /* restore org */
wolffd@0 1609 model->maxdiff=(*maxdiff);
wolffd@0 1610
wolffd@0 1611 return(iteration);
wolffd@0 1612 }
wolffd@0 1613
wolffd@0 1614 long optimize_to_convergence_sharedslack(DOC **docs, long int *label,
wolffd@0 1615 long int totdoc,
wolffd@0 1616 long int totwords, LEARN_PARM *learn_parm,
wolffd@0 1617 KERNEL_PARM *kernel_parm,
wolffd@0 1618 KERNEL_CACHE *kernel_cache,
wolffd@0 1619 SHRINK_STATE *shrink_state, MODEL *model,
wolffd@0 1620 double *a, double *lin, double *c,
wolffd@0 1621 TIMING *timing_profile, double *maxdiff)
wolffd@0 1622 /* docs: Training vectors (x-part) */
wolffd@0 1623 /* label: Training labels/value (y-part, zero if test example for
wolffd@0 1624 transduction) */
wolffd@0 1625 /* totdoc: Number of examples in docs/label */
wolffd@0 1626 /* totwords: Number of features (i.e. highest feature index) */
wolffd@0 1627 /* learn_parm: Learning paramenters */
wolffd@0 1628 /* kernel_parm: Kernel paramenters */
wolffd@0 1629 /* kernel_cache: Initialized/partly filled Cache, if using a kernel.
wolffd@0 1630 NULL if linear. */
wolffd@0 1631 /* shrink_state: State of active variables */
wolffd@0 1632 /* model: Returns learning result */
wolffd@0 1633 /* a: alphas */
wolffd@0 1634 /* lin: linear component of gradient */
wolffd@0 1635 /* c: right hand side of inequalities (margin) */
wolffd@0 1636 /* maxdiff: returns maximum violation of KT-conditions */
wolffd@0 1637 {
wolffd@0 1638 long *chosen,*key,i,j,jj,*last_suboptimal_at,noshrink,*unlabeled;
wolffd@0 1639 long *inconsistent,choosenum,already_chosen=0,iteration;
wolffd@0 1640 long misclassified,supvecnum=0,*active2dnum,inactivenum;
wolffd@0 1641 long *working2dnum,*selexam,*ignore;
wolffd@0 1642 long activenum,retrain,maxslackid,slackset,jointstep;
wolffd@0 1643 double criterion,eq_target;
wolffd@0 1644 double *a_old,*alphaslack;
wolffd@0 1645 long t0=0,t1=0,t2=0,t3=0,t4=0,t5=0,t6=0; /* timing */
wolffd@0 1646 double epsilon_crit_org,maxsharedviol;
wolffd@0 1647 double bestmaxdiff;
wolffd@0 1648 long bestmaxdiffiter,terminate;
wolffd@0 1649
wolffd@0 1650 double *selcrit; /* buffer for sorting */
wolffd@0 1651 CFLOAT *aicache; /* buffer to keep one row of hessian */
wolffd@0 1652 double *weights; /* buffer for weight vector in linear case */
wolffd@0 1653 QP qp; /* buffer for one quadratic program */
wolffd@0 1654 double *slack; /* vector of slack variables for optimization with
wolffd@0 1655 shared slacks */
wolffd@0 1656
wolffd@0 1657 epsilon_crit_org=learn_parm->epsilon_crit; /* save org */
wolffd@0 1658 if(kernel_parm->kernel_type == LINEAR) {
wolffd@0 1659 learn_parm->epsilon_crit=2.0;
wolffd@0 1660 kernel_cache=NULL; /* caching makes no sense for linear kernel */
wolffd@0 1661 }
wolffd@0 1662 learn_parm->epsilon_shrink=2;
wolffd@0 1663 (*maxdiff)=1;
wolffd@0 1664
wolffd@0 1665 learn_parm->totwords=totwords;
wolffd@0 1666
wolffd@0 1667 chosen = (long *)my_malloc(sizeof(long)*totdoc);
wolffd@0 1668 unlabeled = (long *)my_malloc(sizeof(long)*totdoc);
wolffd@0 1669 inconsistent = (long *)my_malloc(sizeof(long)*totdoc);
wolffd@0 1670 ignore = (long *)my_malloc(sizeof(long)*totdoc);
wolffd@0 1671 key = (long *)my_malloc(sizeof(long)*(totdoc+11));
wolffd@0 1672 selcrit = (double *)my_malloc(sizeof(double)*totdoc);
wolffd@0 1673 selexam = (long *)my_malloc(sizeof(long)*totdoc);
wolffd@0 1674 a_old = (double *)my_malloc(sizeof(double)*totdoc);
wolffd@0 1675 aicache = (CFLOAT *)my_malloc(sizeof(CFLOAT)*totdoc);
wolffd@0 1676 working2dnum = (long *)my_malloc(sizeof(long)*(totdoc+11));
wolffd@0 1677 active2dnum = (long *)my_malloc(sizeof(long)*(totdoc+11));
wolffd@0 1678 qp.opt_ce = (double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize);
wolffd@0 1679 qp.opt_ce0 = (double *)my_malloc(sizeof(double));
wolffd@0 1680 qp.opt_g = (double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize
wolffd@0 1681 *learn_parm->svm_maxqpsize);
wolffd@0 1682 qp.opt_g0 = (double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize);
wolffd@0 1683 qp.opt_xinit = (double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize);
wolffd@0 1684 qp.opt_low=(double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize);
wolffd@0 1685 qp.opt_up=(double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize);
wolffd@0 1686 weights=(double *)my_malloc(sizeof(double)*(totwords+1));
wolffd@0 1687 maxslackid=0;
wolffd@0 1688 for(i=0;i<totdoc;i++) { /* determine size of slack array */
wolffd@0 1689 if(maxslackid<docs[i]->slackid)
wolffd@0 1690 maxslackid=docs[i]->slackid;
wolffd@0 1691 }
wolffd@0 1692 slack=(double *)my_malloc(sizeof(double)*(maxslackid+1));
wolffd@0 1693 alphaslack=(double *)my_malloc(sizeof(double)*(maxslackid+1));
wolffd@0 1694 last_suboptimal_at = (long *)my_malloc(sizeof(long)*(maxslackid+1));
wolffd@0 1695 for(i=0;i<=maxslackid;i++) { /* init shared slacks */
wolffd@0 1696 slack[i]=0;
wolffd@0 1697 alphaslack[i]=0;
wolffd@0 1698 last_suboptimal_at[i]=1;
wolffd@0 1699 }
wolffd@0 1700
wolffd@0 1701 choosenum=0;
wolffd@0 1702 retrain=1;
wolffd@0 1703 iteration=1;
wolffd@0 1704 bestmaxdiffiter=1;
wolffd@0 1705 bestmaxdiff=999999999;
wolffd@0 1706 terminate=0;
wolffd@0 1707
wolffd@0 1708 if(kernel_cache) {
wolffd@0 1709 kernel_cache->time=iteration; /* for lru cache */
wolffd@0 1710 kernel_cache_reset_lru(kernel_cache);
wolffd@0 1711 }
wolffd@0 1712
wolffd@0 1713 for(i=0;i<totdoc;i++) { /* various inits */
wolffd@0 1714 chosen[i]=0;
wolffd@0 1715 unlabeled[i]=0;
wolffd@0 1716 inconsistent[i]=0;
wolffd@0 1717 ignore[i]=0;
wolffd@0 1718 a_old[i]=a[i];
wolffd@0 1719 }
wolffd@0 1720 activenum=compute_index(shrink_state->active,totdoc,active2dnum);
wolffd@0 1721 inactivenum=totdoc-activenum;
wolffd@0 1722 clear_index(working2dnum);
wolffd@0 1723
wolffd@0 1724 /* call to init slack and alphaslack */
wolffd@0 1725 compute_shared_slacks(docs,label,a,lin,c,active2dnum,learn_parm,
wolffd@0 1726 slack,alphaslack);
wolffd@0 1727
wolffd@0 1728 /* repeat this loop until we have convergence */
wolffd@0 1729 for(;retrain && (!terminate);iteration++) {
wolffd@0 1730
wolffd@0 1731 if(kernel_cache)
wolffd@0 1732 kernel_cache->time=iteration; /* for lru cache */
wolffd@0 1733 if(verbosity>=2) {
wolffd@0 1734 printf(
wolffd@0 1735 "Iteration %ld: ",iteration); fflush(stdout);
wolffd@0 1736 }
wolffd@0 1737 else if(verbosity==1) {
wolffd@0 1738 printf("."); fflush(stdout);
wolffd@0 1739 }
wolffd@0 1740
wolffd@0 1741 if(verbosity>=2) t0=get_runtime();
wolffd@0 1742 if(verbosity>=3) {
wolffd@0 1743 printf("\nSelecting working set... "); fflush(stdout);
wolffd@0 1744 }
wolffd@0 1745
wolffd@0 1746 if(learn_parm->svm_newvarsinqp>learn_parm->svm_maxqpsize)
wolffd@0 1747 learn_parm->svm_newvarsinqp=learn_parm->svm_maxqpsize;
wolffd@0 1748
wolffd@0 1749 /* select working set according to steepest gradient */
wolffd@0 1750 jointstep=0;
wolffd@0 1751 eq_target=0;
wolffd@0 1752 if(iteration % 101) {
wolffd@0 1753 slackset=select_next_qp_slackset(docs,label,a,lin,slack,alphaslack,c,
wolffd@0 1754 learn_parm,active2dnum,&maxsharedviol);
wolffd@0 1755 if((iteration % 2)
wolffd@0 1756 || (!slackset) || (maxsharedviol<learn_parm->epsilon_crit)){
wolffd@0 1757 /* do a step with examples from different slack sets */
wolffd@0 1758 if(verbosity >= 2) {
wolffd@0 1759 printf("(i-step)"); fflush(stdout);
wolffd@0 1760 }
wolffd@0 1761 i=0;
wolffd@0 1762 for(jj=0;(j=working2dnum[jj])>=0;jj++) { /* clear old part of working set */
wolffd@0 1763 if((chosen[j]>=(learn_parm->svm_maxqpsize/
wolffd@0 1764 minl(learn_parm->svm_maxqpsize,
wolffd@0 1765 learn_parm->svm_newvarsinqp)))) {
wolffd@0 1766 chosen[j]=0;
wolffd@0 1767 choosenum--;
wolffd@0 1768 }
wolffd@0 1769 else {
wolffd@0 1770 chosen[j]++;
wolffd@0 1771 working2dnum[i++]=j;
wolffd@0 1772 }
wolffd@0 1773 }
wolffd@0 1774 working2dnum[i]=-1;
wolffd@0 1775
wolffd@0 1776 already_chosen=0;
wolffd@0 1777 if((minl(learn_parm->svm_newvarsinqp,
wolffd@0 1778 learn_parm->svm_maxqpsize-choosenum)>=4)
wolffd@0 1779 && (kernel_parm->kernel_type != LINEAR)) {
wolffd@0 1780 /* select part of the working set from cache */
wolffd@0 1781 already_chosen=select_next_qp_subproblem_grad(
wolffd@0 1782 label,unlabeled,a,lin,c,totdoc,
wolffd@0 1783 (long)(minl(learn_parm->svm_maxqpsize-choosenum,
wolffd@0 1784 learn_parm->svm_newvarsinqp)
wolffd@0 1785 /2),
wolffd@0 1786 learn_parm,inconsistent,active2dnum,
wolffd@0 1787 working2dnum,selcrit,selexam,kernel_cache,
wolffd@0 1788 (long)1,key,chosen);
wolffd@0 1789 choosenum+=already_chosen;
wolffd@0 1790 }
wolffd@0 1791 choosenum+=select_next_qp_subproblem_grad(
wolffd@0 1792 label,unlabeled,a,lin,c,totdoc,
wolffd@0 1793 minl(learn_parm->svm_maxqpsize-choosenum,
wolffd@0 1794 learn_parm->svm_newvarsinqp-already_chosen),
wolffd@0 1795 learn_parm,inconsistent,active2dnum,
wolffd@0 1796 working2dnum,selcrit,selexam,kernel_cache,
wolffd@0 1797 (long)0,key,chosen);
wolffd@0 1798 }
wolffd@0 1799 else { /* do a step with all examples from same slack set */
wolffd@0 1800 if(verbosity >= 2) {
wolffd@0 1801 printf("(j-step on %ld)",slackset); fflush(stdout);
wolffd@0 1802 }
wolffd@0 1803 jointstep=1;
wolffd@0 1804 for(jj=0;(j=working2dnum[jj])>=0;jj++) { /* clear working set */
wolffd@0 1805 chosen[j]=0;
wolffd@0 1806 }
wolffd@0 1807 working2dnum[0]=-1;
wolffd@0 1808 eq_target=alphaslack[slackset];
wolffd@0 1809 for(j=0;j<totdoc;j++) { /* mask all but slackset */
wolffd@0 1810 /* for(jj=0;(j=active2dnum[jj])>=0;jj++) { */
wolffd@0 1811 if(docs[j]->slackid != slackset)
wolffd@0 1812 ignore[j]=1;
wolffd@0 1813 else {
wolffd@0 1814 ignore[j]=0;
wolffd@0 1815 learn_parm->svm_cost[j]=learn_parm->svm_c;
wolffd@0 1816 /* printf("Inslackset(%ld,%ld)",j,shrink_state->active[j]); */
wolffd@0 1817 }
wolffd@0 1818 }
wolffd@0 1819 learn_parm->biased_hyperplane=1;
wolffd@0 1820 choosenum=select_next_qp_subproblem_grad(
wolffd@0 1821 label,unlabeled,a,lin,c,totdoc,
wolffd@0 1822 learn_parm->svm_maxqpsize,
wolffd@0 1823 learn_parm,ignore,active2dnum,
wolffd@0 1824 working2dnum,selcrit,selexam,kernel_cache,
wolffd@0 1825 (long)0,key,chosen);
wolffd@0 1826 learn_parm->biased_hyperplane=0;
wolffd@0 1827 }
wolffd@0 1828 }
wolffd@0 1829 else { /* once in a while, select a somewhat random working set
wolffd@0 1830 to get unlocked of infinite loops due to numerical
wolffd@0 1831 inaccuracies in the core qp-solver */
wolffd@0 1832 choosenum+=select_next_qp_subproblem_rand(
wolffd@0 1833 label,unlabeled,a,lin,c,totdoc,
wolffd@0 1834 minl(learn_parm->svm_maxqpsize-choosenum,
wolffd@0 1835 learn_parm->svm_newvarsinqp),
wolffd@0 1836 learn_parm,inconsistent,active2dnum,
wolffd@0 1837 working2dnum,selcrit,selexam,kernel_cache,key,
wolffd@0 1838 chosen,iteration);
wolffd@0 1839 }
wolffd@0 1840
wolffd@0 1841 if(verbosity>=2) {
wolffd@0 1842 printf(" %ld vectors chosen\n",choosenum); fflush(stdout);
wolffd@0 1843 }
wolffd@0 1844
wolffd@0 1845 if(verbosity>=2) t1=get_runtime();
wolffd@0 1846
wolffd@0 1847 if(kernel_cache)
wolffd@0 1848 cache_multiple_kernel_rows(kernel_cache,docs,working2dnum,
wolffd@0 1849 choosenum,kernel_parm);
wolffd@0 1850
wolffd@0 1851 if(verbosity>=2) t2=get_runtime();
wolffd@0 1852 if(jointstep) learn_parm->biased_hyperplane=1;
wolffd@0 1853 optimize_svm(docs,label,unlabeled,ignore,eq_target,chosen,active2dnum,
wolffd@0 1854 model,totdoc,working2dnum,choosenum,a,lin,c,learn_parm,
wolffd@0 1855 aicache,kernel_parm,&qp,&epsilon_crit_org);
wolffd@0 1856 learn_parm->biased_hyperplane=0;
wolffd@0 1857
wolffd@0 1858 for(jj=0;(i=working2dnum[jj])>=0;jj++) /* recompute sums of alphas */
wolffd@0 1859 alphaslack[docs[i]->slackid]+=(a[i]-a_old[i]);
wolffd@0 1860 for(jj=0;(i=working2dnum[jj])>=0;jj++) { /* reduce alpha to fulfill
wolffd@0 1861 constraints */
wolffd@0 1862 if(alphaslack[docs[i]->slackid] > learn_parm->svm_c) {
wolffd@0 1863 if(a[i] < (alphaslack[docs[i]->slackid]-learn_parm->svm_c)) {
wolffd@0 1864 alphaslack[docs[i]->slackid]-=a[i];
wolffd@0 1865 a[i]=0;
wolffd@0 1866 }
wolffd@0 1867 else {
wolffd@0 1868 a[i]-=(alphaslack[docs[i]->slackid]-learn_parm->svm_c);
wolffd@0 1869 alphaslack[docs[i]->slackid]=learn_parm->svm_c;
wolffd@0 1870 }
wolffd@0 1871 }
wolffd@0 1872 }
wolffd@0 1873 for(jj=0;(i=active2dnum[jj])>=0;jj++)
wolffd@0 1874 learn_parm->svm_cost[i]=a[i]+(learn_parm->svm_c
wolffd@0 1875 -alphaslack[docs[i]->slackid]);
wolffd@0 1876
wolffd@0 1877 if(verbosity>=2) t3=get_runtime();
wolffd@0 1878 update_linear_component(docs,label,active2dnum,a,a_old,working2dnum,totdoc,
wolffd@0 1879 totwords,kernel_parm,kernel_cache,lin,aicache,
wolffd@0 1880 weights);
wolffd@0 1881 compute_shared_slacks(docs,label,a,lin,c,active2dnum,learn_parm,
wolffd@0 1882 slack,alphaslack);
wolffd@0 1883
wolffd@0 1884 if(verbosity>=2) t4=get_runtime();
wolffd@0 1885 supvecnum=calculate_svm_model(docs,label,unlabeled,lin,a,a_old,c,
wolffd@0 1886 learn_parm,working2dnum,active2dnum,model);
wolffd@0 1887
wolffd@0 1888 if(verbosity>=2) t5=get_runtime();
wolffd@0 1889
wolffd@0 1890 /* The following computation of the objective function works only */
wolffd@0 1891 /* relative to the active variables */
wolffd@0 1892 if(verbosity>=3) {
wolffd@0 1893 criterion=compute_objective_function(a,lin,c,learn_parm->eps,label,
wolffd@0 1894 active2dnum);
wolffd@0 1895 printf("Objective function (over active variables): %.16f\n",criterion);
wolffd@0 1896 fflush(stdout);
wolffd@0 1897 }
wolffd@0 1898
wolffd@0 1899 for(jj=0;(i=working2dnum[jj])>=0;jj++) {
wolffd@0 1900 a_old[i]=a[i];
wolffd@0 1901 }
wolffd@0 1902
wolffd@0 1903 retrain=check_optimality_sharedslack(docs,model,label,a,lin,c,
wolffd@0 1904 slack,alphaslack,totdoc,learn_parm,
wolffd@0 1905 maxdiff,epsilon_crit_org,&misclassified,
wolffd@0 1906 active2dnum,last_suboptimal_at,
wolffd@0 1907 iteration,kernel_parm);
wolffd@0 1908
wolffd@0 1909 if(verbosity>=2) {
wolffd@0 1910 t6=get_runtime();
wolffd@0 1911 timing_profile->time_select+=t1-t0;
wolffd@0 1912 timing_profile->time_kernel+=t2-t1;
wolffd@0 1913 timing_profile->time_opti+=t3-t2;
wolffd@0 1914 timing_profile->time_update+=t4-t3;
wolffd@0 1915 timing_profile->time_model+=t5-t4;
wolffd@0 1916 timing_profile->time_check+=t6-t5;
wolffd@0 1917 }
wolffd@0 1918
wolffd@0 1919 /* checking whether optimizer got stuck */
wolffd@0 1920 if((*maxdiff) < bestmaxdiff) {
wolffd@0 1921 bestmaxdiff=(*maxdiff);
wolffd@0 1922 bestmaxdiffiter=iteration;
wolffd@0 1923 }
wolffd@0 1924 if(iteration > (bestmaxdiffiter+learn_parm->maxiter)) {
wolffd@0 1925 /* long time no progress? */
wolffd@0 1926 terminate=1;
wolffd@0 1927 retrain=0;
wolffd@0 1928 if(verbosity>=1)
wolffd@0 1929 printf("\nWARNING: Relaxing KT-Conditions due to slow progress! Terminating!\n");
wolffd@0 1930 }
wolffd@0 1931
wolffd@0 1932 noshrink=0;
wolffd@0 1933
wolffd@0 1934 if((!retrain) && (inactivenum>0)
wolffd@0 1935 && ((!learn_parm->skip_final_opt_check)
wolffd@0 1936 || (kernel_parm->kernel_type == LINEAR))) {
wolffd@0 1937 if(((verbosity>=1) && (kernel_parm->kernel_type != LINEAR))
wolffd@0 1938 || (verbosity>=2)) {
wolffd@0 1939 if(verbosity==1) {
wolffd@0 1940 printf("\n");
wolffd@0 1941 }
wolffd@0 1942 printf(" Checking optimality of inactive variables...");
wolffd@0 1943 fflush(stdout);
wolffd@0 1944 }
wolffd@0 1945 t1=get_runtime();
wolffd@0 1946 reactivate_inactive_examples(label,unlabeled,a,shrink_state,lin,c,totdoc,
wolffd@0 1947 totwords,iteration,learn_parm,inconsistent,
wolffd@0 1948 docs,kernel_parm,kernel_cache,model,aicache,
wolffd@0 1949 weights,maxdiff);
wolffd@0 1950 /* Update to new active variables. */
wolffd@0 1951 activenum=compute_index(shrink_state->active,totdoc,active2dnum);
wolffd@0 1952 inactivenum=totdoc-activenum;
wolffd@0 1953 /* check optimality, since check in reactivate does not work for
wolffd@0 1954 sharedslacks */
wolffd@0 1955 retrain=check_optimality_sharedslack(docs,model,label,a,lin,c,
wolffd@0 1956 slack,alphaslack,totdoc,learn_parm,
wolffd@0 1957 maxdiff,epsilon_crit_org,&misclassified,
wolffd@0 1958 active2dnum,last_suboptimal_at,
wolffd@0 1959 iteration,kernel_parm);
wolffd@0 1960
wolffd@0 1961 /* reset watchdog */
wolffd@0 1962 bestmaxdiff=(*maxdiff);
wolffd@0 1963 bestmaxdiffiter=iteration;
wolffd@0 1964 /* termination criterion */
wolffd@0 1965 noshrink=1;
wolffd@0 1966 retrain=0;
wolffd@0 1967 if((*maxdiff) > learn_parm->epsilon_crit)
wolffd@0 1968 retrain=1;
wolffd@0 1969 timing_profile->time_shrink+=get_runtime()-t1;
wolffd@0 1970 if(((verbosity>=1) && (kernel_parm->kernel_type != LINEAR))
wolffd@0 1971 || (verbosity>=2)) {
wolffd@0 1972 printf("done.\n"); fflush(stdout);
wolffd@0 1973 printf(" Number of inactive variables = %ld\n",inactivenum);
wolffd@0 1974 }
wolffd@0 1975 }
wolffd@0 1976
wolffd@0 1977 if((!retrain) && (learn_parm->epsilon_crit>(*maxdiff)))
wolffd@0 1978 learn_parm->epsilon_crit=(*maxdiff);
wolffd@0 1979 if((!retrain) && (learn_parm->epsilon_crit>epsilon_crit_org)) {
wolffd@0 1980 learn_parm->epsilon_crit/=2.0;
wolffd@0 1981 retrain=1;
wolffd@0 1982 noshrink=1;
wolffd@0 1983 }
wolffd@0 1984 if(learn_parm->epsilon_crit<epsilon_crit_org)
wolffd@0 1985 learn_parm->epsilon_crit=epsilon_crit_org;
wolffd@0 1986
wolffd@0 1987 if(verbosity>=2) {
wolffd@0 1988 printf(" => (%ld SV (incl. %ld SV at u-bound), max violation=%.5f)\n",
wolffd@0 1989 supvecnum,model->at_upper_bound,(*maxdiff));
wolffd@0 1990 fflush(stdout);
wolffd@0 1991 }
wolffd@0 1992 if(verbosity>=3) {
wolffd@0 1993 printf("\n");
wolffd@0 1994 }
wolffd@0 1995
wolffd@0 1996 if(((iteration % 10) == 0) && (!noshrink)) {
wolffd@0 1997 activenum=shrink_problem(docs,learn_parm,shrink_state,
wolffd@0 1998 kernel_parm,active2dnum,
wolffd@0 1999 last_suboptimal_at,iteration,totdoc,
wolffd@0 2000 maxl((long)(activenum/10),
wolffd@0 2001 maxl((long)(totdoc/500),100)),
wolffd@0 2002 a,inconsistent);
wolffd@0 2003 inactivenum=totdoc-activenum;
wolffd@0 2004 if((kernel_cache)
wolffd@0 2005 && (supvecnum>kernel_cache->max_elems)
wolffd@0 2006 && ((kernel_cache->activenum-activenum)>maxl((long)(activenum/10),500))) {
wolffd@0 2007 kernel_cache_shrink(kernel_cache,totdoc,
wolffd@0 2008 minl((kernel_cache->activenum-activenum),
wolffd@0 2009 (kernel_cache->activenum-supvecnum)),
wolffd@0 2010 shrink_state->active);
wolffd@0 2011 }
wolffd@0 2012 }
wolffd@0 2013
wolffd@0 2014 } /* end of loop */
wolffd@0 2015
wolffd@0 2016
wolffd@0 2017 free(alphaslack);
wolffd@0 2018 free(slack);
wolffd@0 2019 free(chosen);
wolffd@0 2020 free(unlabeled);
wolffd@0 2021 free(inconsistent);
wolffd@0 2022 free(ignore);
wolffd@0 2023 free(last_suboptimal_at);
wolffd@0 2024 free(key);
wolffd@0 2025 free(selcrit);
wolffd@0 2026 free(selexam);
wolffd@0 2027 free(a_old);
wolffd@0 2028 free(aicache);
wolffd@0 2029 free(working2dnum);
wolffd@0 2030 free(active2dnum);
wolffd@0 2031 free(qp.opt_ce);
wolffd@0 2032 free(qp.opt_ce0);
wolffd@0 2033 free(qp.opt_g);
wolffd@0 2034 free(qp.opt_g0);
wolffd@0 2035 free(qp.opt_xinit);
wolffd@0 2036 free(qp.opt_low);
wolffd@0 2037 free(qp.opt_up);
wolffd@0 2038 free(weights);
wolffd@0 2039
wolffd@0 2040 learn_parm->epsilon_crit=epsilon_crit_org; /* restore org */
wolffd@0 2041 model->maxdiff=(*maxdiff);
wolffd@0 2042
wolffd@0 2043 return(iteration);
wolffd@0 2044 }
wolffd@0 2045
wolffd@0 2046
wolffd@0 2047 double compute_objective_function(double *a, double *lin, double *c,
wolffd@0 2048 double eps, long int *label,
wolffd@0 2049 long int *active2dnum)
wolffd@0 2050 /* Return value of objective function. */
wolffd@0 2051 /* Works only relative to the active variables! */
wolffd@0 2052 {
wolffd@0 2053 long i,ii;
wolffd@0 2054 double criterion;
wolffd@0 2055 /* calculate value of objective function */
wolffd@0 2056 criterion=0;
wolffd@0 2057 for(ii=0;active2dnum[ii]>=0;ii++) {
wolffd@0 2058 i=active2dnum[ii];
wolffd@0 2059 criterion=criterion+(eps-(double)label[i]*c[i])*a[i]+0.5*a[i]*label[i]*lin[i];
wolffd@0 2060 }
wolffd@0 2061 return(criterion);
wolffd@0 2062 }
wolffd@0 2063
wolffd@0 2064 void clear_index(long int *index)
wolffd@0 2065 /* initializes and empties index */
wolffd@0 2066 {
wolffd@0 2067 index[0]=-1;
wolffd@0 2068 }
wolffd@0 2069
wolffd@0 2070 void add_to_index(long int *index, long int elem)
wolffd@0 2071 /* initializes and empties index */
wolffd@0 2072 {
wolffd@0 2073 register long i;
wolffd@0 2074 for(i=0;index[i] != -1;i++);
wolffd@0 2075 index[i]=elem;
wolffd@0 2076 index[i+1]=-1;
wolffd@0 2077 }
wolffd@0 2078
wolffd@0 2079 long compute_index(long int *binfeature, long int range, long int *index)
wolffd@0 2080 /* create an inverted index of binfeature */
wolffd@0 2081 {
wolffd@0 2082 register long i,ii;
wolffd@0 2083
wolffd@0 2084 ii=0;
wolffd@0 2085 for(i=0;i<range;i++) {
wolffd@0 2086 if(binfeature[i]) {
wolffd@0 2087 index[ii]=i;
wolffd@0 2088 ii++;
wolffd@0 2089 }
wolffd@0 2090 }
wolffd@0 2091 for(i=0;i<4;i++) {
wolffd@0 2092 index[ii+i]=-1;
wolffd@0 2093 }
wolffd@0 2094 return(ii);
wolffd@0 2095 }
wolffd@0 2096
wolffd@0 2097
wolffd@0 2098 void optimize_svm(DOC **docs, long int *label, long int *unlabeled,
wolffd@0 2099 long int *exclude_from_eq_const, double eq_target,
wolffd@0 2100 long int *chosen, long int *active2dnum, MODEL *model,
wolffd@0 2101 long int totdoc, long int *working2dnum, long int varnum,
wolffd@0 2102 double *a, double *lin, double *c, LEARN_PARM *learn_parm,
wolffd@0 2103 CFLOAT *aicache, KERNEL_PARM *kernel_parm, QP *qp,
wolffd@0 2104 double *epsilon_crit_target)
wolffd@0 2105 /* Do optimization on the working set. */
wolffd@0 2106 {
wolffd@0 2107 long i;
wolffd@0 2108 double *a_v;
wolffd@0 2109
wolffd@0 2110 compute_matrices_for_optimization(docs,label,unlabeled,
wolffd@0 2111 exclude_from_eq_const,eq_target,chosen,
wolffd@0 2112 active2dnum,working2dnum,model,a,lin,c,
wolffd@0 2113 varnum,totdoc,learn_parm,aicache,
wolffd@0 2114 kernel_parm,qp);
wolffd@0 2115
wolffd@0 2116 if(verbosity>=3) {
wolffd@0 2117 printf("Running optimizer..."); fflush(stdout);
wolffd@0 2118 }
wolffd@0 2119 /* call the qp-subsolver */
wolffd@0 2120 a_v=optimize_qp(qp,epsilon_crit_target,
wolffd@0 2121 learn_parm->svm_maxqpsize,
wolffd@0 2122 &(model->b), /* in case the optimizer gives us */
wolffd@0 2123 /* the threshold for free. otherwise */
wolffd@0 2124 /* b is calculated in calculate_model. */
wolffd@0 2125 learn_parm);
wolffd@0 2126 if(verbosity>=3) {
wolffd@0 2127 printf("done\n");
wolffd@0 2128 }
wolffd@0 2129
wolffd@0 2130 for(i=0;i<varnum;i++) {
wolffd@0 2131 a[working2dnum[i]]=a_v[i];
wolffd@0 2132 /*
wolffd@0 2133 if(a_v[i]<=(0+learn_parm->epsilon_a)) {
wolffd@0 2134 a[working2dnum[i]]=0;
wolffd@0 2135 }
wolffd@0 2136 else if(a_v[i]>=(learn_parm->svm_cost[working2dnum[i]]-learn_parm->epsilon_a)) {
wolffd@0 2137 a[working2dnum[i]]=learn_parm->svm_cost[working2dnum[i]];
wolffd@0 2138 }
wolffd@0 2139 */
wolffd@0 2140 }
wolffd@0 2141 }
wolffd@0 2142
wolffd@0 2143 void compute_matrices_for_optimization(DOC **docs, long int *label,
wolffd@0 2144 long int *unlabeled, long *exclude_from_eq_const, double eq_target,
wolffd@0 2145 long int *chosen, long int *active2dnum,
wolffd@0 2146 long int *key, MODEL *model, double *a, double *lin, double *c,
wolffd@0 2147 long int varnum, long int totdoc, LEARN_PARM *learn_parm,
wolffd@0 2148 CFLOAT *aicache, KERNEL_PARM *kernel_parm, QP *qp)
wolffd@0 2149 {
wolffd@0 2150 register long ki,kj,i,j;
wolffd@0 2151 register double kernel_temp;
wolffd@0 2152
wolffd@0 2153 if(verbosity>=3) {
wolffd@0 2154 fprintf(stdout,"Computing qp-matrices (type %ld kernel [degree %ld, rbf_gamma %f, coef_lin %f, coef_const %f])...",kernel_parm->kernel_type,kernel_parm->poly_degree,kernel_parm->rbf_gamma,kernel_parm->coef_lin,kernel_parm->coef_const);
wolffd@0 2155 fflush(stdout);
wolffd@0 2156 }
wolffd@0 2157
wolffd@0 2158 qp->opt_n=varnum;
wolffd@0 2159 qp->opt_ce0[0]=-eq_target; /* compute the constant for equality constraint */
wolffd@0 2160 for(j=1;j<model->sv_num;j++) { /* start at 1 */
wolffd@0 2161 if((!chosen[(model->supvec[j])->docnum])
wolffd@0 2162 && (!exclude_from_eq_const[(model->supvec[j])->docnum])) {
wolffd@0 2163 qp->opt_ce0[0]+=model->alpha[j];
wolffd@0 2164 }
wolffd@0 2165 }
wolffd@0 2166 if(learn_parm->biased_hyperplane)
wolffd@0 2167 qp->opt_m=1;
wolffd@0 2168 else
wolffd@0 2169 qp->opt_m=0; /* eq-constraint will be ignored */
wolffd@0 2170
wolffd@0 2171 /* init linear part of objective function */
wolffd@0 2172 for(i=0;i<varnum;i++) {
wolffd@0 2173 qp->opt_g0[i]=lin[key[i]];
wolffd@0 2174 }
wolffd@0 2175
wolffd@0 2176 for(i=0;i<varnum;i++) {
wolffd@0 2177 ki=key[i];
wolffd@0 2178
wolffd@0 2179 /* Compute the matrix for equality constraints */
wolffd@0 2180 qp->opt_ce[i]=label[ki];
wolffd@0 2181 qp->opt_low[i]=0;
wolffd@0 2182 qp->opt_up[i]=learn_parm->svm_cost[ki];
wolffd@0 2183
wolffd@0 2184 kernel_temp=(double)kernel(kernel_parm,docs[ki],docs[ki]);
wolffd@0 2185 /* compute linear part of objective function */
wolffd@0 2186 qp->opt_g0[i]-=(kernel_temp*a[ki]*(double)label[ki]);
wolffd@0 2187 /* compute quadratic part of objective function */
wolffd@0 2188 qp->opt_g[varnum*i+i]=kernel_temp;
wolffd@0 2189 for(j=i+1;j<varnum;j++) {
wolffd@0 2190 kj=key[j];
wolffd@0 2191 kernel_temp=(double)kernel(kernel_parm,docs[ki],docs[kj]);
wolffd@0 2192 /* compute linear part of objective function */
wolffd@0 2193 qp->opt_g0[i]-=(kernel_temp*a[kj]*(double)label[kj]);
wolffd@0 2194 qp->opt_g0[j]-=(kernel_temp*a[ki]*(double)label[ki]);
wolffd@0 2195 /* compute quadratic part of objective function */
wolffd@0 2196 qp->opt_g[varnum*i+j]=(double)label[ki]*(double)label[kj]*kernel_temp;
wolffd@0 2197 qp->opt_g[varnum*j+i]=(double)label[ki]*(double)label[kj]*kernel_temp;
wolffd@0 2198 }
wolffd@0 2199
wolffd@0 2200 if(verbosity>=3) {
wolffd@0 2201 if(i % 20 == 0) {
wolffd@0 2202 fprintf(stdout,"%ld..",i); fflush(stdout);
wolffd@0 2203 }
wolffd@0 2204 }
wolffd@0 2205 }
wolffd@0 2206
wolffd@0 2207 for(i=0;i<varnum;i++) {
wolffd@0 2208 /* assure starting at feasible point */
wolffd@0 2209 qp->opt_xinit[i]=a[key[i]];
wolffd@0 2210 /* set linear part of objective function */
wolffd@0 2211 qp->opt_g0[i]=(learn_parm->eps-(double)label[key[i]]*c[key[i]])+qp->opt_g0[i]*(double)label[key[i]];
wolffd@0 2212 }
wolffd@0 2213
wolffd@0 2214 if(verbosity>=3) {
wolffd@0 2215 fprintf(stdout,"done\n");
wolffd@0 2216 }
wolffd@0 2217 }
wolffd@0 2218
wolffd@0 2219 long calculate_svm_model(DOC **docs, long int *label, long int *unlabeled,
wolffd@0 2220 double *lin, double *a, double *a_old, double *c,
wolffd@0 2221 LEARN_PARM *learn_parm, long int *working2dnum,
wolffd@0 2222 long int *active2dnum, MODEL *model)
wolffd@0 2223 /* Compute decision function based on current values */
wolffd@0 2224 /* of alpha. */
wolffd@0 2225 {
wolffd@0 2226 long i,ii,pos,b_calculated=0,first_low,first_high;
wolffd@0 2227 double ex_c,b_temp,b_low,b_high;
wolffd@0 2228
wolffd@0 2229 if(verbosity>=3) {
wolffd@0 2230 printf("Calculating model..."); fflush(stdout);
wolffd@0 2231 }
wolffd@0 2232
wolffd@0 2233 if(!learn_parm->biased_hyperplane) {
wolffd@0 2234 model->b=0;
wolffd@0 2235 b_calculated=1;
wolffd@0 2236 }
wolffd@0 2237
wolffd@0 2238 for(ii=0;(i=working2dnum[ii])>=0;ii++) {
wolffd@0 2239 if((a_old[i]>0) && (a[i]==0)) { /* remove from model */
wolffd@0 2240 pos=model->index[i];
wolffd@0 2241 model->index[i]=-1;
wolffd@0 2242 (model->sv_num)--;
wolffd@0 2243 model->supvec[pos]=model->supvec[model->sv_num];
wolffd@0 2244 model->alpha[pos]=model->alpha[model->sv_num];
wolffd@0 2245 model->index[(model->supvec[pos])->docnum]=pos;
wolffd@0 2246 }
wolffd@0 2247 else if((a_old[i]==0) && (a[i]>0)) { /* add to model */
wolffd@0 2248 model->supvec[model->sv_num]=docs[i];
wolffd@0 2249 model->alpha[model->sv_num]=a[i]*(double)label[i];
wolffd@0 2250 model->index[i]=model->sv_num;
wolffd@0 2251 (model->sv_num)++;
wolffd@0 2252 }
wolffd@0 2253 else if(a_old[i]==a[i]) { /* nothing to do */
wolffd@0 2254 }
wolffd@0 2255 else { /* just update alpha */
wolffd@0 2256 model->alpha[model->index[i]]=a[i]*(double)label[i];
wolffd@0 2257 }
wolffd@0 2258
wolffd@0 2259 ex_c=learn_parm->svm_cost[i]-learn_parm->epsilon_a;
wolffd@0 2260 if((a_old[i]>=ex_c) && (a[i]<ex_c)) {
wolffd@0 2261 (model->at_upper_bound)--;
wolffd@0 2262 }
wolffd@0 2263 else if((a_old[i]<ex_c) && (a[i]>=ex_c)) {
wolffd@0 2264 (model->at_upper_bound)++;
wolffd@0 2265 }
wolffd@0 2266
wolffd@0 2267 if((!b_calculated)
wolffd@0 2268 && (a[i]>learn_parm->epsilon_a) && (a[i]<ex_c)) { /* calculate b */
wolffd@0 2269 model->b=((double)label[i]*learn_parm->eps-c[i]+lin[i]);
wolffd@0 2270 /* model->b=(-(double)label[i]+lin[i]); */
wolffd@0 2271 b_calculated=1;
wolffd@0 2272 }
wolffd@0 2273 }
wolffd@0 2274
wolffd@0 2275 /* No alpha in the working set not at bounds, so b was not
wolffd@0 2276 calculated in the usual way. The following handles this special
wolffd@0 2277 case. */
wolffd@0 2278 if(learn_parm->biased_hyperplane
wolffd@0 2279 && (!b_calculated)
wolffd@0 2280 && (model->sv_num-1 == model->at_upper_bound)) {
wolffd@0 2281 first_low=1;
wolffd@0 2282 first_high=1;
wolffd@0 2283 b_low=0;
wolffd@0 2284 b_high=0;
wolffd@0 2285 for(ii=0;(i=active2dnum[ii])>=0;ii++) {
wolffd@0 2286 ex_c=learn_parm->svm_cost[i]-learn_parm->epsilon_a;
wolffd@0 2287 if(a[i]<ex_c) {
wolffd@0 2288 if(label[i]>0) {
wolffd@0 2289 b_temp=-(learn_parm->eps-c[i]+lin[i]);
wolffd@0 2290 if((b_temp>b_low) || (first_low)) {
wolffd@0 2291 b_low=b_temp;
wolffd@0 2292 first_low=0;
wolffd@0 2293 }
wolffd@0 2294 }
wolffd@0 2295 else {
wolffd@0 2296 b_temp=-(-learn_parm->eps-c[i]+lin[i]);
wolffd@0 2297 if((b_temp<b_high) || (first_high)) {
wolffd@0 2298 b_high=b_temp;
wolffd@0 2299 first_high=0;
wolffd@0 2300 }
wolffd@0 2301 }
wolffd@0 2302 }
wolffd@0 2303 else {
wolffd@0 2304 if(label[i]<0) {
wolffd@0 2305 b_temp=-(-learn_parm->eps-c[i]+lin[i]);
wolffd@0 2306 if((b_temp>b_low) || (first_low)) {
wolffd@0 2307 b_low=b_temp;
wolffd@0 2308 first_low=0;
wolffd@0 2309 }
wolffd@0 2310 }
wolffd@0 2311 else {
wolffd@0 2312 b_temp=-(learn_parm->eps-c[i]+lin[i]);
wolffd@0 2313 if((b_temp<b_high) || (first_high)) {
wolffd@0 2314 b_high=b_temp;
wolffd@0 2315 first_high=0;
wolffd@0 2316 }
wolffd@0 2317 }
wolffd@0 2318 }
wolffd@0 2319 }
wolffd@0 2320 if(first_high) {
wolffd@0 2321 model->b=-b_low;
wolffd@0 2322 }
wolffd@0 2323 else if(first_low) {
wolffd@0 2324 model->b=-b_high;
wolffd@0 2325 }
wolffd@0 2326 else {
wolffd@0 2327 model->b=-(b_high+b_low)/2.0; /* select b as the middle of range */
wolffd@0 2328 /* printf("\nb_low=%f, b_high=%f,b=%f\n",b_low,b_high,model->b); */
wolffd@0 2329 }
wolffd@0 2330 }
wolffd@0 2331
wolffd@0 2332 if(verbosity>=3) {
wolffd@0 2333 printf("done\n"); fflush(stdout);
wolffd@0 2334 }
wolffd@0 2335
wolffd@0 2336 return(model->sv_num-1); /* have to substract one, since element 0 is empty*/
wolffd@0 2337 }
wolffd@0 2338
wolffd@0 2339 long check_optimality(MODEL *model, long int *label, long int *unlabeled,
wolffd@0 2340 double *a, double *lin, double *c, long int totdoc,
wolffd@0 2341 LEARN_PARM *learn_parm, double *maxdiff,
wolffd@0 2342 double epsilon_crit_org, long int *misclassified,
wolffd@0 2343 long int *inconsistent, long int *active2dnum,
wolffd@0 2344 long int *last_suboptimal_at,
wolffd@0 2345 long int iteration, KERNEL_PARM *kernel_parm)
wolffd@0 2346 /* Check KT-conditions */
wolffd@0 2347 {
wolffd@0 2348 long i,ii,retrain;
wolffd@0 2349 double dist,ex_c,target;
wolffd@0 2350
wolffd@0 2351 if(kernel_parm->kernel_type == LINEAR) { /* be optimistic */
wolffd@0 2352 learn_parm->epsilon_shrink=-learn_parm->epsilon_crit+epsilon_crit_org;
wolffd@0 2353 }
wolffd@0 2354 else { /* be conservative */
wolffd@0 2355 learn_parm->epsilon_shrink=learn_parm->epsilon_shrink*0.7+(*maxdiff)*0.3;
wolffd@0 2356 }
wolffd@0 2357 retrain=0;
wolffd@0 2358 (*maxdiff)=0;
wolffd@0 2359 (*misclassified)=0;
wolffd@0 2360 for(ii=0;(i=active2dnum[ii])>=0;ii++) {
wolffd@0 2361 if((!inconsistent[i]) && label[i]) {
wolffd@0 2362 dist=(lin[i]-model->b)*(double)label[i];/* 'distance' from
wolffd@0 2363 hyperplane*/
wolffd@0 2364 target=-(learn_parm->eps-(double)label[i]*c[i]);
wolffd@0 2365 ex_c=learn_parm->svm_cost[i]-learn_parm->epsilon_a;
wolffd@0 2366 if(dist <= 0) {
wolffd@0 2367 (*misclassified)++; /* does not work due to deactivation of var */
wolffd@0 2368 }
wolffd@0 2369 if((a[i]>learn_parm->epsilon_a) && (dist > target)) {
wolffd@0 2370 if((dist-target)>(*maxdiff)) /* largest violation */
wolffd@0 2371 (*maxdiff)=dist-target;
wolffd@0 2372 }
wolffd@0 2373 else if((a[i]<ex_c) && (dist < target)) {
wolffd@0 2374 if((target-dist)>(*maxdiff)) /* largest violation */
wolffd@0 2375 (*maxdiff)=target-dist;
wolffd@0 2376 }
wolffd@0 2377 /* Count how long a variable was at lower/upper bound (and optimal).*/
wolffd@0 2378 /* Variables, which were at the bound and optimal for a long */
wolffd@0 2379 /* time are unlikely to become support vectors. In case our */
wolffd@0 2380 /* cache is filled up, those variables are excluded to save */
wolffd@0 2381 /* kernel evaluations. (See chapter 'Shrinking').*/
wolffd@0 2382 if((a[i]>(learn_parm->epsilon_a))
wolffd@0 2383 && (a[i]<ex_c)) {
wolffd@0 2384 last_suboptimal_at[i]=iteration; /* not at bound */
wolffd@0 2385 }
wolffd@0 2386 else if((a[i]<=(learn_parm->epsilon_a))
wolffd@0 2387 && (dist < (target+learn_parm->epsilon_shrink))) {
wolffd@0 2388 last_suboptimal_at[i]=iteration; /* not likely optimal */
wolffd@0 2389 }
wolffd@0 2390 else if((a[i]>=ex_c)
wolffd@0 2391 && (dist > (target-learn_parm->epsilon_shrink))) {
wolffd@0 2392 last_suboptimal_at[i]=iteration; /* not likely optimal */
wolffd@0 2393 }
wolffd@0 2394 }
wolffd@0 2395 }
wolffd@0 2396 /* termination criterion */
wolffd@0 2397 if((!retrain) && ((*maxdiff) > learn_parm->epsilon_crit)) {
wolffd@0 2398 retrain=1;
wolffd@0 2399 }
wolffd@0 2400 return(retrain);
wolffd@0 2401 }
wolffd@0 2402
wolffd@0 2403 long check_optimality_sharedslack(DOC **docs, MODEL *model, long int *label,
wolffd@0 2404 double *a, double *lin, double *c, double *slack,
wolffd@0 2405 double *alphaslack,
wolffd@0 2406 long int totdoc,
wolffd@0 2407 LEARN_PARM *learn_parm, double *maxdiff,
wolffd@0 2408 double epsilon_crit_org, long int *misclassified,
wolffd@0 2409 long int *active2dnum,
wolffd@0 2410 long int *last_suboptimal_at,
wolffd@0 2411 long int iteration, KERNEL_PARM *kernel_parm)
wolffd@0 2412 /* Check KT-conditions */
wolffd@0 2413 {
wolffd@0 2414 long i,ii,retrain;
wolffd@0 2415 double dist,ex_c=0,target;
wolffd@0 2416
wolffd@0 2417 if(kernel_parm->kernel_type == LINEAR) { /* be optimistic */
wolffd@0 2418 learn_parm->epsilon_shrink=-learn_parm->epsilon_crit+epsilon_crit_org;
wolffd@0 2419 }
wolffd@0 2420 else { /* be conservative */
wolffd@0 2421 learn_parm->epsilon_shrink=learn_parm->epsilon_shrink*0.7+(*maxdiff)*0.3;
wolffd@0 2422 }
wolffd@0 2423
wolffd@0 2424 retrain=0;
wolffd@0 2425 (*maxdiff)=0;
wolffd@0 2426 (*misclassified)=0;
wolffd@0 2427 for(ii=0;(i=active2dnum[ii])>=0;ii++) {
wolffd@0 2428 /* 'distance' from hyperplane*/
wolffd@0 2429 dist=(lin[i]-model->b)*(double)label[i]+slack[docs[i]->slackid];
wolffd@0 2430 target=-(learn_parm->eps-(double)label[i]*c[i]);
wolffd@0 2431 ex_c=learn_parm->svm_c-learn_parm->epsilon_a;
wolffd@0 2432 if((a[i]>learn_parm->epsilon_a) && (dist > target)) {
wolffd@0 2433 if((dist-target)>(*maxdiff)) { /* largest violation */
wolffd@0 2434 (*maxdiff)=dist-target;
wolffd@0 2435 if(verbosity>=5) printf("sid %ld: dist=%.2f, target=%.2f, slack=%.2f, a=%f, alphaslack=%f\n",docs[i]->slackid,dist,target,slack[docs[i]->slackid],a[i],alphaslack[docs[i]->slackid]);
wolffd@0 2436 if(verbosity>=5) printf(" (single %f)\n",(*maxdiff));
wolffd@0 2437 }
wolffd@0 2438 }
wolffd@0 2439 if((alphaslack[docs[i]->slackid]<ex_c) && (slack[docs[i]->slackid]>0)) {
wolffd@0 2440 if((slack[docs[i]->slackid])>(*maxdiff)) { /* largest violation */
wolffd@0 2441 (*maxdiff)=slack[docs[i]->slackid];
wolffd@0 2442 if(verbosity>=5) printf("sid %ld: dist=%.2f, target=%.2f, slack=%.2f, a=%f, alphaslack=%f\n",docs[i]->slackid,dist,target,slack[docs[i]->slackid],a[i],alphaslack[docs[i]->slackid]);
wolffd@0 2443 if(verbosity>=5) printf(" (joint %f)\n",(*maxdiff));
wolffd@0 2444 }
wolffd@0 2445 }
wolffd@0 2446 /* Count how long a variable was at lower/upper bound (and optimal).*/
wolffd@0 2447 /* Variables, which were at the bound and optimal for a long */
wolffd@0 2448 /* time are unlikely to become support vectors. In case our */
wolffd@0 2449 /* cache is filled up, those variables are excluded to save */
wolffd@0 2450 /* kernel evaluations. (See chapter 'Shrinking').*/
wolffd@0 2451 if((a[i]>(learn_parm->epsilon_a))
wolffd@0 2452 && (a[i]<ex_c)) {
wolffd@0 2453 last_suboptimal_at[docs[i]->slackid]=iteration; /* not at bound */
wolffd@0 2454 }
wolffd@0 2455 else if((a[i]<=(learn_parm->epsilon_a))
wolffd@0 2456 && (dist < (target+learn_parm->epsilon_shrink))) {
wolffd@0 2457 last_suboptimal_at[docs[i]->slackid]=iteration; /* not likely optimal */
wolffd@0 2458 }
wolffd@0 2459 else if((a[i]>=ex_c)
wolffd@0 2460 && (slack[docs[i]->slackid] < learn_parm->epsilon_shrink)) {
wolffd@0 2461 last_suboptimal_at[docs[i]->slackid]=iteration; /* not likely optimal */
wolffd@0 2462 }
wolffd@0 2463 }
wolffd@0 2464 /* termination criterion */
wolffd@0 2465 if((!retrain) && ((*maxdiff) > learn_parm->epsilon_crit)) {
wolffd@0 2466 retrain=1;
wolffd@0 2467 }
wolffd@0 2468 return(retrain);
wolffd@0 2469 }
wolffd@0 2470
wolffd@0 2471 void compute_shared_slacks(DOC **docs, long int *label,
wolffd@0 2472 double *a, double *lin,
wolffd@0 2473 double *c, long int *active2dnum,
wolffd@0 2474 LEARN_PARM *learn_parm,
wolffd@0 2475 double *slack, double *alphaslack)
wolffd@0 2476 /* compute the value of shared slacks and the joint alphas */
wolffd@0 2477 {
wolffd@0 2478 long jj,i;
wolffd@0 2479 double dist,target;
wolffd@0 2480
wolffd@0 2481 for(jj=0;(i=active2dnum[jj])>=0;jj++) { /* clear slack variables */
wolffd@0 2482 slack[docs[i]->slackid]=0.0;
wolffd@0 2483 alphaslack[docs[i]->slackid]=0.0;
wolffd@0 2484 }
wolffd@0 2485 for(jj=0;(i=active2dnum[jj])>=0;jj++) { /* recompute slack variables */
wolffd@0 2486 dist=(lin[i])*(double)label[i];
wolffd@0 2487 target=-(learn_parm->eps-(double)label[i]*c[i]);
wolffd@0 2488 if((target-dist) > slack[docs[i]->slackid])
wolffd@0 2489 slack[docs[i]->slackid]=target-dist;
wolffd@0 2490 alphaslack[docs[i]->slackid]+=a[i];
wolffd@0 2491 }
wolffd@0 2492 }
wolffd@0 2493
wolffd@0 2494
wolffd@0 2495 long identify_inconsistent(double *a, long int *label,
wolffd@0 2496 long int *unlabeled, long int totdoc,
wolffd@0 2497 LEARN_PARM *learn_parm,
wolffd@0 2498 long int *inconsistentnum, long int *inconsistent)
wolffd@0 2499 {
wolffd@0 2500 long i,retrain;
wolffd@0 2501
wolffd@0 2502 /* Throw out examples with multipliers at upper bound. This */
wolffd@0 2503 /* corresponds to the -i 1 option. */
wolffd@0 2504 /* ATTENTION: this is just a heuristic for finding a close */
wolffd@0 2505 /* to minimum number of examples to exclude to */
wolffd@0 2506 /* make the problem separable with desired margin */
wolffd@0 2507 retrain=0;
wolffd@0 2508 for(i=0;i<totdoc;i++) {
wolffd@0 2509 if((!inconsistent[i]) && (!unlabeled[i])
wolffd@0 2510 && (a[i]>=(learn_parm->svm_cost[i]-learn_parm->epsilon_a))) {
wolffd@0 2511 (*inconsistentnum)++;
wolffd@0 2512 inconsistent[i]=1; /* never choose again */
wolffd@0 2513 retrain=2; /* start over */
wolffd@0 2514 if(verbosity>=3) {
wolffd@0 2515 printf("inconsistent(%ld)..",i); fflush(stdout);
wolffd@0 2516 }
wolffd@0 2517 }
wolffd@0 2518 }
wolffd@0 2519 return(retrain);
wolffd@0 2520 }
wolffd@0 2521
wolffd@0 2522 long identify_misclassified(double *lin, long int *label,
wolffd@0 2523 long int *unlabeled, long int totdoc,
wolffd@0 2524 MODEL *model, long int *inconsistentnum,
wolffd@0 2525 long int *inconsistent)
wolffd@0 2526 {
wolffd@0 2527 long i,retrain;
wolffd@0 2528 double dist;
wolffd@0 2529
wolffd@0 2530 /* Throw out misclassified examples. This */
wolffd@0 2531 /* corresponds to the -i 2 option. */
wolffd@0 2532 /* ATTENTION: this is just a heuristic for finding a close */
wolffd@0 2533 /* to minimum number of examples to exclude to */
wolffd@0 2534 /* make the problem separable with desired margin */
wolffd@0 2535 retrain=0;
wolffd@0 2536 for(i=0;i<totdoc;i++) {
wolffd@0 2537 dist=(lin[i]-model->b)*(double)label[i]; /* 'distance' from hyperplane*/
wolffd@0 2538 if((!inconsistent[i]) && (!unlabeled[i]) && (dist <= 0)) {
wolffd@0 2539 (*inconsistentnum)++;
wolffd@0 2540 inconsistent[i]=1; /* never choose again */
wolffd@0 2541 retrain=2; /* start over */
wolffd@0 2542 if(verbosity>=3) {
wolffd@0 2543 printf("inconsistent(%ld)..",i); fflush(stdout);
wolffd@0 2544 }
wolffd@0 2545 }
wolffd@0 2546 }
wolffd@0 2547 return(retrain);
wolffd@0 2548 }
wolffd@0 2549
wolffd@0 2550 long identify_one_misclassified(double *lin, long int *label,
wolffd@0 2551 long int *unlabeled,
wolffd@0 2552 long int totdoc, MODEL *model,
wolffd@0 2553 long int *inconsistentnum,
wolffd@0 2554 long int *inconsistent)
wolffd@0 2555 {
wolffd@0 2556 long i,retrain,maxex=-1;
wolffd@0 2557 double dist,maxdist=0;
wolffd@0 2558
wolffd@0 2559 /* Throw out the 'most misclassified' example. This */
wolffd@0 2560 /* corresponds to the -i 3 option. */
wolffd@0 2561 /* ATTENTION: this is just a heuristic for finding a close */
wolffd@0 2562 /* to minimum number of examples to exclude to */
wolffd@0 2563 /* make the problem separable with desired margin */
wolffd@0 2564 retrain=0;
wolffd@0 2565 for(i=0;i<totdoc;i++) {
wolffd@0 2566 if((!inconsistent[i]) && (!unlabeled[i])) {
wolffd@0 2567 dist=(lin[i]-model->b)*(double)label[i];/* 'distance' from hyperplane*/
wolffd@0 2568 if(dist<maxdist) {
wolffd@0 2569 maxdist=dist;
wolffd@0 2570 maxex=i;
wolffd@0 2571 }
wolffd@0 2572 }
wolffd@0 2573 }
wolffd@0 2574 if(maxex>=0) {
wolffd@0 2575 (*inconsistentnum)++;
wolffd@0 2576 inconsistent[maxex]=1; /* never choose again */
wolffd@0 2577 retrain=2; /* start over */
wolffd@0 2578 if(verbosity>=3) {
wolffd@0 2579 printf("inconsistent(%ld)..",i); fflush(stdout);
wolffd@0 2580 }
wolffd@0 2581 }
wolffd@0 2582 return(retrain);
wolffd@0 2583 }
wolffd@0 2584
wolffd@0 2585 void update_linear_component(DOC **docs, long int *label,
wolffd@0 2586 long int *active2dnum, double *a,
wolffd@0 2587 double *a_old, long int *working2dnum,
wolffd@0 2588 long int totdoc, long int totwords,
wolffd@0 2589 KERNEL_PARM *kernel_parm,
wolffd@0 2590 KERNEL_CACHE *kernel_cache,
wolffd@0 2591 double *lin, CFLOAT *aicache, double *weights)
wolffd@0 2592 /* keep track of the linear component */
wolffd@0 2593 /* lin of the gradient etc. by updating */
wolffd@0 2594 /* based on the change of the variables */
wolffd@0 2595 /* in the current working set */
wolffd@0 2596 {
wolffd@0 2597 register long i,ii,j,jj;
wolffd@0 2598 register double tec;
wolffd@0 2599 SVECTOR *f;
wolffd@0 2600
wolffd@0 2601 if(kernel_parm->kernel_type==0) { /* special linear case */
wolffd@0 2602 clear_vector_n(weights,totwords);
wolffd@0 2603 for(ii=0;(i=working2dnum[ii])>=0;ii++) {
wolffd@0 2604 if(a[i] != a_old[i]) {
wolffd@0 2605 for(f=docs[i]->fvec;f;f=f->next)
wolffd@0 2606 add_vector_ns(weights,f,
wolffd@0 2607 f->factor*((a[i]-a_old[i])*(double)label[i]));
wolffd@0 2608 }
wolffd@0 2609 }
wolffd@0 2610 for(jj=0;(j=active2dnum[jj])>=0;jj++) {
wolffd@0 2611 for(f=docs[j]->fvec;f;f=f->next)
wolffd@0 2612 lin[j]+=f->factor*sprod_ns(weights,f);
wolffd@0 2613 }
wolffd@0 2614 }
wolffd@0 2615 else { /* general case */
wolffd@0 2616 for(jj=0;(i=working2dnum[jj])>=0;jj++) {
wolffd@0 2617 if(a[i] != a_old[i]) {
wolffd@0 2618 get_kernel_row(kernel_cache,docs,i,totdoc,active2dnum,aicache,
wolffd@0 2619 kernel_parm);
wolffd@0 2620 for(ii=0;(j=active2dnum[ii])>=0;ii++) {
wolffd@0 2621 tec=aicache[j];
wolffd@0 2622 lin[j]+=(((a[i]*tec)-(a_old[i]*tec))*(double)label[i]);
wolffd@0 2623 }
wolffd@0 2624 }
wolffd@0 2625 }
wolffd@0 2626 }
wolffd@0 2627 }
wolffd@0 2628
wolffd@0 2629
wolffd@0 2630 long incorporate_unlabeled_examples(MODEL *model, long int *label,
wolffd@0 2631 long int *inconsistent,
wolffd@0 2632 long int *unlabeled,
wolffd@0 2633 double *a, double *lin,
wolffd@0 2634 long int totdoc, double *selcrit,
wolffd@0 2635 long int *select, long int *key,
wolffd@0 2636 long int transductcycle,
wolffd@0 2637 KERNEL_PARM *kernel_parm,
wolffd@0 2638 LEARN_PARM *learn_parm)
wolffd@0 2639 {
wolffd@0 2640 long i,j,k,j1,j2,j3,j4,unsupaddnum1=0,unsupaddnum2=0;
wolffd@0 2641 long pos,neg,upos,uneg,orgpos,orgneg,nolabel,newpos,newneg,allunlab;
wolffd@0 2642 double dist,model_length,posratio,negratio;
wolffd@0 2643 long check_every=2;
wolffd@0 2644 double loss;
wolffd@0 2645 static double switchsens=0.0,switchsensorg=0.0;
wolffd@0 2646 double umin,umax,sumalpha;
wolffd@0 2647 long imin=0,imax=0;
wolffd@0 2648 static long switchnum=0;
wolffd@0 2649
wolffd@0 2650 switchsens/=1.2;
wolffd@0 2651
wolffd@0 2652 /* assumes that lin[] is up to date -> no inactive vars */
wolffd@0 2653
wolffd@0 2654 orgpos=0;
wolffd@0 2655 orgneg=0;
wolffd@0 2656 newpos=0;
wolffd@0 2657 newneg=0;
wolffd@0 2658 nolabel=0;
wolffd@0 2659 allunlab=0;
wolffd@0 2660 for(i=0;i<totdoc;i++) {
wolffd@0 2661 if(!unlabeled[i]) {
wolffd@0 2662 if(label[i] > 0) {
wolffd@0 2663 orgpos++;
wolffd@0 2664 }
wolffd@0 2665 else {
wolffd@0 2666 orgneg++;
wolffd@0 2667 }
wolffd@0 2668 }
wolffd@0 2669 else {
wolffd@0 2670 allunlab++;
wolffd@0 2671 if(unlabeled[i]) {
wolffd@0 2672 if(label[i] > 0) {
wolffd@0 2673 newpos++;
wolffd@0 2674 }
wolffd@0 2675 else if(label[i] < 0) {
wolffd@0 2676 newneg++;
wolffd@0 2677 }
wolffd@0 2678 }
wolffd@0 2679 }
wolffd@0 2680 if(label[i]==0) {
wolffd@0 2681 nolabel++;
wolffd@0 2682 }
wolffd@0 2683 }
wolffd@0 2684
wolffd@0 2685 if(learn_parm->transduction_posratio >= 0) {
wolffd@0 2686 posratio=learn_parm->transduction_posratio;
wolffd@0 2687 }
wolffd@0 2688 else {
wolffd@0 2689 posratio=(double)orgpos/(double)(orgpos+orgneg); /* use ratio of pos/neg */
wolffd@0 2690 } /* in training data */
wolffd@0 2691 negratio=1.0-posratio;
wolffd@0 2692
wolffd@0 2693 learn_parm->svm_costratio=1.0; /* global */
wolffd@0 2694 if(posratio>0) {
wolffd@0 2695 learn_parm->svm_costratio_unlab=negratio/posratio;
wolffd@0 2696 }
wolffd@0 2697 else {
wolffd@0 2698 learn_parm->svm_costratio_unlab=1.0;
wolffd@0 2699 }
wolffd@0 2700
wolffd@0 2701 pos=0;
wolffd@0 2702 neg=0;
wolffd@0 2703 upos=0;
wolffd@0 2704 uneg=0;
wolffd@0 2705 for(i=0;i<totdoc;i++) {
wolffd@0 2706 dist=(lin[i]-model->b); /* 'distance' from hyperplane*/
wolffd@0 2707 if(dist>0) {
wolffd@0 2708 pos++;
wolffd@0 2709 }
wolffd@0 2710 else {
wolffd@0 2711 neg++;
wolffd@0 2712 }
wolffd@0 2713 if(unlabeled[i]) {
wolffd@0 2714 if(dist>0) {
wolffd@0 2715 upos++;
wolffd@0 2716 }
wolffd@0 2717 else {
wolffd@0 2718 uneg++;
wolffd@0 2719 }
wolffd@0 2720 }
wolffd@0 2721 if((!unlabeled[i]) && (a[i]>(learn_parm->svm_cost[i]-learn_parm->epsilon_a))) {
wolffd@0 2722 /* printf("Ubounded %ld (class %ld, unlabeled %ld)\n",i,label[i],unlabeled[i]); */
wolffd@0 2723 }
wolffd@0 2724 }
wolffd@0 2725 if(verbosity>=2) {
wolffd@0 2726 printf("POS=%ld, ORGPOS=%ld, ORGNEG=%ld\n",pos,orgpos,orgneg);
wolffd@0 2727 printf("POS=%ld, NEWPOS=%ld, NEWNEG=%ld\n",pos,newpos,newneg);
wolffd@0 2728 printf("pos ratio = %f (%f).\n",(double)(upos)/(double)(allunlab),posratio);
wolffd@0 2729 fflush(stdout);
wolffd@0 2730 }
wolffd@0 2731
wolffd@0 2732 if(transductcycle == 0) {
wolffd@0 2733 j1=0;
wolffd@0 2734 j2=0;
wolffd@0 2735 j4=0;
wolffd@0 2736 for(i=0;i<totdoc;i++) {
wolffd@0 2737 dist=(lin[i]-model->b); /* 'distance' from hyperplane*/
wolffd@0 2738 if((label[i]==0) && (unlabeled[i])) {
wolffd@0 2739 selcrit[j4]=dist;
wolffd@0 2740 key[j4]=i;
wolffd@0 2741 j4++;
wolffd@0 2742 }
wolffd@0 2743 }
wolffd@0 2744 unsupaddnum1=0;
wolffd@0 2745 unsupaddnum2=0;
wolffd@0 2746 select_top_n(selcrit,j4,select,(long)(allunlab*posratio+0.5));
wolffd@0 2747 for(k=0;(k<(long)(allunlab*posratio+0.5));k++) {
wolffd@0 2748 i=key[select[k]];
wolffd@0 2749 label[i]=1;
wolffd@0 2750 unsupaddnum1++;
wolffd@0 2751 j1++;
wolffd@0 2752 }
wolffd@0 2753 for(i=0;i<totdoc;i++) {
wolffd@0 2754 if((label[i]==0) && (unlabeled[i])) {
wolffd@0 2755 label[i]=-1;
wolffd@0 2756 j2++;
wolffd@0 2757 unsupaddnum2++;
wolffd@0 2758 }
wolffd@0 2759 }
wolffd@0 2760 for(i=0;i<totdoc;i++) { /* set upper bounds on vars */
wolffd@0 2761 if(unlabeled[i]) {
wolffd@0 2762 if(label[i] == 1) {
wolffd@0 2763 learn_parm->svm_cost[i]=learn_parm->svm_c*
wolffd@0 2764 learn_parm->svm_costratio_unlab*learn_parm->svm_unlabbound;
wolffd@0 2765 }
wolffd@0 2766 else if(label[i] == -1) {
wolffd@0 2767 learn_parm->svm_cost[i]=learn_parm->svm_c*
wolffd@0 2768 learn_parm->svm_unlabbound;
wolffd@0 2769 }
wolffd@0 2770 }
wolffd@0 2771 }
wolffd@0 2772 if(verbosity>=1) {
wolffd@0 2773 /* printf("costratio %f, costratio_unlab %f, unlabbound %f\n",
wolffd@0 2774 learn_parm->svm_costratio,learn_parm->svm_costratio_unlab,
wolffd@0 2775 learn_parm->svm_unlabbound); */
wolffd@0 2776 printf("Classifying unlabeled data as %ld POS / %ld NEG.\n",
wolffd@0 2777 unsupaddnum1,unsupaddnum2);
wolffd@0 2778 fflush(stdout);
wolffd@0 2779 }
wolffd@0 2780 if(verbosity >= 1)
wolffd@0 2781 printf("Retraining.");
wolffd@0 2782 if(verbosity >= 2) printf("\n");
wolffd@0 2783 return((long)3);
wolffd@0 2784 }
wolffd@0 2785 if((transductcycle % check_every) == 0) {
wolffd@0 2786 if(verbosity >= 1)
wolffd@0 2787 printf("Retraining.");
wolffd@0 2788 if(verbosity >= 2) printf("\n");
wolffd@0 2789 j1=0;
wolffd@0 2790 j2=0;
wolffd@0 2791 unsupaddnum1=0;
wolffd@0 2792 unsupaddnum2=0;
wolffd@0 2793 for(i=0;i<totdoc;i++) {
wolffd@0 2794 if((unlabeled[i] == 2)) {
wolffd@0 2795 unlabeled[i]=1;
wolffd@0 2796 label[i]=1;
wolffd@0 2797 j1++;
wolffd@0 2798 unsupaddnum1++;
wolffd@0 2799 }
wolffd@0 2800 else if((unlabeled[i] == 3)) {
wolffd@0 2801 unlabeled[i]=1;
wolffd@0 2802 label[i]=-1;
wolffd@0 2803 j2++;
wolffd@0 2804 unsupaddnum2++;
wolffd@0 2805 }
wolffd@0 2806 }
wolffd@0 2807 for(i=0;i<totdoc;i++) { /* set upper bounds on vars */
wolffd@0 2808 if(unlabeled[i]) {
wolffd@0 2809 if(label[i] == 1) {
wolffd@0 2810 learn_parm->svm_cost[i]=learn_parm->svm_c*
wolffd@0 2811 learn_parm->svm_costratio_unlab*learn_parm->svm_unlabbound;
wolffd@0 2812 }
wolffd@0 2813 else if(label[i] == -1) {
wolffd@0 2814 learn_parm->svm_cost[i]=learn_parm->svm_c*
wolffd@0 2815 learn_parm->svm_unlabbound;
wolffd@0 2816 }
wolffd@0 2817 }
wolffd@0 2818 }
wolffd@0 2819
wolffd@0 2820 if(verbosity>=2) {
wolffd@0 2821 /* printf("costratio %f, costratio_unlab %f, unlabbound %f\n",
wolffd@0 2822 learn_parm->svm_costratio,learn_parm->svm_costratio_unlab,
wolffd@0 2823 learn_parm->svm_unlabbound); */
wolffd@0 2824 printf("%ld positive -> Added %ld POS / %ld NEG unlabeled examples.\n",
wolffd@0 2825 upos,unsupaddnum1,unsupaddnum2);
wolffd@0 2826 fflush(stdout);
wolffd@0 2827 }
wolffd@0 2828
wolffd@0 2829 if(learn_parm->svm_unlabbound == 1) {
wolffd@0 2830 learn_parm->epsilon_crit=0.001; /* do the last run right */
wolffd@0 2831 }
wolffd@0 2832 else {
wolffd@0 2833 learn_parm->epsilon_crit=0.01; /* otherwise, no need to be so picky */
wolffd@0 2834 }
wolffd@0 2835
wolffd@0 2836 return((long)3);
wolffd@0 2837 }
wolffd@0 2838 else if(((transductcycle % check_every) < check_every)) {
wolffd@0 2839 model_length=0;
wolffd@0 2840 sumalpha=0;
wolffd@0 2841 loss=0;
wolffd@0 2842 for(i=0;i<totdoc;i++) {
wolffd@0 2843 model_length+=a[i]*label[i]*lin[i];
wolffd@0 2844 sumalpha+=a[i];
wolffd@0 2845 dist=(lin[i]-model->b); /* 'distance' from hyperplane*/
wolffd@0 2846 if((label[i]*dist)<(1.0-learn_parm->epsilon_crit)) {
wolffd@0 2847 loss+=(1.0-(label[i]*dist))*learn_parm->svm_cost[i];
wolffd@0 2848 }
wolffd@0 2849 }
wolffd@0 2850 model_length=sqrt(model_length);
wolffd@0 2851 if(verbosity>=2) {
wolffd@0 2852 printf("Model-length = %f (%f), loss = %f, objective = %f\n",
wolffd@0 2853 model_length,sumalpha,loss,loss+0.5*model_length*model_length);
wolffd@0 2854 fflush(stdout);
wolffd@0 2855 }
wolffd@0 2856 j1=0;
wolffd@0 2857 j2=0;
wolffd@0 2858 j3=0;
wolffd@0 2859 j4=0;
wolffd@0 2860 unsupaddnum1=0;
wolffd@0 2861 unsupaddnum2=0;
wolffd@0 2862 umin=99999;
wolffd@0 2863 umax=-99999;
wolffd@0 2864 j4=1;
wolffd@0 2865 while(j4) {
wolffd@0 2866 umin=99999;
wolffd@0 2867 umax=-99999;
wolffd@0 2868 for(i=0;(i<totdoc);i++) {
wolffd@0 2869 dist=(lin[i]-model->b);
wolffd@0 2870 if((label[i]>0) && (unlabeled[i]) && (!inconsistent[i])
wolffd@0 2871 && (dist<umin)) {
wolffd@0 2872 umin=dist;
wolffd@0 2873 imin=i;
wolffd@0 2874 }
wolffd@0 2875 if((label[i]<0) && (unlabeled[i]) && (!inconsistent[i])
wolffd@0 2876 && (dist>umax)) {
wolffd@0 2877 umax=dist;
wolffd@0 2878 imax=i;
wolffd@0 2879 }
wolffd@0 2880 }
wolffd@0 2881 if((umin < (umax+switchsens-1E-4))) {
wolffd@0 2882 j1++;
wolffd@0 2883 j2++;
wolffd@0 2884 unsupaddnum1++;
wolffd@0 2885 unlabeled[imin]=3;
wolffd@0 2886 inconsistent[imin]=1;
wolffd@0 2887 unsupaddnum2++;
wolffd@0 2888 unlabeled[imax]=2;
wolffd@0 2889 inconsistent[imax]=1;
wolffd@0 2890 }
wolffd@0 2891 else
wolffd@0 2892 j4=0;
wolffd@0 2893 j4=0;
wolffd@0 2894 }
wolffd@0 2895 for(j=0;(j<totdoc);j++) {
wolffd@0 2896 if(unlabeled[j] && (!inconsistent[j])) {
wolffd@0 2897 if(label[j]>0) {
wolffd@0 2898 unlabeled[j]=2;
wolffd@0 2899 }
wolffd@0 2900 else if(label[j]<0) {
wolffd@0 2901 unlabeled[j]=3;
wolffd@0 2902 }
wolffd@0 2903 /* inconsistent[j]=1; */
wolffd@0 2904 j3++;
wolffd@0 2905 }
wolffd@0 2906 }
wolffd@0 2907 switchnum+=unsupaddnum1+unsupaddnum2;
wolffd@0 2908
wolffd@0 2909 /* stop and print out current margin
wolffd@0 2910 printf("switchnum %ld %ld\n",switchnum,kernel_parm->poly_degree);
wolffd@0 2911 if(switchnum == 2*kernel_parm->poly_degree) {
wolffd@0 2912 learn_parm->svm_unlabbound=1;
wolffd@0 2913 }
wolffd@0 2914 */
wolffd@0 2915
wolffd@0 2916 if((!unsupaddnum1) && (!unsupaddnum2)) {
wolffd@0 2917 if((learn_parm->svm_unlabbound>=1) && ((newpos+newneg) == allunlab)) {
wolffd@0 2918 for(j=0;(j<totdoc);j++) {
wolffd@0 2919 inconsistent[j]=0;
wolffd@0 2920 if(unlabeled[j]) unlabeled[j]=1;
wolffd@0 2921 }
wolffd@0 2922 write_prediction(learn_parm->predfile,model,lin,a,unlabeled,label,
wolffd@0 2923 totdoc,learn_parm);
wolffd@0 2924 if(verbosity>=1)
wolffd@0 2925 printf("Number of switches: %ld\n",switchnum);
wolffd@0 2926 return((long)0);
wolffd@0 2927 }
wolffd@0 2928 switchsens=switchsensorg;
wolffd@0 2929 learn_parm->svm_unlabbound*=1.5;
wolffd@0 2930 if(learn_parm->svm_unlabbound>1) {
wolffd@0 2931 learn_parm->svm_unlabbound=1;
wolffd@0 2932 }
wolffd@0 2933 model->at_upper_bound=0; /* since upper bound increased */
wolffd@0 2934 if(verbosity>=1)
wolffd@0 2935 printf("Increasing influence of unlabeled examples to %f%% .",
wolffd@0 2936 learn_parm->svm_unlabbound*100.0);
wolffd@0 2937 }
wolffd@0 2938 else if(verbosity>=1) {
wolffd@0 2939 printf("%ld positive -> Switching labels of %ld POS / %ld NEG unlabeled examples.",
wolffd@0 2940 upos,unsupaddnum1,unsupaddnum2);
wolffd@0 2941 fflush(stdout);
wolffd@0 2942 }
wolffd@0 2943
wolffd@0 2944 if(verbosity >= 2) printf("\n");
wolffd@0 2945
wolffd@0 2946 learn_parm->epsilon_crit=0.5; /* don't need to be so picky */
wolffd@0 2947
wolffd@0 2948 for(i=0;i<totdoc;i++) { /* set upper bounds on vars */
wolffd@0 2949 if(unlabeled[i]) {
wolffd@0 2950 if(label[i] == 1) {
wolffd@0 2951 learn_parm->svm_cost[i]=learn_parm->svm_c*
wolffd@0 2952 learn_parm->svm_costratio_unlab*learn_parm->svm_unlabbound;
wolffd@0 2953 }
wolffd@0 2954 else if(label[i] == -1) {
wolffd@0 2955 learn_parm->svm_cost[i]=learn_parm->svm_c*
wolffd@0 2956 learn_parm->svm_unlabbound;
wolffd@0 2957 }
wolffd@0 2958 }
wolffd@0 2959 }
wolffd@0 2960
wolffd@0 2961 return((long)2);
wolffd@0 2962 }
wolffd@0 2963
wolffd@0 2964 return((long)0);
wolffd@0 2965 }
wolffd@0 2966
wolffd@0 2967 /*************************** Working set selection ***************************/
wolffd@0 2968
wolffd@0 2969 long select_next_qp_subproblem_grad(long int *label,
wolffd@0 2970 long int *unlabeled,
wolffd@0 2971 double *a, double *lin,
wolffd@0 2972 double *c, long int totdoc,
wolffd@0 2973 long int qp_size,
wolffd@0 2974 LEARN_PARM *learn_parm,
wolffd@0 2975 long int *inconsistent,
wolffd@0 2976 long int *active2dnum,
wolffd@0 2977 long int *working2dnum,
wolffd@0 2978 double *selcrit,
wolffd@0 2979 long int *select,
wolffd@0 2980 KERNEL_CACHE *kernel_cache,
wolffd@0 2981 long int cache_only,
wolffd@0 2982 long int *key, long int *chosen)
wolffd@0 2983 /* Use the feasible direction approach to select the next
wolffd@0 2984 qp-subproblem (see chapter 'Selecting a good working set'). If
wolffd@0 2985 'cache_only' is true, then the variables are selected only among
wolffd@0 2986 those for which the kernel evaluations are cached. */
wolffd@0 2987 {
wolffd@0 2988 long choosenum,i,j,k,activedoc,inum,valid;
wolffd@0 2989 double s;
wolffd@0 2990
wolffd@0 2991 for(inum=0;working2dnum[inum]>=0;inum++); /* find end of index */
wolffd@0 2992 choosenum=0;
wolffd@0 2993 activedoc=0;
wolffd@0 2994 for(i=0;(j=active2dnum[i])>=0;i++) {
wolffd@0 2995 s=-label[j];
wolffd@0 2996 if(kernel_cache && cache_only)
wolffd@0 2997 valid=(kernel_cache->index[j]>=0);
wolffd@0 2998 else
wolffd@0 2999 valid=1;
wolffd@0 3000 if(valid
wolffd@0 3001 && (!((a[j]<=(0+learn_parm->epsilon_a)) && (s<0)))
wolffd@0 3002 && (!((a[j]>=(learn_parm->svm_cost[j]-learn_parm->epsilon_a))
wolffd@0 3003 && (s>0)))
wolffd@0 3004 && (!chosen[j])
wolffd@0 3005 && (label[j])
wolffd@0 3006 && (!inconsistent[j]))
wolffd@0 3007 {
wolffd@0 3008 selcrit[activedoc]=(double)label[j]*(learn_parm->eps-(double)label[j]*c[j]+(double)label[j]*lin[j]);
wolffd@0 3009 /* selcrit[activedoc]=(double)label[j]*(-1.0+(double)label[j]*lin[j]); */
wolffd@0 3010 key[activedoc]=j;
wolffd@0 3011 activedoc++;
wolffd@0 3012 }
wolffd@0 3013 }
wolffd@0 3014 select_top_n(selcrit,activedoc,select,(long)(qp_size/2));
wolffd@0 3015 for(k=0;(choosenum<(qp_size/2)) && (k<(qp_size/2)) && (k<activedoc);k++) {
wolffd@0 3016 /* if(learn_parm->biased_hyperplane || (selcrit[select[k]] > 0)) { */
wolffd@0 3017 i=key[select[k]];
wolffd@0 3018 chosen[i]=1;
wolffd@0 3019 working2dnum[inum+choosenum]=i;
wolffd@0 3020 choosenum+=1;
wolffd@0 3021 if(kernel_cache)
wolffd@0 3022 kernel_cache_touch(kernel_cache,i); /* make sure it does not get
wolffd@0 3023 kicked out of cache */
wolffd@0 3024 /* } */
wolffd@0 3025 }
wolffd@0 3026
wolffd@0 3027 activedoc=0;
wolffd@0 3028 for(i=0;(j=active2dnum[i])>=0;i++) {
wolffd@0 3029 s=label[j];
wolffd@0 3030 if(kernel_cache && cache_only)
wolffd@0 3031 valid=(kernel_cache->index[j]>=0);
wolffd@0 3032 else
wolffd@0 3033 valid=1;
wolffd@0 3034 if(valid
wolffd@0 3035 && (!((a[j]<=(0+learn_parm->epsilon_a)) && (s<0)))
wolffd@0 3036 && (!((a[j]>=(learn_parm->svm_cost[j]-learn_parm->epsilon_a))
wolffd@0 3037 && (s>0)))
wolffd@0 3038 && (!chosen[j])
wolffd@0 3039 && (label[j])
wolffd@0 3040 && (!inconsistent[j]))
wolffd@0 3041 {
wolffd@0 3042 selcrit[activedoc]=-(double)label[j]*(learn_parm->eps-(double)label[j]*c[j]+(double)label[j]*lin[j]);
wolffd@0 3043 /* selcrit[activedoc]=-(double)(label[j]*(-1.0+(double)label[j]*lin[j])); */
wolffd@0 3044 key[activedoc]=j;
wolffd@0 3045 activedoc++;
wolffd@0 3046 }
wolffd@0 3047 }
wolffd@0 3048 select_top_n(selcrit,activedoc,select,(long)(qp_size/2));
wolffd@0 3049 for(k=0;(choosenum<qp_size) && (k<(qp_size/2)) && (k<activedoc);k++) {
wolffd@0 3050 /* if(learn_parm->biased_hyperplane || (selcrit[select[k]] > 0)) { */
wolffd@0 3051 i=key[select[k]];
wolffd@0 3052 chosen[i]=1;
wolffd@0 3053 working2dnum[inum+choosenum]=i;
wolffd@0 3054 choosenum+=1;
wolffd@0 3055 if(kernel_cache)
wolffd@0 3056 kernel_cache_touch(kernel_cache,i); /* make sure it does not get
wolffd@0 3057 kicked out of cache */
wolffd@0 3058 /* } */
wolffd@0 3059 }
wolffd@0 3060 working2dnum[inum+choosenum]=-1; /* complete index */
wolffd@0 3061 return(choosenum);
wolffd@0 3062 }
wolffd@0 3063
wolffd@0 3064 long select_next_qp_subproblem_rand(long int *label,
wolffd@0 3065 long int *unlabeled,
wolffd@0 3066 double *a, double *lin,
wolffd@0 3067 double *c, long int totdoc,
wolffd@0 3068 long int qp_size,
wolffd@0 3069 LEARN_PARM *learn_parm,
wolffd@0 3070 long int *inconsistent,
wolffd@0 3071 long int *active2dnum,
wolffd@0 3072 long int *working2dnum,
wolffd@0 3073 double *selcrit,
wolffd@0 3074 long int *select,
wolffd@0 3075 KERNEL_CACHE *kernel_cache,
wolffd@0 3076 long int *key,
wolffd@0 3077 long int *chosen,
wolffd@0 3078 long int iteration)
wolffd@0 3079 /* Use the feasible direction approach to select the next
wolffd@0 3080 qp-subproblem (see section 'Selecting a good working set'). Chooses
wolffd@0 3081 a feasible direction at (pseudo) random to help jump over numerical
wolffd@0 3082 problem. */
wolffd@0 3083 {
wolffd@0 3084 long choosenum,i,j,k,activedoc,inum;
wolffd@0 3085 double s;
wolffd@0 3086
wolffd@0 3087 for(inum=0;working2dnum[inum]>=0;inum++); /* find end of index */
wolffd@0 3088 choosenum=0;
wolffd@0 3089 activedoc=0;
wolffd@0 3090 for(i=0;(j=active2dnum[i])>=0;i++) {
wolffd@0 3091 s=-label[j];
wolffd@0 3092 if((!((a[j]<=(0+learn_parm->epsilon_a)) && (s<0)))
wolffd@0 3093 && (!((a[j]>=(learn_parm->svm_cost[j]-learn_parm->epsilon_a))
wolffd@0 3094 && (s>0)))
wolffd@0 3095 && (!inconsistent[j])
wolffd@0 3096 && (label[j])
wolffd@0 3097 && (!chosen[j])) {
wolffd@0 3098 selcrit[activedoc]=(j+iteration) % totdoc;
wolffd@0 3099 key[activedoc]=j;
wolffd@0 3100 activedoc++;
wolffd@0 3101 }
wolffd@0 3102 }
wolffd@0 3103 select_top_n(selcrit,activedoc,select,(long)(qp_size/2));
wolffd@0 3104 for(k=0;(choosenum<(qp_size/2)) && (k<(qp_size/2)) && (k<activedoc);k++) {
wolffd@0 3105 i=key[select[k]];
wolffd@0 3106 chosen[i]=1;
wolffd@0 3107 working2dnum[inum+choosenum]=i;
wolffd@0 3108 choosenum+=1;
wolffd@0 3109 kernel_cache_touch(kernel_cache,i); /* make sure it does not get kicked */
wolffd@0 3110 /* out of cache */
wolffd@0 3111 }
wolffd@0 3112
wolffd@0 3113 activedoc=0;
wolffd@0 3114 for(i=0;(j=active2dnum[i])>=0;i++) {
wolffd@0 3115 s=label[j];
wolffd@0 3116 if((!((a[j]<=(0+learn_parm->epsilon_a)) && (s<0)))
wolffd@0 3117 && (!((a[j]>=(learn_parm->svm_cost[j]-learn_parm->epsilon_a))
wolffd@0 3118 && (s>0)))
wolffd@0 3119 && (!inconsistent[j])
wolffd@0 3120 && (label[j])
wolffd@0 3121 && (!chosen[j])) {
wolffd@0 3122 selcrit[activedoc]=(j+iteration) % totdoc;
wolffd@0 3123 key[activedoc]=j;
wolffd@0 3124 activedoc++;
wolffd@0 3125 }
wolffd@0 3126 }
wolffd@0 3127 select_top_n(selcrit,activedoc,select,(long)(qp_size/2));
wolffd@0 3128 for(k=0;(choosenum<qp_size) && (k<(qp_size/2)) && (k<activedoc);k++) {
wolffd@0 3129 i=key[select[k]];
wolffd@0 3130 chosen[i]=1;
wolffd@0 3131 working2dnum[inum+choosenum]=i;
wolffd@0 3132 choosenum+=1;
wolffd@0 3133 kernel_cache_touch(kernel_cache,i); /* make sure it does not get kicked */
wolffd@0 3134 /* out of cache */
wolffd@0 3135 }
wolffd@0 3136 working2dnum[inum+choosenum]=-1; /* complete index */
wolffd@0 3137 return(choosenum);
wolffd@0 3138 }
wolffd@0 3139
wolffd@0 3140 long select_next_qp_slackset(DOC **docs, long int *label,
wolffd@0 3141 double *a, double *lin,
wolffd@0 3142 double *slack, double *alphaslack,
wolffd@0 3143 double *c,
wolffd@0 3144 LEARN_PARM *learn_parm,
wolffd@0 3145 long int *active2dnum, double *maxviol)
wolffd@0 3146 /* returns the slackset with the largest internal violation */
wolffd@0 3147 {
wolffd@0 3148 long i,ii,maxdiffid;
wolffd@0 3149 double dist,target,maxdiff,ex_c;
wolffd@0 3150
wolffd@0 3151 maxdiff=0;
wolffd@0 3152 maxdiffid=0;
wolffd@0 3153 for(ii=0;(i=active2dnum[ii])>=0;ii++) {
wolffd@0 3154 ex_c=learn_parm->svm_c-learn_parm->epsilon_a;
wolffd@0 3155 if(alphaslack[docs[i]->slackid] >= ex_c) {
wolffd@0 3156 dist=(lin[i])*(double)label[i]+slack[docs[i]->slackid]; /* distance */
wolffd@0 3157 target=-(learn_parm->eps-(double)label[i]*c[i]); /* rhs of constraint */
wolffd@0 3158 if((a[i]>learn_parm->epsilon_a) && (dist > target)) {
wolffd@0 3159 if((dist-target)>maxdiff) { /* largest violation */
wolffd@0 3160 maxdiff=dist-target;
wolffd@0 3161 maxdiffid=docs[i]->slackid;
wolffd@0 3162 }
wolffd@0 3163 }
wolffd@0 3164 }
wolffd@0 3165 }
wolffd@0 3166 (*maxviol)=maxdiff;
wolffd@0 3167 return(maxdiffid);
wolffd@0 3168 }
wolffd@0 3169
wolffd@0 3170
wolffd@0 3171 void select_top_n(double *selcrit, long int range, long int *select,
wolffd@0 3172 long int n)
wolffd@0 3173 {
wolffd@0 3174 register long i,j;
wolffd@0 3175
wolffd@0 3176 for(i=0;(i<n) && (i<range);i++) { /* Initialize with the first n elements */
wolffd@0 3177 for(j=i;j>=0;j--) {
wolffd@0 3178 if((j>0) && (selcrit[select[j-1]]<selcrit[i])){
wolffd@0 3179 select[j]=select[j-1];
wolffd@0 3180 }
wolffd@0 3181 else {
wolffd@0 3182 select[j]=i;
wolffd@0 3183 j=-1;
wolffd@0 3184 }
wolffd@0 3185 }
wolffd@0 3186 }
wolffd@0 3187 if(n>0) {
wolffd@0 3188 for(i=n;i<range;i++) {
wolffd@0 3189 if(selcrit[i]>selcrit[select[n-1]]) {
wolffd@0 3190 for(j=n-1;j>=0;j--) {
wolffd@0 3191 if((j>0) && (selcrit[select[j-1]]<selcrit[i])) {
wolffd@0 3192 select[j]=select[j-1];
wolffd@0 3193 }
wolffd@0 3194 else {
wolffd@0 3195 select[j]=i;
wolffd@0 3196 j=-1;
wolffd@0 3197 }
wolffd@0 3198 }
wolffd@0 3199 }
wolffd@0 3200 }
wolffd@0 3201 }
wolffd@0 3202 }
wolffd@0 3203
wolffd@0 3204
wolffd@0 3205 /******************************** Shrinking *********************************/
wolffd@0 3206
wolffd@0 3207 void init_shrink_state(SHRINK_STATE *shrink_state, long int totdoc,
wolffd@0 3208 long int maxhistory)
wolffd@0 3209 {
wolffd@0 3210 long i;
wolffd@0 3211
wolffd@0 3212 shrink_state->deactnum=0;
wolffd@0 3213 shrink_state->active = (long *)my_malloc(sizeof(long)*totdoc);
wolffd@0 3214 shrink_state->inactive_since = (long *)my_malloc(sizeof(long)*totdoc);
wolffd@0 3215 shrink_state->a_history = (double **)my_malloc(sizeof(double *)*maxhistory);
wolffd@0 3216 shrink_state->maxhistory=maxhistory;
wolffd@0 3217 shrink_state->last_lin = (double *)my_malloc(sizeof(double)*totdoc);
wolffd@0 3218 shrink_state->last_a = (double *)my_malloc(sizeof(double)*totdoc);
wolffd@0 3219
wolffd@0 3220 for(i=0;i<totdoc;i++) {
wolffd@0 3221 shrink_state->active[i]=1;
wolffd@0 3222 shrink_state->inactive_since[i]=0;
wolffd@0 3223 shrink_state->last_a[i]=0;
wolffd@0 3224 shrink_state->last_lin[i]=0;
wolffd@0 3225 }
wolffd@0 3226 }
wolffd@0 3227
wolffd@0 3228 void shrink_state_cleanup(SHRINK_STATE *shrink_state)
wolffd@0 3229 {
wolffd@0 3230 free(shrink_state->active);
wolffd@0 3231 free(shrink_state->inactive_since);
wolffd@0 3232 if(shrink_state->deactnum > 0)
wolffd@0 3233 free(shrink_state->a_history[shrink_state->deactnum-1]);
wolffd@0 3234 free(shrink_state->a_history);
wolffd@0 3235 free(shrink_state->last_a);
wolffd@0 3236 free(shrink_state->last_lin);
wolffd@0 3237 }
wolffd@0 3238
wolffd@0 3239 long shrink_problem(DOC **docs,
wolffd@0 3240 LEARN_PARM *learn_parm,
wolffd@0 3241 SHRINK_STATE *shrink_state,
wolffd@0 3242 KERNEL_PARM *kernel_parm,
wolffd@0 3243 long int *active2dnum,
wolffd@0 3244 long int *last_suboptimal_at,
wolffd@0 3245 long int iteration,
wolffd@0 3246 long int totdoc,
wolffd@0 3247 long int minshrink,
wolffd@0 3248 double *a,
wolffd@0 3249 long int *inconsistent)
wolffd@0 3250 /* Shrink some variables away. Do the shrinking only if at least
wolffd@0 3251 minshrink variables can be removed. */
wolffd@0 3252 {
wolffd@0 3253 long i,ii,change,activenum,lastiter;
wolffd@0 3254 double *a_old;
wolffd@0 3255
wolffd@0 3256 activenum=0;
wolffd@0 3257 change=0;
wolffd@0 3258 for(ii=0;active2dnum[ii]>=0;ii++) {
wolffd@0 3259 i=active2dnum[ii];
wolffd@0 3260 activenum++;
wolffd@0 3261 if(learn_parm->sharedslack)
wolffd@0 3262 lastiter=last_suboptimal_at[docs[i]->slackid];
wolffd@0 3263 else
wolffd@0 3264 lastiter=last_suboptimal_at[i];
wolffd@0 3265 if(((iteration-lastiter) > learn_parm->svm_iter_to_shrink)
wolffd@0 3266 || (inconsistent[i])) {
wolffd@0 3267 change++;
wolffd@0 3268 }
wolffd@0 3269 }
wolffd@0 3270 if((change>=minshrink) /* shrink only if sufficiently many candidates */
wolffd@0 3271 && (shrink_state->deactnum<shrink_state->maxhistory)) { /* and enough memory */
wolffd@0 3272 /* Shrink problem by removing those variables which are */
wolffd@0 3273 /* optimal at a bound for a minimum number of iterations */
wolffd@0 3274 if(verbosity>=2) {
wolffd@0 3275 printf(" Shrinking..."); fflush(stdout);
wolffd@0 3276 }
wolffd@0 3277 if(kernel_parm->kernel_type != LINEAR) { /* non-linear case save alphas */
wolffd@0 3278 a_old=(double *)my_malloc(sizeof(double)*totdoc);
wolffd@0 3279 shrink_state->a_history[shrink_state->deactnum]=a_old;
wolffd@0 3280 for(i=0;i<totdoc;i++) {
wolffd@0 3281 a_old[i]=a[i];
wolffd@0 3282 }
wolffd@0 3283 }
wolffd@0 3284 for(ii=0;active2dnum[ii]>=0;ii++) {
wolffd@0 3285 i=active2dnum[ii];
wolffd@0 3286 if(learn_parm->sharedslack)
wolffd@0 3287 lastiter=last_suboptimal_at[docs[i]->slackid];
wolffd@0 3288 else
wolffd@0 3289 lastiter=last_suboptimal_at[i];
wolffd@0 3290 if(((iteration-lastiter) > learn_parm->svm_iter_to_shrink)
wolffd@0 3291 || (inconsistent[i])) {
wolffd@0 3292 shrink_state->active[i]=0;
wolffd@0 3293 shrink_state->inactive_since[i]=shrink_state->deactnum;
wolffd@0 3294 }
wolffd@0 3295 }
wolffd@0 3296 activenum=compute_index(shrink_state->active,totdoc,active2dnum);
wolffd@0 3297 shrink_state->deactnum++;
wolffd@0 3298 if(kernel_parm->kernel_type == LINEAR) {
wolffd@0 3299 shrink_state->deactnum=0;
wolffd@0 3300 }
wolffd@0 3301 if(verbosity>=2) {
wolffd@0 3302 printf("done.\n"); fflush(stdout);
wolffd@0 3303 printf(" Number of inactive variables = %ld\n",totdoc-activenum);
wolffd@0 3304 }
wolffd@0 3305 }
wolffd@0 3306 return(activenum);
wolffd@0 3307 }
wolffd@0 3308
wolffd@0 3309
wolffd@0 3310 void reactivate_inactive_examples(long int *label,
wolffd@0 3311 long int *unlabeled,
wolffd@0 3312 double *a,
wolffd@0 3313 SHRINK_STATE *shrink_state,
wolffd@0 3314 double *lin,
wolffd@0 3315 double *c,
wolffd@0 3316 long int totdoc,
wolffd@0 3317 long int totwords,
wolffd@0 3318 long int iteration,
wolffd@0 3319 LEARN_PARM *learn_parm,
wolffd@0 3320 long int *inconsistent,
wolffd@0 3321 DOC **docs,
wolffd@0 3322 KERNEL_PARM *kernel_parm,
wolffd@0 3323 KERNEL_CACHE *kernel_cache,
wolffd@0 3324 MODEL *model,
wolffd@0 3325 CFLOAT *aicache,
wolffd@0 3326 double *weights,
wolffd@0 3327 double *maxdiff)
wolffd@0 3328 /* Make all variables active again which had been removed by
wolffd@0 3329 shrinking. */
wolffd@0 3330 /* Computes lin for those variables from scratch. */
wolffd@0 3331 {
wolffd@0 3332 register long i,j,ii,jj,t,*changed2dnum,*inactive2dnum;
wolffd@0 3333 long *changed,*inactive;
wolffd@0 3334 register double kernel_val,*a_old,dist;
wolffd@0 3335 double ex_c,target;
wolffd@0 3336 SVECTOR *f;
wolffd@0 3337
wolffd@0 3338 if(kernel_parm->kernel_type == LINEAR) { /* special linear case */
wolffd@0 3339 a_old=shrink_state->last_a;
wolffd@0 3340 clear_vector_n(weights,totwords);
wolffd@0 3341 for(i=0;i<totdoc;i++) {
wolffd@0 3342 if(a[i] != a_old[i]) {
wolffd@0 3343 for(f=docs[i]->fvec;f;f=f->next)
wolffd@0 3344 add_vector_ns(weights,f,
wolffd@0 3345 f->factor*((a[i]-a_old[i])*(double)label[i]));
wolffd@0 3346 a_old[i]=a[i];
wolffd@0 3347 }
wolffd@0 3348 }
wolffd@0 3349 for(i=0;i<totdoc;i++) {
wolffd@0 3350 if(!shrink_state->active[i]) {
wolffd@0 3351 for(f=docs[i]->fvec;f;f=f->next)
wolffd@0 3352 lin[i]=shrink_state->last_lin[i]+f->factor*sprod_ns(weights,f);
wolffd@0 3353 }
wolffd@0 3354 shrink_state->last_lin[i]=lin[i];
wolffd@0 3355 }
wolffd@0 3356 }
wolffd@0 3357 else {
wolffd@0 3358 changed=(long *)my_malloc(sizeof(long)*totdoc);
wolffd@0 3359 changed2dnum=(long *)my_malloc(sizeof(long)*(totdoc+11));
wolffd@0 3360 inactive=(long *)my_malloc(sizeof(long)*totdoc);
wolffd@0 3361 inactive2dnum=(long *)my_malloc(sizeof(long)*(totdoc+11));
wolffd@0 3362 for(t=shrink_state->deactnum-1;(t>=0) && shrink_state->a_history[t];t--) {
wolffd@0 3363 if(verbosity>=2) {
wolffd@0 3364 printf("%ld..",t); fflush(stdout);
wolffd@0 3365 }
wolffd@0 3366 a_old=shrink_state->a_history[t];
wolffd@0 3367 for(i=0;i<totdoc;i++) {
wolffd@0 3368 inactive[i]=((!shrink_state->active[i])
wolffd@0 3369 && (shrink_state->inactive_since[i] == t));
wolffd@0 3370 changed[i]= (a[i] != a_old[i]);
wolffd@0 3371 }
wolffd@0 3372 compute_index(inactive,totdoc,inactive2dnum);
wolffd@0 3373 compute_index(changed,totdoc,changed2dnum);
wolffd@0 3374
wolffd@0 3375 for(ii=0;(i=changed2dnum[ii])>=0;ii++) {
wolffd@0 3376 get_kernel_row(kernel_cache,docs,i,totdoc,inactive2dnum,aicache,
wolffd@0 3377 kernel_parm);
wolffd@0 3378 for(jj=0;(j=inactive2dnum[jj])>=0;jj++) {
wolffd@0 3379 kernel_val=aicache[j];
wolffd@0 3380 lin[j]+=(((a[i]*kernel_val)-(a_old[i]*kernel_val))*(double)label[i]);
wolffd@0 3381 }
wolffd@0 3382 }
wolffd@0 3383 }
wolffd@0 3384 free(changed);
wolffd@0 3385 free(changed2dnum);
wolffd@0 3386 free(inactive);
wolffd@0 3387 free(inactive2dnum);
wolffd@0 3388 }
wolffd@0 3389 (*maxdiff)=0;
wolffd@0 3390 for(i=0;i<totdoc;i++) {
wolffd@0 3391 shrink_state->inactive_since[i]=shrink_state->deactnum-1;
wolffd@0 3392 if(!inconsistent[i]) {
wolffd@0 3393 dist=(lin[i]-model->b)*(double)label[i];
wolffd@0 3394 target=-(learn_parm->eps-(double)label[i]*c[i]);
wolffd@0 3395 ex_c=learn_parm->svm_cost[i]-learn_parm->epsilon_a;
wolffd@0 3396 if((a[i]>learn_parm->epsilon_a) && (dist > target)) {
wolffd@0 3397 if((dist-target)>(*maxdiff)) /* largest violation */
wolffd@0 3398 (*maxdiff)=dist-target;
wolffd@0 3399 }
wolffd@0 3400 else if((a[i]<ex_c) && (dist < target)) {
wolffd@0 3401 if((target-dist)>(*maxdiff)) /* largest violation */
wolffd@0 3402 (*maxdiff)=target-dist;
wolffd@0 3403 }
wolffd@0 3404 if((a[i]>(0+learn_parm->epsilon_a))
wolffd@0 3405 && (a[i]<ex_c)) {
wolffd@0 3406 shrink_state->active[i]=1; /* not at bound */
wolffd@0 3407 }
wolffd@0 3408 else if((a[i]<=(0+learn_parm->epsilon_a)) && (dist < (target+learn_parm->epsilon_shrink))) {
wolffd@0 3409 shrink_state->active[i]=1;
wolffd@0 3410 }
wolffd@0 3411 else if((a[i]>=ex_c)
wolffd@0 3412 && (dist > (target-learn_parm->epsilon_shrink))) {
wolffd@0 3413 shrink_state->active[i]=1;
wolffd@0 3414 }
wolffd@0 3415 else if(learn_parm->sharedslack) { /* make all active when sharedslack */
wolffd@0 3416 shrink_state->active[i]=1;
wolffd@0 3417 }
wolffd@0 3418 }
wolffd@0 3419 }
wolffd@0 3420 if(kernel_parm->kernel_type != LINEAR) { /* update history for non-linear */
wolffd@0 3421 for(i=0;i<totdoc;i++) {
wolffd@0 3422 (shrink_state->a_history[shrink_state->deactnum-1])[i]=a[i];
wolffd@0 3423 }
wolffd@0 3424 for(t=shrink_state->deactnum-2;(t>=0) && shrink_state->a_history[t];t--) {
wolffd@0 3425 free(shrink_state->a_history[t]);
wolffd@0 3426 shrink_state->a_history[t]=0;
wolffd@0 3427 }
wolffd@0 3428 }
wolffd@0 3429 }
wolffd@0 3430
wolffd@0 3431 /****************************** Cache handling *******************************/
wolffd@0 3432
wolffd@0 3433 void get_kernel_row(KERNEL_CACHE *kernel_cache, DOC **docs,
wolffd@0 3434 long int docnum, long int totdoc,
wolffd@0 3435 long int *active2dnum, CFLOAT *buffer,
wolffd@0 3436 KERNEL_PARM *kernel_parm)
wolffd@0 3437 /* Get's a row of the matrix of kernel values This matrix has the
wolffd@0 3438 same form as the Hessian, just that the elements are not
wolffd@0 3439 multiplied by */
wolffd@0 3440 /* y_i * y_j * a_i * a_j */
wolffd@0 3441 /* Takes the values from the cache if available. */
wolffd@0 3442 {
wolffd@0 3443 register long i,j,start;
wolffd@0 3444 DOC *ex;
wolffd@0 3445
wolffd@0 3446 ex=docs[docnum];
wolffd@0 3447
wolffd@0 3448 if(kernel_cache->index[docnum] != -1) { /* row is cached? */
wolffd@0 3449 kernel_cache->lru[kernel_cache->index[docnum]]=kernel_cache->time; /* lru */
wolffd@0 3450 start=kernel_cache->activenum*kernel_cache->index[docnum];
wolffd@0 3451 for(i=0;(j=active2dnum[i])>=0;i++) {
wolffd@0 3452 if(kernel_cache->totdoc2active[j] >= 0) { /* column is cached? */
wolffd@0 3453 buffer[j]=kernel_cache->buffer[start+kernel_cache->totdoc2active[j]];
wolffd@0 3454 }
wolffd@0 3455 else {
wolffd@0 3456 buffer[j]=(CFLOAT)kernel(kernel_parm,ex,docs[j]);
wolffd@0 3457 }
wolffd@0 3458 }
wolffd@0 3459 }
wolffd@0 3460 else {
wolffd@0 3461 for(i=0;(j=active2dnum[i])>=0;i++) {
wolffd@0 3462 buffer[j]=(CFLOAT)kernel(kernel_parm,ex,docs[j]);
wolffd@0 3463 }
wolffd@0 3464 }
wolffd@0 3465 }
wolffd@0 3466
wolffd@0 3467
wolffd@0 3468 void cache_kernel_row(KERNEL_CACHE *kernel_cache, DOC **docs,
wolffd@0 3469 long int m, KERNEL_PARM *kernel_parm)
wolffd@0 3470 /* Fills cache for the row m */
wolffd@0 3471 {
wolffd@0 3472 register DOC *ex;
wolffd@0 3473 register long j,k,l;
wolffd@0 3474 register CFLOAT *cache;
wolffd@0 3475
wolffd@0 3476 if(!kernel_cache_check(kernel_cache,m)) { /* not cached yet*/
wolffd@0 3477 cache = kernel_cache_clean_and_malloc(kernel_cache,m);
wolffd@0 3478 if(cache) {
wolffd@0 3479 l=kernel_cache->totdoc2active[m];
wolffd@0 3480 ex=docs[m];
wolffd@0 3481 for(j=0;j<kernel_cache->activenum;j++) { /* fill cache */
wolffd@0 3482 k=kernel_cache->active2totdoc[j];
wolffd@0 3483 if((kernel_cache->index[k] != -1) && (l != -1) && (k != m)) {
wolffd@0 3484 cache[j]=kernel_cache->buffer[kernel_cache->activenum
wolffd@0 3485 *kernel_cache->index[k]+l];
wolffd@0 3486 }
wolffd@0 3487 else {
wolffd@0 3488 cache[j]=kernel(kernel_parm,ex,docs[k]);
wolffd@0 3489 }
wolffd@0 3490 }
wolffd@0 3491 }
wolffd@0 3492 else {
wolffd@0 3493 perror("Error: Kernel cache full! => increase cache size");
wolffd@0 3494 }
wolffd@0 3495 }
wolffd@0 3496 }
wolffd@0 3497
wolffd@0 3498
wolffd@0 3499 void cache_multiple_kernel_rows(KERNEL_CACHE *kernel_cache, DOC **docs,
wolffd@0 3500 long int *key, long int varnum,
wolffd@0 3501 KERNEL_PARM *kernel_parm)
wolffd@0 3502 /* Fills cache for the rows in key */
wolffd@0 3503 {
wolffd@0 3504 register long i;
wolffd@0 3505
wolffd@0 3506 for(i=0;i<varnum;i++) { /* fill up kernel cache */
wolffd@0 3507 cache_kernel_row(kernel_cache,docs,key[i],kernel_parm);
wolffd@0 3508 }
wolffd@0 3509 }
wolffd@0 3510
wolffd@0 3511
wolffd@0 3512 void kernel_cache_shrink(KERNEL_CACHE *kernel_cache, long int totdoc,
wolffd@0 3513 long int numshrink, long int *after)
wolffd@0 3514 /* Remove numshrink columns in the cache which correspond to
wolffd@0 3515 examples marked 0 in after. */
wolffd@0 3516 {
wolffd@0 3517 register long i,j,jj,from=0,to=0,scount;
wolffd@0 3518 long *keep;
wolffd@0 3519
wolffd@0 3520 if(verbosity>=2) {
wolffd@0 3521 printf(" Reorganizing cache..."); fflush(stdout);
wolffd@0 3522 }
wolffd@0 3523
wolffd@0 3524 keep=(long *)my_malloc(sizeof(long)*totdoc);
wolffd@0 3525 for(j=0;j<totdoc;j++) {
wolffd@0 3526 keep[j]=1;
wolffd@0 3527 }
wolffd@0 3528 scount=0;
wolffd@0 3529 for(jj=0;(jj<kernel_cache->activenum) && (scount<numshrink);jj++) {
wolffd@0 3530 j=kernel_cache->active2totdoc[jj];
wolffd@0 3531 if(!after[j]) {
wolffd@0 3532 scount++;
wolffd@0 3533 keep[j]=0;
wolffd@0 3534 }
wolffd@0 3535 }
wolffd@0 3536
wolffd@0 3537 for(i=0;i<kernel_cache->max_elems;i++) {
wolffd@0 3538 for(jj=0;jj<kernel_cache->activenum;jj++) {
wolffd@0 3539 j=kernel_cache->active2totdoc[jj];
wolffd@0 3540 if(!keep[j]) {
wolffd@0 3541 from++;
wolffd@0 3542 }
wolffd@0 3543 else {
wolffd@0 3544 kernel_cache->buffer[to]=kernel_cache->buffer[from];
wolffd@0 3545 to++;
wolffd@0 3546 from++;
wolffd@0 3547 }
wolffd@0 3548 }
wolffd@0 3549 }
wolffd@0 3550
wolffd@0 3551 kernel_cache->activenum=0;
wolffd@0 3552 for(j=0;j<totdoc;j++) {
wolffd@0 3553 if((keep[j]) && (kernel_cache->totdoc2active[j] != -1)) {
wolffd@0 3554 kernel_cache->active2totdoc[kernel_cache->activenum]=j;
wolffd@0 3555 kernel_cache->totdoc2active[j]=kernel_cache->activenum;
wolffd@0 3556 kernel_cache->activenum++;
wolffd@0 3557 }
wolffd@0 3558 else {
wolffd@0 3559 kernel_cache->totdoc2active[j]=-1;
wolffd@0 3560 }
wolffd@0 3561 }
wolffd@0 3562
wolffd@0 3563 kernel_cache->max_elems=(long)(kernel_cache->buffsize/kernel_cache->activenum);
wolffd@0 3564 if(kernel_cache->max_elems>totdoc) {
wolffd@0 3565 kernel_cache->max_elems=totdoc;
wolffd@0 3566 }
wolffd@0 3567
wolffd@0 3568 free(keep);
wolffd@0 3569
wolffd@0 3570 if(verbosity>=2) {
wolffd@0 3571 printf("done.\n"); fflush(stdout);
wolffd@0 3572 printf(" Cache-size in rows = %ld\n",kernel_cache->max_elems);
wolffd@0 3573 }
wolffd@0 3574 }
wolffd@0 3575
wolffd@0 3576 KERNEL_CACHE *kernel_cache_init(long int totdoc, long int buffsize)
wolffd@0 3577 {
wolffd@0 3578 long i;
wolffd@0 3579 KERNEL_CACHE *kernel_cache;
wolffd@0 3580
wolffd@0 3581 kernel_cache=(KERNEL_CACHE *)my_malloc(sizeof(KERNEL_CACHE));
wolffd@0 3582 kernel_cache->index = (long *)my_malloc(sizeof(long)*totdoc);
wolffd@0 3583 kernel_cache->occu = (long *)my_malloc(sizeof(long)*totdoc);
wolffd@0 3584 kernel_cache->lru = (long *)my_malloc(sizeof(long)*totdoc);
wolffd@0 3585 kernel_cache->invindex = (long *)my_malloc(sizeof(long)*totdoc);
wolffd@0 3586 kernel_cache->active2totdoc = (long *)my_malloc(sizeof(long)*totdoc);
wolffd@0 3587 kernel_cache->totdoc2active = (long *)my_malloc(sizeof(long)*totdoc);
wolffd@0 3588 kernel_cache->buffer = (CFLOAT *)my_malloc((size_t)(buffsize)*1024*1024);
wolffd@0 3589
wolffd@0 3590 kernel_cache->buffsize=(long)(buffsize/sizeof(CFLOAT)*1024*1024);
wolffd@0 3591
wolffd@0 3592 kernel_cache->max_elems=(long)(kernel_cache->buffsize/totdoc);
wolffd@0 3593 if(kernel_cache->max_elems>totdoc) {
wolffd@0 3594 kernel_cache->max_elems=totdoc;
wolffd@0 3595 }
wolffd@0 3596
wolffd@0 3597 if(verbosity>=2) {
wolffd@0 3598 printf(" Cache-size in rows = %ld\n",kernel_cache->max_elems);
wolffd@0 3599 printf(" Kernel evals so far: %ld\n",kernel_cache_statistic);
wolffd@0 3600 }
wolffd@0 3601
wolffd@0 3602 kernel_cache->elems=0; /* initialize cache */
wolffd@0 3603 for(i=0;i<totdoc;i++) {
wolffd@0 3604 kernel_cache->index[i]=-1;
wolffd@0 3605 kernel_cache->lru[i]=0;
wolffd@0 3606 }
wolffd@0 3607 for(i=0;i<totdoc;i++) {
wolffd@0 3608 kernel_cache->occu[i]=0;
wolffd@0 3609 kernel_cache->invindex[i]=-1;
wolffd@0 3610 }
wolffd@0 3611
wolffd@0 3612 kernel_cache->activenum=totdoc;;
wolffd@0 3613 for(i=0;i<totdoc;i++) {
wolffd@0 3614 kernel_cache->active2totdoc[i]=i;
wolffd@0 3615 kernel_cache->totdoc2active[i]=i;
wolffd@0 3616 }
wolffd@0 3617
wolffd@0 3618 kernel_cache->time=0;
wolffd@0 3619
wolffd@0 3620 return(kernel_cache);
wolffd@0 3621 }
wolffd@0 3622
wolffd@0 3623 void kernel_cache_reset_lru(KERNEL_CACHE *kernel_cache)
wolffd@0 3624 {
wolffd@0 3625 long maxlru=0,k;
wolffd@0 3626
wolffd@0 3627 for(k=0;k<kernel_cache->max_elems;k++) {
wolffd@0 3628 if(maxlru < kernel_cache->lru[k])
wolffd@0 3629 maxlru=kernel_cache->lru[k];
wolffd@0 3630 }
wolffd@0 3631 for(k=0;k<kernel_cache->max_elems;k++) {
wolffd@0 3632 kernel_cache->lru[k]-=maxlru;
wolffd@0 3633 }
wolffd@0 3634 }
wolffd@0 3635
wolffd@0 3636 void kernel_cache_cleanup(KERNEL_CACHE *kernel_cache)
wolffd@0 3637 {
wolffd@0 3638 free(kernel_cache->index);
wolffd@0 3639 free(kernel_cache->occu);
wolffd@0 3640 free(kernel_cache->lru);
wolffd@0 3641 free(kernel_cache->invindex);
wolffd@0 3642 free(kernel_cache->active2totdoc);
wolffd@0 3643 free(kernel_cache->totdoc2active);
wolffd@0 3644 free(kernel_cache->buffer);
wolffd@0 3645 free(kernel_cache);
wolffd@0 3646 }
wolffd@0 3647
wolffd@0 3648 long kernel_cache_malloc(KERNEL_CACHE *kernel_cache)
wolffd@0 3649 {
wolffd@0 3650 long i;
wolffd@0 3651
wolffd@0 3652 if(kernel_cache_space_available(kernel_cache)) {
wolffd@0 3653 for(i=0;i<kernel_cache->max_elems;i++) {
wolffd@0 3654 if(!kernel_cache->occu[i]) {
wolffd@0 3655 kernel_cache->occu[i]=1;
wolffd@0 3656 kernel_cache->elems++;
wolffd@0 3657 return(i);
wolffd@0 3658 }
wolffd@0 3659 }
wolffd@0 3660 }
wolffd@0 3661 return(-1);
wolffd@0 3662 }
wolffd@0 3663
wolffd@0 3664 void kernel_cache_free(KERNEL_CACHE *kernel_cache, long int i)
wolffd@0 3665 {
wolffd@0 3666 kernel_cache->occu[i]=0;
wolffd@0 3667 kernel_cache->elems--;
wolffd@0 3668 }
wolffd@0 3669
wolffd@0 3670 long kernel_cache_free_lru(KERNEL_CACHE *kernel_cache)
wolffd@0 3671 /* remove least recently used cache element */
wolffd@0 3672 {
wolffd@0 3673 register long k,least_elem=-1,least_time;
wolffd@0 3674
wolffd@0 3675 least_time=kernel_cache->time+1;
wolffd@0 3676 for(k=0;k<kernel_cache->max_elems;k++) {
wolffd@0 3677 if(kernel_cache->invindex[k] != -1) {
wolffd@0 3678 if(kernel_cache->lru[k]<least_time) {
wolffd@0 3679 least_time=kernel_cache->lru[k];
wolffd@0 3680 least_elem=k;
wolffd@0 3681 }
wolffd@0 3682 }
wolffd@0 3683 }
wolffd@0 3684 if(least_elem != -1) {
wolffd@0 3685 kernel_cache_free(kernel_cache,least_elem);
wolffd@0 3686 kernel_cache->index[kernel_cache->invindex[least_elem]]=-1;
wolffd@0 3687 kernel_cache->invindex[least_elem]=-1;
wolffd@0 3688 return(1);
wolffd@0 3689 }
wolffd@0 3690 return(0);
wolffd@0 3691 }
wolffd@0 3692
wolffd@0 3693
wolffd@0 3694 CFLOAT *kernel_cache_clean_and_malloc(KERNEL_CACHE *kernel_cache,
wolffd@0 3695 long int docnum)
wolffd@0 3696 /* Get a free cache entry. In case cache is full, the lru element
wolffd@0 3697 is removed. */
wolffd@0 3698 {
wolffd@0 3699 long result;
wolffd@0 3700 if((result = kernel_cache_malloc(kernel_cache)) == -1) {
wolffd@0 3701 if(kernel_cache_free_lru(kernel_cache)) {
wolffd@0 3702 result = kernel_cache_malloc(kernel_cache);
wolffd@0 3703 }
wolffd@0 3704 }
wolffd@0 3705 kernel_cache->index[docnum]=result;
wolffd@0 3706 if(result == -1) {
wolffd@0 3707 return(0);
wolffd@0 3708 }
wolffd@0 3709 kernel_cache->invindex[result]=docnum;
wolffd@0 3710 kernel_cache->lru[kernel_cache->index[docnum]]=kernel_cache->time; /* lru */
wolffd@0 3711 return((CFLOAT *)((long)kernel_cache->buffer
wolffd@0 3712 +(kernel_cache->activenum*sizeof(CFLOAT)*
wolffd@0 3713 kernel_cache->index[docnum])));
wolffd@0 3714 }
wolffd@0 3715
wolffd@0 3716 long kernel_cache_touch(KERNEL_CACHE *kernel_cache, long int docnum)
wolffd@0 3717 /* Update lru time to avoid removal from cache. */
wolffd@0 3718 {
wolffd@0 3719 if(kernel_cache && kernel_cache->index[docnum] != -1) {
wolffd@0 3720 kernel_cache->lru[kernel_cache->index[docnum]]=kernel_cache->time; /* lru */
wolffd@0 3721 return(1);
wolffd@0 3722 }
wolffd@0 3723 return(0);
wolffd@0 3724 }
wolffd@0 3725
wolffd@0 3726 long kernel_cache_check(KERNEL_CACHE *kernel_cache, long int docnum)
wolffd@0 3727 /* Is that row cached? */
wolffd@0 3728 {
wolffd@0 3729 return(kernel_cache->index[docnum] != -1);
wolffd@0 3730 }
wolffd@0 3731
wolffd@0 3732 long kernel_cache_space_available(KERNEL_CACHE *kernel_cache)
wolffd@0 3733 /* Is there room for one more row? */
wolffd@0 3734 {
wolffd@0 3735 return(kernel_cache->elems < kernel_cache->max_elems);
wolffd@0 3736 }
wolffd@0 3737
wolffd@0 3738 /************************** Compute estimates ******************************/
wolffd@0 3739
wolffd@0 3740 void compute_xa_estimates(MODEL *model, long int *label,
wolffd@0 3741 long int *unlabeled, long int totdoc,
wolffd@0 3742 DOC **docs, double *lin, double *a,
wolffd@0 3743 KERNEL_PARM *kernel_parm,
wolffd@0 3744 LEARN_PARM *learn_parm, double *error,
wolffd@0 3745 double *recall, double *precision)
wolffd@0 3746 /* Computes xa-estimate of error rate, recall, and precision. See
wolffd@0 3747 T. Joachims, Estimating the Generalization Performance of an SVM
wolffd@0 3748 Efficiently, IMCL, 2000. */
wolffd@0 3749 {
wolffd@0 3750 long i,looerror,looposerror,loonegerror;
wolffd@0 3751 long totex,totposex;
wolffd@0 3752 double xi,r_delta,r_delta_sq,sim=0;
wolffd@0 3753 long *sv2dnum=NULL,*sv=NULL,svnum;
wolffd@0 3754
wolffd@0 3755 r_delta=estimate_r_delta(docs,totdoc,kernel_parm);
wolffd@0 3756 r_delta_sq=r_delta*r_delta;
wolffd@0 3757
wolffd@0 3758 looerror=0;
wolffd@0 3759 looposerror=0;
wolffd@0 3760 loonegerror=0;
wolffd@0 3761 totex=0;
wolffd@0 3762 totposex=0;
wolffd@0 3763 svnum=0;
wolffd@0 3764
wolffd@0 3765 if(learn_parm->xa_depth > 0) {
wolffd@0 3766 sv = (long *)my_malloc(sizeof(long)*(totdoc+11));
wolffd@0 3767 for(i=0;i<totdoc;i++)
wolffd@0 3768 sv[i]=0;
wolffd@0 3769 for(i=1;i<model->sv_num;i++)
wolffd@0 3770 if(a[model->supvec[i]->docnum]
wolffd@0 3771 < (learn_parm->svm_cost[model->supvec[i]->docnum]
wolffd@0 3772 -learn_parm->epsilon_a)) {
wolffd@0 3773 sv[model->supvec[i]->docnum]=1;
wolffd@0 3774 svnum++;
wolffd@0 3775 }
wolffd@0 3776 sv2dnum = (long *)my_malloc(sizeof(long)*(totdoc+11));
wolffd@0 3777 clear_index(sv2dnum);
wolffd@0 3778 compute_index(sv,totdoc,sv2dnum);
wolffd@0 3779 }
wolffd@0 3780
wolffd@0 3781 for(i=0;i<totdoc;i++) {
wolffd@0 3782 if(unlabeled[i]) {
wolffd@0 3783 /* ignore it */
wolffd@0 3784 }
wolffd@0 3785 else {
wolffd@0 3786 xi=1.0-((lin[i]-model->b)*(double)label[i]);
wolffd@0 3787 if(xi<0) xi=0;
wolffd@0 3788 if(label[i]>0) {
wolffd@0 3789 totposex++;
wolffd@0 3790 }
wolffd@0 3791 if((learn_parm->rho*a[i]*r_delta_sq+xi) >= 1.0) {
wolffd@0 3792 if(learn_parm->xa_depth > 0) { /* makes assumptions */
wolffd@0 3793 sim=distribute_alpha_t_greedily(sv2dnum,svnum,docs,a,i,label,
wolffd@0 3794 kernel_parm,learn_parm,
wolffd@0 3795 (double)((1.0-xi-a[i]*r_delta_sq)/(2.0*a[i])));
wolffd@0 3796 }
wolffd@0 3797 if((learn_parm->xa_depth == 0) ||
wolffd@0 3798 ((a[i]*kernel(kernel_parm,docs[i],docs[i])+a[i]*2.0*sim+xi) >= 1.0)) {
wolffd@0 3799 looerror++;
wolffd@0 3800 if(label[i]>0) {
wolffd@0 3801 looposerror++;
wolffd@0 3802 }
wolffd@0 3803 else {
wolffd@0 3804 loonegerror++;
wolffd@0 3805 }
wolffd@0 3806 }
wolffd@0 3807 }
wolffd@0 3808 totex++;
wolffd@0 3809 }
wolffd@0 3810 }
wolffd@0 3811
wolffd@0 3812 (*error)=((double)looerror/(double)totex)*100.0;
wolffd@0 3813 (*recall)=(1.0-(double)looposerror/(double)totposex)*100.0;
wolffd@0 3814 (*precision)=(((double)totposex-(double)looposerror)
wolffd@0 3815 /((double)totposex-(double)looposerror+(double)loonegerror))*100.0;
wolffd@0 3816
wolffd@0 3817 free(sv);
wolffd@0 3818 free(sv2dnum);
wolffd@0 3819 }
wolffd@0 3820
wolffd@0 3821
wolffd@0 3822 double distribute_alpha_t_greedily(long int *sv2dnum, long int svnum,
wolffd@0 3823 DOC **docs, double *a,
wolffd@0 3824 long int docnum,
wolffd@0 3825 long int *label,
wolffd@0 3826 KERNEL_PARM *kernel_parm,
wolffd@0 3827 LEARN_PARM *learn_parm, double thresh)
wolffd@0 3828 /* Experimental Code improving plain XiAlpha Estimates by
wolffd@0 3829 computing a better bound using a greedy optimzation strategy. */
wolffd@0 3830 {
wolffd@0 3831 long best_depth=0;
wolffd@0 3832 long i,j,k,d,skip,allskip;
wolffd@0 3833 double best,best_val[101],val,init_val_sq,init_val_lin;
wolffd@0 3834 long best_ex[101];
wolffd@0 3835 CFLOAT *cache,*trow;
wolffd@0 3836
wolffd@0 3837 cache=(CFLOAT *)my_malloc(sizeof(CFLOAT)*learn_parm->xa_depth*svnum);
wolffd@0 3838 trow = (CFLOAT *)my_malloc(sizeof(CFLOAT)*svnum);
wolffd@0 3839
wolffd@0 3840 for(k=0;k<svnum;k++) {
wolffd@0 3841 trow[k]=kernel(kernel_parm,docs[docnum],docs[sv2dnum[k]]);
wolffd@0 3842 }
wolffd@0 3843
wolffd@0 3844 init_val_sq=0;
wolffd@0 3845 init_val_lin=0;
wolffd@0 3846 best=0;
wolffd@0 3847
wolffd@0 3848 for(d=0;d<learn_parm->xa_depth;d++) {
wolffd@0 3849 allskip=1;
wolffd@0 3850 if(d>=1) {
wolffd@0 3851 init_val_sq+=cache[best_ex[d-1]+svnum*(d-1)];
wolffd@0 3852 for(k=0;k<d-1;k++) {
wolffd@0 3853 init_val_sq+=2.0*cache[best_ex[k]+svnum*(d-1)];
wolffd@0 3854 }
wolffd@0 3855 init_val_lin+=trow[best_ex[d-1]];
wolffd@0 3856 }
wolffd@0 3857 for(i=0;i<svnum;i++) {
wolffd@0 3858 skip=0;
wolffd@0 3859 if(sv2dnum[i] == docnum) skip=1;
wolffd@0 3860 for(j=0;j<d;j++) {
wolffd@0 3861 if(i == best_ex[j]) skip=1;
wolffd@0 3862 }
wolffd@0 3863
wolffd@0 3864 if(!skip) {
wolffd@0 3865 val=init_val_sq;
wolffd@0 3866 if(kernel_parm->kernel_type == LINEAR)
wolffd@0 3867 val+=docs[sv2dnum[i]]->fvec->twonorm_sq;
wolffd@0 3868 else
wolffd@0 3869 val+=kernel(kernel_parm,docs[sv2dnum[i]],docs[sv2dnum[i]]);
wolffd@0 3870 for(j=0;j<d;j++) {
wolffd@0 3871 val+=2.0*cache[i+j*svnum];
wolffd@0 3872 }
wolffd@0 3873 val*=(1.0/(2.0*(d+1.0)*(d+1.0)));
wolffd@0 3874 val-=((init_val_lin+trow[i])/(d+1.0));
wolffd@0 3875
wolffd@0 3876 if(allskip || (val < best_val[d])) {
wolffd@0 3877 best_val[d]=val;
wolffd@0 3878 best_ex[d]=i;
wolffd@0 3879 }
wolffd@0 3880 allskip=0;
wolffd@0 3881 if(val < thresh) {
wolffd@0 3882 i=svnum;
wolffd@0 3883 /* printf("EARLY"); */
wolffd@0 3884 }
wolffd@0 3885 }
wolffd@0 3886 }
wolffd@0 3887 if(!allskip) {
wolffd@0 3888 for(k=0;k<svnum;k++) {
wolffd@0 3889 cache[d*svnum+k]=kernel(kernel_parm,
wolffd@0 3890 docs[sv2dnum[best_ex[d]]],
wolffd@0 3891 docs[sv2dnum[k]]);
wolffd@0 3892 }
wolffd@0 3893 }
wolffd@0 3894 if((!allskip) && ((best_val[d] < best) || (d == 0))) {
wolffd@0 3895 best=best_val[d];
wolffd@0 3896 best_depth=d;
wolffd@0 3897 }
wolffd@0 3898 if(allskip || (best < thresh)) {
wolffd@0 3899 d=learn_parm->xa_depth;
wolffd@0 3900 }
wolffd@0 3901 }
wolffd@0 3902
wolffd@0 3903 free(cache);
wolffd@0 3904 free(trow);
wolffd@0 3905
wolffd@0 3906 /* printf("Distribute[%ld](%ld)=%f, ",docnum,best_depth,best); */
wolffd@0 3907 return(best);
wolffd@0 3908 }
wolffd@0 3909
wolffd@0 3910
wolffd@0 3911 void estimate_transduction_quality(MODEL *model, long int *label,
wolffd@0 3912 long int *unlabeled,
wolffd@0 3913 long int totdoc, DOC **docs, double *lin)
wolffd@0 3914 /* Loo-bound based on observation that loo-errors must have an
wolffd@0 3915 equal distribution in both training and test examples, given
wolffd@0 3916 that the test examples are classified correctly. Compare
wolffd@0 3917 chapter "Constraints on the Transductive Hyperplane" in my
wolffd@0 3918 Dissertation. */
wolffd@0 3919 {
wolffd@0 3920 long i,j,l=0,ulab=0,lab=0,labpos=0,labneg=0,ulabpos=0,ulabneg=0,totulab=0;
wolffd@0 3921 double totlab=0,totlabpos=0,totlabneg=0,labsum=0,ulabsum=0;
wolffd@0 3922 double r_delta,r_delta_sq,xi,xisum=0,asum=0;
wolffd@0 3923
wolffd@0 3924 r_delta=estimate_r_delta(docs,totdoc,&(model->kernel_parm));
wolffd@0 3925 r_delta_sq=r_delta*r_delta;
wolffd@0 3926
wolffd@0 3927 for(j=0;j<totdoc;j++) {
wolffd@0 3928 if(unlabeled[j]) {
wolffd@0 3929 totulab++;
wolffd@0 3930 }
wolffd@0 3931 else {
wolffd@0 3932 totlab++;
wolffd@0 3933 if(label[j] > 0)
wolffd@0 3934 totlabpos++;
wolffd@0 3935 else
wolffd@0 3936 totlabneg++;
wolffd@0 3937 }
wolffd@0 3938 }
wolffd@0 3939 for(j=1;j<model->sv_num;j++) {
wolffd@0 3940 i=model->supvec[j]->docnum;
wolffd@0 3941 xi=1.0-((lin[i]-model->b)*(double)label[i]);
wolffd@0 3942 if(xi<0) xi=0;
wolffd@0 3943
wolffd@0 3944 xisum+=xi;
wolffd@0 3945 asum+=fabs(model->alpha[j]);
wolffd@0 3946 if(unlabeled[i]) {
wolffd@0 3947 ulabsum+=(fabs(model->alpha[j])*r_delta_sq+xi);
wolffd@0 3948 }
wolffd@0 3949 else {
wolffd@0 3950 labsum+=(fabs(model->alpha[j])*r_delta_sq+xi);
wolffd@0 3951 }
wolffd@0 3952 if((fabs(model->alpha[j])*r_delta_sq+xi) >= 1) {
wolffd@0 3953 l++;
wolffd@0 3954 if(unlabeled[model->supvec[j]->docnum]) {
wolffd@0 3955 ulab++;
wolffd@0 3956 if(model->alpha[j] > 0)
wolffd@0 3957 ulabpos++;
wolffd@0 3958 else
wolffd@0 3959 ulabneg++;
wolffd@0 3960 }
wolffd@0 3961 else {
wolffd@0 3962 lab++;
wolffd@0 3963 if(model->alpha[j] > 0)
wolffd@0 3964 labpos++;
wolffd@0 3965 else
wolffd@0 3966 labneg++;
wolffd@0 3967 }
wolffd@0 3968 }
wolffd@0 3969 }
wolffd@0 3970 printf("xacrit>=1: labeledpos=%.5f labeledneg=%.5f default=%.5f\n",(double)labpos/(double)totlab*100.0,(double)labneg/(double)totlab*100.0,(double)totlabpos/(double)(totlab)*100.0);
wolffd@0 3971 printf("xacrit>=1: unlabelpos=%.5f unlabelneg=%.5f\n",(double)ulabpos/(double)totulab*100.0,(double)ulabneg/(double)totulab*100.0);
wolffd@0 3972 printf("xacrit>=1: labeled=%.5f unlabled=%.5f all=%.5f\n",(double)lab/(double)totlab*100.0,(double)ulab/(double)totulab*100.0,(double)l/(double)(totdoc)*100.0);
wolffd@0 3973 printf("xacritsum: labeled=%.5f unlabled=%.5f all=%.5f\n",(double)labsum/(double)totlab*100.0,(double)ulabsum/(double)totulab*100.0,(double)(labsum+ulabsum)/(double)(totdoc)*100.0);
wolffd@0 3974 printf("r_delta_sq=%.5f xisum=%.5f asum=%.5f\n",r_delta_sq,xisum,asum);
wolffd@0 3975 }
wolffd@0 3976
wolffd@0 3977 double estimate_margin_vcdim(MODEL *model, double w, double R,
wolffd@0 3978 KERNEL_PARM *kernel_parm)
wolffd@0 3979 /* optional: length of model vector in feature space */
wolffd@0 3980 /* optional: radius of ball containing the data */
wolffd@0 3981 {
wolffd@0 3982 double h;
wolffd@0 3983
wolffd@0 3984 /* follows chapter 5.6.4 in [Vapnik/95] */
wolffd@0 3985
wolffd@0 3986 if(w<0) {
wolffd@0 3987 w=model_length_s(model,kernel_parm);
wolffd@0 3988 }
wolffd@0 3989 if(R<0) {
wolffd@0 3990 R=estimate_sphere(model,kernel_parm);
wolffd@0 3991 }
wolffd@0 3992 h = w*w * R*R +1;
wolffd@0 3993 return(h);
wolffd@0 3994 }
wolffd@0 3995
wolffd@0 3996 double estimate_sphere(MODEL *model, KERNEL_PARM *kernel_parm)
wolffd@0 3997 /* Approximates the radius of the ball containing */
wolffd@0 3998 /* the support vectors by bounding it with the */
wolffd@0 3999 { /* length of the longest support vector. This is */
wolffd@0 4000 register long j; /* pretty good for text categorization, since all */
wolffd@0 4001 double xlen,maxxlen=0; /* documents have feature vectors of length 1. It */
wolffd@0 4002 DOC *nulldoc; /* assumes that the center of the ball is at the */
wolffd@0 4003 WORD nullword; /* origin of the space. */
wolffd@0 4004
wolffd@0 4005 nullword.wnum=0;
wolffd@0 4006 nulldoc=create_example(-2,0,0,0.0,create_svector(&nullword,"",1.0));
wolffd@0 4007
wolffd@0 4008 for(j=1;j<model->sv_num;j++) {
wolffd@0 4009 xlen=sqrt(kernel(kernel_parm,model->supvec[j],model->supvec[j])
wolffd@0 4010 -2*kernel(kernel_parm,model->supvec[j],nulldoc)
wolffd@0 4011 +kernel(kernel_parm,nulldoc,nulldoc));
wolffd@0 4012 if(xlen>maxxlen) {
wolffd@0 4013 maxxlen=xlen;
wolffd@0 4014 }
wolffd@0 4015 }
wolffd@0 4016
wolffd@0 4017 free_example(nulldoc,1);
wolffd@0 4018 return(maxxlen);
wolffd@0 4019 }
wolffd@0 4020
wolffd@0 4021 double estimate_r_delta(DOC **docs, long int totdoc, KERNEL_PARM *kernel_parm)
wolffd@0 4022 {
wolffd@0 4023 long i;
wolffd@0 4024 double maxxlen,xlen;
wolffd@0 4025 DOC *nulldoc; /* assumes that the center of the ball is at the */
wolffd@0 4026 WORD nullword; /* origin of the space. */
wolffd@0 4027
wolffd@0 4028 nullword.wnum=0;
wolffd@0 4029 nulldoc=create_example(-2,0,0,0.0,create_svector(&nullword,"",1.0));
wolffd@0 4030
wolffd@0 4031 maxxlen=0;
wolffd@0 4032 for(i=0;i<totdoc;i++) {
wolffd@0 4033 xlen=sqrt(kernel(kernel_parm,docs[i],docs[i])
wolffd@0 4034 -2*kernel(kernel_parm,docs[i],nulldoc)
wolffd@0 4035 +kernel(kernel_parm,nulldoc,nulldoc));
wolffd@0 4036 if(xlen>maxxlen) {
wolffd@0 4037 maxxlen=xlen;
wolffd@0 4038 }
wolffd@0 4039 }
wolffd@0 4040
wolffd@0 4041 free_example(nulldoc,1);
wolffd@0 4042 return(maxxlen);
wolffd@0 4043 }
wolffd@0 4044
wolffd@0 4045 double estimate_r_delta_average(DOC **docs, long int totdoc,
wolffd@0 4046 KERNEL_PARM *kernel_parm)
wolffd@0 4047 {
wolffd@0 4048 long i;
wolffd@0 4049 double avgxlen;
wolffd@0 4050 DOC *nulldoc; /* assumes that the center of the ball is at the */
wolffd@0 4051 WORD nullword; /* origin of the space. */
wolffd@0 4052
wolffd@0 4053 nullword.wnum=0;
wolffd@0 4054 nulldoc=create_example(-2,0,0,0.0,create_svector(&nullword,"",1.0));
wolffd@0 4055
wolffd@0 4056 avgxlen=0;
wolffd@0 4057 for(i=0;i<totdoc;i++) {
wolffd@0 4058 avgxlen+=sqrt(kernel(kernel_parm,docs[i],docs[i])
wolffd@0 4059 -2*kernel(kernel_parm,docs[i],nulldoc)
wolffd@0 4060 +kernel(kernel_parm,nulldoc,nulldoc));
wolffd@0 4061 }
wolffd@0 4062
wolffd@0 4063 free_example(nulldoc,1);
wolffd@0 4064 return(avgxlen/totdoc);
wolffd@0 4065 }
wolffd@0 4066
wolffd@0 4067 double length_of_longest_document_vector(DOC **docs, long int totdoc,
wolffd@0 4068 KERNEL_PARM *kernel_parm)
wolffd@0 4069 {
wolffd@0 4070 long i;
wolffd@0 4071 double maxxlen,xlen;
wolffd@0 4072
wolffd@0 4073 maxxlen=0;
wolffd@0 4074 for(i=0;i<totdoc;i++) {
wolffd@0 4075 xlen=sqrt(kernel(kernel_parm,docs[i],docs[i]));
wolffd@0 4076 if(xlen>maxxlen) {
wolffd@0 4077 maxxlen=xlen;
wolffd@0 4078 }
wolffd@0 4079 }
wolffd@0 4080
wolffd@0 4081 return(maxxlen);
wolffd@0 4082 }
wolffd@0 4083
wolffd@0 4084 /****************************** IO-handling **********************************/
wolffd@0 4085
wolffd@0 4086 void write_prediction(char *predfile, MODEL *model, double *lin,
wolffd@0 4087 double *a, long int *unlabeled,
wolffd@0 4088 long int *label, long int totdoc,
wolffd@0 4089 LEARN_PARM *learn_parm)
wolffd@0 4090 {
wolffd@0 4091 FILE *predfl;
wolffd@0 4092 long i;
wolffd@0 4093 double dist,a_max;
wolffd@0 4094
wolffd@0 4095 if(verbosity>=1) {
wolffd@0 4096 printf("Writing prediction file..."); fflush(stdout);
wolffd@0 4097 }
wolffd@0 4098 if ((predfl = fopen (predfile, "w")) == NULL)
wolffd@0 4099 { perror (predfile); exit (1); }
wolffd@0 4100 a_max=learn_parm->epsilon_a;
wolffd@0 4101 for(i=0;i<totdoc;i++) {
wolffd@0 4102 if((unlabeled[i]) && (a[i]>a_max)) {
wolffd@0 4103 a_max=a[i];
wolffd@0 4104 }
wolffd@0 4105 }
wolffd@0 4106 for(i=0;i<totdoc;i++) {
wolffd@0 4107 if(unlabeled[i]) {
wolffd@0 4108 if((a[i]>(learn_parm->epsilon_a))) {
wolffd@0 4109 dist=(double)label[i]*(1.0-learn_parm->epsilon_crit-a[i]/(a_max*2.0));
wolffd@0 4110 }
wolffd@0 4111 else {
wolffd@0 4112 dist=(lin[i]-model->b);
wolffd@0 4113 }
wolffd@0 4114 if(dist>0) {
wolffd@0 4115 fprintf(predfl,"%.8g:+1 %.8g:-1\n",dist,-dist);
wolffd@0 4116 }
wolffd@0 4117 else {
wolffd@0 4118 fprintf(predfl,"%.8g:-1 %.8g:+1\n",-dist,dist);
wolffd@0 4119 }
wolffd@0 4120 }
wolffd@0 4121 }
wolffd@0 4122 fclose(predfl);
wolffd@0 4123 if(verbosity>=1) {
wolffd@0 4124 printf("done\n");
wolffd@0 4125 }
wolffd@0 4126 }
wolffd@0 4127
wolffd@0 4128 void write_alphas(char *alphafile, double *a,
wolffd@0 4129 long int *label, long int totdoc)
wolffd@0 4130 {
wolffd@0 4131 FILE *alphafl;
wolffd@0 4132 long i;
wolffd@0 4133
wolffd@0 4134 if(verbosity>=1) {
wolffd@0 4135 printf("Writing alpha file..."); fflush(stdout);
wolffd@0 4136 }
wolffd@0 4137 if ((alphafl = fopen (alphafile, "w")) == NULL)
wolffd@0 4138 { perror (alphafile); exit (1); }
wolffd@0 4139 for(i=0;i<totdoc;i++) {
wolffd@0 4140 fprintf(alphafl,"%.18g\n",a[i]*(double)label[i]);
wolffd@0 4141 }
wolffd@0 4142 fclose(alphafl);
wolffd@0 4143 if(verbosity>=1) {
wolffd@0 4144 printf("done\n");
wolffd@0 4145 }
wolffd@0 4146 }
wolffd@0 4147