| wolffd@0 | 1 /***********************************************************************/ | 
| wolffd@0 | 2 /*                                                                     */ | 
| wolffd@0 | 3 /*   svm_learn.c                                                       */ | 
| wolffd@0 | 4 /*                                                                     */ | 
| wolffd@0 | 5 /*   Learning module of Support Vector Machine.                        */ | 
| wolffd@0 | 6 /*                                                                     */ | 
| wolffd@0 | 7 /*   Author: Thorsten Joachims                                         */ | 
| wolffd@0 | 8 /*   Date: 02.07.02                                                    */ | 
| wolffd@0 | 9 /*                                                                     */ | 
| wolffd@0 | 10 /*   Copyright (c) 2002  Thorsten Joachims - All rights reserved       */ | 
| wolffd@0 | 11 /*                                                                     */ | 
| wolffd@0 | 12 /*   This software is available for non-commercial use only. It must   */ | 
| wolffd@0 | 13 /*   not be modified and distributed without prior permission of the   */ | 
| wolffd@0 | 14 /*   author. The author is not responsible for implications from the   */ | 
| wolffd@0 | 15 /*   use of this software.                                             */ | 
| wolffd@0 | 16 /*                                                                     */ | 
| wolffd@0 | 17 /***********************************************************************/ | 
| wolffd@0 | 18 | 
| wolffd@0 | 19 | 
| wolffd@0 | 20 # include "svm_common.h" | 
| wolffd@0 | 21 # include "svm_learn.h" | 
| wolffd@0 | 22 | 
| wolffd@0 | 23 | 
| wolffd@0 | 24 /* interface to QP-solver */ | 
| wolffd@0 | 25 double *optimize_qp(QP *, double *, long, double *, LEARN_PARM *); | 
| wolffd@0 | 26 | 
| wolffd@0 | 27 /*---------------------------------------------------------------------------*/ | 
| wolffd@0 | 28 | 
| wolffd@0 | 29 /* Learns an SVM classification model based on the training data in | 
| wolffd@0 | 30    docs/label. The resulting model is returned in the structure | 
| wolffd@0 | 31    model. */ | 
| wolffd@0 | 32 | 
| wolffd@0 | 33 void svm_learn_classification(DOC **docs, double *class, long int | 
| wolffd@0 | 34 			      totdoc, long int totwords, | 
| wolffd@0 | 35 			      LEARN_PARM *learn_parm, | 
| wolffd@0 | 36 			      KERNEL_PARM *kernel_parm, | 
| wolffd@0 | 37 			      KERNEL_CACHE *kernel_cache, | 
| wolffd@0 | 38 			      MODEL *model, | 
| wolffd@0 | 39 			      double *alpha) | 
| wolffd@0 | 40      /* docs:        Training vectors (x-part) */ | 
| wolffd@0 | 41      /* class:       Training labels (y-part, zero if test example for | 
| wolffd@0 | 42                      transduction) */ | 
| wolffd@0 | 43      /* totdoc:      Number of examples in docs/label */ | 
| wolffd@0 | 44      /* totwords:    Number of features (i.e. highest feature index) */ | 
| wolffd@0 | 45      /* learn_parm:  Learning paramenters */ | 
| wolffd@0 | 46      /* kernel_parm: Kernel paramenters */ | 
| wolffd@0 | 47      /* kernel_cache:Initialized Cache of size totdoc, if using a kernel. | 
| wolffd@0 | 48                      NULL if linear.*/ | 
| wolffd@0 | 49      /* model:       Returns learning result (assumed empty before called) */ | 
| wolffd@0 | 50      /* alpha:       Start values for the alpha variables or NULL | 
| wolffd@0 | 51 	             pointer. The new alpha values are returned after | 
| wolffd@0 | 52 		     optimization if not NULL. Array must be of size totdoc. */ | 
| wolffd@0 | 53 { | 
| wolffd@0 | 54   long *inconsistent,i,*label; | 
| wolffd@0 | 55   long inconsistentnum; | 
| wolffd@0 | 56   long misclassified,upsupvecnum; | 
| wolffd@0 | 57   double loss,model_length,example_length; | 
| wolffd@0 | 58   double maxdiff,*lin,*a,*c; | 
| wolffd@0 | 59   long runtime_start,runtime_end; | 
| wolffd@0 | 60   long iterations; | 
| wolffd@0 | 61   long *unlabeled,transduction; | 
| wolffd@0 | 62   long heldout; | 
| wolffd@0 | 63   long loo_count=0,loo_count_pos=0,loo_count_neg=0,trainpos=0,trainneg=0; | 
| wolffd@0 | 64   long loocomputed=0,runtime_start_loo=0,runtime_start_xa=0; | 
| wolffd@0 | 65   double heldout_c=0,r_delta_sq=0,r_delta,r_delta_avg; | 
| wolffd@0 | 66   long *index,*index2dnum; | 
| wolffd@0 | 67   double *weights; | 
| wolffd@0 | 68   CFLOAT *aicache;  /* buffer to keep one row of hessian */ | 
| wolffd@0 | 69 | 
| wolffd@0 | 70   double *xi_fullset; /* buffer for storing xi on full sample in loo */ | 
| wolffd@0 | 71   double *a_fullset;  /* buffer for storing alpha on full sample in loo */ | 
| wolffd@0 | 72   TIMING timing_profile; | 
| wolffd@0 | 73   SHRINK_STATE shrink_state; | 
| wolffd@0 | 74 | 
| wolffd@0 | 75   runtime_start=get_runtime(); | 
| wolffd@0 | 76   timing_profile.time_kernel=0; | 
| wolffd@0 | 77   timing_profile.time_opti=0; | 
| wolffd@0 | 78   timing_profile.time_shrink=0; | 
| wolffd@0 | 79   timing_profile.time_update=0; | 
| wolffd@0 | 80   timing_profile.time_model=0; | 
| wolffd@0 | 81   timing_profile.time_check=0; | 
| wolffd@0 | 82   timing_profile.time_select=0; | 
| wolffd@0 | 83   kernel_cache_statistic=0; | 
| wolffd@0 | 84 | 
| wolffd@0 | 85   learn_parm->totwords=totwords; | 
| wolffd@0 | 86 | 
| wolffd@0 | 87   /* make sure -n value is reasonable */ | 
| wolffd@0 | 88   if((learn_parm->svm_newvarsinqp < 2) | 
| wolffd@0 | 89      || (learn_parm->svm_newvarsinqp > learn_parm->svm_maxqpsize)) { | 
| wolffd@0 | 90     learn_parm->svm_newvarsinqp=learn_parm->svm_maxqpsize; | 
| wolffd@0 | 91   } | 
| wolffd@0 | 92 | 
| wolffd@0 | 93   init_shrink_state(&shrink_state,totdoc,(long)MAXSHRINK); | 
| wolffd@0 | 94 | 
| wolffd@0 | 95   label = (long *)my_malloc(sizeof(long)*totdoc); | 
| wolffd@0 | 96   inconsistent = (long *)my_malloc(sizeof(long)*totdoc); | 
| wolffd@0 | 97   unlabeled = (long *)my_malloc(sizeof(long)*totdoc); | 
| wolffd@0 | 98   c = (double *)my_malloc(sizeof(double)*totdoc); | 
| wolffd@0 | 99   a = (double *)my_malloc(sizeof(double)*totdoc); | 
| wolffd@0 | 100   a_fullset = (double *)my_malloc(sizeof(double)*totdoc); | 
| wolffd@0 | 101   xi_fullset = (double *)my_malloc(sizeof(double)*totdoc); | 
| wolffd@0 | 102   lin = (double *)my_malloc(sizeof(double)*totdoc); | 
| wolffd@0 | 103   learn_parm->svm_cost = (double *)my_malloc(sizeof(double)*totdoc); | 
| wolffd@0 | 104   model->supvec = (DOC **)my_malloc(sizeof(DOC *)*(totdoc+2)); | 
| wolffd@0 | 105   model->alpha = (double *)my_malloc(sizeof(double)*(totdoc+2)); | 
| wolffd@0 | 106   model->index = (long *)my_malloc(sizeof(long)*(totdoc+2)); | 
| wolffd@0 | 107 | 
| wolffd@0 | 108   model->at_upper_bound=0; | 
| wolffd@0 | 109   model->b=0; | 
| wolffd@0 | 110   model->supvec[0]=0;  /* element 0 reserved and empty for now */ | 
| wolffd@0 | 111   model->alpha[0]=0; | 
| wolffd@0 | 112   model->lin_weights=NULL; | 
| wolffd@0 | 113   model->totwords=totwords; | 
| wolffd@0 | 114   model->totdoc=totdoc; | 
| wolffd@0 | 115   model->kernel_parm=(*kernel_parm); | 
| wolffd@0 | 116   model->sv_num=1; | 
| wolffd@0 | 117   model->loo_error=-1; | 
| wolffd@0 | 118   model->loo_recall=-1; | 
| wolffd@0 | 119   model->loo_precision=-1; | 
| wolffd@0 | 120   model->xa_error=-1; | 
| wolffd@0 | 121   model->xa_recall=-1; | 
| wolffd@0 | 122   model->xa_precision=-1; | 
| wolffd@0 | 123   inconsistentnum=0; | 
| wolffd@0 | 124   transduction=0; | 
| wolffd@0 | 125 | 
| wolffd@0 | 126   r_delta=estimate_r_delta(docs,totdoc,kernel_parm); | 
| wolffd@0 | 127   r_delta_sq=r_delta*r_delta; | 
| wolffd@0 | 128 | 
| wolffd@0 | 129   r_delta_avg=estimate_r_delta_average(docs,totdoc,kernel_parm); | 
| wolffd@0 | 130   if(learn_parm->svm_c == 0.0) {  /* default value for C */ | 
| wolffd@0 | 131     learn_parm->svm_c=1.0/(r_delta_avg*r_delta_avg); | 
| wolffd@0 | 132     if(verbosity>=1) | 
| wolffd@0 | 133       printf("Setting default regularization parameter C=%.4f\n", | 
| wolffd@0 | 134 	     learn_parm->svm_c); | 
| wolffd@0 | 135   } | 
| wolffd@0 | 136 | 
| wolffd@0 | 137   learn_parm->eps=-1.0;      /* equivalent regression epsilon for | 
| wolffd@0 | 138 				classification */ | 
| wolffd@0 | 139 | 
| wolffd@0 | 140   for(i=0;i<totdoc;i++) {    /* various inits */ | 
| wolffd@0 | 141     docs[i]->docnum=i; | 
| wolffd@0 | 142     inconsistent[i]=0; | 
| wolffd@0 | 143     a[i]=0; | 
| wolffd@0 | 144     lin[i]=0; | 
| wolffd@0 | 145     c[i]=0.0; | 
| wolffd@0 | 146     unlabeled[i]=0; | 
| wolffd@0 | 147     if(class[i] == 0) { | 
| wolffd@0 | 148       unlabeled[i]=1; | 
| wolffd@0 | 149       label[i]=0; | 
| wolffd@0 | 150       transduction=1; | 
| wolffd@0 | 151     } | 
| wolffd@0 | 152     if(class[i] > 0) { | 
| wolffd@0 | 153       learn_parm->svm_cost[i]=learn_parm->svm_c*learn_parm->svm_costratio* | 
| wolffd@0 | 154 	docs[i]->costfactor; | 
| wolffd@0 | 155       label[i]=1; | 
| wolffd@0 | 156       trainpos++; | 
| wolffd@0 | 157     } | 
| wolffd@0 | 158     else if(class[i] < 0) { | 
| wolffd@0 | 159       learn_parm->svm_cost[i]=learn_parm->svm_c*docs[i]->costfactor; | 
| wolffd@0 | 160       label[i]=-1; | 
| wolffd@0 | 161       trainneg++; | 
| wolffd@0 | 162     } | 
| wolffd@0 | 163     else { | 
| wolffd@0 | 164       learn_parm->svm_cost[i]=0; | 
| wolffd@0 | 165     } | 
| wolffd@0 | 166   } | 
| wolffd@0 | 167   if(verbosity>=2) { | 
| wolffd@0 | 168     printf("%ld positive, %ld negative, and %ld unlabeled examples.\n",trainpos,trainneg,totdoc-trainpos-trainneg); fflush(stdout); | 
| wolffd@0 | 169   } | 
| wolffd@0 | 170 | 
| wolffd@0 | 171   /* caching makes no sense for linear kernel */ | 
| wolffd@0 | 172   if(kernel_parm->kernel_type == LINEAR) { | 
| wolffd@0 | 173     kernel_cache = NULL; | 
| wolffd@0 | 174   } | 
| wolffd@0 | 175 | 
| wolffd@0 | 176   /* compute starting state for initial alpha values */ | 
| wolffd@0 | 177   if(alpha) { | 
| wolffd@0 | 178     if(verbosity>=1) { | 
| wolffd@0 | 179       printf("Computing starting state..."); fflush(stdout); | 
| wolffd@0 | 180     } | 
| wolffd@0 | 181     index = (long *)my_malloc(sizeof(long)*totdoc); | 
| wolffd@0 | 182     index2dnum = (long *)my_malloc(sizeof(long)*(totdoc+11)); | 
| wolffd@0 | 183     weights=(double *)my_malloc(sizeof(double)*(totwords+1)); | 
| wolffd@0 | 184     aicache = (CFLOAT *)my_malloc(sizeof(CFLOAT)*totdoc); | 
| wolffd@0 | 185     for(i=0;i<totdoc;i++) {    /* create full index and clip alphas */ | 
| wolffd@0 | 186       index[i]=1; | 
| wolffd@0 | 187       alpha[i]=fabs(alpha[i]); | 
| wolffd@0 | 188       if(alpha[i]<0) alpha[i]=0; | 
| wolffd@0 | 189       if(alpha[i]>learn_parm->svm_cost[i]) alpha[i]=learn_parm->svm_cost[i]; | 
| wolffd@0 | 190     } | 
| wolffd@0 | 191     if(kernel_parm->kernel_type != LINEAR) { | 
| wolffd@0 | 192       for(i=0;i<totdoc;i++)     /* fill kernel cache with unbounded SV */ | 
| wolffd@0 | 193 	if((alpha[i]>0) && (alpha[i]<learn_parm->svm_cost[i]) | 
| wolffd@0 | 194 	   && (kernel_cache_space_available(kernel_cache))) | 
| wolffd@0 | 195 	  cache_kernel_row(kernel_cache,docs,i,kernel_parm); | 
| wolffd@0 | 196       for(i=0;i<totdoc;i++)     /* fill rest of kernel cache with bounded SV */ | 
| wolffd@0 | 197 	if((alpha[i]==learn_parm->svm_cost[i]) | 
| wolffd@0 | 198 	   && (kernel_cache_space_available(kernel_cache))) | 
| wolffd@0 | 199 	  cache_kernel_row(kernel_cache,docs,i,kernel_parm); | 
| wolffd@0 | 200     } | 
| wolffd@0 | 201     (void)compute_index(index,totdoc,index2dnum); | 
| wolffd@0 | 202     update_linear_component(docs,label,index2dnum,alpha,a,index2dnum,totdoc, | 
| wolffd@0 | 203 			    totwords,kernel_parm,kernel_cache,lin,aicache, | 
| wolffd@0 | 204 			    weights); | 
| wolffd@0 | 205     (void)calculate_svm_model(docs,label,unlabeled,lin,alpha,a,c, | 
| wolffd@0 | 206 			      learn_parm,index2dnum,index2dnum,model); | 
| wolffd@0 | 207     for(i=0;i<totdoc;i++) {    /* copy initial alphas */ | 
| wolffd@0 | 208       a[i]=alpha[i]; | 
| wolffd@0 | 209     } | 
| wolffd@0 | 210     free(index); | 
| wolffd@0 | 211     free(index2dnum); | 
| wolffd@0 | 212     free(weights); | 
| wolffd@0 | 213     free(aicache); | 
| wolffd@0 | 214     if(verbosity>=1) { | 
| wolffd@0 | 215       printf("done.\n");  fflush(stdout); | 
| wolffd@0 | 216     } | 
| wolffd@0 | 217   } | 
| wolffd@0 | 218 | 
| wolffd@0 | 219   if(transduction) { | 
| wolffd@0 | 220     learn_parm->svm_iter_to_shrink=99999999; | 
| wolffd@0 | 221     if(verbosity >= 1) | 
| wolffd@0 | 222       printf("\nDeactivating Shrinking due to an incompatibility with the transductive \nlearner in the current version.\n\n"); | 
| wolffd@0 | 223   } | 
| wolffd@0 | 224 | 
| wolffd@0 | 225   if(transduction && learn_parm->compute_loo) { | 
| wolffd@0 | 226     learn_parm->compute_loo=0; | 
| wolffd@0 | 227     if(verbosity >= 1) | 
| wolffd@0 | 228       printf("\nCannot compute leave-one-out estimates for transductive learner.\n\n"); | 
| wolffd@0 | 229   } | 
| wolffd@0 | 230 | 
| wolffd@0 | 231   if(learn_parm->remove_inconsistent && learn_parm->compute_loo) { | 
| wolffd@0 | 232     learn_parm->compute_loo=0; | 
| wolffd@0 | 233     printf("\nCannot compute leave-one-out estimates when removing inconsistent examples.\n\n"); | 
| wolffd@0 | 234   } | 
| wolffd@0 | 235 | 
| wolffd@0 | 236   if(learn_parm->compute_loo && ((trainpos == 1) || (trainneg == 1))) { | 
| wolffd@0 | 237     learn_parm->compute_loo=0; | 
| wolffd@0 | 238     printf("\nCannot compute leave-one-out with only one example in one class.\n\n"); | 
| wolffd@0 | 239   } | 
| wolffd@0 | 240 | 
| wolffd@0 | 241 | 
| wolffd@0 | 242   if(verbosity==1) { | 
| wolffd@0 | 243     printf("Optimizing"); fflush(stdout); | 
| wolffd@0 | 244   } | 
| wolffd@0 | 245 | 
| wolffd@0 | 246   /* train the svm */ | 
| wolffd@0 | 247   iterations=optimize_to_convergence(docs,label,totdoc,totwords,learn_parm, | 
| wolffd@0 | 248 				     kernel_parm,kernel_cache,&shrink_state,model, | 
| wolffd@0 | 249 				     inconsistent,unlabeled,a,lin, | 
| wolffd@0 | 250 				     c,&timing_profile, | 
| wolffd@0 | 251 				     &maxdiff,(long)-1, | 
| wolffd@0 | 252 				     (long)1); | 
| wolffd@0 | 253 | 
| wolffd@0 | 254   if(verbosity>=1) { | 
| wolffd@0 | 255     if(verbosity==1) printf("done. (%ld iterations)\n",iterations); | 
| wolffd@0 | 256 | 
| wolffd@0 | 257     misclassified=0; | 
| wolffd@0 | 258     for(i=0;(i<totdoc);i++) { /* get final statistic */ | 
| wolffd@0 | 259       if((lin[i]-model->b)*(double)label[i] <= 0.0) | 
| wolffd@0 | 260 	misclassified++; | 
| wolffd@0 | 261     } | 
| wolffd@0 | 262 | 
| wolffd@0 | 263     printf("Optimization finished (%ld misclassified, maxdiff=%.5f).\n", | 
| wolffd@0 | 264 	   misclassified,maxdiff); | 
| wolffd@0 | 265 | 
| wolffd@0 | 266     runtime_end=get_runtime(); | 
| wolffd@0 | 267     if(verbosity>=2) { | 
| wolffd@0 | 268       printf("Runtime in cpu-seconds: %.2f (%.2f%% for kernel/%.2f%% for optimizer/%.2f%% for final/%.2f%% for update/%.2f%% for model/%.2f%% for check/%.2f%% for select)\n", | 
| wolffd@0 | 269         ((float)runtime_end-(float)runtime_start)/100.0, | 
| wolffd@0 | 270         (100.0*timing_profile.time_kernel)/(float)(runtime_end-runtime_start), | 
| wolffd@0 | 271 	(100.0*timing_profile.time_opti)/(float)(runtime_end-runtime_start), | 
| wolffd@0 | 272 	(100.0*timing_profile.time_shrink)/(float)(runtime_end-runtime_start), | 
| wolffd@0 | 273         (100.0*timing_profile.time_update)/(float)(runtime_end-runtime_start), | 
| wolffd@0 | 274         (100.0*timing_profile.time_model)/(float)(runtime_end-runtime_start), | 
| wolffd@0 | 275         (100.0*timing_profile.time_check)/(float)(runtime_end-runtime_start), | 
| wolffd@0 | 276         (100.0*timing_profile.time_select)/(float)(runtime_end-runtime_start)); | 
| wolffd@0 | 277     } | 
| wolffd@0 | 278     else { | 
| wolffd@0 | 279       printf("Runtime in cpu-seconds: %.2f\n", | 
| wolffd@0 | 280 	     (runtime_end-runtime_start)/100.0); | 
| wolffd@0 | 281     } | 
| wolffd@0 | 282 | 
| wolffd@0 | 283     if(learn_parm->remove_inconsistent) { | 
| wolffd@0 | 284       inconsistentnum=0; | 
| wolffd@0 | 285       for(i=0;i<totdoc;i++) | 
| wolffd@0 | 286 	if(inconsistent[i]) | 
| wolffd@0 | 287 	  inconsistentnum++; | 
| wolffd@0 | 288       printf("Number of SV: %ld (plus %ld inconsistent examples)\n", | 
| wolffd@0 | 289 	     model->sv_num-1,inconsistentnum); | 
| wolffd@0 | 290     } | 
| wolffd@0 | 291     else { | 
| wolffd@0 | 292       upsupvecnum=0; | 
| wolffd@0 | 293       for(i=1;i<model->sv_num;i++) { | 
| wolffd@0 | 294 	if(fabs(model->alpha[i]) >= | 
| wolffd@0 | 295 	   (learn_parm->svm_cost[(model->supvec[i])->docnum]- | 
| wolffd@0 | 296 	    learn_parm->epsilon_a)) | 
| wolffd@0 | 297 	  upsupvecnum++; | 
| wolffd@0 | 298       } | 
| wolffd@0 | 299       printf("Number of SV: %ld (including %ld at upper bound)\n", | 
| wolffd@0 | 300 	     model->sv_num-1,upsupvecnum); | 
| wolffd@0 | 301     } | 
| wolffd@0 | 302 | 
| wolffd@0 | 303     if((verbosity>=1) && (!learn_parm->skip_final_opt_check)) { | 
| wolffd@0 | 304       loss=0; | 
| wolffd@0 | 305       model_length=0; | 
| wolffd@0 | 306       for(i=0;i<totdoc;i++) { | 
| wolffd@0 | 307 	if((lin[i]-model->b)*(double)label[i] < 1.0-learn_parm->epsilon_crit) | 
| wolffd@0 | 308 	  loss+=1.0-(lin[i]-model->b)*(double)label[i]; | 
| wolffd@0 | 309 	model_length+=a[i]*label[i]*lin[i]; | 
| wolffd@0 | 310       } | 
| wolffd@0 | 311       model_length=sqrt(model_length); | 
| wolffd@0 | 312       fprintf(stdout,"L1 loss: loss=%.5f\n",loss); | 
| wolffd@0 | 313       fprintf(stdout,"Norm of weight vector: |w|=%.5f\n",model_length); | 
| wolffd@0 | 314       example_length=estimate_sphere(model,kernel_parm); | 
| wolffd@0 | 315       fprintf(stdout,"Norm of longest example vector: |x|=%.5f\n", | 
| wolffd@0 | 316 	      length_of_longest_document_vector(docs,totdoc,kernel_parm)); | 
| wolffd@0 | 317       fprintf(stdout,"Estimated VCdim of classifier: VCdim<=%.5f\n", | 
| wolffd@0 | 318 	      estimate_margin_vcdim(model,model_length,example_length, | 
| wolffd@0 | 319 				    kernel_parm)); | 
| wolffd@0 | 320       if((!learn_parm->remove_inconsistent) && (!transduction)) { | 
| wolffd@0 | 321 	runtime_start_xa=get_runtime(); | 
| wolffd@0 | 322 	if(verbosity>=1) { | 
| wolffd@0 | 323 	  printf("Computing XiAlpha-estimates..."); fflush(stdout); | 
| wolffd@0 | 324 	} | 
| wolffd@0 | 325 	compute_xa_estimates(model,label,unlabeled,totdoc,docs,lin,a, | 
| wolffd@0 | 326 			     kernel_parm,learn_parm,&(model->xa_error), | 
| wolffd@0 | 327 			     &(model->xa_recall),&(model->xa_precision)); | 
| wolffd@0 | 328 	if(verbosity>=1) { | 
| wolffd@0 | 329 	  printf("done\n"); | 
| wolffd@0 | 330 	} | 
| wolffd@0 | 331 	printf("Runtime for XiAlpha-estimates in cpu-seconds: %.2f\n", | 
| wolffd@0 | 332 	       (get_runtime()-runtime_start_xa)/100.0); | 
| wolffd@0 | 333 | 
| wolffd@0 | 334 	fprintf(stdout,"XiAlpha-estimate of the error: error<=%.2f%% (rho=%.2f,depth=%ld)\n", | 
| wolffd@0 | 335 		model->xa_error,learn_parm->rho,learn_parm->xa_depth); | 
| wolffd@0 | 336 	fprintf(stdout,"XiAlpha-estimate of the recall: recall=>%.2f%% (rho=%.2f,depth=%ld)\n", | 
| wolffd@0 | 337 		model->xa_recall,learn_parm->rho,learn_parm->xa_depth); | 
| wolffd@0 | 338 	fprintf(stdout,"XiAlpha-estimate of the precision: precision=>%.2f%% (rho=%.2f,depth=%ld)\n", | 
| wolffd@0 | 339 		model->xa_precision,learn_parm->rho,learn_parm->xa_depth); | 
| wolffd@0 | 340       } | 
| wolffd@0 | 341       else if(!learn_parm->remove_inconsistent) { | 
| wolffd@0 | 342 	estimate_transduction_quality(model,label,unlabeled,totdoc,docs,lin); | 
| wolffd@0 | 343       } | 
| wolffd@0 | 344     } | 
| wolffd@0 | 345     if(verbosity>=1) { | 
| wolffd@0 | 346       printf("Number of kernel evaluations: %ld\n",kernel_cache_statistic); | 
| wolffd@0 | 347     } | 
| wolffd@0 | 348   } | 
| wolffd@0 | 349 | 
| wolffd@0 | 350 | 
| wolffd@0 | 351   /* leave-one-out testing starts now */ | 
| wolffd@0 | 352   if(learn_parm->compute_loo) { | 
| wolffd@0 | 353     /* save results of training on full dataset for leave-one-out */ | 
| wolffd@0 | 354     runtime_start_loo=get_runtime(); | 
| wolffd@0 | 355     for(i=0;i<totdoc;i++) { | 
| wolffd@0 | 356       xi_fullset[i]=1.0-((lin[i]-model->b)*(double)label[i]); | 
| wolffd@0 | 357       if(xi_fullset[i]<0) xi_fullset[i]=0; | 
| wolffd@0 | 358       a_fullset[i]=a[i]; | 
| wolffd@0 | 359     } | 
| wolffd@0 | 360     if(verbosity>=1) { | 
| wolffd@0 | 361       printf("Computing leave-one-out"); | 
| wolffd@0 | 362     } | 
| wolffd@0 | 363 | 
| wolffd@0 | 364     /* repeat this loop for every held-out example */ | 
| wolffd@0 | 365     for(heldout=0;(heldout<totdoc);heldout++) { | 
| wolffd@0 | 366       if(learn_parm->rho*a_fullset[heldout]*r_delta_sq+xi_fullset[heldout] | 
| wolffd@0 | 367 	 < 1.0) { | 
| wolffd@0 | 368 	/* guaranteed to not produce a leave-one-out error */ | 
| wolffd@0 | 369 	if(verbosity==1) { | 
| wolffd@0 | 370 	  printf("+"); fflush(stdout); | 
| wolffd@0 | 371 	} | 
| wolffd@0 | 372       } | 
| wolffd@0 | 373       else if(xi_fullset[heldout] > 1.0) { | 
| wolffd@0 | 374 	/* guaranteed to produce a leave-one-out error */ | 
| wolffd@0 | 375 	loo_count++; | 
| wolffd@0 | 376 	if(label[heldout] > 0)  loo_count_pos++; else loo_count_neg++; | 
| wolffd@0 | 377 	if(verbosity==1) { | 
| wolffd@0 | 378 	  printf("-"); fflush(stdout); | 
| wolffd@0 | 379 	} | 
| wolffd@0 | 380       } | 
| wolffd@0 | 381       else { | 
| wolffd@0 | 382 	loocomputed++; | 
| wolffd@0 | 383 	heldout_c=learn_parm->svm_cost[heldout]; /* set upper bound to zero */ | 
| wolffd@0 | 384 	learn_parm->svm_cost[heldout]=0; | 
| wolffd@0 | 385 	/* make sure heldout example is not currently  */ | 
| wolffd@0 | 386 	/* shrunk away. Assumes that lin is up to date! */ | 
| wolffd@0 | 387 	shrink_state.active[heldout]=1; | 
| wolffd@0 | 388 	if(verbosity>=2) | 
| wolffd@0 | 389 	  printf("\nLeave-One-Out test on example %ld\n",heldout); | 
| wolffd@0 | 390 	if(verbosity>=1) { | 
| wolffd@0 | 391 	  printf("(?[%ld]",heldout); fflush(stdout); | 
| wolffd@0 | 392 	} | 
| wolffd@0 | 393 | 
| wolffd@0 | 394 	optimize_to_convergence(docs,label,totdoc,totwords,learn_parm, | 
| wolffd@0 | 395 				kernel_parm, | 
| wolffd@0 | 396 				kernel_cache,&shrink_state,model,inconsistent,unlabeled, | 
| wolffd@0 | 397 				a,lin,c,&timing_profile, | 
| wolffd@0 | 398 				&maxdiff,heldout,(long)2); | 
| wolffd@0 | 399 | 
| wolffd@0 | 400 	/* printf("%.20f\n",(lin[heldout]-model->b)*(double)label[heldout]); */ | 
| wolffd@0 | 401 | 
| wolffd@0 | 402 	if(((lin[heldout]-model->b)*(double)label[heldout]) <= 0.0) { | 
| wolffd@0 | 403 	  loo_count++;                            /* there was a loo-error */ | 
| wolffd@0 | 404 	  if(label[heldout] > 0)  loo_count_pos++; else loo_count_neg++; | 
| wolffd@0 | 405 	  if(verbosity>=1) { | 
| wolffd@0 | 406 	    printf("-)"); fflush(stdout); | 
| wolffd@0 | 407 	  } | 
| wolffd@0 | 408 	} | 
| wolffd@0 | 409 	else { | 
| wolffd@0 | 410 	  if(verbosity>=1) { | 
| wolffd@0 | 411 	    printf("+)"); fflush(stdout); | 
| wolffd@0 | 412 	  } | 
| wolffd@0 | 413 	} | 
| wolffd@0 | 414 	/* now we need to restore the original data set*/ | 
| wolffd@0 | 415 	learn_parm->svm_cost[heldout]=heldout_c; /* restore upper bound */ | 
| wolffd@0 | 416       } | 
| wolffd@0 | 417     } /* end of leave-one-out loop */ | 
| wolffd@0 | 418 | 
| wolffd@0 | 419 | 
| wolffd@0 | 420     if(verbosity>=1) { | 
| wolffd@0 | 421       printf("\nRetrain on full problem"); fflush(stdout); | 
| wolffd@0 | 422     } | 
| wolffd@0 | 423     optimize_to_convergence(docs,label,totdoc,totwords,learn_parm, | 
| wolffd@0 | 424 			    kernel_parm, | 
| wolffd@0 | 425 			    kernel_cache,&shrink_state,model,inconsistent,unlabeled, | 
| wolffd@0 | 426 			    a,lin,c,&timing_profile, | 
| wolffd@0 | 427 			    &maxdiff,(long)-1,(long)1); | 
| wolffd@0 | 428     if(verbosity >= 1) | 
| wolffd@0 | 429       printf("done.\n"); | 
| wolffd@0 | 430 | 
| wolffd@0 | 431 | 
| wolffd@0 | 432     /* after all leave-one-out computed */ | 
| wolffd@0 | 433     model->loo_error=100.0*loo_count/(double)totdoc; | 
| wolffd@0 | 434     model->loo_recall=(1.0-(double)loo_count_pos/(double)trainpos)*100.0; | 
| wolffd@0 | 435     model->loo_precision=(trainpos-loo_count_pos)/ | 
| wolffd@0 | 436       (double)(trainpos-loo_count_pos+loo_count_neg)*100.0; | 
| wolffd@0 | 437     if(verbosity >= 1) { | 
| wolffd@0 | 438       fprintf(stdout,"Leave-one-out estimate of the error: error=%.2f%%\n", | 
| wolffd@0 | 439 	      model->loo_error); | 
| wolffd@0 | 440       fprintf(stdout,"Leave-one-out estimate of the recall: recall=%.2f%%\n", | 
| wolffd@0 | 441 	      model->loo_recall); | 
| wolffd@0 | 442       fprintf(stdout,"Leave-one-out estimate of the precision: precision=%.2f%%\n", | 
| wolffd@0 | 443 	      model->loo_precision); | 
| wolffd@0 | 444       fprintf(stdout,"Actual leave-one-outs computed:  %ld (rho=%.2f)\n", | 
| wolffd@0 | 445 	      loocomputed,learn_parm->rho); | 
| wolffd@0 | 446       printf("Runtime for leave-one-out in cpu-seconds: %.2f\n", | 
| wolffd@0 | 447 	     (double)(get_runtime()-runtime_start_loo)/100.0); | 
| wolffd@0 | 448     } | 
| wolffd@0 | 449   } | 
| wolffd@0 | 450 | 
| wolffd@0 | 451   if(learn_parm->alphafile[0]) | 
| wolffd@0 | 452     write_alphas(learn_parm->alphafile,a,label,totdoc); | 
| wolffd@0 | 453 | 
| wolffd@0 | 454   shrink_state_cleanup(&shrink_state); | 
| wolffd@0 | 455   free(label); | 
| wolffd@0 | 456   free(inconsistent); | 
| wolffd@0 | 457   free(unlabeled); | 
| wolffd@0 | 458   free(c); | 
| wolffd@0 | 459   free(a); | 
| wolffd@0 | 460   free(a_fullset); | 
| wolffd@0 | 461   free(xi_fullset); | 
| wolffd@0 | 462   free(lin); | 
| wolffd@0 | 463   free(learn_parm->svm_cost); | 
| wolffd@0 | 464 } | 
| wolffd@0 | 465 | 
| wolffd@0 | 466 | 
| wolffd@0 | 467 /* Learns an SVM regression model based on the training data in | 
| wolffd@0 | 468    docs/label. The resulting model is returned in the structure | 
| wolffd@0 | 469    model. */ | 
| wolffd@0 | 470 | 
| wolffd@0 | 471 void svm_learn_regression(DOC **docs, double *value, long int totdoc, | 
| wolffd@0 | 472 			  long int totwords, LEARN_PARM *learn_parm, | 
| wolffd@0 | 473 			  KERNEL_PARM *kernel_parm, | 
| wolffd@0 | 474 			  KERNEL_CACHE **kernel_cache, MODEL *model) | 
| wolffd@0 | 475      /* docs:        Training vectors (x-part) */ | 
| wolffd@0 | 476      /* class:       Training value (y-part) */ | 
| wolffd@0 | 477      /* totdoc:      Number of examples in docs/label */ | 
| wolffd@0 | 478      /* totwords:    Number of features (i.e. highest feature index) */ | 
| wolffd@0 | 479      /* learn_parm:  Learning paramenters */ | 
| wolffd@0 | 480      /* kernel_parm: Kernel paramenters */ | 
| wolffd@0 | 481      /* kernel_cache:Initialized Cache, if using a kernel. NULL if | 
| wolffd@0 | 482                      linear. Note that it will be free'd and reassigned */ | 
| wolffd@0 | 483      /* model:       Returns learning result (assumed empty before called) */ | 
| wolffd@0 | 484 { | 
| wolffd@0 | 485   long *inconsistent,i,j; | 
| wolffd@0 | 486   long inconsistentnum; | 
| wolffd@0 | 487   long upsupvecnum; | 
| wolffd@0 | 488   double loss,model_length,example_length; | 
| wolffd@0 | 489   double maxdiff,*lin,*a,*c; | 
| wolffd@0 | 490   long runtime_start,runtime_end; | 
| wolffd@0 | 491   long iterations,kernel_cache_size; | 
| wolffd@0 | 492   long *unlabeled; | 
| wolffd@0 | 493   double r_delta_sq=0,r_delta,r_delta_avg; | 
| wolffd@0 | 494   double *xi_fullset; /* buffer for storing xi on full sample in loo */ | 
| wolffd@0 | 495   double *a_fullset;  /* buffer for storing alpha on full sample in loo */ | 
| wolffd@0 | 496   TIMING timing_profile; | 
| wolffd@0 | 497   SHRINK_STATE shrink_state; | 
| wolffd@0 | 498   DOC **docs_org; | 
| wolffd@0 | 499   long *label; | 
| wolffd@0 | 500 | 
| wolffd@0 | 501   /* set up regression problem in standard form */ | 
| wolffd@0 | 502   docs_org=docs; | 
| wolffd@0 | 503   docs = (DOC **)my_malloc(sizeof(DOC)*2*totdoc); | 
| wolffd@0 | 504   label = (long *)my_malloc(sizeof(long)*2*totdoc); | 
| wolffd@0 | 505   c = (double *)my_malloc(sizeof(double)*2*totdoc); | 
| wolffd@0 | 506   for(i=0;i<totdoc;i++) { | 
| wolffd@0 | 507     j=2*totdoc-1-i; | 
| wolffd@0 | 508     docs[i]=create_example(i,0,0,docs_org[i]->costfactor,docs_org[i]->fvec); | 
| wolffd@0 | 509     label[i]=+1; | 
| wolffd@0 | 510     c[i]=value[i]; | 
| wolffd@0 | 511     docs[j]=create_example(j,0,0,docs_org[i]->costfactor,docs_org[i]->fvec); | 
| wolffd@0 | 512     label[j]=-1; | 
| wolffd@0 | 513     c[j]=value[i]; | 
| wolffd@0 | 514   } | 
| wolffd@0 | 515   totdoc*=2; | 
| wolffd@0 | 516 | 
| wolffd@0 | 517   /* need to get a bigger kernel cache */ | 
| wolffd@0 | 518   if(*kernel_cache) { | 
| wolffd@0 | 519     kernel_cache_size=(*kernel_cache)->buffsize*sizeof(CFLOAT)/(1024*1024); | 
| wolffd@0 | 520     kernel_cache_cleanup(*kernel_cache); | 
| wolffd@0 | 521     (*kernel_cache)=kernel_cache_init(totdoc,kernel_cache_size); | 
| wolffd@0 | 522   } | 
| wolffd@0 | 523 | 
| wolffd@0 | 524   runtime_start=get_runtime(); | 
| wolffd@0 | 525   timing_profile.time_kernel=0; | 
| wolffd@0 | 526   timing_profile.time_opti=0; | 
| wolffd@0 | 527   timing_profile.time_shrink=0; | 
| wolffd@0 | 528   timing_profile.time_update=0; | 
| wolffd@0 | 529   timing_profile.time_model=0; | 
| wolffd@0 | 530   timing_profile.time_check=0; | 
| wolffd@0 | 531   timing_profile.time_select=0; | 
| wolffd@0 | 532   kernel_cache_statistic=0; | 
| wolffd@0 | 533 | 
| wolffd@0 | 534   learn_parm->totwords=totwords; | 
| wolffd@0 | 535 | 
| wolffd@0 | 536   /* make sure -n value is reasonable */ | 
| wolffd@0 | 537   if((learn_parm->svm_newvarsinqp < 2) | 
| wolffd@0 | 538      || (learn_parm->svm_newvarsinqp > learn_parm->svm_maxqpsize)) { | 
| wolffd@0 | 539     learn_parm->svm_newvarsinqp=learn_parm->svm_maxqpsize; | 
| wolffd@0 | 540   } | 
| wolffd@0 | 541 | 
| wolffd@0 | 542   init_shrink_state(&shrink_state,totdoc,(long)MAXSHRINK); | 
| wolffd@0 | 543 | 
| wolffd@0 | 544   inconsistent = (long *)my_malloc(sizeof(long)*totdoc); | 
| wolffd@0 | 545   unlabeled = (long *)my_malloc(sizeof(long)*totdoc); | 
| wolffd@0 | 546   a = (double *)my_malloc(sizeof(double)*totdoc); | 
| wolffd@0 | 547   a_fullset = (double *)my_malloc(sizeof(double)*totdoc); | 
| wolffd@0 | 548   xi_fullset = (double *)my_malloc(sizeof(double)*totdoc); | 
| wolffd@0 | 549   lin = (double *)my_malloc(sizeof(double)*totdoc); | 
| wolffd@0 | 550   learn_parm->svm_cost = (double *)my_malloc(sizeof(double)*totdoc); | 
| wolffd@0 | 551   model->supvec = (DOC **)my_malloc(sizeof(DOC *)*(totdoc+2)); | 
| wolffd@0 | 552   model->alpha = (double *)my_malloc(sizeof(double)*(totdoc+2)); | 
| wolffd@0 | 553   model->index = (long *)my_malloc(sizeof(long)*(totdoc+2)); | 
| wolffd@0 | 554 | 
| wolffd@0 | 555   model->at_upper_bound=0; | 
| wolffd@0 | 556   model->b=0; | 
| wolffd@0 | 557   model->supvec[0]=0;  /* element 0 reserved and empty for now */ | 
| wolffd@0 | 558   model->alpha[0]=0; | 
| wolffd@0 | 559   model->lin_weights=NULL; | 
| wolffd@0 | 560   model->totwords=totwords; | 
| wolffd@0 | 561   model->totdoc=totdoc; | 
| wolffd@0 | 562   model->kernel_parm=(*kernel_parm); | 
| wolffd@0 | 563   model->sv_num=1; | 
| wolffd@0 | 564   model->loo_error=-1; | 
| wolffd@0 | 565   model->loo_recall=-1; | 
| wolffd@0 | 566   model->loo_precision=-1; | 
| wolffd@0 | 567   model->xa_error=-1; | 
| wolffd@0 | 568   model->xa_recall=-1; | 
| wolffd@0 | 569   model->xa_precision=-1; | 
| wolffd@0 | 570   inconsistentnum=0; | 
| wolffd@0 | 571 | 
| wolffd@0 | 572   r_delta=estimate_r_delta(docs,totdoc,kernel_parm); | 
| wolffd@0 | 573   r_delta_sq=r_delta*r_delta; | 
| wolffd@0 | 574 | 
| wolffd@0 | 575   r_delta_avg=estimate_r_delta_average(docs,totdoc,kernel_parm); | 
| wolffd@0 | 576   if(learn_parm->svm_c == 0.0) {  /* default value for C */ | 
| wolffd@0 | 577     learn_parm->svm_c=1.0/(r_delta_avg*r_delta_avg); | 
| wolffd@0 | 578     if(verbosity>=1) | 
| wolffd@0 | 579       printf("Setting default regularization parameter C=%.4f\n", | 
| wolffd@0 | 580 	     learn_parm->svm_c); | 
| wolffd@0 | 581   } | 
| wolffd@0 | 582 | 
| wolffd@0 | 583   for(i=0;i<totdoc;i++) {    /* various inits */ | 
| wolffd@0 | 584     inconsistent[i]=0; | 
| wolffd@0 | 585     a[i]=0; | 
| wolffd@0 | 586     lin[i]=0; | 
| wolffd@0 | 587     unlabeled[i]=0; | 
| wolffd@0 | 588     if(label[i] > 0) { | 
| wolffd@0 | 589       learn_parm->svm_cost[i]=learn_parm->svm_c*learn_parm->svm_costratio* | 
| wolffd@0 | 590 	docs[i]->costfactor; | 
| wolffd@0 | 591     } | 
| wolffd@0 | 592     else if(label[i] < 0) { | 
| wolffd@0 | 593       learn_parm->svm_cost[i]=learn_parm->svm_c*docs[i]->costfactor; | 
| wolffd@0 | 594     } | 
| wolffd@0 | 595   } | 
| wolffd@0 | 596 | 
| wolffd@0 | 597   /* caching makes no sense for linear kernel */ | 
| wolffd@0 | 598   if((kernel_parm->kernel_type == LINEAR) && (*kernel_cache)) { | 
| wolffd@0 | 599     printf("WARNING: Using a kernel cache for linear case will slow optimization down!\n"); | 
| wolffd@0 | 600   } | 
| wolffd@0 | 601 | 
| wolffd@0 | 602   if(verbosity==1) { | 
| wolffd@0 | 603     printf("Optimizing"); fflush(stdout); | 
| wolffd@0 | 604   } | 
| wolffd@0 | 605 | 
| wolffd@0 | 606   /* train the svm */ | 
| wolffd@0 | 607   iterations=optimize_to_convergence(docs,label,totdoc,totwords,learn_parm, | 
| wolffd@0 | 608 				     kernel_parm,*kernel_cache,&shrink_state, | 
| wolffd@0 | 609 				     model,inconsistent,unlabeled,a,lin,c, | 
| wolffd@0 | 610 				     &timing_profile,&maxdiff,(long)-1, | 
| wolffd@0 | 611 				     (long)1); | 
| wolffd@0 | 612 | 
| wolffd@0 | 613   if(verbosity>=1) { | 
| wolffd@0 | 614     if(verbosity==1) printf("done. (%ld iterations)\n",iterations); | 
| wolffd@0 | 615 | 
| wolffd@0 | 616     printf("Optimization finished (maxdiff=%.5f).\n",maxdiff); | 
| wolffd@0 | 617 | 
| wolffd@0 | 618     runtime_end=get_runtime(); | 
| wolffd@0 | 619     if(verbosity>=2) { | 
| wolffd@0 | 620       printf("Runtime in cpu-seconds: %.2f (%.2f%% for kernel/%.2f%% for optimizer/%.2f%% for final/%.2f%% for update/%.2f%% for model/%.2f%% for check/%.2f%% for select)\n", | 
| wolffd@0 | 621         ((float)runtime_end-(float)runtime_start)/100.0, | 
| wolffd@0 | 622         (100.0*timing_profile.time_kernel)/(float)(runtime_end-runtime_start), | 
| wolffd@0 | 623 	(100.0*timing_profile.time_opti)/(float)(runtime_end-runtime_start), | 
| wolffd@0 | 624 	(100.0*timing_profile.time_shrink)/(float)(runtime_end-runtime_start), | 
| wolffd@0 | 625         (100.0*timing_profile.time_update)/(float)(runtime_end-runtime_start), | 
| wolffd@0 | 626         (100.0*timing_profile.time_model)/(float)(runtime_end-runtime_start), | 
| wolffd@0 | 627         (100.0*timing_profile.time_check)/(float)(runtime_end-runtime_start), | 
| wolffd@0 | 628         (100.0*timing_profile.time_select)/(float)(runtime_end-runtime_start)); | 
| wolffd@0 | 629     } | 
| wolffd@0 | 630     else { | 
| wolffd@0 | 631       printf("Runtime in cpu-seconds: %.2f\n", | 
| wolffd@0 | 632 	     (runtime_end-runtime_start)/100.0); | 
| wolffd@0 | 633     } | 
| wolffd@0 | 634 | 
| wolffd@0 | 635     if(learn_parm->remove_inconsistent) { | 
| wolffd@0 | 636       inconsistentnum=0; | 
| wolffd@0 | 637       for(i=0;i<totdoc;i++) | 
| wolffd@0 | 638 	if(inconsistent[i]) | 
| wolffd@0 | 639 	  inconsistentnum++; | 
| wolffd@0 | 640       printf("Number of SV: %ld (plus %ld inconsistent examples)\n", | 
| wolffd@0 | 641 	     model->sv_num-1,inconsistentnum); | 
| wolffd@0 | 642     } | 
| wolffd@0 | 643     else { | 
| wolffd@0 | 644       upsupvecnum=0; | 
| wolffd@0 | 645       for(i=1;i<model->sv_num;i++) { | 
| wolffd@0 | 646 	if(fabs(model->alpha[i]) >= | 
| wolffd@0 | 647 	   (learn_parm->svm_cost[(model->supvec[i])->docnum]- | 
| wolffd@0 | 648 	    learn_parm->epsilon_a)) | 
| wolffd@0 | 649 	  upsupvecnum++; | 
| wolffd@0 | 650       } | 
| wolffd@0 | 651       printf("Number of SV: %ld (including %ld at upper bound)\n", | 
| wolffd@0 | 652 	     model->sv_num-1,upsupvecnum); | 
| wolffd@0 | 653     } | 
| wolffd@0 | 654 | 
| wolffd@0 | 655     if((verbosity>=1) && (!learn_parm->skip_final_opt_check)) { | 
| wolffd@0 | 656       loss=0; | 
| wolffd@0 | 657       model_length=0; | 
| wolffd@0 | 658       for(i=0;i<totdoc;i++) { | 
| wolffd@0 | 659 	if((lin[i]-model->b)*(double)label[i] < (-learn_parm->eps+(double)label[i]*c[i])-learn_parm->epsilon_crit) | 
| wolffd@0 | 660 	  loss+=-learn_parm->eps+(double)label[i]*c[i]-(lin[i]-model->b)*(double)label[i]; | 
| wolffd@0 | 661 	model_length+=a[i]*label[i]*lin[i]; | 
| wolffd@0 | 662       } | 
| wolffd@0 | 663       model_length=sqrt(model_length); | 
| wolffd@0 | 664       fprintf(stdout,"L1 loss: loss=%.5f\n",loss); | 
| wolffd@0 | 665       fprintf(stdout,"Norm of weight vector: |w|=%.5f\n",model_length); | 
| wolffd@0 | 666       example_length=estimate_sphere(model,kernel_parm); | 
| wolffd@0 | 667       fprintf(stdout,"Norm of longest example vector: |x|=%.5f\n", | 
| wolffd@0 | 668 	      length_of_longest_document_vector(docs,totdoc,kernel_parm)); | 
| wolffd@0 | 669     } | 
| wolffd@0 | 670     if(verbosity>=1) { | 
| wolffd@0 | 671       printf("Number of kernel evaluations: %ld\n",kernel_cache_statistic); | 
| wolffd@0 | 672     } | 
| wolffd@0 | 673   } | 
| wolffd@0 | 674 | 
| wolffd@0 | 675   if(learn_parm->alphafile[0]) | 
| wolffd@0 | 676     write_alphas(learn_parm->alphafile,a,label,totdoc); | 
| wolffd@0 | 677 | 
| wolffd@0 | 678   /* this makes sure the model we return does not contain pointers to the | 
| wolffd@0 | 679      temporary documents */ | 
| wolffd@0 | 680   for(i=1;i<model->sv_num;i++) { | 
| wolffd@0 | 681     j=model->supvec[i]->docnum; | 
| wolffd@0 | 682     if(j >= (totdoc/2)) { | 
| wolffd@0 | 683       j=totdoc-j-1; | 
| wolffd@0 | 684     } | 
| wolffd@0 | 685     model->supvec[i]=docs_org[j]; | 
| wolffd@0 | 686   } | 
| wolffd@0 | 687 | 
| wolffd@0 | 688   shrink_state_cleanup(&shrink_state); | 
| wolffd@0 | 689   for(i=0;i<totdoc;i++) | 
| wolffd@0 | 690     free_example(docs[i],0); | 
| wolffd@0 | 691   free(docs); | 
| wolffd@0 | 692   free(label); | 
| wolffd@0 | 693   free(inconsistent); | 
| wolffd@0 | 694   free(unlabeled); | 
| wolffd@0 | 695   free(c); | 
| wolffd@0 | 696   free(a); | 
| wolffd@0 | 697   free(a_fullset); | 
| wolffd@0 | 698   free(xi_fullset); | 
| wolffd@0 | 699   free(lin); | 
| wolffd@0 | 700   free(learn_parm->svm_cost); | 
| wolffd@0 | 701 } | 
| wolffd@0 | 702 | 
| wolffd@0 | 703 void svm_learn_ranking(DOC **docs, double *rankvalue, long int totdoc, | 
| wolffd@0 | 704 		       long int totwords, LEARN_PARM *learn_parm, | 
| wolffd@0 | 705 		       KERNEL_PARM *kernel_parm, KERNEL_CACHE **kernel_cache, | 
| wolffd@0 | 706 		       MODEL *model) | 
| wolffd@0 | 707      /* docs:        Training vectors (x-part) */ | 
| wolffd@0 | 708      /* rankvalue:   Training target values that determine the ranking */ | 
| wolffd@0 | 709      /* totdoc:      Number of examples in docs/label */ | 
| wolffd@0 | 710      /* totwords:    Number of features (i.e. highest feature index) */ | 
| wolffd@0 | 711      /* learn_parm:  Learning paramenters */ | 
| wolffd@0 | 712      /* kernel_parm: Kernel paramenters */ | 
| wolffd@0 | 713      /* kernel_cache:Initialized pointer to Cache of size 1*totdoc, if | 
| wolffd@0 | 714 	             using a kernel. NULL if linear. NOTE: Cache is | 
| wolffd@0 | 715                      getting reinitialized in this function */ | 
| wolffd@0 | 716      /* model:       Returns learning result (assumed empty before called) */ | 
| wolffd@0 | 717 { | 
| wolffd@0 | 718   DOC **docdiff; | 
| wolffd@0 | 719   long i,j,k,totpair,kernel_cache_size; | 
| wolffd@0 | 720   double *target,*alpha,cost; | 
| wolffd@0 | 721   long *greater,*lesser; | 
| wolffd@0 | 722   MODEL *pairmodel; | 
| wolffd@0 | 723   SVECTOR *flow,*fhigh; | 
| wolffd@0 | 724 | 
| wolffd@0 | 725   totpair=0; | 
| wolffd@0 | 726   for(i=0;i<totdoc;i++) { | 
| wolffd@0 | 727     for(j=i+1;j<totdoc;j++) { | 
| wolffd@0 | 728       if((docs[i]->queryid==docs[j]->queryid) && (rankvalue[i] != rankvalue[j])) { | 
| wolffd@0 | 729 	totpair++; | 
| wolffd@0 | 730       } | 
| wolffd@0 | 731     } | 
| wolffd@0 | 732   } | 
| wolffd@0 | 733 | 
| wolffd@0 | 734   printf("Constructing %ld rank constraints...",totpair); fflush(stdout); | 
| wolffd@0 | 735   docdiff=(DOC **)my_malloc(sizeof(DOC)*totpair); | 
| wolffd@0 | 736   target=(double *)my_malloc(sizeof(double)*totpair); | 
| wolffd@0 | 737   greater=(long *)my_malloc(sizeof(long)*totpair); | 
| wolffd@0 | 738   lesser=(long *)my_malloc(sizeof(long)*totpair); | 
| wolffd@0 | 739 | 
| wolffd@0 | 740   k=0; | 
| wolffd@0 | 741   for(i=0;i<totdoc;i++) { | 
| wolffd@0 | 742     for(j=i+1;j<totdoc;j++) { | 
| wolffd@0 | 743       if(docs[i]->queryid == docs[j]->queryid) { | 
| wolffd@0 | 744 	cost=(docs[i]->costfactor+docs[j]->costfactor)/2.0; | 
| wolffd@0 | 745 	if(rankvalue[i] > rankvalue[j]) { | 
| wolffd@0 | 746 	  if(kernel_parm->kernel_type == LINEAR) | 
| wolffd@0 | 747 	    docdiff[k]=create_example(k,0,0,cost, | 
| wolffd@0 | 748 				      sub_ss(docs[i]->fvec,docs[j]->fvec)); | 
| wolffd@0 | 749 	  else { | 
| wolffd@0 | 750 	    flow=copy_svector(docs[j]->fvec); | 
| wolffd@0 | 751 	    flow->factor=-1.0; | 
| wolffd@0 | 752 	    flow->next=NULL; | 
| wolffd@0 | 753 	    fhigh=copy_svector(docs[i]->fvec); | 
| wolffd@0 | 754 	    fhigh->factor=1.0; | 
| wolffd@0 | 755 	    fhigh->next=flow; | 
| wolffd@0 | 756 	    docdiff[k]=create_example(k,0,0,cost,fhigh); | 
| wolffd@0 | 757 	  } | 
| wolffd@0 | 758 	  target[k]=1; | 
| wolffd@0 | 759 	  greater[k]=i; | 
| wolffd@0 | 760 	  lesser[k]=j; | 
| wolffd@0 | 761 	  k++; | 
| wolffd@0 | 762 	} | 
| wolffd@0 | 763 	else if(rankvalue[i] < rankvalue[j]) { | 
| wolffd@0 | 764 	  if(kernel_parm->kernel_type == LINEAR) | 
| wolffd@0 | 765 	    docdiff[k]=create_example(k,0,0,cost, | 
| wolffd@0 | 766 				      sub_ss(docs[i]->fvec,docs[j]->fvec)); | 
| wolffd@0 | 767 	  else { | 
| wolffd@0 | 768 	    flow=copy_svector(docs[j]->fvec); | 
| wolffd@0 | 769 	    flow->factor=-1.0; | 
| wolffd@0 | 770 	    flow->next=NULL; | 
| wolffd@0 | 771 	    fhigh=copy_svector(docs[i]->fvec); | 
| wolffd@0 | 772 	    fhigh->factor=1.0; | 
| wolffd@0 | 773 	    fhigh->next=flow; | 
| wolffd@0 | 774 	    docdiff[k]=create_example(k,0,0,cost,fhigh); | 
| wolffd@0 | 775 	  } | 
| wolffd@0 | 776 	  target[k]=-1; | 
| wolffd@0 | 777 	  greater[k]=i; | 
| wolffd@0 | 778 	  lesser[k]=j; | 
| wolffd@0 | 779 	  k++; | 
| wolffd@0 | 780 	} | 
| wolffd@0 | 781       } | 
| wolffd@0 | 782     } | 
| wolffd@0 | 783   } | 
| wolffd@0 | 784   printf("done.\n"); fflush(stdout); | 
| wolffd@0 | 785 | 
| wolffd@0 | 786   /* need to get a bigger kernel cache */ | 
| wolffd@0 | 787   if(*kernel_cache) { | 
| wolffd@0 | 788     kernel_cache_size=(*kernel_cache)->buffsize*sizeof(CFLOAT)/(1024*1024); | 
| wolffd@0 | 789     kernel_cache_cleanup(*kernel_cache); | 
| wolffd@0 | 790     (*kernel_cache)=kernel_cache_init(totpair,kernel_cache_size); | 
| wolffd@0 | 791   } | 
| wolffd@0 | 792 | 
| wolffd@0 | 793   /* must use unbiased hyperplane on difference vectors */ | 
| wolffd@0 | 794   learn_parm->biased_hyperplane=0; | 
| wolffd@0 | 795   pairmodel=(MODEL *)my_malloc(sizeof(MODEL)); | 
| wolffd@0 | 796   svm_learn_classification(docdiff,target,totpair,totwords,learn_parm, | 
| wolffd@0 | 797 			   kernel_parm,(*kernel_cache),pairmodel,NULL); | 
| wolffd@0 | 798 | 
| wolffd@0 | 799   /* Transfer the result into a more compact model. If you would like | 
| wolffd@0 | 800      to output the original model on pairs of documents, see below. */ | 
| wolffd@0 | 801   alpha=(double *)my_malloc(sizeof(double)*totdoc); | 
| wolffd@0 | 802   for(i=0;i<totdoc;i++) { | 
| wolffd@0 | 803     alpha[i]=0; | 
| wolffd@0 | 804   } | 
| wolffd@0 | 805   for(i=1;i<pairmodel->sv_num;i++) { | 
| wolffd@0 | 806     alpha[lesser[(pairmodel->supvec[i])->docnum]]-=pairmodel->alpha[i]; | 
| wolffd@0 | 807     alpha[greater[(pairmodel->supvec[i])->docnum]]+=pairmodel->alpha[i]; | 
| wolffd@0 | 808   } | 
| wolffd@0 | 809   model->supvec = (DOC **)my_malloc(sizeof(DOC *)*(totdoc+2)); | 
| wolffd@0 | 810   model->alpha = (double *)my_malloc(sizeof(double)*(totdoc+2)); | 
| wolffd@0 | 811   model->index = (long *)my_malloc(sizeof(long)*(totdoc+2)); | 
| wolffd@0 | 812   model->supvec[0]=0;  /* element 0 reserved and empty for now */ | 
| wolffd@0 | 813   model->alpha[0]=0; | 
| wolffd@0 | 814   model->sv_num=1; | 
| wolffd@0 | 815   for(i=0;i<totdoc;i++) { | 
| wolffd@0 | 816     if(alpha[i]) { | 
| wolffd@0 | 817       model->supvec[model->sv_num]=docs[i]; | 
| wolffd@0 | 818       model->alpha[model->sv_num]=alpha[i]; | 
| wolffd@0 | 819       model->index[i]=model->sv_num; | 
| wolffd@0 | 820       model->sv_num++; | 
| wolffd@0 | 821     } | 
| wolffd@0 | 822     else { | 
| wolffd@0 | 823       model->index[i]=-1; | 
| wolffd@0 | 824     } | 
| wolffd@0 | 825   } | 
| wolffd@0 | 826   model->at_upper_bound=0; | 
| wolffd@0 | 827   model->b=0; | 
| wolffd@0 | 828   model->lin_weights=NULL; | 
| wolffd@0 | 829   model->totwords=totwords; | 
| wolffd@0 | 830   model->totdoc=totdoc; | 
| wolffd@0 | 831   model->kernel_parm=(*kernel_parm); | 
| wolffd@0 | 832   model->loo_error=-1; | 
| wolffd@0 | 833   model->loo_recall=-1; | 
| wolffd@0 | 834   model->loo_precision=-1; | 
| wolffd@0 | 835   model->xa_error=-1; | 
| wolffd@0 | 836   model->xa_recall=-1; | 
| wolffd@0 | 837   model->xa_precision=-1; | 
| wolffd@0 | 838 | 
| wolffd@0 | 839   free(alpha); | 
| wolffd@0 | 840   free(greater); | 
| wolffd@0 | 841   free(lesser); | 
| wolffd@0 | 842   free(target); | 
| wolffd@0 | 843 | 
| wolffd@0 | 844   /* If you would like to output the original model on pairs of | 
| wolffd@0 | 845      document, replace the following lines with '(*model)=(*pairmodel);' */ | 
| wolffd@0 | 846   for(i=0;i<totpair;i++) | 
| wolffd@0 | 847     free_example(docdiff[i],1); | 
| wolffd@0 | 848   free(docdiff); | 
| wolffd@0 | 849   free_model(pairmodel,0); | 
| wolffd@0 | 850 } | 
| wolffd@0 | 851 | 
| wolffd@0 | 852 | 
| wolffd@0 | 853 /* The following solves a freely defined and given set of | 
| wolffd@0 | 854    inequalities. The optimization problem is of the following form: | 
| wolffd@0 | 855 | 
| wolffd@0 | 856    min 0.5 w*w + C sum_i C_i \xi_i | 
| wolffd@0 | 857    s.t. x_i * w > rhs_i - \xi_i | 
| wolffd@0 | 858 | 
| wolffd@0 | 859    This corresponds to the -z o option. */ | 
| wolffd@0 | 860 | 
| wolffd@0 | 861 void svm_learn_optimization(DOC **docs, double *rhs, long int | 
| wolffd@0 | 862 			    totdoc, long int totwords, | 
| wolffd@0 | 863 			    LEARN_PARM *learn_parm, | 
| wolffd@0 | 864 			    KERNEL_PARM *kernel_parm, | 
| wolffd@0 | 865 			    KERNEL_CACHE *kernel_cache, MODEL *model, | 
| wolffd@0 | 866 			    double *alpha) | 
| wolffd@0 | 867      /* docs:        Left-hand side of inequalities (x-part) */ | 
| wolffd@0 | 868      /* rhs:         Right-hand side of inequalities */ | 
| wolffd@0 | 869      /* totdoc:      Number of examples in docs/label */ | 
| wolffd@0 | 870      /* totwords:    Number of features (i.e. highest feature index) */ | 
| wolffd@0 | 871      /* learn_parm:  Learning paramenters */ | 
| wolffd@0 | 872      /* kernel_parm: Kernel paramenters */ | 
| wolffd@0 | 873      /* kernel_cache:Initialized Cache of size 1*totdoc, if using a kernel. | 
| wolffd@0 | 874                      NULL if linear.*/ | 
| wolffd@0 | 875      /* model:       Returns solution as SV expansion (assumed empty before called) */ | 
| wolffd@0 | 876      /* alpha:       Start values for the alpha variables or NULL | 
| wolffd@0 | 877 	             pointer. The new alpha values are returned after | 
| wolffd@0 | 878 		     optimization if not NULL. Array must be of size totdoc. */ | 
| wolffd@0 | 879 { | 
| wolffd@0 | 880   long i,*label; | 
| wolffd@0 | 881   long misclassified,upsupvecnum; | 
| wolffd@0 | 882   double loss,model_length,example_length; | 
| wolffd@0 | 883   double maxdiff,*lin,*a,*c; | 
| wolffd@0 | 884   long runtime_start,runtime_end; | 
| wolffd@0 | 885   long iterations,maxslackid,svsetnum; | 
| wolffd@0 | 886   long *unlabeled,*inconsistent; | 
| wolffd@0 | 887   double r_delta_sq=0,r_delta,r_delta_avg; | 
| wolffd@0 | 888   long *index,*index2dnum; | 
| wolffd@0 | 889   double *weights,*slack,*alphaslack; | 
| wolffd@0 | 890   CFLOAT *aicache;  /* buffer to keep one row of hessian */ | 
| wolffd@0 | 891 | 
| wolffd@0 | 892   TIMING timing_profile; | 
| wolffd@0 | 893   SHRINK_STATE shrink_state; | 
| wolffd@0 | 894 | 
| wolffd@0 | 895   runtime_start=get_runtime(); | 
| wolffd@0 | 896   timing_profile.time_kernel=0; | 
| wolffd@0 | 897   timing_profile.time_opti=0; | 
| wolffd@0 | 898   timing_profile.time_shrink=0; | 
| wolffd@0 | 899   timing_profile.time_update=0; | 
| wolffd@0 | 900   timing_profile.time_model=0; | 
| wolffd@0 | 901   timing_profile.time_check=0; | 
| wolffd@0 | 902   timing_profile.time_select=0; | 
| wolffd@0 | 903   kernel_cache_statistic=0; | 
| wolffd@0 | 904 | 
| wolffd@0 | 905   learn_parm->totwords=totwords; | 
| wolffd@0 | 906 | 
| wolffd@0 | 907   /* make sure -n value is reasonable */ | 
| wolffd@0 | 908   if((learn_parm->svm_newvarsinqp < 2) | 
| wolffd@0 | 909      || (learn_parm->svm_newvarsinqp > learn_parm->svm_maxqpsize)) { | 
| wolffd@0 | 910     learn_parm->svm_newvarsinqp=learn_parm->svm_maxqpsize; | 
| wolffd@0 | 911   } | 
| wolffd@0 | 912 | 
| wolffd@0 | 913   init_shrink_state(&shrink_state,totdoc,(long)MAXSHRINK); | 
| wolffd@0 | 914 | 
| wolffd@0 | 915   label = (long *)my_malloc(sizeof(long)*totdoc); | 
| wolffd@0 | 916   unlabeled = (long *)my_malloc(sizeof(long)*totdoc); | 
| wolffd@0 | 917   inconsistent = (long *)my_malloc(sizeof(long)*totdoc); | 
| wolffd@0 | 918   c = (double *)my_malloc(sizeof(double)*totdoc); | 
| wolffd@0 | 919   a = (double *)my_malloc(sizeof(double)*totdoc); | 
| wolffd@0 | 920   lin = (double *)my_malloc(sizeof(double)*totdoc); | 
| wolffd@0 | 921   learn_parm->svm_cost = (double *)my_malloc(sizeof(double)*totdoc); | 
| wolffd@0 | 922   model->supvec = (DOC **)my_malloc(sizeof(DOC *)*(totdoc+2)); | 
| wolffd@0 | 923   model->alpha = (double *)my_malloc(sizeof(double)*(totdoc+2)); | 
| wolffd@0 | 924   model->index = (long *)my_malloc(sizeof(long)*(totdoc+2)); | 
| wolffd@0 | 925 | 
| wolffd@0 | 926   model->at_upper_bound=0; | 
| wolffd@0 | 927   model->b=0; | 
| wolffd@0 | 928   model->supvec[0]=0;  /* element 0 reserved and empty for now */ | 
| wolffd@0 | 929   model->alpha[0]=0; | 
| wolffd@0 | 930   model->lin_weights=NULL; | 
| wolffd@0 | 931   model->totwords=totwords; | 
| wolffd@0 | 932   model->totdoc=totdoc; | 
| wolffd@0 | 933   model->kernel_parm=(*kernel_parm); | 
| wolffd@0 | 934   model->sv_num=1; | 
| wolffd@0 | 935   model->loo_error=-1; | 
| wolffd@0 | 936   model->loo_recall=-1; | 
| wolffd@0 | 937   model->loo_precision=-1; | 
| wolffd@0 | 938   model->xa_error=-1; | 
| wolffd@0 | 939   model->xa_recall=-1; | 
| wolffd@0 | 940   model->xa_precision=-1; | 
| wolffd@0 | 941 | 
| wolffd@0 | 942   r_delta=estimate_r_delta(docs,totdoc,kernel_parm); | 
| wolffd@0 | 943   r_delta_sq=r_delta*r_delta; | 
| wolffd@0 | 944 | 
| wolffd@0 | 945   r_delta_avg=estimate_r_delta_average(docs,totdoc,kernel_parm); | 
| wolffd@0 | 946   if(learn_parm->svm_c == 0.0) {  /* default value for C */ | 
| wolffd@0 | 947     learn_parm->svm_c=1.0/(r_delta_avg*r_delta_avg); | 
| wolffd@0 | 948     if(verbosity>=1) | 
| wolffd@0 | 949       printf("Setting default regularization parameter C=%.4f\n", | 
| wolffd@0 | 950 	     learn_parm->svm_c); | 
| wolffd@0 | 951   } | 
| wolffd@0 | 952 | 
| wolffd@0 | 953   learn_parm->biased_hyperplane=0; /* learn an unbiased hyperplane */ | 
| wolffd@0 | 954 | 
| wolffd@0 | 955   learn_parm->eps=0.0;      /* No margin, unless explicitly handcoded | 
| wolffd@0 | 956                                in the right-hand side in the training | 
| wolffd@0 | 957                                set.  */ | 
| wolffd@0 | 958 | 
| wolffd@0 | 959   for(i=0;i<totdoc;i++) {    /* various inits */ | 
| wolffd@0 | 960     docs[i]->docnum=i; | 
| wolffd@0 | 961     a[i]=0; | 
| wolffd@0 | 962     lin[i]=0; | 
| wolffd@0 | 963     c[i]=rhs[i];       /* set right-hand side */ | 
| wolffd@0 | 964     unlabeled[i]=0; | 
| wolffd@0 | 965     inconsistent[i]=0; | 
| wolffd@0 | 966     learn_parm->svm_cost[i]=learn_parm->svm_c*learn_parm->svm_costratio* | 
| wolffd@0 | 967       docs[i]->costfactor; | 
| wolffd@0 | 968     label[i]=1; | 
| wolffd@0 | 969   } | 
| wolffd@0 | 970   if(learn_parm->sharedslack) /* if shared slacks are used, they must */ | 
| wolffd@0 | 971     for(i=0;i<totdoc;i++)     /*  be used on every constraint */ | 
| wolffd@0 | 972       if(!docs[i]->slackid) { | 
| wolffd@0 | 973 	perror("Error: Missing shared slacks definitions in some of the examples."); | 
| wolffd@0 | 974 	exit(0); | 
| wolffd@0 | 975       } | 
| wolffd@0 | 976 | 
| wolffd@0 | 977   /* compute starting state for initial alpha values */ | 
| wolffd@0 | 978   if(alpha) { | 
| wolffd@0 | 979     if(verbosity>=1) { | 
| wolffd@0 | 980       printf("Computing starting state..."); fflush(stdout); | 
| wolffd@0 | 981     } | 
| wolffd@0 | 982     index = (long *)my_malloc(sizeof(long)*totdoc); | 
| wolffd@0 | 983     index2dnum = (long *)my_malloc(sizeof(long)*(totdoc+11)); | 
| wolffd@0 | 984     weights=(double *)my_malloc(sizeof(double)*(totwords+1)); | 
| wolffd@0 | 985     aicache = (CFLOAT *)my_malloc(sizeof(CFLOAT)*totdoc); | 
| wolffd@0 | 986     for(i=0;i<totdoc;i++) {    /* create full index and clip alphas */ | 
| wolffd@0 | 987       index[i]=1; | 
| wolffd@0 | 988       alpha[i]=fabs(alpha[i]); | 
| wolffd@0 | 989       if(alpha[i]<0) alpha[i]=0; | 
| wolffd@0 | 990       if(alpha[i]>learn_parm->svm_cost[i]) alpha[i]=learn_parm->svm_cost[i]; | 
| wolffd@0 | 991     } | 
| wolffd@0 | 992     if(kernel_parm->kernel_type != LINEAR) { | 
| wolffd@0 | 993       for(i=0;i<totdoc;i++)     /* fill kernel cache with unbounded SV */ | 
| wolffd@0 | 994 	if((alpha[i]>0) && (alpha[i]<learn_parm->svm_cost[i]) | 
| wolffd@0 | 995 	   && (kernel_cache_space_available(kernel_cache))) | 
| wolffd@0 | 996 	  cache_kernel_row(kernel_cache,docs,i,kernel_parm); | 
| wolffd@0 | 997       for(i=0;i<totdoc;i++)     /* fill rest of kernel cache with bounded SV */ | 
| wolffd@0 | 998 	if((alpha[i]==learn_parm->svm_cost[i]) | 
| wolffd@0 | 999 	   && (kernel_cache_space_available(kernel_cache))) | 
| wolffd@0 | 1000 	  cache_kernel_row(kernel_cache,docs,i,kernel_parm); | 
| wolffd@0 | 1001     } | 
| wolffd@0 | 1002     (void)compute_index(index,totdoc,index2dnum); | 
| wolffd@0 | 1003     update_linear_component(docs,label,index2dnum,alpha,a,index2dnum,totdoc, | 
| wolffd@0 | 1004 			    totwords,kernel_parm,kernel_cache,lin,aicache, | 
| wolffd@0 | 1005 			    weights); | 
| wolffd@0 | 1006     (void)calculate_svm_model(docs,label,unlabeled,lin,alpha,a,c, | 
| wolffd@0 | 1007 			      learn_parm,index2dnum,index2dnum,model); | 
| wolffd@0 | 1008     for(i=0;i<totdoc;i++) {    /* copy initial alphas */ | 
| wolffd@0 | 1009       a[i]=alpha[i]; | 
| wolffd@0 | 1010     } | 
| wolffd@0 | 1011     free(index); | 
| wolffd@0 | 1012     free(index2dnum); | 
| wolffd@0 | 1013     free(weights); | 
| wolffd@0 | 1014     free(aicache); | 
| wolffd@0 | 1015     if(verbosity>=1) { | 
| wolffd@0 | 1016       printf("done.\n");  fflush(stdout); | 
| wolffd@0 | 1017     } | 
| wolffd@0 | 1018   } | 
| wolffd@0 | 1019 | 
| wolffd@0 | 1020   /* removing inconsistent does not work for general optimization problem */ | 
| wolffd@0 | 1021   if(learn_parm->remove_inconsistent) { | 
| wolffd@0 | 1022     learn_parm->remove_inconsistent = 0; | 
| wolffd@0 | 1023     printf("'remove inconsistent' not available in this mode. Switching option off!"); fflush(stdout); | 
| wolffd@0 | 1024   } | 
| wolffd@0 | 1025 | 
| wolffd@0 | 1026   /* caching makes no sense for linear kernel */ | 
| wolffd@0 | 1027   if(kernel_parm->kernel_type == LINEAR) { | 
| wolffd@0 | 1028     kernel_cache = NULL; | 
| wolffd@0 | 1029   } | 
| wolffd@0 | 1030 | 
| wolffd@0 | 1031   if(verbosity==1) { | 
| wolffd@0 | 1032     printf("Optimizing"); fflush(stdout); | 
| wolffd@0 | 1033   } | 
| wolffd@0 | 1034 | 
| wolffd@0 | 1035   /* train the svm */ | 
| wolffd@0 | 1036   if(learn_parm->sharedslack) | 
| wolffd@0 | 1037     iterations=optimize_to_convergence_sharedslack(docs,label,totdoc, | 
| wolffd@0 | 1038 				     totwords,learn_parm,kernel_parm, | 
| wolffd@0 | 1039 				     kernel_cache,&shrink_state,model, | 
| wolffd@0 | 1040 				     a,lin,c,&timing_profile, | 
| wolffd@0 | 1041 				     &maxdiff); | 
| wolffd@0 | 1042   else | 
| wolffd@0 | 1043     iterations=optimize_to_convergence(docs,label,totdoc, | 
| wolffd@0 | 1044 				     totwords,learn_parm,kernel_parm, | 
| wolffd@0 | 1045 				     kernel_cache,&shrink_state,model, | 
| wolffd@0 | 1046 				     inconsistent,unlabeled, | 
| wolffd@0 | 1047 				     a,lin,c,&timing_profile, | 
| wolffd@0 | 1048 				     &maxdiff,(long)-1,(long)1); | 
| wolffd@0 | 1049 | 
| wolffd@0 | 1050   if(verbosity>=1) { | 
| wolffd@0 | 1051     if(verbosity==1) printf("done. (%ld iterations)\n",iterations); | 
| wolffd@0 | 1052 | 
| wolffd@0 | 1053     misclassified=0; | 
| wolffd@0 | 1054     for(i=0;(i<totdoc);i++) { /* get final statistic */ | 
| wolffd@0 | 1055       if((lin[i]-model->b)*(double)label[i] <= 0.0) | 
| wolffd@0 | 1056 	misclassified++; | 
| wolffd@0 | 1057     } | 
| wolffd@0 | 1058 | 
| wolffd@0 | 1059     printf("Optimization finished (maxdiff=%.5f).\n",maxdiff); | 
| wolffd@0 | 1060 | 
| wolffd@0 | 1061     runtime_end=get_runtime(); | 
| wolffd@0 | 1062     if(verbosity>=2) { | 
| wolffd@0 | 1063       printf("Runtime in cpu-seconds: %.2f (%.2f%% for kernel/%.2f%% for optimizer/%.2f%% for final/%.2f%% for update/%.2f%% for model/%.2f%% for check/%.2f%% for select)\n", | 
| wolffd@0 | 1064         ((float)runtime_end-(float)runtime_start)/100.0, | 
| wolffd@0 | 1065         (100.0*timing_profile.time_kernel)/(float)(runtime_end-runtime_start), | 
| wolffd@0 | 1066 	(100.0*timing_profile.time_opti)/(float)(runtime_end-runtime_start), | 
| wolffd@0 | 1067 	(100.0*timing_profile.time_shrink)/(float)(runtime_end-runtime_start), | 
| wolffd@0 | 1068         (100.0*timing_profile.time_update)/(float)(runtime_end-runtime_start), | 
| wolffd@0 | 1069         (100.0*timing_profile.time_model)/(float)(runtime_end-runtime_start), | 
| wolffd@0 | 1070         (100.0*timing_profile.time_check)/(float)(runtime_end-runtime_start), | 
| wolffd@0 | 1071         (100.0*timing_profile.time_select)/(float)(runtime_end-runtime_start)); | 
| wolffd@0 | 1072     } | 
| wolffd@0 | 1073     else { | 
| wolffd@0 | 1074       printf("Runtime in cpu-seconds: %.2f\n", | 
| wolffd@0 | 1075 	     (runtime_end-runtime_start)/100.0); | 
| wolffd@0 | 1076     } | 
| wolffd@0 | 1077   } | 
| wolffd@0 | 1078   if((verbosity>=1) && (!learn_parm->skip_final_opt_check)) { | 
| wolffd@0 | 1079     loss=0; | 
| wolffd@0 | 1080     model_length=0; | 
| wolffd@0 | 1081     for(i=0;i<totdoc;i++) { | 
| wolffd@0 | 1082       if((lin[i]-model->b)*(double)label[i] < c[i]-learn_parm->epsilon_crit) | 
| wolffd@0 | 1083 	loss+=c[i]-(lin[i]-model->b)*(double)label[i]; | 
| wolffd@0 | 1084       model_length+=a[i]*label[i]*lin[i]; | 
| wolffd@0 | 1085     } | 
| wolffd@0 | 1086     model_length=sqrt(model_length); | 
| wolffd@0 | 1087     fprintf(stdout,"Norm of weight vector: |w|=%.5f\n",model_length); | 
| wolffd@0 | 1088   } | 
| wolffd@0 | 1089 | 
| wolffd@0 | 1090   if(learn_parm->sharedslack) { | 
| wolffd@0 | 1091     index = (long *)my_malloc(sizeof(long)*totdoc); | 
| wolffd@0 | 1092     index2dnum = (long *)my_malloc(sizeof(long)*(totdoc+11)); | 
| wolffd@0 | 1093     maxslackid=0; | 
| wolffd@0 | 1094     for(i=0;i<totdoc;i++) {    /* create full index */ | 
| wolffd@0 | 1095       index[i]=1; | 
| wolffd@0 | 1096       if(maxslackid<docs[i]->slackid) | 
| wolffd@0 | 1097 	maxslackid=docs[i]->slackid; | 
| wolffd@0 | 1098     } | 
| wolffd@0 | 1099     (void)compute_index(index,totdoc,index2dnum); | 
| wolffd@0 | 1100     slack=(double *)my_malloc(sizeof(double)*(maxslackid+1)); | 
| wolffd@0 | 1101     alphaslack=(double *)my_malloc(sizeof(double)*(maxslackid+1)); | 
| wolffd@0 | 1102     for(i=0;i<=maxslackid;i++) {    /* init shared slacks */ | 
| wolffd@0 | 1103       slack[i]=0; | 
| wolffd@0 | 1104       alphaslack[i]=0; | 
| wolffd@0 | 1105     } | 
| wolffd@0 | 1106     compute_shared_slacks(docs,label,a,lin,c,index2dnum,learn_parm, | 
| wolffd@0 | 1107 			  slack,alphaslack); | 
| wolffd@0 | 1108     loss=0; | 
| wolffd@0 | 1109     model->at_upper_bound=0; | 
| wolffd@0 | 1110     svsetnum=0; | 
| wolffd@0 | 1111     for(i=0;i<=maxslackid;i++) {    /* create full index */ | 
| wolffd@0 | 1112       loss+=slack[i]; | 
| wolffd@0 | 1113       if(alphaslack[i] > (learn_parm->svm_c - learn_parm->epsilon_a)) | 
| wolffd@0 | 1114 	model->at_upper_bound++; | 
| wolffd@0 | 1115       if(alphaslack[i] > learn_parm->epsilon_a) | 
| wolffd@0 | 1116 	svsetnum++; | 
| wolffd@0 | 1117     } | 
| wolffd@0 | 1118     free(index); | 
| wolffd@0 | 1119     free(index2dnum); | 
| wolffd@0 | 1120     free(slack); | 
| wolffd@0 | 1121     free(alphaslack); | 
| wolffd@0 | 1122   } | 
| wolffd@0 | 1123 | 
| wolffd@0 | 1124   if((verbosity>=1) && (!learn_parm->skip_final_opt_check)) { | 
| wolffd@0 | 1125     if(learn_parm->sharedslack) { | 
| wolffd@0 | 1126       printf("Number of SV: %ld\n", | 
| wolffd@0 | 1127 	     model->sv_num-1); | 
| wolffd@0 | 1128       printf("Number of non-zero slack variables: %ld (out of %ld)\n", | 
| wolffd@0 | 1129 	     model->at_upper_bound,svsetnum); | 
| wolffd@0 | 1130       fprintf(stdout,"L1 loss: loss=%.5f\n",loss); | 
| wolffd@0 | 1131     } | 
| wolffd@0 | 1132     else { | 
| wolffd@0 | 1133       upsupvecnum=0; | 
| wolffd@0 | 1134       for(i=1;i<model->sv_num;i++) { | 
| wolffd@0 | 1135 	if(fabs(model->alpha[i]) >= | 
| wolffd@0 | 1136 	   (learn_parm->svm_cost[(model->supvec[i])->docnum]- | 
| wolffd@0 | 1137 	    learn_parm->epsilon_a)) | 
| wolffd@0 | 1138 	  upsupvecnum++; | 
| wolffd@0 | 1139       } | 
| wolffd@0 | 1140       printf("Number of SV: %ld (including %ld at upper bound)\n", | 
| wolffd@0 | 1141 	     model->sv_num-1,upsupvecnum); | 
| wolffd@0 | 1142       fprintf(stdout,"L1 loss: loss=%.5f\n",loss); | 
| wolffd@0 | 1143     } | 
| wolffd@0 | 1144     example_length=estimate_sphere(model,kernel_parm); | 
| wolffd@0 | 1145     fprintf(stdout,"Norm of longest example vector: |x|=%.5f\n", | 
| wolffd@0 | 1146 	    length_of_longest_document_vector(docs,totdoc,kernel_parm)); | 
| wolffd@0 | 1147   } | 
| wolffd@0 | 1148   if(verbosity>=1) { | 
| wolffd@0 | 1149     printf("Number of kernel evaluations: %ld\n",kernel_cache_statistic); | 
| wolffd@0 | 1150   } | 
| wolffd@0 | 1151 | 
| wolffd@0 | 1152   if(alpha) { | 
| wolffd@0 | 1153     for(i=0;i<totdoc;i++) {    /* copy final alphas */ | 
| wolffd@0 | 1154       alpha[i]=a[i]; | 
| wolffd@0 | 1155     } | 
| wolffd@0 | 1156   } | 
| wolffd@0 | 1157 | 
| wolffd@0 | 1158   if(learn_parm->alphafile[0]) | 
| wolffd@0 | 1159     write_alphas(learn_parm->alphafile,a,label,totdoc); | 
| wolffd@0 | 1160 | 
| wolffd@0 | 1161   shrink_state_cleanup(&shrink_state); | 
| wolffd@0 | 1162   free(label); | 
| wolffd@0 | 1163   free(unlabeled); | 
| wolffd@0 | 1164   free(inconsistent); | 
| wolffd@0 | 1165   free(c); | 
| wolffd@0 | 1166   free(a); | 
| wolffd@0 | 1167   free(lin); | 
| wolffd@0 | 1168   free(learn_parm->svm_cost); | 
| wolffd@0 | 1169 } | 
| wolffd@0 | 1170 | 
| wolffd@0 | 1171 | 
| wolffd@0 | 1172 long optimize_to_convergence(DOC **docs, long int *label, long int totdoc, | 
| wolffd@0 | 1173 			     long int totwords, LEARN_PARM *learn_parm, | 
| wolffd@0 | 1174 			     KERNEL_PARM *kernel_parm, | 
| wolffd@0 | 1175 			     KERNEL_CACHE *kernel_cache, | 
| wolffd@0 | 1176 			     SHRINK_STATE *shrink_state, MODEL *model, | 
| wolffd@0 | 1177 			     long int *inconsistent, long int *unlabeled, | 
| wolffd@0 | 1178 			     double *a, double *lin, double *c, | 
| wolffd@0 | 1179 			     TIMING *timing_profile, double *maxdiff, | 
| wolffd@0 | 1180 			     long int heldout, long int retrain) | 
| wolffd@0 | 1181      /* docs: Training vectors (x-part) */ | 
| wolffd@0 | 1182      /* label: Training labels/value (y-part, zero if test example for | 
| wolffd@0 | 1183 			      transduction) */ | 
| wolffd@0 | 1184      /* totdoc: Number of examples in docs/label */ | 
| wolffd@0 | 1185      /* totwords: Number of features (i.e. highest feature index) */ | 
| wolffd@0 | 1186      /* laern_parm: Learning paramenters */ | 
| wolffd@0 | 1187      /* kernel_parm: Kernel paramenters */ | 
| wolffd@0 | 1188      /* kernel_cache: Initialized/partly filled Cache, if using a kernel. | 
| wolffd@0 | 1189                       NULL if linear. */ | 
| wolffd@0 | 1190      /* shrink_state: State of active variables */ | 
| wolffd@0 | 1191      /* model: Returns learning result */ | 
| wolffd@0 | 1192      /* inconsistent: examples thrown out as inconstistent */ | 
| wolffd@0 | 1193      /* unlabeled: test examples for transduction */ | 
| wolffd@0 | 1194      /* a: alphas */ | 
| wolffd@0 | 1195      /* lin: linear component of gradient */ | 
| wolffd@0 | 1196      /* c: right hand side of inequalities (margin) */ | 
| wolffd@0 | 1197      /* maxdiff: returns maximum violation of KT-conditions */ | 
| wolffd@0 | 1198      /* heldout: marks held-out example for leave-one-out (or -1) */ | 
| wolffd@0 | 1199      /* retrain: selects training mode (1=regular / 2=holdout) */ | 
| wolffd@0 | 1200 { | 
| wolffd@0 | 1201   long *chosen,*key,i,j,jj,*last_suboptimal_at,noshrink; | 
| wolffd@0 | 1202   long inconsistentnum,choosenum,already_chosen=0,iteration; | 
| wolffd@0 | 1203   long misclassified,supvecnum=0,*active2dnum,inactivenum; | 
| wolffd@0 | 1204   long *working2dnum,*selexam; | 
| wolffd@0 | 1205   long activenum; | 
| wolffd@0 | 1206   double criterion,eq; | 
| wolffd@0 | 1207   double *a_old; | 
| wolffd@0 | 1208   long t0=0,t1=0,t2=0,t3=0,t4=0,t5=0,t6=0; /* timing */ | 
| wolffd@0 | 1209   long transductcycle; | 
| wolffd@0 | 1210   long transduction; | 
| wolffd@0 | 1211   double epsilon_crit_org; | 
| wolffd@0 | 1212   double bestmaxdiff; | 
| wolffd@0 | 1213   long   bestmaxdiffiter,terminate; | 
| wolffd@0 | 1214 | 
| wolffd@0 | 1215   double *selcrit;  /* buffer for sorting */ | 
| wolffd@0 | 1216   CFLOAT *aicache;  /* buffer to keep one row of hessian */ | 
| wolffd@0 | 1217   double *weights;  /* buffer for weight vector in linear case */ | 
| wolffd@0 | 1218   QP qp;            /* buffer for one quadratic program */ | 
| wolffd@0 | 1219 | 
| wolffd@0 | 1220   epsilon_crit_org=learn_parm->epsilon_crit; /* save org */ | 
| wolffd@0 | 1221   if(kernel_parm->kernel_type == LINEAR) { | 
| wolffd@0 | 1222     learn_parm->epsilon_crit=2.0; | 
| wolffd@0 | 1223     kernel_cache=NULL;   /* caching makes no sense for linear kernel */ | 
| wolffd@0 | 1224   } | 
| wolffd@0 | 1225   learn_parm->epsilon_shrink=2; | 
| wolffd@0 | 1226   (*maxdiff)=1; | 
| wolffd@0 | 1227 | 
| wolffd@0 | 1228   learn_parm->totwords=totwords; | 
| wolffd@0 | 1229 | 
| wolffd@0 | 1230   chosen = (long *)my_malloc(sizeof(long)*totdoc); | 
| wolffd@0 | 1231   last_suboptimal_at = (long *)my_malloc(sizeof(long)*totdoc); | 
| wolffd@0 | 1232   key = (long *)my_malloc(sizeof(long)*(totdoc+11)); | 
| wolffd@0 | 1233   selcrit = (double *)my_malloc(sizeof(double)*totdoc); | 
| wolffd@0 | 1234   selexam = (long *)my_malloc(sizeof(long)*totdoc); | 
| wolffd@0 | 1235   a_old = (double *)my_malloc(sizeof(double)*totdoc); | 
| wolffd@0 | 1236   aicache = (CFLOAT *)my_malloc(sizeof(CFLOAT)*totdoc); | 
| wolffd@0 | 1237   working2dnum = (long *)my_malloc(sizeof(long)*(totdoc+11)); | 
| wolffd@0 | 1238   active2dnum = (long *)my_malloc(sizeof(long)*(totdoc+11)); | 
| wolffd@0 | 1239   qp.opt_ce = (double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize); | 
| wolffd@0 | 1240   qp.opt_ce0 = (double *)my_malloc(sizeof(double)); | 
| wolffd@0 | 1241   qp.opt_g = (double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize | 
| wolffd@0 | 1242 				 *learn_parm->svm_maxqpsize); | 
| wolffd@0 | 1243   qp.opt_g0 = (double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize); | 
| wolffd@0 | 1244   qp.opt_xinit = (double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize); | 
| wolffd@0 | 1245   qp.opt_low=(double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize); | 
| wolffd@0 | 1246   qp.opt_up=(double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize); | 
| wolffd@0 | 1247   weights=(double *)my_malloc(sizeof(double)*(totwords+1)); | 
| wolffd@0 | 1248 | 
| wolffd@0 | 1249   choosenum=0; | 
| wolffd@0 | 1250   inconsistentnum=0; | 
| wolffd@0 | 1251   transductcycle=0; | 
| wolffd@0 | 1252   transduction=0; | 
| wolffd@0 | 1253   if(!retrain) retrain=1; | 
| wolffd@0 | 1254   iteration=1; | 
| wolffd@0 | 1255   bestmaxdiffiter=1; | 
| wolffd@0 | 1256   bestmaxdiff=999999999; | 
| wolffd@0 | 1257   terminate=0; | 
| wolffd@0 | 1258 | 
| wolffd@0 | 1259   if(kernel_cache) { | 
| wolffd@0 | 1260     kernel_cache->time=iteration;  /* for lru cache */ | 
| wolffd@0 | 1261     kernel_cache_reset_lru(kernel_cache); | 
| wolffd@0 | 1262   } | 
| wolffd@0 | 1263 | 
| wolffd@0 | 1264   for(i=0;i<totdoc;i++) {    /* various inits */ | 
| wolffd@0 | 1265     chosen[i]=0; | 
| wolffd@0 | 1266     a_old[i]=a[i]; | 
| wolffd@0 | 1267     last_suboptimal_at[i]=1; | 
| wolffd@0 | 1268     if(inconsistent[i]) | 
| wolffd@0 | 1269       inconsistentnum++; | 
| wolffd@0 | 1270     if(unlabeled[i]) { | 
| wolffd@0 | 1271       transduction=1; | 
| wolffd@0 | 1272     } | 
| wolffd@0 | 1273   } | 
| wolffd@0 | 1274   activenum=compute_index(shrink_state->active,totdoc,active2dnum); | 
| wolffd@0 | 1275   inactivenum=totdoc-activenum; | 
| wolffd@0 | 1276   clear_index(working2dnum); | 
| wolffd@0 | 1277 | 
| wolffd@0 | 1278                             /* repeat this loop until we have convergence */ | 
| wolffd@0 | 1279   for(;retrain && (!terminate);iteration++) { | 
| wolffd@0 | 1280 | 
| wolffd@0 | 1281     if(kernel_cache) | 
| wolffd@0 | 1282       kernel_cache->time=iteration;  /* for lru cache */ | 
| wolffd@0 | 1283     if(verbosity>=2) { | 
| wolffd@0 | 1284       printf( | 
| wolffd@0 | 1285 	"Iteration %ld: ",iteration); fflush(stdout); | 
| wolffd@0 | 1286     } | 
| wolffd@0 | 1287     else if(verbosity==1) { | 
| wolffd@0 | 1288       printf("."); fflush(stdout); | 
| wolffd@0 | 1289     } | 
| wolffd@0 | 1290 | 
| wolffd@0 | 1291     if(verbosity>=2) t0=get_runtime(); | 
| wolffd@0 | 1292     if(verbosity>=3) { | 
| wolffd@0 | 1293       printf("\nSelecting working set... "); fflush(stdout); | 
| wolffd@0 | 1294     } | 
| wolffd@0 | 1295 | 
| wolffd@0 | 1296     if(learn_parm->svm_newvarsinqp>learn_parm->svm_maxqpsize) | 
| wolffd@0 | 1297       learn_parm->svm_newvarsinqp=learn_parm->svm_maxqpsize; | 
| wolffd@0 | 1298 | 
| wolffd@0 | 1299     i=0; | 
| wolffd@0 | 1300     for(jj=0;(j=working2dnum[jj])>=0;jj++) { /* clear working set */ | 
| wolffd@0 | 1301       if((chosen[j]>=(learn_parm->svm_maxqpsize/ | 
| wolffd@0 | 1302 		      minl(learn_parm->svm_maxqpsize, | 
| wolffd@0 | 1303 			   learn_parm->svm_newvarsinqp))) | 
| wolffd@0 | 1304 	 || (inconsistent[j]) | 
| wolffd@0 | 1305 	 || (j == heldout)) { | 
| wolffd@0 | 1306 	chosen[j]=0; | 
| wolffd@0 | 1307 	choosenum--; | 
| wolffd@0 | 1308       } | 
| wolffd@0 | 1309       else { | 
| wolffd@0 | 1310 	chosen[j]++; | 
| wolffd@0 | 1311 	working2dnum[i++]=j; | 
| wolffd@0 | 1312       } | 
| wolffd@0 | 1313     } | 
| wolffd@0 | 1314     working2dnum[i]=-1; | 
| wolffd@0 | 1315 | 
| wolffd@0 | 1316     if(retrain == 2) { | 
| wolffd@0 | 1317       choosenum=0; | 
| wolffd@0 | 1318       for(jj=0;(j=working2dnum[jj])>=0;jj++) { /* fully clear working set */ | 
| wolffd@0 | 1319 	chosen[j]=0; | 
| wolffd@0 | 1320       } | 
| wolffd@0 | 1321       clear_index(working2dnum); | 
| wolffd@0 | 1322       for(i=0;i<totdoc;i++) { /* set inconsistent examples to zero (-i 1) */ | 
| wolffd@0 | 1323 	if((inconsistent[i] || (heldout==i)) && (a[i] != 0.0)) { | 
| wolffd@0 | 1324 	  chosen[i]=99999; | 
| wolffd@0 | 1325 	  choosenum++; | 
| wolffd@0 | 1326 	  a[i]=0; | 
| wolffd@0 | 1327 	} | 
| wolffd@0 | 1328       } | 
| wolffd@0 | 1329       if(learn_parm->biased_hyperplane) { | 
| wolffd@0 | 1330 	eq=0; | 
| wolffd@0 | 1331 	for(i=0;i<totdoc;i++) { /* make sure we fulfill equality constraint */ | 
| wolffd@0 | 1332 	  eq+=a[i]*label[i]; | 
| wolffd@0 | 1333 	} | 
| wolffd@0 | 1334 	for(i=0;(i<totdoc) && (fabs(eq) > learn_parm->epsilon_a);i++) { | 
| wolffd@0 | 1335 	  if((eq*label[i] > 0) && (a[i] > 0)) { | 
| wolffd@0 | 1336 	    chosen[i]=88888; | 
| wolffd@0 | 1337 	    choosenum++; | 
| wolffd@0 | 1338 	    if((eq*label[i]) > a[i]) { | 
| wolffd@0 | 1339 	      eq-=(a[i]*label[i]); | 
| wolffd@0 | 1340 	      a[i]=0; | 
| wolffd@0 | 1341 	    } | 
| wolffd@0 | 1342 	    else { | 
| wolffd@0 | 1343 	      a[i]-=(eq*label[i]); | 
| wolffd@0 | 1344 	      eq=0; | 
| wolffd@0 | 1345 	    } | 
| wolffd@0 | 1346 	  } | 
| wolffd@0 | 1347 	} | 
| wolffd@0 | 1348       } | 
| wolffd@0 | 1349       compute_index(chosen,totdoc,working2dnum); | 
| wolffd@0 | 1350     } | 
| wolffd@0 | 1351     else {      /* select working set according to steepest gradient */ | 
| wolffd@0 | 1352       if(iteration % 101) { | 
| wolffd@0 | 1353         already_chosen=0; | 
| wolffd@0 | 1354 	if((minl(learn_parm->svm_newvarsinqp, | 
| wolffd@0 | 1355 		 learn_parm->svm_maxqpsize-choosenum)>=4) | 
| wolffd@0 | 1356 	   && (kernel_parm->kernel_type != LINEAR)) { | 
| wolffd@0 | 1357 	  /* select part of the working set from cache */ | 
| wolffd@0 | 1358 	  already_chosen=select_next_qp_subproblem_grad( | 
| wolffd@0 | 1359 			      label,unlabeled,a,lin,c,totdoc, | 
| wolffd@0 | 1360 			      (long)(minl(learn_parm->svm_maxqpsize-choosenum, | 
| wolffd@0 | 1361 					  learn_parm->svm_newvarsinqp) | 
| wolffd@0 | 1362 				     /2), | 
| wolffd@0 | 1363 			      learn_parm,inconsistent,active2dnum, | 
| wolffd@0 | 1364 			      working2dnum,selcrit,selexam,kernel_cache,1, | 
| wolffd@0 | 1365 			      key,chosen); | 
| wolffd@0 | 1366 	  choosenum+=already_chosen; | 
| wolffd@0 | 1367 	} | 
| wolffd@0 | 1368 	choosenum+=select_next_qp_subproblem_grad( | 
| wolffd@0 | 1369                               label,unlabeled,a,lin,c,totdoc, | 
| wolffd@0 | 1370                               minl(learn_parm->svm_maxqpsize-choosenum, | 
| wolffd@0 | 1371 				   learn_parm->svm_newvarsinqp-already_chosen), | 
| wolffd@0 | 1372                               learn_parm,inconsistent,active2dnum, | 
| wolffd@0 | 1373 			      working2dnum,selcrit,selexam,kernel_cache,0,key, | 
| wolffd@0 | 1374 			      chosen); | 
| wolffd@0 | 1375       } | 
| wolffd@0 | 1376       else { /* once in a while, select a somewhat random working set | 
| wolffd@0 | 1377 		to get unlocked of infinite loops due to numerical | 
| wolffd@0 | 1378 		inaccuracies in the core qp-solver */ | 
| wolffd@0 | 1379 	choosenum+=select_next_qp_subproblem_rand( | 
| wolffd@0 | 1380                               label,unlabeled,a,lin,c,totdoc, | 
| wolffd@0 | 1381                               minl(learn_parm->svm_maxqpsize-choosenum, | 
| wolffd@0 | 1382 				   learn_parm->svm_newvarsinqp), | 
| wolffd@0 | 1383                               learn_parm,inconsistent,active2dnum, | 
| wolffd@0 | 1384 			      working2dnum,selcrit,selexam,kernel_cache,key, | 
| wolffd@0 | 1385 			      chosen,iteration); | 
| wolffd@0 | 1386       } | 
| wolffd@0 | 1387     } | 
| wolffd@0 | 1388 | 
| wolffd@0 | 1389     if(verbosity>=2) { | 
| wolffd@0 | 1390       printf(" %ld vectors chosen\n",choosenum); fflush(stdout); | 
| wolffd@0 | 1391     } | 
| wolffd@0 | 1392 | 
| wolffd@0 | 1393     if(verbosity>=2) t1=get_runtime(); | 
| wolffd@0 | 1394 | 
| wolffd@0 | 1395     if(kernel_cache) | 
| wolffd@0 | 1396       cache_multiple_kernel_rows(kernel_cache,docs,working2dnum, | 
| wolffd@0 | 1397 				 choosenum,kernel_parm); | 
| wolffd@0 | 1398 | 
| wolffd@0 | 1399     if(verbosity>=2) t2=get_runtime(); | 
| wolffd@0 | 1400     if(retrain != 2) { | 
| wolffd@0 | 1401       optimize_svm(docs,label,unlabeled,inconsistent,0.0,chosen,active2dnum, | 
| wolffd@0 | 1402 		   model,totdoc,working2dnum,choosenum,a,lin,c,learn_parm, | 
| wolffd@0 | 1403 		   aicache,kernel_parm,&qp,&epsilon_crit_org); | 
| wolffd@0 | 1404     } | 
| wolffd@0 | 1405 | 
| wolffd@0 | 1406     if(verbosity>=2) t3=get_runtime(); | 
| wolffd@0 | 1407     update_linear_component(docs,label,active2dnum,a,a_old,working2dnum,totdoc, | 
| wolffd@0 | 1408 			    totwords,kernel_parm,kernel_cache,lin,aicache, | 
| wolffd@0 | 1409 			    weights); | 
| wolffd@0 | 1410 | 
| wolffd@0 | 1411     if(verbosity>=2) t4=get_runtime(); | 
| wolffd@0 | 1412     supvecnum=calculate_svm_model(docs,label,unlabeled,lin,a,a_old,c, | 
| wolffd@0 | 1413 		                  learn_parm,working2dnum,active2dnum,model); | 
| wolffd@0 | 1414 | 
| wolffd@0 | 1415     if(verbosity>=2) t5=get_runtime(); | 
| wolffd@0 | 1416 | 
| wolffd@0 | 1417     /* The following computation of the objective function works only */ | 
| wolffd@0 | 1418     /* relative to the active variables */ | 
| wolffd@0 | 1419     if(verbosity>=3) { | 
| wolffd@0 | 1420       criterion=compute_objective_function(a,lin,c,learn_parm->eps,label, | 
| wolffd@0 | 1421 		                           active2dnum); | 
| wolffd@0 | 1422       printf("Objective function (over active variables): %.16f\n",criterion); | 
| wolffd@0 | 1423       fflush(stdout); | 
| wolffd@0 | 1424     } | 
| wolffd@0 | 1425 | 
| wolffd@0 | 1426     for(jj=0;(i=working2dnum[jj])>=0;jj++) { | 
| wolffd@0 | 1427       a_old[i]=a[i]; | 
| wolffd@0 | 1428     } | 
| wolffd@0 | 1429 | 
| wolffd@0 | 1430     if(retrain == 2) {  /* reset inconsistent unlabeled examples */ | 
| wolffd@0 | 1431       for(i=0;(i<totdoc);i++) { | 
| wolffd@0 | 1432 	if(inconsistent[i] && unlabeled[i]) { | 
| wolffd@0 | 1433 	  inconsistent[i]=0; | 
| wolffd@0 | 1434 	  label[i]=0; | 
| wolffd@0 | 1435 	} | 
| wolffd@0 | 1436       } | 
| wolffd@0 | 1437     } | 
| wolffd@0 | 1438 | 
| wolffd@0 | 1439     retrain=check_optimality(model,label,unlabeled,a,lin,c,totdoc,learn_parm, | 
| wolffd@0 | 1440 			     maxdiff,epsilon_crit_org,&misclassified, | 
| wolffd@0 | 1441 			     inconsistent,active2dnum,last_suboptimal_at, | 
| wolffd@0 | 1442 			     iteration,kernel_parm); | 
| wolffd@0 | 1443 | 
| wolffd@0 | 1444     if(verbosity>=2) { | 
| wolffd@0 | 1445       t6=get_runtime(); | 
| wolffd@0 | 1446       timing_profile->time_select+=t1-t0; | 
| wolffd@0 | 1447       timing_profile->time_kernel+=t2-t1; | 
| wolffd@0 | 1448       timing_profile->time_opti+=t3-t2; | 
| wolffd@0 | 1449       timing_profile->time_update+=t4-t3; | 
| wolffd@0 | 1450       timing_profile->time_model+=t5-t4; | 
| wolffd@0 | 1451       timing_profile->time_check+=t6-t5; | 
| wolffd@0 | 1452     } | 
| wolffd@0 | 1453 | 
| wolffd@0 | 1454     /* checking whether optimizer got stuck */ | 
| wolffd@0 | 1455     if((*maxdiff) < bestmaxdiff) { | 
| wolffd@0 | 1456       bestmaxdiff=(*maxdiff); | 
| wolffd@0 | 1457       bestmaxdiffiter=iteration; | 
| wolffd@0 | 1458     } | 
| wolffd@0 | 1459     if(iteration > (bestmaxdiffiter+learn_parm->maxiter)) { | 
| wolffd@0 | 1460       /* long time no progress? */ | 
| wolffd@0 | 1461       terminate=1; | 
| wolffd@0 | 1462       retrain=0; | 
| wolffd@0 | 1463       if(verbosity>=1) | 
| wolffd@0 | 1464 	printf("\nWARNING: Relaxing KT-Conditions due to slow progress! Terminating!\n"); | 
| wolffd@0 | 1465     } | 
| wolffd@0 | 1466 | 
| wolffd@0 | 1467     noshrink=0; | 
| wolffd@0 | 1468     if((!retrain) && (inactivenum>0) | 
| wolffd@0 | 1469        && ((!learn_parm->skip_final_opt_check) | 
| wolffd@0 | 1470 	   || (kernel_parm->kernel_type == LINEAR))) { | 
| wolffd@0 | 1471       if(((verbosity>=1) && (kernel_parm->kernel_type != LINEAR)) | 
| wolffd@0 | 1472 	 || (verbosity>=2)) { | 
| wolffd@0 | 1473 	if(verbosity==1) { | 
| wolffd@0 | 1474 	  printf("\n"); | 
| wolffd@0 | 1475 	} | 
| wolffd@0 | 1476 	printf(" Checking optimality of inactive variables..."); | 
| wolffd@0 | 1477 	fflush(stdout); | 
| wolffd@0 | 1478       } | 
| wolffd@0 | 1479       t1=get_runtime(); | 
| wolffd@0 | 1480       reactivate_inactive_examples(label,unlabeled,a,shrink_state,lin,c,totdoc, | 
| wolffd@0 | 1481 				   totwords,iteration,learn_parm,inconsistent, | 
| wolffd@0 | 1482 				   docs,kernel_parm,kernel_cache,model,aicache, | 
| wolffd@0 | 1483 				   weights,maxdiff); | 
| wolffd@0 | 1484       /* Update to new active variables. */ | 
| wolffd@0 | 1485       activenum=compute_index(shrink_state->active,totdoc,active2dnum); | 
| wolffd@0 | 1486       inactivenum=totdoc-activenum; | 
| wolffd@0 | 1487       /* reset watchdog */ | 
| wolffd@0 | 1488       bestmaxdiff=(*maxdiff); | 
| wolffd@0 | 1489       bestmaxdiffiter=iteration; | 
| wolffd@0 | 1490       /* termination criterion */ | 
| wolffd@0 | 1491       noshrink=1; | 
| wolffd@0 | 1492       retrain=0; | 
| wolffd@0 | 1493       if((*maxdiff) > learn_parm->epsilon_crit) | 
| wolffd@0 | 1494 	retrain=1; | 
| wolffd@0 | 1495       timing_profile->time_shrink+=get_runtime()-t1; | 
| wolffd@0 | 1496       if(((verbosity>=1) && (kernel_parm->kernel_type != LINEAR)) | 
| wolffd@0 | 1497 	 || (verbosity>=2)) { | 
| wolffd@0 | 1498 	printf("done.\n");  fflush(stdout); | 
| wolffd@0 | 1499         printf(" Number of inactive variables = %ld\n",inactivenum); | 
| wolffd@0 | 1500       } | 
| wolffd@0 | 1501     } | 
| wolffd@0 | 1502 | 
| wolffd@0 | 1503     if((!retrain) && (learn_parm->epsilon_crit>(*maxdiff))) | 
| wolffd@0 | 1504       learn_parm->epsilon_crit=(*maxdiff); | 
| wolffd@0 | 1505     if((!retrain) && (learn_parm->epsilon_crit>epsilon_crit_org)) { | 
| wolffd@0 | 1506       learn_parm->epsilon_crit/=2.0; | 
| wolffd@0 | 1507       retrain=1; | 
| wolffd@0 | 1508       noshrink=1; | 
| wolffd@0 | 1509     } | 
| wolffd@0 | 1510     if(learn_parm->epsilon_crit<epsilon_crit_org) | 
| wolffd@0 | 1511       learn_parm->epsilon_crit=epsilon_crit_org; | 
| wolffd@0 | 1512 | 
| wolffd@0 | 1513     if(verbosity>=2) { | 
| wolffd@0 | 1514       printf(" => (%ld SV (incl. %ld SV at u-bound), max violation=%.5f)\n", | 
| wolffd@0 | 1515 	     supvecnum,model->at_upper_bound,(*maxdiff)); | 
| wolffd@0 | 1516       fflush(stdout); | 
| wolffd@0 | 1517     } | 
| wolffd@0 | 1518     if(verbosity>=3) { | 
| wolffd@0 | 1519       printf("\n"); | 
| wolffd@0 | 1520     } | 
| wolffd@0 | 1521 | 
| wolffd@0 | 1522     if((!retrain) && (transduction)) { | 
| wolffd@0 | 1523       for(i=0;(i<totdoc);i++) { | 
| wolffd@0 | 1524 	shrink_state->active[i]=1; | 
| wolffd@0 | 1525       } | 
| wolffd@0 | 1526       activenum=compute_index(shrink_state->active,totdoc,active2dnum); | 
| wolffd@0 | 1527       inactivenum=0; | 
| wolffd@0 | 1528       if(verbosity==1) printf("done\n"); | 
| wolffd@0 | 1529       retrain=incorporate_unlabeled_examples(model,label,inconsistent, | 
| wolffd@0 | 1530 					     unlabeled,a,lin,totdoc, | 
| wolffd@0 | 1531 					     selcrit,selexam,key, | 
| wolffd@0 | 1532 					     transductcycle,kernel_parm, | 
| wolffd@0 | 1533 					     learn_parm); | 
| wolffd@0 | 1534       epsilon_crit_org=learn_parm->epsilon_crit; | 
| wolffd@0 | 1535       if(kernel_parm->kernel_type == LINEAR) | 
| wolffd@0 | 1536 	learn_parm->epsilon_crit=1; | 
| wolffd@0 | 1537       transductcycle++; | 
| wolffd@0 | 1538       /* reset watchdog */ | 
| wolffd@0 | 1539       bestmaxdiff=(*maxdiff); | 
| wolffd@0 | 1540       bestmaxdiffiter=iteration; | 
| wolffd@0 | 1541     } | 
| wolffd@0 | 1542     else if(((iteration % 10) == 0) && (!noshrink)) { | 
| wolffd@0 | 1543       activenum=shrink_problem(docs,learn_parm,shrink_state,kernel_parm, | 
| wolffd@0 | 1544 			       active2dnum,last_suboptimal_at,iteration,totdoc, | 
| wolffd@0 | 1545 			       maxl((long)(activenum/10), | 
| wolffd@0 | 1546 				    maxl((long)(totdoc/500),100)), | 
| wolffd@0 | 1547 			       a,inconsistent); | 
| wolffd@0 | 1548       inactivenum=totdoc-activenum; | 
| wolffd@0 | 1549       if((kernel_cache) | 
| wolffd@0 | 1550 	 && (supvecnum>kernel_cache->max_elems) | 
| wolffd@0 | 1551 	 && ((kernel_cache->activenum-activenum)>maxl((long)(activenum/10),500))) { | 
| wolffd@0 | 1552 	kernel_cache_shrink(kernel_cache,totdoc, | 
| wolffd@0 | 1553 			    minl((kernel_cache->activenum-activenum), | 
| wolffd@0 | 1554 				 (kernel_cache->activenum-supvecnum)), | 
| wolffd@0 | 1555 			    shrink_state->active); | 
| wolffd@0 | 1556       } | 
| wolffd@0 | 1557     } | 
| wolffd@0 | 1558 | 
| wolffd@0 | 1559     if((!retrain) && learn_parm->remove_inconsistent) { | 
| wolffd@0 | 1560       if(verbosity>=1) { | 
| wolffd@0 | 1561 	printf(" Moving training errors to inconsistent examples..."); | 
| wolffd@0 | 1562 	fflush(stdout); | 
| wolffd@0 | 1563       } | 
| wolffd@0 | 1564       if(learn_parm->remove_inconsistent == 1) { | 
| wolffd@0 | 1565 	retrain=identify_inconsistent(a,label,unlabeled,totdoc,learn_parm, | 
| wolffd@0 | 1566 				      &inconsistentnum,inconsistent); | 
| wolffd@0 | 1567       } | 
| wolffd@0 | 1568       else if(learn_parm->remove_inconsistent == 2) { | 
| wolffd@0 | 1569 	retrain=identify_misclassified(lin,label,unlabeled,totdoc, | 
| wolffd@0 | 1570 				       model,&inconsistentnum,inconsistent); | 
| wolffd@0 | 1571       } | 
| wolffd@0 | 1572       else if(learn_parm->remove_inconsistent == 3) { | 
| wolffd@0 | 1573 	retrain=identify_one_misclassified(lin,label,unlabeled,totdoc, | 
| wolffd@0 | 1574 				   model,&inconsistentnum,inconsistent); | 
| wolffd@0 | 1575       } | 
| wolffd@0 | 1576       if(retrain) { | 
| wolffd@0 | 1577 	if(kernel_parm->kernel_type == LINEAR) { /* reinit shrinking */ | 
| wolffd@0 | 1578 	  learn_parm->epsilon_crit=2.0; | 
| wolffd@0 | 1579 	} | 
| wolffd@0 | 1580       } | 
| wolffd@0 | 1581       if(verbosity>=1) { | 
| wolffd@0 | 1582 	printf("done.\n"); | 
| wolffd@0 | 1583 	if(retrain) { | 
| wolffd@0 | 1584 	  printf(" Now %ld inconsistent examples.\n",inconsistentnum); | 
| wolffd@0 | 1585 	} | 
| wolffd@0 | 1586       } | 
| wolffd@0 | 1587     } | 
| wolffd@0 | 1588   } /* end of loop */ | 
| wolffd@0 | 1589 | 
| wolffd@0 | 1590   free(chosen); | 
| wolffd@0 | 1591   free(last_suboptimal_at); | 
| wolffd@0 | 1592   free(key); | 
| wolffd@0 | 1593   free(selcrit); | 
| wolffd@0 | 1594   free(selexam); | 
| wolffd@0 | 1595   free(a_old); | 
| wolffd@0 | 1596   free(aicache); | 
| wolffd@0 | 1597   free(working2dnum); | 
| wolffd@0 | 1598   free(active2dnum); | 
| wolffd@0 | 1599   free(qp.opt_ce); | 
| wolffd@0 | 1600   free(qp.opt_ce0); | 
| wolffd@0 | 1601   free(qp.opt_g); | 
| wolffd@0 | 1602   free(qp.opt_g0); | 
| wolffd@0 | 1603   free(qp.opt_xinit); | 
| wolffd@0 | 1604   free(qp.opt_low); | 
| wolffd@0 | 1605   free(qp.opt_up); | 
| wolffd@0 | 1606   free(weights); | 
| wolffd@0 | 1607 | 
| wolffd@0 | 1608   learn_parm->epsilon_crit=epsilon_crit_org; /* restore org */ | 
| wolffd@0 | 1609   model->maxdiff=(*maxdiff); | 
| wolffd@0 | 1610 | 
| wolffd@0 | 1611   return(iteration); | 
| wolffd@0 | 1612 } | 
| wolffd@0 | 1613 | 
| wolffd@0 | 1614 long optimize_to_convergence_sharedslack(DOC **docs, long int *label, | 
| wolffd@0 | 1615 			     long int totdoc, | 
| wolffd@0 | 1616 			     long int totwords, LEARN_PARM *learn_parm, | 
| wolffd@0 | 1617 			     KERNEL_PARM *kernel_parm, | 
| wolffd@0 | 1618 			     KERNEL_CACHE *kernel_cache, | 
| wolffd@0 | 1619 			     SHRINK_STATE *shrink_state, MODEL *model, | 
| wolffd@0 | 1620 			     double *a, double *lin, double *c, | 
| wolffd@0 | 1621 			     TIMING *timing_profile, double *maxdiff) | 
| wolffd@0 | 1622      /* docs: Training vectors (x-part) */ | 
| wolffd@0 | 1623      /* label: Training labels/value (y-part, zero if test example for | 
| wolffd@0 | 1624 			      transduction) */ | 
| wolffd@0 | 1625      /* totdoc: Number of examples in docs/label */ | 
| wolffd@0 | 1626      /* totwords: Number of features (i.e. highest feature index) */ | 
| wolffd@0 | 1627      /* learn_parm: Learning paramenters */ | 
| wolffd@0 | 1628      /* kernel_parm: Kernel paramenters */ | 
| wolffd@0 | 1629      /* kernel_cache: Initialized/partly filled Cache, if using a kernel. | 
| wolffd@0 | 1630                       NULL if linear. */ | 
| wolffd@0 | 1631      /* shrink_state: State of active variables */ | 
| wolffd@0 | 1632      /* model: Returns learning result */ | 
| wolffd@0 | 1633      /* a: alphas */ | 
| wolffd@0 | 1634      /* lin: linear component of gradient */ | 
| wolffd@0 | 1635      /* c: right hand side of inequalities (margin) */ | 
| wolffd@0 | 1636      /* maxdiff: returns maximum violation of KT-conditions */ | 
| wolffd@0 | 1637 { | 
| wolffd@0 | 1638   long *chosen,*key,i,j,jj,*last_suboptimal_at,noshrink,*unlabeled; | 
| wolffd@0 | 1639   long *inconsistent,choosenum,already_chosen=0,iteration; | 
| wolffd@0 | 1640   long misclassified,supvecnum=0,*active2dnum,inactivenum; | 
| wolffd@0 | 1641   long *working2dnum,*selexam,*ignore; | 
| wolffd@0 | 1642   long activenum,retrain,maxslackid,slackset,jointstep; | 
| wolffd@0 | 1643   double criterion,eq_target; | 
| wolffd@0 | 1644   double *a_old,*alphaslack; | 
| wolffd@0 | 1645   long t0=0,t1=0,t2=0,t3=0,t4=0,t5=0,t6=0; /* timing */ | 
| wolffd@0 | 1646   double epsilon_crit_org,maxsharedviol; | 
| wolffd@0 | 1647   double bestmaxdiff; | 
| wolffd@0 | 1648   long   bestmaxdiffiter,terminate; | 
| wolffd@0 | 1649 | 
| wolffd@0 | 1650   double *selcrit;  /* buffer for sorting */ | 
| wolffd@0 | 1651   CFLOAT *aicache;  /* buffer to keep one row of hessian */ | 
| wolffd@0 | 1652   double *weights;  /* buffer for weight vector in linear case */ | 
| wolffd@0 | 1653   QP qp;            /* buffer for one quadratic program */ | 
| wolffd@0 | 1654   double *slack;    /* vector of slack variables for optimization with | 
| wolffd@0 | 1655 		       shared slacks */ | 
| wolffd@0 | 1656 | 
| wolffd@0 | 1657   epsilon_crit_org=learn_parm->epsilon_crit; /* save org */ | 
| wolffd@0 | 1658   if(kernel_parm->kernel_type == LINEAR) { | 
| wolffd@0 | 1659     learn_parm->epsilon_crit=2.0; | 
| wolffd@0 | 1660     kernel_cache=NULL;   /* caching makes no sense for linear kernel */ | 
| wolffd@0 | 1661   } | 
| wolffd@0 | 1662   learn_parm->epsilon_shrink=2; | 
| wolffd@0 | 1663   (*maxdiff)=1; | 
| wolffd@0 | 1664 | 
| wolffd@0 | 1665   learn_parm->totwords=totwords; | 
| wolffd@0 | 1666 | 
| wolffd@0 | 1667   chosen = (long *)my_malloc(sizeof(long)*totdoc); | 
| wolffd@0 | 1668   unlabeled = (long *)my_malloc(sizeof(long)*totdoc); | 
| wolffd@0 | 1669   inconsistent = (long *)my_malloc(sizeof(long)*totdoc); | 
| wolffd@0 | 1670   ignore = (long *)my_malloc(sizeof(long)*totdoc); | 
| wolffd@0 | 1671   key = (long *)my_malloc(sizeof(long)*(totdoc+11)); | 
| wolffd@0 | 1672   selcrit = (double *)my_malloc(sizeof(double)*totdoc); | 
| wolffd@0 | 1673   selexam = (long *)my_malloc(sizeof(long)*totdoc); | 
| wolffd@0 | 1674   a_old = (double *)my_malloc(sizeof(double)*totdoc); | 
| wolffd@0 | 1675   aicache = (CFLOAT *)my_malloc(sizeof(CFLOAT)*totdoc); | 
| wolffd@0 | 1676   working2dnum = (long *)my_malloc(sizeof(long)*(totdoc+11)); | 
| wolffd@0 | 1677   active2dnum = (long *)my_malloc(sizeof(long)*(totdoc+11)); | 
| wolffd@0 | 1678   qp.opt_ce = (double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize); | 
| wolffd@0 | 1679   qp.opt_ce0 = (double *)my_malloc(sizeof(double)); | 
| wolffd@0 | 1680   qp.opt_g = (double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize | 
| wolffd@0 | 1681 				 *learn_parm->svm_maxqpsize); | 
| wolffd@0 | 1682   qp.opt_g0 = (double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize); | 
| wolffd@0 | 1683   qp.opt_xinit = (double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize); | 
| wolffd@0 | 1684   qp.opt_low=(double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize); | 
| wolffd@0 | 1685   qp.opt_up=(double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize); | 
| wolffd@0 | 1686   weights=(double *)my_malloc(sizeof(double)*(totwords+1)); | 
| wolffd@0 | 1687   maxslackid=0; | 
| wolffd@0 | 1688   for(i=0;i<totdoc;i++) {    /* determine size of slack array */ | 
| wolffd@0 | 1689     if(maxslackid<docs[i]->slackid) | 
| wolffd@0 | 1690       maxslackid=docs[i]->slackid; | 
| wolffd@0 | 1691   } | 
| wolffd@0 | 1692   slack=(double *)my_malloc(sizeof(double)*(maxslackid+1)); | 
| wolffd@0 | 1693   alphaslack=(double *)my_malloc(sizeof(double)*(maxslackid+1)); | 
| wolffd@0 | 1694   last_suboptimal_at = (long *)my_malloc(sizeof(long)*(maxslackid+1)); | 
| wolffd@0 | 1695   for(i=0;i<=maxslackid;i++) {    /* init shared slacks */ | 
| wolffd@0 | 1696     slack[i]=0; | 
| wolffd@0 | 1697     alphaslack[i]=0; | 
| wolffd@0 | 1698     last_suboptimal_at[i]=1; | 
| wolffd@0 | 1699   } | 
| wolffd@0 | 1700 | 
| wolffd@0 | 1701   choosenum=0; | 
| wolffd@0 | 1702   retrain=1; | 
| wolffd@0 | 1703   iteration=1; | 
| wolffd@0 | 1704   bestmaxdiffiter=1; | 
| wolffd@0 | 1705   bestmaxdiff=999999999; | 
| wolffd@0 | 1706   terminate=0; | 
| wolffd@0 | 1707 | 
| wolffd@0 | 1708   if(kernel_cache) { | 
| wolffd@0 | 1709     kernel_cache->time=iteration;  /* for lru cache */ | 
| wolffd@0 | 1710     kernel_cache_reset_lru(kernel_cache); | 
| wolffd@0 | 1711   } | 
| wolffd@0 | 1712 | 
| wolffd@0 | 1713   for(i=0;i<totdoc;i++) {    /* various inits */ | 
| wolffd@0 | 1714     chosen[i]=0; | 
| wolffd@0 | 1715     unlabeled[i]=0; | 
| wolffd@0 | 1716     inconsistent[i]=0; | 
| wolffd@0 | 1717     ignore[i]=0; | 
| wolffd@0 | 1718     a_old[i]=a[i]; | 
| wolffd@0 | 1719   } | 
| wolffd@0 | 1720   activenum=compute_index(shrink_state->active,totdoc,active2dnum); | 
| wolffd@0 | 1721   inactivenum=totdoc-activenum; | 
| wolffd@0 | 1722   clear_index(working2dnum); | 
| wolffd@0 | 1723 | 
| wolffd@0 | 1724   /* call to init slack and alphaslack */ | 
| wolffd@0 | 1725   compute_shared_slacks(docs,label,a,lin,c,active2dnum,learn_parm, | 
| wolffd@0 | 1726 			slack,alphaslack); | 
| wolffd@0 | 1727 | 
| wolffd@0 | 1728                             /* repeat this loop until we have convergence */ | 
| wolffd@0 | 1729   for(;retrain && (!terminate);iteration++) { | 
| wolffd@0 | 1730 | 
| wolffd@0 | 1731     if(kernel_cache) | 
| wolffd@0 | 1732       kernel_cache->time=iteration;  /* for lru cache */ | 
| wolffd@0 | 1733     if(verbosity>=2) { | 
| wolffd@0 | 1734       printf( | 
| wolffd@0 | 1735 	"Iteration %ld: ",iteration); fflush(stdout); | 
| wolffd@0 | 1736     } | 
| wolffd@0 | 1737     else if(verbosity==1) { | 
| wolffd@0 | 1738       printf("."); fflush(stdout); | 
| wolffd@0 | 1739     } | 
| wolffd@0 | 1740 | 
| wolffd@0 | 1741     if(verbosity>=2) t0=get_runtime(); | 
| wolffd@0 | 1742     if(verbosity>=3) { | 
| wolffd@0 | 1743       printf("\nSelecting working set... "); fflush(stdout); | 
| wolffd@0 | 1744     } | 
| wolffd@0 | 1745 | 
| wolffd@0 | 1746     if(learn_parm->svm_newvarsinqp>learn_parm->svm_maxqpsize) | 
| wolffd@0 | 1747       learn_parm->svm_newvarsinqp=learn_parm->svm_maxqpsize; | 
| wolffd@0 | 1748 | 
| wolffd@0 | 1749     /* select working set according to steepest gradient */ | 
| wolffd@0 | 1750     jointstep=0; | 
| wolffd@0 | 1751     eq_target=0; | 
| wolffd@0 | 1752     if(iteration % 101) { | 
| wolffd@0 | 1753       slackset=select_next_qp_slackset(docs,label,a,lin,slack,alphaslack,c, | 
| wolffd@0 | 1754 				       learn_parm,active2dnum,&maxsharedviol); | 
| wolffd@0 | 1755       if((iteration % 2) | 
| wolffd@0 | 1756 	 || (!slackset) || (maxsharedviol<learn_parm->epsilon_crit)){ | 
| wolffd@0 | 1757 	/* do a step with examples from different slack sets */ | 
| wolffd@0 | 1758 	if(verbosity >= 2) { | 
| wolffd@0 | 1759 	  printf("(i-step)"); fflush(stdout); | 
| wolffd@0 | 1760 	} | 
| wolffd@0 | 1761 	i=0; | 
| wolffd@0 | 1762 	for(jj=0;(j=working2dnum[jj])>=0;jj++) { /* clear old part of working set */ | 
| wolffd@0 | 1763 	  if((chosen[j]>=(learn_parm->svm_maxqpsize/ | 
| wolffd@0 | 1764 			  minl(learn_parm->svm_maxqpsize, | 
| wolffd@0 | 1765 			       learn_parm->svm_newvarsinqp)))) { | 
| wolffd@0 | 1766 	    chosen[j]=0; | 
| wolffd@0 | 1767 	    choosenum--; | 
| wolffd@0 | 1768 	  } | 
| wolffd@0 | 1769 	  else { | 
| wolffd@0 | 1770 	    chosen[j]++; | 
| wolffd@0 | 1771 	    working2dnum[i++]=j; | 
| wolffd@0 | 1772 	  } | 
| wolffd@0 | 1773 	} | 
| wolffd@0 | 1774 	working2dnum[i]=-1; | 
| wolffd@0 | 1775 | 
| wolffd@0 | 1776 	already_chosen=0; | 
| wolffd@0 | 1777 	if((minl(learn_parm->svm_newvarsinqp, | 
| wolffd@0 | 1778 		 learn_parm->svm_maxqpsize-choosenum)>=4) | 
| wolffd@0 | 1779 	   && (kernel_parm->kernel_type != LINEAR)) { | 
| wolffd@0 | 1780 	  /* select part of the working set from cache */ | 
| wolffd@0 | 1781 	  already_chosen=select_next_qp_subproblem_grad( | 
| wolffd@0 | 1782 			      label,unlabeled,a,lin,c,totdoc, | 
| wolffd@0 | 1783 			      (long)(minl(learn_parm->svm_maxqpsize-choosenum, | 
| wolffd@0 | 1784 					  learn_parm->svm_newvarsinqp) | 
| wolffd@0 | 1785 				     /2), | 
| wolffd@0 | 1786 			      learn_parm,inconsistent,active2dnum, | 
| wolffd@0 | 1787 			      working2dnum,selcrit,selexam,kernel_cache, | 
| wolffd@0 | 1788 			      (long)1,key,chosen); | 
| wolffd@0 | 1789 	  choosenum+=already_chosen; | 
| wolffd@0 | 1790 	} | 
| wolffd@0 | 1791 	choosenum+=select_next_qp_subproblem_grad( | 
| wolffd@0 | 1792                               label,unlabeled,a,lin,c,totdoc, | 
| wolffd@0 | 1793                               minl(learn_parm->svm_maxqpsize-choosenum, | 
| wolffd@0 | 1794 				   learn_parm->svm_newvarsinqp-already_chosen), | 
| wolffd@0 | 1795                               learn_parm,inconsistent,active2dnum, | 
| wolffd@0 | 1796 			      working2dnum,selcrit,selexam,kernel_cache, | 
| wolffd@0 | 1797 			      (long)0,key,chosen); | 
| wolffd@0 | 1798       } | 
| wolffd@0 | 1799       else { /* do a step with all examples from same slack set */ | 
| wolffd@0 | 1800 	if(verbosity >= 2) { | 
| wolffd@0 | 1801 	  printf("(j-step on %ld)",slackset); fflush(stdout); | 
| wolffd@0 | 1802 	} | 
| wolffd@0 | 1803 	jointstep=1; | 
| wolffd@0 | 1804 	for(jj=0;(j=working2dnum[jj])>=0;jj++) { /* clear working set */ | 
| wolffd@0 | 1805 	    chosen[j]=0; | 
| wolffd@0 | 1806 	} | 
| wolffd@0 | 1807 	working2dnum[0]=-1; | 
| wolffd@0 | 1808 	eq_target=alphaslack[slackset]; | 
| wolffd@0 | 1809 	for(j=0;j<totdoc;j++) {                  /* mask all but slackset */ | 
| wolffd@0 | 1810 	  /* for(jj=0;(j=active2dnum[jj])>=0;jj++) { */ | 
| wolffd@0 | 1811 	  if(docs[j]->slackid != slackset) | 
| wolffd@0 | 1812 	    ignore[j]=1; | 
| wolffd@0 | 1813 	  else { | 
| wolffd@0 | 1814 	    ignore[j]=0; | 
| wolffd@0 | 1815 	    learn_parm->svm_cost[j]=learn_parm->svm_c; | 
| wolffd@0 | 1816 	    /* printf("Inslackset(%ld,%ld)",j,shrink_state->active[j]); */ | 
| wolffd@0 | 1817 	  } | 
| wolffd@0 | 1818 	} | 
| wolffd@0 | 1819 	learn_parm->biased_hyperplane=1; | 
| wolffd@0 | 1820 	choosenum=select_next_qp_subproblem_grad( | 
| wolffd@0 | 1821                               label,unlabeled,a,lin,c,totdoc, | 
| wolffd@0 | 1822                               learn_parm->svm_maxqpsize, | 
| wolffd@0 | 1823                               learn_parm,ignore,active2dnum, | 
| wolffd@0 | 1824 			      working2dnum,selcrit,selexam,kernel_cache, | 
| wolffd@0 | 1825 			      (long)0,key,chosen); | 
| wolffd@0 | 1826 	learn_parm->biased_hyperplane=0; | 
| wolffd@0 | 1827       } | 
| wolffd@0 | 1828     } | 
| wolffd@0 | 1829     else { /* once in a while, select a somewhat random working set | 
| wolffd@0 | 1830 	      to get unlocked of infinite loops due to numerical | 
| wolffd@0 | 1831 	      inaccuracies in the core qp-solver */ | 
| wolffd@0 | 1832       choosenum+=select_next_qp_subproblem_rand( | 
| wolffd@0 | 1833                               label,unlabeled,a,lin,c,totdoc, | 
| wolffd@0 | 1834                               minl(learn_parm->svm_maxqpsize-choosenum, | 
| wolffd@0 | 1835 				   learn_parm->svm_newvarsinqp), | 
| wolffd@0 | 1836                               learn_parm,inconsistent,active2dnum, | 
| wolffd@0 | 1837 			      working2dnum,selcrit,selexam,kernel_cache,key, | 
| wolffd@0 | 1838 			      chosen,iteration); | 
| wolffd@0 | 1839     } | 
| wolffd@0 | 1840 | 
| wolffd@0 | 1841     if(verbosity>=2) { | 
| wolffd@0 | 1842       printf(" %ld vectors chosen\n",choosenum); fflush(stdout); | 
| wolffd@0 | 1843     } | 
| wolffd@0 | 1844 | 
| wolffd@0 | 1845     if(verbosity>=2) t1=get_runtime(); | 
| wolffd@0 | 1846 | 
| wolffd@0 | 1847     if(kernel_cache) | 
| wolffd@0 | 1848       cache_multiple_kernel_rows(kernel_cache,docs,working2dnum, | 
| wolffd@0 | 1849 				 choosenum,kernel_parm); | 
| wolffd@0 | 1850 | 
| wolffd@0 | 1851     if(verbosity>=2) t2=get_runtime(); | 
| wolffd@0 | 1852     if(jointstep) learn_parm->biased_hyperplane=1; | 
| wolffd@0 | 1853     optimize_svm(docs,label,unlabeled,ignore,eq_target,chosen,active2dnum, | 
| wolffd@0 | 1854 		 model,totdoc,working2dnum,choosenum,a,lin,c,learn_parm, | 
| wolffd@0 | 1855 		 aicache,kernel_parm,&qp,&epsilon_crit_org); | 
| wolffd@0 | 1856     learn_parm->biased_hyperplane=0; | 
| wolffd@0 | 1857 | 
| wolffd@0 | 1858     for(jj=0;(i=working2dnum[jj])>=0;jj++)   /* recompute sums of alphas */ | 
| wolffd@0 | 1859       alphaslack[docs[i]->slackid]+=(a[i]-a_old[i]); | 
| wolffd@0 | 1860     for(jj=0;(i=working2dnum[jj])>=0;jj++) { /* reduce alpha to fulfill | 
| wolffd@0 | 1861 						constraints */ | 
| wolffd@0 | 1862       if(alphaslack[docs[i]->slackid] > learn_parm->svm_c) { | 
| wolffd@0 | 1863 	if(a[i] < (alphaslack[docs[i]->slackid]-learn_parm->svm_c)) { | 
| wolffd@0 | 1864 	  alphaslack[docs[i]->slackid]-=a[i]; | 
| wolffd@0 | 1865 	  a[i]=0; | 
| wolffd@0 | 1866 	} | 
| wolffd@0 | 1867 	else { | 
| wolffd@0 | 1868 	  a[i]-=(alphaslack[docs[i]->slackid]-learn_parm->svm_c); | 
| wolffd@0 | 1869 	  alphaslack[docs[i]->slackid]=learn_parm->svm_c; | 
| wolffd@0 | 1870 	} | 
| wolffd@0 | 1871       } | 
| wolffd@0 | 1872     } | 
| wolffd@0 | 1873     for(jj=0;(i=active2dnum[jj])>=0;jj++) | 
| wolffd@0 | 1874       learn_parm->svm_cost[i]=a[i]+(learn_parm->svm_c | 
| wolffd@0 | 1875 				    -alphaslack[docs[i]->slackid]); | 
| wolffd@0 | 1876 | 
| wolffd@0 | 1877     if(verbosity>=2) t3=get_runtime(); | 
| wolffd@0 | 1878     update_linear_component(docs,label,active2dnum,a,a_old,working2dnum,totdoc, | 
| wolffd@0 | 1879 			    totwords,kernel_parm,kernel_cache,lin,aicache, | 
| wolffd@0 | 1880 			    weights); | 
| wolffd@0 | 1881     compute_shared_slacks(docs,label,a,lin,c,active2dnum,learn_parm, | 
| wolffd@0 | 1882 			  slack,alphaslack); | 
| wolffd@0 | 1883 | 
| wolffd@0 | 1884     if(verbosity>=2) t4=get_runtime(); | 
| wolffd@0 | 1885     supvecnum=calculate_svm_model(docs,label,unlabeled,lin,a,a_old,c, | 
| wolffd@0 | 1886 		                  learn_parm,working2dnum,active2dnum,model); | 
| wolffd@0 | 1887 | 
| wolffd@0 | 1888     if(verbosity>=2) t5=get_runtime(); | 
| wolffd@0 | 1889 | 
| wolffd@0 | 1890     /* The following computation of the objective function works only */ | 
| wolffd@0 | 1891     /* relative to the active variables */ | 
| wolffd@0 | 1892     if(verbosity>=3) { | 
| wolffd@0 | 1893       criterion=compute_objective_function(a,lin,c,learn_parm->eps,label, | 
| wolffd@0 | 1894 		                           active2dnum); | 
| wolffd@0 | 1895       printf("Objective function (over active variables): %.16f\n",criterion); | 
| wolffd@0 | 1896       fflush(stdout); | 
| wolffd@0 | 1897     } | 
| wolffd@0 | 1898 | 
| wolffd@0 | 1899     for(jj=0;(i=working2dnum[jj])>=0;jj++) { | 
| wolffd@0 | 1900       a_old[i]=a[i]; | 
| wolffd@0 | 1901     } | 
| wolffd@0 | 1902 | 
| wolffd@0 | 1903     retrain=check_optimality_sharedslack(docs,model,label,a,lin,c, | 
| wolffd@0 | 1904                              slack,alphaslack,totdoc,learn_parm, | 
| wolffd@0 | 1905 			     maxdiff,epsilon_crit_org,&misclassified, | 
| wolffd@0 | 1906 			     active2dnum,last_suboptimal_at, | 
| wolffd@0 | 1907 			     iteration,kernel_parm); | 
| wolffd@0 | 1908 | 
| wolffd@0 | 1909     if(verbosity>=2) { | 
| wolffd@0 | 1910       t6=get_runtime(); | 
| wolffd@0 | 1911       timing_profile->time_select+=t1-t0; | 
| wolffd@0 | 1912       timing_profile->time_kernel+=t2-t1; | 
| wolffd@0 | 1913       timing_profile->time_opti+=t3-t2; | 
| wolffd@0 | 1914       timing_profile->time_update+=t4-t3; | 
| wolffd@0 | 1915       timing_profile->time_model+=t5-t4; | 
| wolffd@0 | 1916       timing_profile->time_check+=t6-t5; | 
| wolffd@0 | 1917     } | 
| wolffd@0 | 1918 | 
| wolffd@0 | 1919     /* checking whether optimizer got stuck */ | 
| wolffd@0 | 1920     if((*maxdiff) < bestmaxdiff) { | 
| wolffd@0 | 1921       bestmaxdiff=(*maxdiff); | 
| wolffd@0 | 1922       bestmaxdiffiter=iteration; | 
| wolffd@0 | 1923     } | 
| wolffd@0 | 1924     if(iteration > (bestmaxdiffiter+learn_parm->maxiter)) { | 
| wolffd@0 | 1925       /* long time no progress? */ | 
| wolffd@0 | 1926       terminate=1; | 
| wolffd@0 | 1927       retrain=0; | 
| wolffd@0 | 1928       if(verbosity>=1) | 
| wolffd@0 | 1929 	printf("\nWARNING: Relaxing KT-Conditions due to slow progress! Terminating!\n"); | 
| wolffd@0 | 1930     } | 
| wolffd@0 | 1931 | 
| wolffd@0 | 1932     noshrink=0; | 
| wolffd@0 | 1933 | 
| wolffd@0 | 1934     if((!retrain) && (inactivenum>0) | 
| wolffd@0 | 1935        && ((!learn_parm->skip_final_opt_check) | 
| wolffd@0 | 1936 	   || (kernel_parm->kernel_type == LINEAR))) { | 
| wolffd@0 | 1937       if(((verbosity>=1) && (kernel_parm->kernel_type != LINEAR)) | 
| wolffd@0 | 1938 	 || (verbosity>=2)) { | 
| wolffd@0 | 1939 	if(verbosity==1) { | 
| wolffd@0 | 1940 	  printf("\n"); | 
| wolffd@0 | 1941 	} | 
| wolffd@0 | 1942 	printf(" Checking optimality of inactive variables..."); | 
| wolffd@0 | 1943 	fflush(stdout); | 
| wolffd@0 | 1944       } | 
| wolffd@0 | 1945       t1=get_runtime(); | 
| wolffd@0 | 1946       reactivate_inactive_examples(label,unlabeled,a,shrink_state,lin,c,totdoc, | 
| wolffd@0 | 1947 				   totwords,iteration,learn_parm,inconsistent, | 
| wolffd@0 | 1948 				   docs,kernel_parm,kernel_cache,model,aicache, | 
| wolffd@0 | 1949 				   weights,maxdiff); | 
| wolffd@0 | 1950       /* Update to new active variables. */ | 
| wolffd@0 | 1951       activenum=compute_index(shrink_state->active,totdoc,active2dnum); | 
| wolffd@0 | 1952       inactivenum=totdoc-activenum; | 
| wolffd@0 | 1953       /* check optimality, since check in reactivate does not work for | 
| wolffd@0 | 1954 	 sharedslacks */ | 
| wolffd@0 | 1955       retrain=check_optimality_sharedslack(docs,model,label,a,lin,c, | 
| wolffd@0 | 1956 			     slack,alphaslack,totdoc,learn_parm, | 
| wolffd@0 | 1957 			     maxdiff,epsilon_crit_org,&misclassified, | 
| wolffd@0 | 1958 			     active2dnum,last_suboptimal_at, | 
| wolffd@0 | 1959 			     iteration,kernel_parm); | 
| wolffd@0 | 1960 | 
| wolffd@0 | 1961       /* reset watchdog */ | 
| wolffd@0 | 1962       bestmaxdiff=(*maxdiff); | 
| wolffd@0 | 1963       bestmaxdiffiter=iteration; | 
| wolffd@0 | 1964       /* termination criterion */ | 
| wolffd@0 | 1965       noshrink=1; | 
| wolffd@0 | 1966       retrain=0; | 
| wolffd@0 | 1967       if((*maxdiff) > learn_parm->epsilon_crit) | 
| wolffd@0 | 1968 	retrain=1; | 
| wolffd@0 | 1969       timing_profile->time_shrink+=get_runtime()-t1; | 
| wolffd@0 | 1970       if(((verbosity>=1) && (kernel_parm->kernel_type != LINEAR)) | 
| wolffd@0 | 1971 	 || (verbosity>=2)) { | 
| wolffd@0 | 1972 	printf("done.\n");  fflush(stdout); | 
| wolffd@0 | 1973         printf(" Number of inactive variables = %ld\n",inactivenum); | 
| wolffd@0 | 1974       } | 
| wolffd@0 | 1975     } | 
| wolffd@0 | 1976 | 
| wolffd@0 | 1977     if((!retrain) && (learn_parm->epsilon_crit>(*maxdiff))) | 
| wolffd@0 | 1978       learn_parm->epsilon_crit=(*maxdiff); | 
| wolffd@0 | 1979     if((!retrain) && (learn_parm->epsilon_crit>epsilon_crit_org)) { | 
| wolffd@0 | 1980       learn_parm->epsilon_crit/=2.0; | 
| wolffd@0 | 1981       retrain=1; | 
| wolffd@0 | 1982       noshrink=1; | 
| wolffd@0 | 1983     } | 
| wolffd@0 | 1984     if(learn_parm->epsilon_crit<epsilon_crit_org) | 
| wolffd@0 | 1985       learn_parm->epsilon_crit=epsilon_crit_org; | 
| wolffd@0 | 1986 | 
| wolffd@0 | 1987     if(verbosity>=2) { | 
| wolffd@0 | 1988       printf(" => (%ld SV (incl. %ld SV at u-bound), max violation=%.5f)\n", | 
| wolffd@0 | 1989 	     supvecnum,model->at_upper_bound,(*maxdiff)); | 
| wolffd@0 | 1990       fflush(stdout); | 
| wolffd@0 | 1991     } | 
| wolffd@0 | 1992     if(verbosity>=3) { | 
| wolffd@0 | 1993       printf("\n"); | 
| wolffd@0 | 1994     } | 
| wolffd@0 | 1995 | 
| wolffd@0 | 1996     if(((iteration % 10) == 0) && (!noshrink)) { | 
| wolffd@0 | 1997       activenum=shrink_problem(docs,learn_parm,shrink_state, | 
| wolffd@0 | 1998 			       kernel_parm,active2dnum, | 
| wolffd@0 | 1999 			       last_suboptimal_at,iteration,totdoc, | 
| wolffd@0 | 2000 			       maxl((long)(activenum/10), | 
| wolffd@0 | 2001 				    maxl((long)(totdoc/500),100)), | 
| wolffd@0 | 2002 			       a,inconsistent); | 
| wolffd@0 | 2003       inactivenum=totdoc-activenum; | 
| wolffd@0 | 2004       if((kernel_cache) | 
| wolffd@0 | 2005 	 && (supvecnum>kernel_cache->max_elems) | 
| wolffd@0 | 2006 	 && ((kernel_cache->activenum-activenum)>maxl((long)(activenum/10),500))) { | 
| wolffd@0 | 2007 	kernel_cache_shrink(kernel_cache,totdoc, | 
| wolffd@0 | 2008 			    minl((kernel_cache->activenum-activenum), | 
| wolffd@0 | 2009 				 (kernel_cache->activenum-supvecnum)), | 
| wolffd@0 | 2010 			    shrink_state->active); | 
| wolffd@0 | 2011       } | 
| wolffd@0 | 2012     } | 
| wolffd@0 | 2013 | 
| wolffd@0 | 2014   } /* end of loop */ | 
| wolffd@0 | 2015 | 
| wolffd@0 | 2016 | 
| wolffd@0 | 2017   free(alphaslack); | 
| wolffd@0 | 2018   free(slack); | 
| wolffd@0 | 2019   free(chosen); | 
| wolffd@0 | 2020   free(unlabeled); | 
| wolffd@0 | 2021   free(inconsistent); | 
| wolffd@0 | 2022   free(ignore); | 
| wolffd@0 | 2023   free(last_suboptimal_at); | 
| wolffd@0 | 2024   free(key); | 
| wolffd@0 | 2025   free(selcrit); | 
| wolffd@0 | 2026   free(selexam); | 
| wolffd@0 | 2027   free(a_old); | 
| wolffd@0 | 2028   free(aicache); | 
| wolffd@0 | 2029   free(working2dnum); | 
| wolffd@0 | 2030   free(active2dnum); | 
| wolffd@0 | 2031   free(qp.opt_ce); | 
| wolffd@0 | 2032   free(qp.opt_ce0); | 
| wolffd@0 | 2033   free(qp.opt_g); | 
| wolffd@0 | 2034   free(qp.opt_g0); | 
| wolffd@0 | 2035   free(qp.opt_xinit); | 
| wolffd@0 | 2036   free(qp.opt_low); | 
| wolffd@0 | 2037   free(qp.opt_up); | 
| wolffd@0 | 2038   free(weights); | 
| wolffd@0 | 2039 | 
| wolffd@0 | 2040   learn_parm->epsilon_crit=epsilon_crit_org; /* restore org */ | 
| wolffd@0 | 2041   model->maxdiff=(*maxdiff); | 
| wolffd@0 | 2042 | 
| wolffd@0 | 2043   return(iteration); | 
| wolffd@0 | 2044 } | 
| wolffd@0 | 2045 | 
| wolffd@0 | 2046 | 
| wolffd@0 | 2047 double compute_objective_function(double *a, double *lin, double *c, | 
| wolffd@0 | 2048 				  double eps, long int *label, | 
| wolffd@0 | 2049 				  long int *active2dnum) | 
| wolffd@0 | 2050      /* Return value of objective function. */ | 
| wolffd@0 | 2051      /* Works only relative to the active variables! */ | 
| wolffd@0 | 2052 { | 
| wolffd@0 | 2053   long i,ii; | 
| wolffd@0 | 2054   double criterion; | 
| wolffd@0 | 2055   /* calculate value of objective function */ | 
| wolffd@0 | 2056   criterion=0; | 
| wolffd@0 | 2057   for(ii=0;active2dnum[ii]>=0;ii++) { | 
| wolffd@0 | 2058     i=active2dnum[ii]; | 
| wolffd@0 | 2059     criterion=criterion+(eps-(double)label[i]*c[i])*a[i]+0.5*a[i]*label[i]*lin[i]; | 
| wolffd@0 | 2060   } | 
| wolffd@0 | 2061   return(criterion); | 
| wolffd@0 | 2062 } | 
| wolffd@0 | 2063 | 
| wolffd@0 | 2064 void clear_index(long int *index) | 
| wolffd@0 | 2065               /* initializes and empties index */ | 
| wolffd@0 | 2066 { | 
| wolffd@0 | 2067   index[0]=-1; | 
| wolffd@0 | 2068 } | 
| wolffd@0 | 2069 | 
| wolffd@0 | 2070 void add_to_index(long int *index, long int elem) | 
| wolffd@0 | 2071      /* initializes and empties index */ | 
| wolffd@0 | 2072 { | 
| wolffd@0 | 2073   register long i; | 
| wolffd@0 | 2074   for(i=0;index[i] != -1;i++); | 
| wolffd@0 | 2075   index[i]=elem; | 
| wolffd@0 | 2076   index[i+1]=-1; | 
| wolffd@0 | 2077 } | 
| wolffd@0 | 2078 | 
| wolffd@0 | 2079 long compute_index(long int *binfeature, long int range, long int *index) | 
| wolffd@0 | 2080      /* create an inverted index of binfeature */ | 
| wolffd@0 | 2081 { | 
| wolffd@0 | 2082   register long i,ii; | 
| wolffd@0 | 2083 | 
| wolffd@0 | 2084   ii=0; | 
| wolffd@0 | 2085   for(i=0;i<range;i++) { | 
| wolffd@0 | 2086     if(binfeature[i]) { | 
| wolffd@0 | 2087       index[ii]=i; | 
| wolffd@0 | 2088       ii++; | 
| wolffd@0 | 2089     } | 
| wolffd@0 | 2090   } | 
| wolffd@0 | 2091   for(i=0;i<4;i++) { | 
| wolffd@0 | 2092     index[ii+i]=-1; | 
| wolffd@0 | 2093   } | 
| wolffd@0 | 2094   return(ii); | 
| wolffd@0 | 2095 } | 
| wolffd@0 | 2096 | 
| wolffd@0 | 2097 | 
| wolffd@0 | 2098 void optimize_svm(DOC **docs, long int *label, long int *unlabeled, | 
| wolffd@0 | 2099 		  long int *exclude_from_eq_const, double eq_target, | 
| wolffd@0 | 2100 		  long int *chosen, long int *active2dnum, MODEL *model, | 
| wolffd@0 | 2101 		  long int totdoc, long int *working2dnum, long int varnum, | 
| wolffd@0 | 2102 		  double *a, double *lin, double *c, LEARN_PARM *learn_parm, | 
| wolffd@0 | 2103 		  CFLOAT *aicache, KERNEL_PARM *kernel_parm, QP *qp, | 
| wolffd@0 | 2104 		  double *epsilon_crit_target) | 
| wolffd@0 | 2105      /* Do optimization on the working set. */ | 
| wolffd@0 | 2106 { | 
| wolffd@0 | 2107     long i; | 
| wolffd@0 | 2108     double *a_v; | 
| wolffd@0 | 2109 | 
| wolffd@0 | 2110     compute_matrices_for_optimization(docs,label,unlabeled, | 
| wolffd@0 | 2111 				      exclude_from_eq_const,eq_target,chosen, | 
| wolffd@0 | 2112 				      active2dnum,working2dnum,model,a,lin,c, | 
| wolffd@0 | 2113 				      varnum,totdoc,learn_parm,aicache, | 
| wolffd@0 | 2114 				      kernel_parm,qp); | 
| wolffd@0 | 2115 | 
| wolffd@0 | 2116     if(verbosity>=3) { | 
| wolffd@0 | 2117       printf("Running optimizer..."); fflush(stdout); | 
| wolffd@0 | 2118     } | 
| wolffd@0 | 2119     /* call the qp-subsolver */ | 
| wolffd@0 | 2120     a_v=optimize_qp(qp,epsilon_crit_target, | 
| wolffd@0 | 2121 		    learn_parm->svm_maxqpsize, | 
| wolffd@0 | 2122 		    &(model->b),   /* in case the optimizer gives us */ | 
| wolffd@0 | 2123                                    /* the threshold for free. otherwise */ | 
| wolffd@0 | 2124                                    /* b is calculated in calculate_model. */ | 
| wolffd@0 | 2125 		    learn_parm); | 
| wolffd@0 | 2126     if(verbosity>=3) { | 
| wolffd@0 | 2127       printf("done\n"); | 
| wolffd@0 | 2128     } | 
| wolffd@0 | 2129 | 
| wolffd@0 | 2130     for(i=0;i<varnum;i++) { | 
| wolffd@0 | 2131       a[working2dnum[i]]=a_v[i]; | 
| wolffd@0 | 2132       /* | 
| wolffd@0 | 2133       if(a_v[i]<=(0+learn_parm->epsilon_a)) { | 
| wolffd@0 | 2134 	a[working2dnum[i]]=0; | 
| wolffd@0 | 2135       } | 
| wolffd@0 | 2136       else if(a_v[i]>=(learn_parm->svm_cost[working2dnum[i]]-learn_parm->epsilon_a)) { | 
| wolffd@0 | 2137 	a[working2dnum[i]]=learn_parm->svm_cost[working2dnum[i]]; | 
| wolffd@0 | 2138       } | 
| wolffd@0 | 2139       */ | 
| wolffd@0 | 2140     } | 
| wolffd@0 | 2141 } | 
| wolffd@0 | 2142 | 
| wolffd@0 | 2143 void compute_matrices_for_optimization(DOC **docs, long int *label, | 
| wolffd@0 | 2144           long int *unlabeled, long *exclude_from_eq_const, double eq_target, | 
| wolffd@0 | 2145 	  long int *chosen, long int *active2dnum, | 
| wolffd@0 | 2146           long int *key, MODEL *model, double *a, double *lin, double *c, | 
| wolffd@0 | 2147 	  long int varnum, long int totdoc, LEARN_PARM *learn_parm, | 
| wolffd@0 | 2148           CFLOAT *aicache, KERNEL_PARM *kernel_parm, QP *qp) | 
| wolffd@0 | 2149 { | 
| wolffd@0 | 2150   register long ki,kj,i,j; | 
| wolffd@0 | 2151   register double kernel_temp; | 
| wolffd@0 | 2152 | 
| wolffd@0 | 2153   if(verbosity>=3) { | 
| wolffd@0 | 2154     fprintf(stdout,"Computing qp-matrices (type %ld kernel [degree %ld, rbf_gamma %f, coef_lin %f, coef_const %f])...",kernel_parm->kernel_type,kernel_parm->poly_degree,kernel_parm->rbf_gamma,kernel_parm->coef_lin,kernel_parm->coef_const); | 
| wolffd@0 | 2155     fflush(stdout); | 
| wolffd@0 | 2156   } | 
| wolffd@0 | 2157 | 
| wolffd@0 | 2158   qp->opt_n=varnum; | 
| wolffd@0 | 2159   qp->opt_ce0[0]=-eq_target; /* compute the constant for equality constraint */ | 
| wolffd@0 | 2160   for(j=1;j<model->sv_num;j++) { /* start at 1 */ | 
| wolffd@0 | 2161     if((!chosen[(model->supvec[j])->docnum]) | 
| wolffd@0 | 2162        && (!exclude_from_eq_const[(model->supvec[j])->docnum])) { | 
| wolffd@0 | 2163       qp->opt_ce0[0]+=model->alpha[j]; | 
| wolffd@0 | 2164     } | 
| wolffd@0 | 2165   } | 
| wolffd@0 | 2166   if(learn_parm->biased_hyperplane) | 
| wolffd@0 | 2167     qp->opt_m=1; | 
| wolffd@0 | 2168   else | 
| wolffd@0 | 2169     qp->opt_m=0;  /* eq-constraint will be ignored */ | 
| wolffd@0 | 2170 | 
| wolffd@0 | 2171   /* init linear part of objective function */ | 
| wolffd@0 | 2172   for(i=0;i<varnum;i++) { | 
| wolffd@0 | 2173     qp->opt_g0[i]=lin[key[i]]; | 
| wolffd@0 | 2174   } | 
| wolffd@0 | 2175 | 
| wolffd@0 | 2176   for(i=0;i<varnum;i++) { | 
| wolffd@0 | 2177     ki=key[i]; | 
| wolffd@0 | 2178 | 
| wolffd@0 | 2179     /* Compute the matrix for equality constraints */ | 
| wolffd@0 | 2180     qp->opt_ce[i]=label[ki]; | 
| wolffd@0 | 2181     qp->opt_low[i]=0; | 
| wolffd@0 | 2182     qp->opt_up[i]=learn_parm->svm_cost[ki]; | 
| wolffd@0 | 2183 | 
| wolffd@0 | 2184     kernel_temp=(double)kernel(kernel_parm,docs[ki],docs[ki]); | 
| wolffd@0 | 2185     /* compute linear part of objective function */ | 
| wolffd@0 | 2186     qp->opt_g0[i]-=(kernel_temp*a[ki]*(double)label[ki]); | 
| wolffd@0 | 2187     /* compute quadratic part of objective function */ | 
| wolffd@0 | 2188     qp->opt_g[varnum*i+i]=kernel_temp; | 
| wolffd@0 | 2189     for(j=i+1;j<varnum;j++) { | 
| wolffd@0 | 2190       kj=key[j]; | 
| wolffd@0 | 2191       kernel_temp=(double)kernel(kernel_parm,docs[ki],docs[kj]); | 
| wolffd@0 | 2192       /* compute linear part of objective function */ | 
| wolffd@0 | 2193       qp->opt_g0[i]-=(kernel_temp*a[kj]*(double)label[kj]); | 
| wolffd@0 | 2194       qp->opt_g0[j]-=(kernel_temp*a[ki]*(double)label[ki]); | 
| wolffd@0 | 2195       /* compute quadratic part of objective function */ | 
| wolffd@0 | 2196       qp->opt_g[varnum*i+j]=(double)label[ki]*(double)label[kj]*kernel_temp; | 
| wolffd@0 | 2197       qp->opt_g[varnum*j+i]=(double)label[ki]*(double)label[kj]*kernel_temp; | 
| wolffd@0 | 2198     } | 
| wolffd@0 | 2199 | 
| wolffd@0 | 2200     if(verbosity>=3) { | 
| wolffd@0 | 2201       if(i % 20 == 0) { | 
| wolffd@0 | 2202 	fprintf(stdout,"%ld..",i); fflush(stdout); | 
| wolffd@0 | 2203       } | 
| wolffd@0 | 2204     } | 
| wolffd@0 | 2205   } | 
| wolffd@0 | 2206 | 
| wolffd@0 | 2207   for(i=0;i<varnum;i++) { | 
| wolffd@0 | 2208     /* assure starting at feasible point */ | 
| wolffd@0 | 2209     qp->opt_xinit[i]=a[key[i]]; | 
| wolffd@0 | 2210     /* set linear part of objective function */ | 
| wolffd@0 | 2211     qp->opt_g0[i]=(learn_parm->eps-(double)label[key[i]]*c[key[i]])+qp->opt_g0[i]*(double)label[key[i]]; | 
| wolffd@0 | 2212   } | 
| wolffd@0 | 2213 | 
| wolffd@0 | 2214   if(verbosity>=3) { | 
| wolffd@0 | 2215     fprintf(stdout,"done\n"); | 
| wolffd@0 | 2216   } | 
| wolffd@0 | 2217 } | 
| wolffd@0 | 2218 | 
| wolffd@0 | 2219 long calculate_svm_model(DOC **docs, long int *label, long int *unlabeled, | 
| wolffd@0 | 2220 			 double *lin, double *a, double *a_old, double *c, | 
| wolffd@0 | 2221 			 LEARN_PARM *learn_parm, long int *working2dnum, | 
| wolffd@0 | 2222 			 long int *active2dnum, MODEL *model) | 
| wolffd@0 | 2223      /* Compute decision function based on current values */ | 
| wolffd@0 | 2224      /* of alpha. */ | 
| wolffd@0 | 2225 { | 
| wolffd@0 | 2226   long i,ii,pos,b_calculated=0,first_low,first_high; | 
| wolffd@0 | 2227   double ex_c,b_temp,b_low,b_high; | 
| wolffd@0 | 2228 | 
| wolffd@0 | 2229   if(verbosity>=3) { | 
| wolffd@0 | 2230     printf("Calculating model..."); fflush(stdout); | 
| wolffd@0 | 2231   } | 
| wolffd@0 | 2232 | 
| wolffd@0 | 2233   if(!learn_parm->biased_hyperplane) { | 
| wolffd@0 | 2234     model->b=0; | 
| wolffd@0 | 2235     b_calculated=1; | 
| wolffd@0 | 2236   } | 
| wolffd@0 | 2237 | 
| wolffd@0 | 2238   for(ii=0;(i=working2dnum[ii])>=0;ii++) { | 
| wolffd@0 | 2239     if((a_old[i]>0) && (a[i]==0)) { /* remove from model */ | 
| wolffd@0 | 2240       pos=model->index[i]; | 
| wolffd@0 | 2241       model->index[i]=-1; | 
| wolffd@0 | 2242       (model->sv_num)--; | 
| wolffd@0 | 2243       model->supvec[pos]=model->supvec[model->sv_num]; | 
| wolffd@0 | 2244       model->alpha[pos]=model->alpha[model->sv_num]; | 
| wolffd@0 | 2245       model->index[(model->supvec[pos])->docnum]=pos; | 
| wolffd@0 | 2246     } | 
| wolffd@0 | 2247     else if((a_old[i]==0) && (a[i]>0)) { /* add to model */ | 
| wolffd@0 | 2248       model->supvec[model->sv_num]=docs[i]; | 
| wolffd@0 | 2249       model->alpha[model->sv_num]=a[i]*(double)label[i]; | 
| wolffd@0 | 2250       model->index[i]=model->sv_num; | 
| wolffd@0 | 2251       (model->sv_num)++; | 
| wolffd@0 | 2252     } | 
| wolffd@0 | 2253     else if(a_old[i]==a[i]) { /* nothing to do */ | 
| wolffd@0 | 2254     } | 
| wolffd@0 | 2255     else {  /* just update alpha */ | 
| wolffd@0 | 2256       model->alpha[model->index[i]]=a[i]*(double)label[i]; | 
| wolffd@0 | 2257     } | 
| wolffd@0 | 2258 | 
| wolffd@0 | 2259     ex_c=learn_parm->svm_cost[i]-learn_parm->epsilon_a; | 
| wolffd@0 | 2260     if((a_old[i]>=ex_c) && (a[i]<ex_c)) { | 
| wolffd@0 | 2261       (model->at_upper_bound)--; | 
| wolffd@0 | 2262     } | 
| wolffd@0 | 2263     else if((a_old[i]<ex_c) && (a[i]>=ex_c)) { | 
| wolffd@0 | 2264       (model->at_upper_bound)++; | 
| wolffd@0 | 2265     } | 
| wolffd@0 | 2266 | 
| wolffd@0 | 2267     if((!b_calculated) | 
| wolffd@0 | 2268        && (a[i]>learn_parm->epsilon_a) && (a[i]<ex_c)) {   /* calculate b */ | 
| wolffd@0 | 2269      	model->b=((double)label[i]*learn_parm->eps-c[i]+lin[i]); | 
| wolffd@0 | 2270 	/* model->b=(-(double)label[i]+lin[i]); */ | 
| wolffd@0 | 2271 	b_calculated=1; | 
| wolffd@0 | 2272     } | 
| wolffd@0 | 2273   } | 
| wolffd@0 | 2274 | 
| wolffd@0 | 2275   /* No alpha in the working set not at bounds, so b was not | 
| wolffd@0 | 2276      calculated in the usual way. The following handles this special | 
| wolffd@0 | 2277      case. */ | 
| wolffd@0 | 2278   if(learn_parm->biased_hyperplane | 
| wolffd@0 | 2279      && (!b_calculated) | 
| wolffd@0 | 2280      && (model->sv_num-1 == model->at_upper_bound)) { | 
| wolffd@0 | 2281     first_low=1; | 
| wolffd@0 | 2282     first_high=1; | 
| wolffd@0 | 2283     b_low=0; | 
| wolffd@0 | 2284     b_high=0; | 
| wolffd@0 | 2285     for(ii=0;(i=active2dnum[ii])>=0;ii++) { | 
| wolffd@0 | 2286       ex_c=learn_parm->svm_cost[i]-learn_parm->epsilon_a; | 
| wolffd@0 | 2287       if(a[i]<ex_c) { | 
| wolffd@0 | 2288 	if(label[i]>0)  { | 
| wolffd@0 | 2289 	  b_temp=-(learn_parm->eps-c[i]+lin[i]); | 
| wolffd@0 | 2290 	  if((b_temp>b_low) || (first_low)) { | 
| wolffd@0 | 2291 	    b_low=b_temp; | 
| wolffd@0 | 2292 	    first_low=0; | 
| wolffd@0 | 2293 	  } | 
| wolffd@0 | 2294 	} | 
| wolffd@0 | 2295 	else { | 
| wolffd@0 | 2296 	  b_temp=-(-learn_parm->eps-c[i]+lin[i]); | 
| wolffd@0 | 2297 	  if((b_temp<b_high) || (first_high)) { | 
| wolffd@0 | 2298 	    b_high=b_temp; | 
| wolffd@0 | 2299 	    first_high=0; | 
| wolffd@0 | 2300 	  } | 
| wolffd@0 | 2301 	} | 
| wolffd@0 | 2302       } | 
| wolffd@0 | 2303       else { | 
| wolffd@0 | 2304 	if(label[i]<0)  { | 
| wolffd@0 | 2305 	  b_temp=-(-learn_parm->eps-c[i]+lin[i]); | 
| wolffd@0 | 2306 	  if((b_temp>b_low) || (first_low)) { | 
| wolffd@0 | 2307 	    b_low=b_temp; | 
| wolffd@0 | 2308 	    first_low=0; | 
| wolffd@0 | 2309 	  } | 
| wolffd@0 | 2310 	} | 
| wolffd@0 | 2311 	else { | 
| wolffd@0 | 2312 	  b_temp=-(learn_parm->eps-c[i]+lin[i]); | 
| wolffd@0 | 2313 	  if((b_temp<b_high) || (first_high)) { | 
| wolffd@0 | 2314 	    b_high=b_temp; | 
| wolffd@0 | 2315 	    first_high=0; | 
| wolffd@0 | 2316 	  } | 
| wolffd@0 | 2317 	} | 
| wolffd@0 | 2318       } | 
| wolffd@0 | 2319     } | 
| wolffd@0 | 2320     if(first_high) { | 
| wolffd@0 | 2321       model->b=-b_low; | 
| wolffd@0 | 2322     } | 
| wolffd@0 | 2323     else if(first_low) { | 
| wolffd@0 | 2324       model->b=-b_high; | 
| wolffd@0 | 2325     } | 
| wolffd@0 | 2326     else { | 
| wolffd@0 | 2327       model->b=-(b_high+b_low)/2.0;  /* select b as the middle of range */ | 
| wolffd@0 | 2328       /* printf("\nb_low=%f, b_high=%f,b=%f\n",b_low,b_high,model->b); */ | 
| wolffd@0 | 2329     } | 
| wolffd@0 | 2330   } | 
| wolffd@0 | 2331 | 
| wolffd@0 | 2332   if(verbosity>=3) { | 
| wolffd@0 | 2333     printf("done\n"); fflush(stdout); | 
| wolffd@0 | 2334   } | 
| wolffd@0 | 2335 | 
| wolffd@0 | 2336   return(model->sv_num-1); /* have to substract one, since element 0 is empty*/ | 
| wolffd@0 | 2337 } | 
| wolffd@0 | 2338 | 
| wolffd@0 | 2339 long check_optimality(MODEL *model, long int *label, long int *unlabeled, | 
| wolffd@0 | 2340 		      double *a, double *lin, double *c, long int totdoc, | 
| wolffd@0 | 2341 		      LEARN_PARM *learn_parm, double *maxdiff, | 
| wolffd@0 | 2342 		      double epsilon_crit_org, long int *misclassified, | 
| wolffd@0 | 2343 		      long int *inconsistent, long int *active2dnum, | 
| wolffd@0 | 2344 		      long int *last_suboptimal_at, | 
| wolffd@0 | 2345 		      long int iteration, KERNEL_PARM *kernel_parm) | 
| wolffd@0 | 2346      /* Check KT-conditions */ | 
| wolffd@0 | 2347 { | 
| wolffd@0 | 2348   long i,ii,retrain; | 
| wolffd@0 | 2349   double dist,ex_c,target; | 
| wolffd@0 | 2350 | 
| wolffd@0 | 2351   if(kernel_parm->kernel_type == LINEAR) {  /* be optimistic */ | 
| wolffd@0 | 2352     learn_parm->epsilon_shrink=-learn_parm->epsilon_crit+epsilon_crit_org; | 
| wolffd@0 | 2353   } | 
| wolffd@0 | 2354   else {  /* be conservative */ | 
| wolffd@0 | 2355     learn_parm->epsilon_shrink=learn_parm->epsilon_shrink*0.7+(*maxdiff)*0.3; | 
| wolffd@0 | 2356   } | 
| wolffd@0 | 2357   retrain=0; | 
| wolffd@0 | 2358   (*maxdiff)=0; | 
| wolffd@0 | 2359   (*misclassified)=0; | 
| wolffd@0 | 2360   for(ii=0;(i=active2dnum[ii])>=0;ii++) { | 
| wolffd@0 | 2361     if((!inconsistent[i]) && label[i]) { | 
| wolffd@0 | 2362       dist=(lin[i]-model->b)*(double)label[i];/* 'distance' from | 
| wolffd@0 | 2363 						 hyperplane*/ | 
| wolffd@0 | 2364       target=-(learn_parm->eps-(double)label[i]*c[i]); | 
| wolffd@0 | 2365       ex_c=learn_parm->svm_cost[i]-learn_parm->epsilon_a; | 
| wolffd@0 | 2366       if(dist <= 0) { | 
| wolffd@0 | 2367 	(*misclassified)++;  /* does not work due to deactivation of var */ | 
| wolffd@0 | 2368       } | 
| wolffd@0 | 2369       if((a[i]>learn_parm->epsilon_a) && (dist > target)) { | 
| wolffd@0 | 2370 	if((dist-target)>(*maxdiff))  /* largest violation */ | 
| wolffd@0 | 2371 	  (*maxdiff)=dist-target; | 
| wolffd@0 | 2372       } | 
| wolffd@0 | 2373       else if((a[i]<ex_c) && (dist < target)) { | 
| wolffd@0 | 2374 	if((target-dist)>(*maxdiff))  /* largest violation */ | 
| wolffd@0 | 2375 	  (*maxdiff)=target-dist; | 
| wolffd@0 | 2376       } | 
| wolffd@0 | 2377       /* Count how long a variable was at lower/upper bound (and optimal).*/ | 
| wolffd@0 | 2378       /* Variables, which were at the bound and optimal for a long */ | 
| wolffd@0 | 2379       /* time are unlikely to become support vectors. In case our */ | 
| wolffd@0 | 2380       /* cache is filled up, those variables are excluded to save */ | 
| wolffd@0 | 2381       /* kernel evaluations. (See chapter 'Shrinking').*/ | 
| wolffd@0 | 2382       if((a[i]>(learn_parm->epsilon_a)) | 
| wolffd@0 | 2383 	 && (a[i]<ex_c)) { | 
| wolffd@0 | 2384 	last_suboptimal_at[i]=iteration;         /* not at bound */ | 
| wolffd@0 | 2385       } | 
| wolffd@0 | 2386       else if((a[i]<=(learn_parm->epsilon_a)) | 
| wolffd@0 | 2387 	      && (dist < (target+learn_parm->epsilon_shrink))) { | 
| wolffd@0 | 2388 	last_suboptimal_at[i]=iteration;         /* not likely optimal */ | 
| wolffd@0 | 2389       } | 
| wolffd@0 | 2390       else if((a[i]>=ex_c) | 
| wolffd@0 | 2391 	      && (dist > (target-learn_parm->epsilon_shrink)))  { | 
| wolffd@0 | 2392 	last_suboptimal_at[i]=iteration;         /* not likely optimal */ | 
| wolffd@0 | 2393       } | 
| wolffd@0 | 2394     } | 
| wolffd@0 | 2395   } | 
| wolffd@0 | 2396   /* termination criterion */ | 
| wolffd@0 | 2397   if((!retrain) && ((*maxdiff) > learn_parm->epsilon_crit)) { | 
| wolffd@0 | 2398     retrain=1; | 
| wolffd@0 | 2399   } | 
| wolffd@0 | 2400   return(retrain); | 
| wolffd@0 | 2401 } | 
| wolffd@0 | 2402 | 
| wolffd@0 | 2403 long check_optimality_sharedslack(DOC **docs, MODEL *model, long int *label, | 
| wolffd@0 | 2404 		      double *a, double *lin, double *c, double *slack, | 
| wolffd@0 | 2405 		      double *alphaslack, | 
| wolffd@0 | 2406 		      long int totdoc, | 
| wolffd@0 | 2407 		      LEARN_PARM *learn_parm, double *maxdiff, | 
| wolffd@0 | 2408 		      double epsilon_crit_org, long int *misclassified, | 
| wolffd@0 | 2409 		      long int *active2dnum, | 
| wolffd@0 | 2410 		      long int *last_suboptimal_at, | 
| wolffd@0 | 2411 		      long int iteration, KERNEL_PARM *kernel_parm) | 
| wolffd@0 | 2412      /* Check KT-conditions */ | 
| wolffd@0 | 2413 { | 
| wolffd@0 | 2414   long i,ii,retrain; | 
| wolffd@0 | 2415   double dist,ex_c=0,target; | 
| wolffd@0 | 2416 | 
| wolffd@0 | 2417   if(kernel_parm->kernel_type == LINEAR) {  /* be optimistic */ | 
| wolffd@0 | 2418     learn_parm->epsilon_shrink=-learn_parm->epsilon_crit+epsilon_crit_org; | 
| wolffd@0 | 2419   } | 
| wolffd@0 | 2420   else {  /* be conservative */ | 
| wolffd@0 | 2421     learn_parm->epsilon_shrink=learn_parm->epsilon_shrink*0.7+(*maxdiff)*0.3; | 
| wolffd@0 | 2422   } | 
| wolffd@0 | 2423 | 
| wolffd@0 | 2424   retrain=0; | 
| wolffd@0 | 2425   (*maxdiff)=0; | 
| wolffd@0 | 2426   (*misclassified)=0; | 
| wolffd@0 | 2427   for(ii=0;(i=active2dnum[ii])>=0;ii++) { | 
| wolffd@0 | 2428     /* 'distance' from hyperplane*/ | 
| wolffd@0 | 2429     dist=(lin[i]-model->b)*(double)label[i]+slack[docs[i]->slackid]; | 
| wolffd@0 | 2430     target=-(learn_parm->eps-(double)label[i]*c[i]); | 
| wolffd@0 | 2431     ex_c=learn_parm->svm_c-learn_parm->epsilon_a; | 
| wolffd@0 | 2432     if((a[i]>learn_parm->epsilon_a) && (dist > target)) { | 
| wolffd@0 | 2433       if((dist-target)>(*maxdiff)) {  /* largest violation */ | 
| wolffd@0 | 2434 	(*maxdiff)=dist-target; | 
| wolffd@0 | 2435 	if(verbosity>=5) printf("sid %ld: dist=%.2f, target=%.2f, slack=%.2f, a=%f, alphaslack=%f\n",docs[i]->slackid,dist,target,slack[docs[i]->slackid],a[i],alphaslack[docs[i]->slackid]); | 
| wolffd@0 | 2436 	if(verbosity>=5) printf(" (single %f)\n",(*maxdiff)); | 
| wolffd@0 | 2437       } | 
| wolffd@0 | 2438     } | 
| wolffd@0 | 2439     if((alphaslack[docs[i]->slackid]<ex_c) && (slack[docs[i]->slackid]>0)) { | 
| wolffd@0 | 2440       if((slack[docs[i]->slackid])>(*maxdiff)) { /* largest violation */ | 
| wolffd@0 | 2441 	(*maxdiff)=slack[docs[i]->slackid]; | 
| wolffd@0 | 2442 	if(verbosity>=5) printf("sid %ld: dist=%.2f, target=%.2f, slack=%.2f, a=%f, alphaslack=%f\n",docs[i]->slackid,dist,target,slack[docs[i]->slackid],a[i],alphaslack[docs[i]->slackid]); | 
| wolffd@0 | 2443 	if(verbosity>=5) printf(" (joint %f)\n",(*maxdiff)); | 
| wolffd@0 | 2444       } | 
| wolffd@0 | 2445     } | 
| wolffd@0 | 2446     /* Count how long a variable was at lower/upper bound (and optimal).*/ | 
| wolffd@0 | 2447     /* Variables, which were at the bound and optimal for a long */ | 
| wolffd@0 | 2448     /* time are unlikely to become support vectors. In case our */ | 
| wolffd@0 | 2449     /* cache is filled up, those variables are excluded to save */ | 
| wolffd@0 | 2450     /* kernel evaluations. (See chapter 'Shrinking').*/ | 
| wolffd@0 | 2451     if((a[i]>(learn_parm->epsilon_a)) | 
| wolffd@0 | 2452        && (a[i]<ex_c)) { | 
| wolffd@0 | 2453       last_suboptimal_at[docs[i]->slackid]=iteration;  /* not at bound */ | 
| wolffd@0 | 2454     } | 
| wolffd@0 | 2455     else if((a[i]<=(learn_parm->epsilon_a)) | 
| wolffd@0 | 2456 	    && (dist < (target+learn_parm->epsilon_shrink))) { | 
| wolffd@0 | 2457       last_suboptimal_at[docs[i]->slackid]=iteration;  /* not likely optimal */ | 
| wolffd@0 | 2458     } | 
| wolffd@0 | 2459     else if((a[i]>=ex_c) | 
| wolffd@0 | 2460 	    && (slack[docs[i]->slackid] < learn_parm->epsilon_shrink))  { | 
| wolffd@0 | 2461       last_suboptimal_at[docs[i]->slackid]=iteration;  /* not likely optimal */ | 
| wolffd@0 | 2462     } | 
| wolffd@0 | 2463   } | 
| wolffd@0 | 2464   /* termination criterion */ | 
| wolffd@0 | 2465   if((!retrain) && ((*maxdiff) > learn_parm->epsilon_crit)) { | 
| wolffd@0 | 2466     retrain=1; | 
| wolffd@0 | 2467   } | 
| wolffd@0 | 2468   return(retrain); | 
| wolffd@0 | 2469 } | 
| wolffd@0 | 2470 | 
| wolffd@0 | 2471 void compute_shared_slacks(DOC **docs, long int *label, | 
| wolffd@0 | 2472 			   double *a, double *lin, | 
| wolffd@0 | 2473 			   double *c, long int *active2dnum, | 
| wolffd@0 | 2474 			   LEARN_PARM *learn_parm, | 
| wolffd@0 | 2475 			   double *slack, double *alphaslack) | 
| wolffd@0 | 2476      /* compute the value of shared slacks and the joint alphas */ | 
| wolffd@0 | 2477 { | 
| wolffd@0 | 2478   long jj,i; | 
| wolffd@0 | 2479   double dist,target; | 
| wolffd@0 | 2480 | 
| wolffd@0 | 2481   for(jj=0;(i=active2dnum[jj])>=0;jj++) { /* clear slack variables */ | 
| wolffd@0 | 2482     slack[docs[i]->slackid]=0.0; | 
| wolffd@0 | 2483     alphaslack[docs[i]->slackid]=0.0; | 
| wolffd@0 | 2484   } | 
| wolffd@0 | 2485   for(jj=0;(i=active2dnum[jj])>=0;jj++) { /* recompute slack variables */ | 
| wolffd@0 | 2486     dist=(lin[i])*(double)label[i]; | 
| wolffd@0 | 2487     target=-(learn_parm->eps-(double)label[i]*c[i]); | 
| wolffd@0 | 2488     if((target-dist) > slack[docs[i]->slackid]) | 
| wolffd@0 | 2489       slack[docs[i]->slackid]=target-dist; | 
| wolffd@0 | 2490     alphaslack[docs[i]->slackid]+=a[i]; | 
| wolffd@0 | 2491   } | 
| wolffd@0 | 2492 } | 
| wolffd@0 | 2493 | 
| wolffd@0 | 2494 | 
| wolffd@0 | 2495 long identify_inconsistent(double *a, long int *label, | 
| wolffd@0 | 2496 			   long int *unlabeled, long int totdoc, | 
| wolffd@0 | 2497 			   LEARN_PARM *learn_parm, | 
| wolffd@0 | 2498 			   long int *inconsistentnum, long int *inconsistent) | 
| wolffd@0 | 2499 { | 
| wolffd@0 | 2500   long i,retrain; | 
| wolffd@0 | 2501 | 
| wolffd@0 | 2502   /* Throw out examples with multipliers at upper bound. This */ | 
| wolffd@0 | 2503   /* corresponds to the -i 1 option. */ | 
| wolffd@0 | 2504   /* ATTENTION: this is just a heuristic for finding a close */ | 
| wolffd@0 | 2505   /*            to minimum number of examples to exclude to */ | 
| wolffd@0 | 2506   /*            make the problem separable with desired margin */ | 
| wolffd@0 | 2507   retrain=0; | 
| wolffd@0 | 2508   for(i=0;i<totdoc;i++) { | 
| wolffd@0 | 2509     if((!inconsistent[i]) && (!unlabeled[i]) | 
| wolffd@0 | 2510        && (a[i]>=(learn_parm->svm_cost[i]-learn_parm->epsilon_a))) { | 
| wolffd@0 | 2511 	(*inconsistentnum)++; | 
| wolffd@0 | 2512 	inconsistent[i]=1;  /* never choose again */ | 
| wolffd@0 | 2513 	retrain=2;          /* start over */ | 
| wolffd@0 | 2514 	if(verbosity>=3) { | 
| wolffd@0 | 2515 	  printf("inconsistent(%ld)..",i); fflush(stdout); | 
| wolffd@0 | 2516 	} | 
| wolffd@0 | 2517     } | 
| wolffd@0 | 2518   } | 
| wolffd@0 | 2519   return(retrain); | 
| wolffd@0 | 2520 } | 
| wolffd@0 | 2521 | 
| wolffd@0 | 2522 long identify_misclassified(double *lin, long int *label, | 
| wolffd@0 | 2523 			    long int *unlabeled, long int totdoc, | 
| wolffd@0 | 2524 			    MODEL *model, long int *inconsistentnum, | 
| wolffd@0 | 2525 			    long int *inconsistent) | 
| wolffd@0 | 2526 { | 
| wolffd@0 | 2527   long i,retrain; | 
| wolffd@0 | 2528   double dist; | 
| wolffd@0 | 2529 | 
| wolffd@0 | 2530   /* Throw out misclassified examples. This */ | 
| wolffd@0 | 2531   /* corresponds to the -i 2 option. */ | 
| wolffd@0 | 2532   /* ATTENTION: this is just a heuristic for finding a close */ | 
| wolffd@0 | 2533   /*            to minimum number of examples to exclude to */ | 
| wolffd@0 | 2534   /*            make the problem separable with desired margin */ | 
| wolffd@0 | 2535   retrain=0; | 
| wolffd@0 | 2536   for(i=0;i<totdoc;i++) { | 
| wolffd@0 | 2537     dist=(lin[i]-model->b)*(double)label[i]; /* 'distance' from hyperplane*/ | 
| wolffd@0 | 2538     if((!inconsistent[i]) && (!unlabeled[i]) && (dist <= 0)) { | 
| wolffd@0 | 2539 	(*inconsistentnum)++; | 
| wolffd@0 | 2540 	inconsistent[i]=1;  /* never choose again */ | 
| wolffd@0 | 2541 	retrain=2;          /* start over */ | 
| wolffd@0 | 2542 	if(verbosity>=3) { | 
| wolffd@0 | 2543 	  printf("inconsistent(%ld)..",i); fflush(stdout); | 
| wolffd@0 | 2544 	} | 
| wolffd@0 | 2545     } | 
| wolffd@0 | 2546   } | 
| wolffd@0 | 2547   return(retrain); | 
| wolffd@0 | 2548 } | 
| wolffd@0 | 2549 | 
| wolffd@0 | 2550 long identify_one_misclassified(double *lin, long int *label, | 
| wolffd@0 | 2551 				long int *unlabeled, | 
| wolffd@0 | 2552 				long int totdoc, MODEL *model, | 
| wolffd@0 | 2553 				long int *inconsistentnum, | 
| wolffd@0 | 2554 				long int *inconsistent) | 
| wolffd@0 | 2555 { | 
| wolffd@0 | 2556   long i,retrain,maxex=-1; | 
| wolffd@0 | 2557   double dist,maxdist=0; | 
| wolffd@0 | 2558 | 
| wolffd@0 | 2559   /* Throw out the 'most misclassified' example. This */ | 
| wolffd@0 | 2560   /* corresponds to the -i 3 option. */ | 
| wolffd@0 | 2561   /* ATTENTION: this is just a heuristic for finding a close */ | 
| wolffd@0 | 2562   /*            to minimum number of examples to exclude to */ | 
| wolffd@0 | 2563   /*            make the problem separable with desired margin */ | 
| wolffd@0 | 2564   retrain=0; | 
| wolffd@0 | 2565   for(i=0;i<totdoc;i++) { | 
| wolffd@0 | 2566     if((!inconsistent[i]) && (!unlabeled[i])) { | 
| wolffd@0 | 2567       dist=(lin[i]-model->b)*(double)label[i];/* 'distance' from hyperplane*/ | 
| wolffd@0 | 2568       if(dist<maxdist) { | 
| wolffd@0 | 2569 	maxdist=dist; | 
| wolffd@0 | 2570 	maxex=i; | 
| wolffd@0 | 2571       } | 
| wolffd@0 | 2572     } | 
| wolffd@0 | 2573   } | 
| wolffd@0 | 2574   if(maxex>=0) { | 
| wolffd@0 | 2575     (*inconsistentnum)++; | 
| wolffd@0 | 2576     inconsistent[maxex]=1;  /* never choose again */ | 
| wolffd@0 | 2577     retrain=2;          /* start over */ | 
| wolffd@0 | 2578     if(verbosity>=3) { | 
| wolffd@0 | 2579       printf("inconsistent(%ld)..",i); fflush(stdout); | 
| wolffd@0 | 2580     } | 
| wolffd@0 | 2581   } | 
| wolffd@0 | 2582   return(retrain); | 
| wolffd@0 | 2583 } | 
| wolffd@0 | 2584 | 
| wolffd@0 | 2585 void update_linear_component(DOC **docs, long int *label, | 
| wolffd@0 | 2586 			     long int *active2dnum, double *a, | 
| wolffd@0 | 2587 			     double *a_old, long int *working2dnum, | 
| wolffd@0 | 2588 			     long int totdoc, long int totwords, | 
| wolffd@0 | 2589 			     KERNEL_PARM *kernel_parm, | 
| wolffd@0 | 2590 			     KERNEL_CACHE *kernel_cache, | 
| wolffd@0 | 2591 			     double *lin, CFLOAT *aicache, double *weights) | 
| wolffd@0 | 2592      /* keep track of the linear component */ | 
| wolffd@0 | 2593      /* lin of the gradient etc. by updating */ | 
| wolffd@0 | 2594      /* based on the change of the variables */ | 
| wolffd@0 | 2595      /* in the current working set */ | 
| wolffd@0 | 2596 { | 
| wolffd@0 | 2597   register long i,ii,j,jj; | 
| wolffd@0 | 2598   register double tec; | 
| wolffd@0 | 2599   SVECTOR *f; | 
| wolffd@0 | 2600 | 
| wolffd@0 | 2601   if(kernel_parm->kernel_type==0) { /* special linear case */ | 
| wolffd@0 | 2602     clear_vector_n(weights,totwords); | 
| wolffd@0 | 2603     for(ii=0;(i=working2dnum[ii])>=0;ii++) { | 
| wolffd@0 | 2604       if(a[i] != a_old[i]) { | 
| wolffd@0 | 2605 	for(f=docs[i]->fvec;f;f=f->next) | 
| wolffd@0 | 2606 	  add_vector_ns(weights,f, | 
| wolffd@0 | 2607 			f->factor*((a[i]-a_old[i])*(double)label[i])); | 
| wolffd@0 | 2608       } | 
| wolffd@0 | 2609     } | 
| wolffd@0 | 2610     for(jj=0;(j=active2dnum[jj])>=0;jj++) { | 
| wolffd@0 | 2611       for(f=docs[j]->fvec;f;f=f->next) | 
| wolffd@0 | 2612 	lin[j]+=f->factor*sprod_ns(weights,f); | 
| wolffd@0 | 2613     } | 
| wolffd@0 | 2614   } | 
| wolffd@0 | 2615   else {                            /* general case */ | 
| wolffd@0 | 2616     for(jj=0;(i=working2dnum[jj])>=0;jj++) { | 
| wolffd@0 | 2617       if(a[i] != a_old[i]) { | 
| wolffd@0 | 2618 	get_kernel_row(kernel_cache,docs,i,totdoc,active2dnum,aicache, | 
| wolffd@0 | 2619 		       kernel_parm); | 
| wolffd@0 | 2620 	for(ii=0;(j=active2dnum[ii])>=0;ii++) { | 
| wolffd@0 | 2621 	  tec=aicache[j]; | 
| wolffd@0 | 2622 	  lin[j]+=(((a[i]*tec)-(a_old[i]*tec))*(double)label[i]); | 
| wolffd@0 | 2623 	} | 
| wolffd@0 | 2624       } | 
| wolffd@0 | 2625     } | 
| wolffd@0 | 2626   } | 
| wolffd@0 | 2627 } | 
| wolffd@0 | 2628 | 
| wolffd@0 | 2629 | 
| wolffd@0 | 2630 long incorporate_unlabeled_examples(MODEL *model, long int *label, | 
| wolffd@0 | 2631 				    long int *inconsistent, | 
| wolffd@0 | 2632 				    long int *unlabeled, | 
| wolffd@0 | 2633 				    double *a, double *lin, | 
| wolffd@0 | 2634 				    long int totdoc, double *selcrit, | 
| wolffd@0 | 2635 				    long int *select, long int *key, | 
| wolffd@0 | 2636 				    long int transductcycle, | 
| wolffd@0 | 2637 				    KERNEL_PARM *kernel_parm, | 
| wolffd@0 | 2638 				    LEARN_PARM *learn_parm) | 
| wolffd@0 | 2639 { | 
| wolffd@0 | 2640   long i,j,k,j1,j2,j3,j4,unsupaddnum1=0,unsupaddnum2=0; | 
| wolffd@0 | 2641   long pos,neg,upos,uneg,orgpos,orgneg,nolabel,newpos,newneg,allunlab; | 
| wolffd@0 | 2642   double dist,model_length,posratio,negratio; | 
| wolffd@0 | 2643   long check_every=2; | 
| wolffd@0 | 2644   double loss; | 
| wolffd@0 | 2645   static double switchsens=0.0,switchsensorg=0.0; | 
| wolffd@0 | 2646   double umin,umax,sumalpha; | 
| wolffd@0 | 2647   long imin=0,imax=0; | 
| wolffd@0 | 2648   static long switchnum=0; | 
| wolffd@0 | 2649 | 
| wolffd@0 | 2650   switchsens/=1.2; | 
| wolffd@0 | 2651 | 
| wolffd@0 | 2652   /* assumes that lin[] is up to date -> no inactive vars */ | 
| wolffd@0 | 2653 | 
| wolffd@0 | 2654   orgpos=0; | 
| wolffd@0 | 2655   orgneg=0; | 
| wolffd@0 | 2656   newpos=0; | 
| wolffd@0 | 2657   newneg=0; | 
| wolffd@0 | 2658   nolabel=0; | 
| wolffd@0 | 2659   allunlab=0; | 
| wolffd@0 | 2660   for(i=0;i<totdoc;i++) { | 
| wolffd@0 | 2661     if(!unlabeled[i]) { | 
| wolffd@0 | 2662       if(label[i] > 0) { | 
| wolffd@0 | 2663 	orgpos++; | 
| wolffd@0 | 2664       } | 
| wolffd@0 | 2665       else { | 
| wolffd@0 | 2666 	orgneg++; | 
| wolffd@0 | 2667       } | 
| wolffd@0 | 2668     } | 
| wolffd@0 | 2669     else { | 
| wolffd@0 | 2670       allunlab++; | 
| wolffd@0 | 2671       if(unlabeled[i]) { | 
| wolffd@0 | 2672 	if(label[i] > 0) { | 
| wolffd@0 | 2673 	  newpos++; | 
| wolffd@0 | 2674 	} | 
| wolffd@0 | 2675 	else if(label[i] < 0) { | 
| wolffd@0 | 2676 	  newneg++; | 
| wolffd@0 | 2677 	} | 
| wolffd@0 | 2678       } | 
| wolffd@0 | 2679     } | 
| wolffd@0 | 2680     if(label[i]==0) { | 
| wolffd@0 | 2681       nolabel++; | 
| wolffd@0 | 2682     } | 
| wolffd@0 | 2683   } | 
| wolffd@0 | 2684 | 
| wolffd@0 | 2685   if(learn_parm->transduction_posratio >= 0) { | 
| wolffd@0 | 2686     posratio=learn_parm->transduction_posratio; | 
| wolffd@0 | 2687   } | 
| wolffd@0 | 2688   else { | 
| wolffd@0 | 2689     posratio=(double)orgpos/(double)(orgpos+orgneg); /* use ratio of pos/neg */ | 
| wolffd@0 | 2690   }                                                  /* in training data */ | 
| wolffd@0 | 2691   negratio=1.0-posratio; | 
| wolffd@0 | 2692 | 
| wolffd@0 | 2693   learn_parm->svm_costratio=1.0;                     /* global */ | 
| wolffd@0 | 2694   if(posratio>0) { | 
| wolffd@0 | 2695     learn_parm->svm_costratio_unlab=negratio/posratio; | 
| wolffd@0 | 2696   } | 
| wolffd@0 | 2697   else { | 
| wolffd@0 | 2698     learn_parm->svm_costratio_unlab=1.0; | 
| wolffd@0 | 2699   } | 
| wolffd@0 | 2700 | 
| wolffd@0 | 2701   pos=0; | 
| wolffd@0 | 2702   neg=0; | 
| wolffd@0 | 2703   upos=0; | 
| wolffd@0 | 2704   uneg=0; | 
| wolffd@0 | 2705   for(i=0;i<totdoc;i++) { | 
| wolffd@0 | 2706     dist=(lin[i]-model->b);  /* 'distance' from hyperplane*/ | 
| wolffd@0 | 2707     if(dist>0) { | 
| wolffd@0 | 2708       pos++; | 
| wolffd@0 | 2709     } | 
| wolffd@0 | 2710     else { | 
| wolffd@0 | 2711       neg++; | 
| wolffd@0 | 2712     } | 
| wolffd@0 | 2713     if(unlabeled[i]) { | 
| wolffd@0 | 2714       if(dist>0) { | 
| wolffd@0 | 2715 	upos++; | 
| wolffd@0 | 2716       } | 
| wolffd@0 | 2717       else { | 
| wolffd@0 | 2718 	uneg++; | 
| wolffd@0 | 2719       } | 
| wolffd@0 | 2720     } | 
| wolffd@0 | 2721     if((!unlabeled[i]) && (a[i]>(learn_parm->svm_cost[i]-learn_parm->epsilon_a))) { | 
| wolffd@0 | 2722       /*      printf("Ubounded %ld (class %ld, unlabeled %ld)\n",i,label[i],unlabeled[i]); */ | 
| wolffd@0 | 2723     } | 
| wolffd@0 | 2724   } | 
| wolffd@0 | 2725   if(verbosity>=2) { | 
| wolffd@0 | 2726     printf("POS=%ld, ORGPOS=%ld, ORGNEG=%ld\n",pos,orgpos,orgneg); | 
| wolffd@0 | 2727     printf("POS=%ld, NEWPOS=%ld, NEWNEG=%ld\n",pos,newpos,newneg); | 
| wolffd@0 | 2728     printf("pos ratio = %f (%f).\n",(double)(upos)/(double)(allunlab),posratio); | 
| wolffd@0 | 2729     fflush(stdout); | 
| wolffd@0 | 2730   } | 
| wolffd@0 | 2731 | 
| wolffd@0 | 2732   if(transductcycle == 0) { | 
| wolffd@0 | 2733     j1=0; | 
| wolffd@0 | 2734     j2=0; | 
| wolffd@0 | 2735     j4=0; | 
| wolffd@0 | 2736     for(i=0;i<totdoc;i++) { | 
| wolffd@0 | 2737       dist=(lin[i]-model->b);  /* 'distance' from hyperplane*/ | 
| wolffd@0 | 2738       if((label[i]==0) && (unlabeled[i])) { | 
| wolffd@0 | 2739 	selcrit[j4]=dist; | 
| wolffd@0 | 2740 	key[j4]=i; | 
| wolffd@0 | 2741 	j4++; | 
| wolffd@0 | 2742       } | 
| wolffd@0 | 2743     } | 
| wolffd@0 | 2744     unsupaddnum1=0; | 
| wolffd@0 | 2745     unsupaddnum2=0; | 
| wolffd@0 | 2746     select_top_n(selcrit,j4,select,(long)(allunlab*posratio+0.5)); | 
| wolffd@0 | 2747     for(k=0;(k<(long)(allunlab*posratio+0.5));k++) { | 
| wolffd@0 | 2748       i=key[select[k]]; | 
| wolffd@0 | 2749       label[i]=1; | 
| wolffd@0 | 2750       unsupaddnum1++; | 
| wolffd@0 | 2751       j1++; | 
| wolffd@0 | 2752     } | 
| wolffd@0 | 2753     for(i=0;i<totdoc;i++) { | 
| wolffd@0 | 2754       if((label[i]==0) && (unlabeled[i])) { | 
| wolffd@0 | 2755 	label[i]=-1; | 
| wolffd@0 | 2756 	j2++; | 
| wolffd@0 | 2757 	unsupaddnum2++; | 
| wolffd@0 | 2758       } | 
| wolffd@0 | 2759     } | 
| wolffd@0 | 2760     for(i=0;i<totdoc;i++) {  /* set upper bounds on vars */ | 
| wolffd@0 | 2761       if(unlabeled[i]) { | 
| wolffd@0 | 2762 	if(label[i] == 1) { | 
| wolffd@0 | 2763 	  learn_parm->svm_cost[i]=learn_parm->svm_c* | 
| wolffd@0 | 2764 	    learn_parm->svm_costratio_unlab*learn_parm->svm_unlabbound; | 
| wolffd@0 | 2765 	} | 
| wolffd@0 | 2766 	else if(label[i] == -1) { | 
| wolffd@0 | 2767 	  learn_parm->svm_cost[i]=learn_parm->svm_c* | 
| wolffd@0 | 2768 	    learn_parm->svm_unlabbound; | 
| wolffd@0 | 2769 	} | 
| wolffd@0 | 2770       } | 
| wolffd@0 | 2771     } | 
| wolffd@0 | 2772     if(verbosity>=1) { | 
| wolffd@0 | 2773       /* printf("costratio %f, costratio_unlab %f, unlabbound %f\n", | 
| wolffd@0 | 2774 	 learn_parm->svm_costratio,learn_parm->svm_costratio_unlab, | 
| wolffd@0 | 2775 	 learn_parm->svm_unlabbound); */ | 
| wolffd@0 | 2776       printf("Classifying unlabeled data as %ld POS / %ld NEG.\n", | 
| wolffd@0 | 2777 	     unsupaddnum1,unsupaddnum2); | 
| wolffd@0 | 2778       fflush(stdout); | 
| wolffd@0 | 2779     } | 
| wolffd@0 | 2780     if(verbosity >= 1) | 
| wolffd@0 | 2781       printf("Retraining."); | 
| wolffd@0 | 2782     if(verbosity >= 2) printf("\n"); | 
| wolffd@0 | 2783     return((long)3); | 
| wolffd@0 | 2784   } | 
| wolffd@0 | 2785   if((transductcycle % check_every) == 0) { | 
| wolffd@0 | 2786     if(verbosity >= 1) | 
| wolffd@0 | 2787       printf("Retraining."); | 
| wolffd@0 | 2788     if(verbosity >= 2) printf("\n"); | 
| wolffd@0 | 2789     j1=0; | 
| wolffd@0 | 2790     j2=0; | 
| wolffd@0 | 2791     unsupaddnum1=0; | 
| wolffd@0 | 2792     unsupaddnum2=0; | 
| wolffd@0 | 2793     for(i=0;i<totdoc;i++) { | 
| wolffd@0 | 2794       if((unlabeled[i] == 2)) { | 
| wolffd@0 | 2795 	unlabeled[i]=1; | 
| wolffd@0 | 2796 	label[i]=1; | 
| wolffd@0 | 2797 	j1++; | 
| wolffd@0 | 2798 	unsupaddnum1++; | 
| wolffd@0 | 2799       } | 
| wolffd@0 | 2800       else if((unlabeled[i] == 3)) { | 
| wolffd@0 | 2801 	unlabeled[i]=1; | 
| wolffd@0 | 2802 	label[i]=-1; | 
| wolffd@0 | 2803 	j2++; | 
| wolffd@0 | 2804 	unsupaddnum2++; | 
| wolffd@0 | 2805       } | 
| wolffd@0 | 2806     } | 
| wolffd@0 | 2807     for(i=0;i<totdoc;i++) {  /* set upper bounds on vars */ | 
| wolffd@0 | 2808       if(unlabeled[i]) { | 
| wolffd@0 | 2809 	if(label[i] == 1) { | 
| wolffd@0 | 2810 	  learn_parm->svm_cost[i]=learn_parm->svm_c* | 
| wolffd@0 | 2811 	    learn_parm->svm_costratio_unlab*learn_parm->svm_unlabbound; | 
| wolffd@0 | 2812 	} | 
| wolffd@0 | 2813 	else if(label[i] == -1) { | 
| wolffd@0 | 2814 	  learn_parm->svm_cost[i]=learn_parm->svm_c* | 
| wolffd@0 | 2815 	    learn_parm->svm_unlabbound; | 
| wolffd@0 | 2816 	} | 
| wolffd@0 | 2817       } | 
| wolffd@0 | 2818     } | 
| wolffd@0 | 2819 | 
| wolffd@0 | 2820     if(verbosity>=2) { | 
| wolffd@0 | 2821       /* printf("costratio %f, costratio_unlab %f, unlabbound %f\n", | 
| wolffd@0 | 2822 	     learn_parm->svm_costratio,learn_parm->svm_costratio_unlab, | 
| wolffd@0 | 2823 	     learn_parm->svm_unlabbound); */ | 
| wolffd@0 | 2824       printf("%ld positive -> Added %ld POS / %ld NEG unlabeled examples.\n", | 
| wolffd@0 | 2825 	     upos,unsupaddnum1,unsupaddnum2); | 
| wolffd@0 | 2826       fflush(stdout); | 
| wolffd@0 | 2827     } | 
| wolffd@0 | 2828 | 
| wolffd@0 | 2829     if(learn_parm->svm_unlabbound == 1) { | 
| wolffd@0 | 2830       learn_parm->epsilon_crit=0.001; /* do the last run right */ | 
| wolffd@0 | 2831     } | 
| wolffd@0 | 2832     else { | 
| wolffd@0 | 2833       learn_parm->epsilon_crit=0.01; /* otherwise, no need to be so picky */ | 
| wolffd@0 | 2834     } | 
| wolffd@0 | 2835 | 
| wolffd@0 | 2836     return((long)3); | 
| wolffd@0 | 2837   } | 
| wolffd@0 | 2838   else if(((transductcycle % check_every) < check_every)) { | 
| wolffd@0 | 2839     model_length=0; | 
| wolffd@0 | 2840     sumalpha=0; | 
| wolffd@0 | 2841     loss=0; | 
| wolffd@0 | 2842     for(i=0;i<totdoc;i++) { | 
| wolffd@0 | 2843       model_length+=a[i]*label[i]*lin[i]; | 
| wolffd@0 | 2844       sumalpha+=a[i]; | 
| wolffd@0 | 2845       dist=(lin[i]-model->b);  /* 'distance' from hyperplane*/ | 
| wolffd@0 | 2846       if((label[i]*dist)<(1.0-learn_parm->epsilon_crit)) { | 
| wolffd@0 | 2847 	loss+=(1.0-(label[i]*dist))*learn_parm->svm_cost[i]; | 
| wolffd@0 | 2848       } | 
| wolffd@0 | 2849     } | 
| wolffd@0 | 2850     model_length=sqrt(model_length); | 
| wolffd@0 | 2851     if(verbosity>=2) { | 
| wolffd@0 | 2852       printf("Model-length = %f (%f), loss = %f, objective = %f\n", | 
| wolffd@0 | 2853 	     model_length,sumalpha,loss,loss+0.5*model_length*model_length); | 
| wolffd@0 | 2854       fflush(stdout); | 
| wolffd@0 | 2855     } | 
| wolffd@0 | 2856     j1=0; | 
| wolffd@0 | 2857     j2=0; | 
| wolffd@0 | 2858     j3=0; | 
| wolffd@0 | 2859     j4=0; | 
| wolffd@0 | 2860     unsupaddnum1=0; | 
| wolffd@0 | 2861     unsupaddnum2=0; | 
| wolffd@0 | 2862     umin=99999; | 
| wolffd@0 | 2863     umax=-99999; | 
| wolffd@0 | 2864     j4=1; | 
| wolffd@0 | 2865     while(j4) { | 
| wolffd@0 | 2866       umin=99999; | 
| wolffd@0 | 2867       umax=-99999; | 
| wolffd@0 | 2868       for(i=0;(i<totdoc);i++) { | 
| wolffd@0 | 2869 	dist=(lin[i]-model->b); | 
| wolffd@0 | 2870 	if((label[i]>0) && (unlabeled[i]) && (!inconsistent[i]) | 
| wolffd@0 | 2871 	   && (dist<umin)) { | 
| wolffd@0 | 2872 	  umin=dist; | 
| wolffd@0 | 2873 	  imin=i; | 
| wolffd@0 | 2874 	} | 
| wolffd@0 | 2875 	if((label[i]<0) && (unlabeled[i])  && (!inconsistent[i]) | 
| wolffd@0 | 2876 	   && (dist>umax)) { | 
| wolffd@0 | 2877 	  umax=dist; | 
| wolffd@0 | 2878 	  imax=i; | 
| wolffd@0 | 2879 	} | 
| wolffd@0 | 2880       } | 
| wolffd@0 | 2881       if((umin < (umax+switchsens-1E-4))) { | 
| wolffd@0 | 2882 	j1++; | 
| wolffd@0 | 2883 	j2++; | 
| wolffd@0 | 2884 	unsupaddnum1++; | 
| wolffd@0 | 2885 	unlabeled[imin]=3; | 
| wolffd@0 | 2886 	inconsistent[imin]=1; | 
| wolffd@0 | 2887 	unsupaddnum2++; | 
| wolffd@0 | 2888 	unlabeled[imax]=2; | 
| wolffd@0 | 2889 	inconsistent[imax]=1; | 
| wolffd@0 | 2890       } | 
| wolffd@0 | 2891       else | 
| wolffd@0 | 2892 	j4=0; | 
| wolffd@0 | 2893       j4=0; | 
| wolffd@0 | 2894     } | 
| wolffd@0 | 2895     for(j=0;(j<totdoc);j++) { | 
| wolffd@0 | 2896       if(unlabeled[j] && (!inconsistent[j])) { | 
| wolffd@0 | 2897 	if(label[j]>0) { | 
| wolffd@0 | 2898 	  unlabeled[j]=2; | 
| wolffd@0 | 2899 	} | 
| wolffd@0 | 2900 	else if(label[j]<0) { | 
| wolffd@0 | 2901 	  unlabeled[j]=3; | 
| wolffd@0 | 2902 	} | 
| wolffd@0 | 2903 	/* inconsistent[j]=1; */ | 
| wolffd@0 | 2904 	j3++; | 
| wolffd@0 | 2905       } | 
| wolffd@0 | 2906     } | 
| wolffd@0 | 2907     switchnum+=unsupaddnum1+unsupaddnum2; | 
| wolffd@0 | 2908 | 
| wolffd@0 | 2909     /* stop and print out current margin | 
| wolffd@0 | 2910        printf("switchnum %ld %ld\n",switchnum,kernel_parm->poly_degree); | 
| wolffd@0 | 2911        if(switchnum == 2*kernel_parm->poly_degree) { | 
| wolffd@0 | 2912        learn_parm->svm_unlabbound=1; | 
| wolffd@0 | 2913        } | 
| wolffd@0 | 2914        */ | 
| wolffd@0 | 2915 | 
| wolffd@0 | 2916     if((!unsupaddnum1) && (!unsupaddnum2)) { | 
| wolffd@0 | 2917       if((learn_parm->svm_unlabbound>=1) && ((newpos+newneg) == allunlab)) { | 
| wolffd@0 | 2918 	for(j=0;(j<totdoc);j++) { | 
| wolffd@0 | 2919 	  inconsistent[j]=0; | 
| wolffd@0 | 2920 	  if(unlabeled[j]) unlabeled[j]=1; | 
| wolffd@0 | 2921 	} | 
| wolffd@0 | 2922 	write_prediction(learn_parm->predfile,model,lin,a,unlabeled,label, | 
| wolffd@0 | 2923 			 totdoc,learn_parm); | 
| wolffd@0 | 2924 	if(verbosity>=1) | 
| wolffd@0 | 2925 	  printf("Number of switches: %ld\n",switchnum); | 
| wolffd@0 | 2926 	return((long)0); | 
| wolffd@0 | 2927       } | 
| wolffd@0 | 2928       switchsens=switchsensorg; | 
| wolffd@0 | 2929       learn_parm->svm_unlabbound*=1.5; | 
| wolffd@0 | 2930       if(learn_parm->svm_unlabbound>1) { | 
| wolffd@0 | 2931 	learn_parm->svm_unlabbound=1; | 
| wolffd@0 | 2932       } | 
| wolffd@0 | 2933       model->at_upper_bound=0; /* since upper bound increased */ | 
| wolffd@0 | 2934       if(verbosity>=1) | 
| wolffd@0 | 2935 	printf("Increasing influence of unlabeled examples to %f%% .", | 
| wolffd@0 | 2936 	       learn_parm->svm_unlabbound*100.0); | 
| wolffd@0 | 2937     } | 
| wolffd@0 | 2938     else if(verbosity>=1) { | 
| wolffd@0 | 2939       printf("%ld positive -> Switching labels of %ld POS / %ld NEG unlabeled examples.", | 
| wolffd@0 | 2940 	     upos,unsupaddnum1,unsupaddnum2); | 
| wolffd@0 | 2941       fflush(stdout); | 
| wolffd@0 | 2942     } | 
| wolffd@0 | 2943 | 
| wolffd@0 | 2944     if(verbosity >= 2) printf("\n"); | 
| wolffd@0 | 2945 | 
| wolffd@0 | 2946     learn_parm->epsilon_crit=0.5; /* don't need to be so picky */ | 
| wolffd@0 | 2947 | 
| wolffd@0 | 2948     for(i=0;i<totdoc;i++) {  /* set upper bounds on vars */ | 
| wolffd@0 | 2949       if(unlabeled[i]) { | 
| wolffd@0 | 2950 	if(label[i] == 1) { | 
| wolffd@0 | 2951 	  learn_parm->svm_cost[i]=learn_parm->svm_c* | 
| wolffd@0 | 2952 	    learn_parm->svm_costratio_unlab*learn_parm->svm_unlabbound; | 
| wolffd@0 | 2953 	} | 
| wolffd@0 | 2954 	else if(label[i] == -1) { | 
| wolffd@0 | 2955 	  learn_parm->svm_cost[i]=learn_parm->svm_c* | 
| wolffd@0 | 2956 	    learn_parm->svm_unlabbound; | 
| wolffd@0 | 2957 	} | 
| wolffd@0 | 2958       } | 
| wolffd@0 | 2959     } | 
| wolffd@0 | 2960 | 
| wolffd@0 | 2961     return((long)2); | 
| wolffd@0 | 2962   } | 
| wolffd@0 | 2963 | 
| wolffd@0 | 2964   return((long)0); | 
| wolffd@0 | 2965 } | 
| wolffd@0 | 2966 | 
| wolffd@0 | 2967 /*************************** Working set selection ***************************/ | 
| wolffd@0 | 2968 | 
| wolffd@0 | 2969 long select_next_qp_subproblem_grad(long int *label, | 
| wolffd@0 | 2970 				    long int *unlabeled, | 
| wolffd@0 | 2971 				    double *a, double *lin, | 
| wolffd@0 | 2972 				    double *c, long int totdoc, | 
| wolffd@0 | 2973 				    long int qp_size, | 
| wolffd@0 | 2974 				    LEARN_PARM *learn_parm, | 
| wolffd@0 | 2975 				    long int *inconsistent, | 
| wolffd@0 | 2976 				    long int *active2dnum, | 
| wolffd@0 | 2977 				    long int *working2dnum, | 
| wolffd@0 | 2978 				    double *selcrit, | 
| wolffd@0 | 2979 				    long int *select, | 
| wolffd@0 | 2980 				    KERNEL_CACHE *kernel_cache, | 
| wolffd@0 | 2981 				    long int cache_only, | 
| wolffd@0 | 2982 				    long int *key, long int *chosen) | 
| wolffd@0 | 2983      /* Use the feasible direction approach to select the next | 
| wolffd@0 | 2984       qp-subproblem (see chapter 'Selecting a good working set'). If | 
| wolffd@0 | 2985       'cache_only' is true, then the variables are selected only among | 
| wolffd@0 | 2986       those for which the kernel evaluations are cached. */ | 
| wolffd@0 | 2987 { | 
| wolffd@0 | 2988   long choosenum,i,j,k,activedoc,inum,valid; | 
| wolffd@0 | 2989   double s; | 
| wolffd@0 | 2990 | 
| wolffd@0 | 2991   for(inum=0;working2dnum[inum]>=0;inum++); /* find end of index */ | 
| wolffd@0 | 2992   choosenum=0; | 
| wolffd@0 | 2993   activedoc=0; | 
| wolffd@0 | 2994   for(i=0;(j=active2dnum[i])>=0;i++) { | 
| wolffd@0 | 2995     s=-label[j]; | 
| wolffd@0 | 2996     if(kernel_cache && cache_only) | 
| wolffd@0 | 2997       valid=(kernel_cache->index[j]>=0); | 
| wolffd@0 | 2998     else | 
| wolffd@0 | 2999       valid=1; | 
| wolffd@0 | 3000     if(valid | 
| wolffd@0 | 3001        && (!((a[j]<=(0+learn_parm->epsilon_a)) && (s<0))) | 
| wolffd@0 | 3002        && (!((a[j]>=(learn_parm->svm_cost[j]-learn_parm->epsilon_a)) | 
| wolffd@0 | 3003 	     && (s>0))) | 
| wolffd@0 | 3004        && (!chosen[j]) | 
| wolffd@0 | 3005        && (label[j]) | 
| wolffd@0 | 3006        && (!inconsistent[j])) | 
| wolffd@0 | 3007       { | 
| wolffd@0 | 3008       selcrit[activedoc]=(double)label[j]*(learn_parm->eps-(double)label[j]*c[j]+(double)label[j]*lin[j]); | 
| wolffd@0 | 3009       /*      selcrit[activedoc]=(double)label[j]*(-1.0+(double)label[j]*lin[j]); */ | 
| wolffd@0 | 3010       key[activedoc]=j; | 
| wolffd@0 | 3011       activedoc++; | 
| wolffd@0 | 3012     } | 
| wolffd@0 | 3013   } | 
| wolffd@0 | 3014   select_top_n(selcrit,activedoc,select,(long)(qp_size/2)); | 
| wolffd@0 | 3015   for(k=0;(choosenum<(qp_size/2)) && (k<(qp_size/2)) && (k<activedoc);k++) { | 
| wolffd@0 | 3016     /* if(learn_parm->biased_hyperplane || (selcrit[select[k]] > 0)) { */ | 
| wolffd@0 | 3017       i=key[select[k]]; | 
| wolffd@0 | 3018       chosen[i]=1; | 
| wolffd@0 | 3019       working2dnum[inum+choosenum]=i; | 
| wolffd@0 | 3020       choosenum+=1; | 
| wolffd@0 | 3021       if(kernel_cache) | 
| wolffd@0 | 3022 	kernel_cache_touch(kernel_cache,i); /* make sure it does not get | 
| wolffd@0 | 3023 					       kicked out of cache */ | 
| wolffd@0 | 3024       /* } */ | 
| wolffd@0 | 3025   } | 
| wolffd@0 | 3026 | 
| wolffd@0 | 3027   activedoc=0; | 
| wolffd@0 | 3028   for(i=0;(j=active2dnum[i])>=0;i++) { | 
| wolffd@0 | 3029     s=label[j]; | 
| wolffd@0 | 3030     if(kernel_cache && cache_only) | 
| wolffd@0 | 3031       valid=(kernel_cache->index[j]>=0); | 
| wolffd@0 | 3032     else | 
| wolffd@0 | 3033       valid=1; | 
| wolffd@0 | 3034     if(valid | 
| wolffd@0 | 3035        && (!((a[j]<=(0+learn_parm->epsilon_a)) && (s<0))) | 
| wolffd@0 | 3036        && (!((a[j]>=(learn_parm->svm_cost[j]-learn_parm->epsilon_a)) | 
| wolffd@0 | 3037 	     && (s>0))) | 
| wolffd@0 | 3038        && (!chosen[j]) | 
| wolffd@0 | 3039        && (label[j]) | 
| wolffd@0 | 3040        && (!inconsistent[j])) | 
| wolffd@0 | 3041       { | 
| wolffd@0 | 3042       selcrit[activedoc]=-(double)label[j]*(learn_parm->eps-(double)label[j]*c[j]+(double)label[j]*lin[j]); | 
| wolffd@0 | 3043       /*  selcrit[activedoc]=-(double)(label[j]*(-1.0+(double)label[j]*lin[j])); */ | 
| wolffd@0 | 3044       key[activedoc]=j; | 
| wolffd@0 | 3045       activedoc++; | 
| wolffd@0 | 3046     } | 
| wolffd@0 | 3047   } | 
| wolffd@0 | 3048   select_top_n(selcrit,activedoc,select,(long)(qp_size/2)); | 
| wolffd@0 | 3049   for(k=0;(choosenum<qp_size) && (k<(qp_size/2)) && (k<activedoc);k++) { | 
| wolffd@0 | 3050     /* if(learn_parm->biased_hyperplane || (selcrit[select[k]] > 0)) { */ | 
| wolffd@0 | 3051       i=key[select[k]]; | 
| wolffd@0 | 3052       chosen[i]=1; | 
| wolffd@0 | 3053       working2dnum[inum+choosenum]=i; | 
| wolffd@0 | 3054       choosenum+=1; | 
| wolffd@0 | 3055       if(kernel_cache) | 
| wolffd@0 | 3056 	kernel_cache_touch(kernel_cache,i); /* make sure it does not get | 
| wolffd@0 | 3057 					       kicked out of cache */ | 
| wolffd@0 | 3058       /* } */ | 
| wolffd@0 | 3059   } | 
| wolffd@0 | 3060   working2dnum[inum+choosenum]=-1; /* complete index */ | 
| wolffd@0 | 3061   return(choosenum); | 
| wolffd@0 | 3062 } | 
| wolffd@0 | 3063 | 
| wolffd@0 | 3064 long select_next_qp_subproblem_rand(long int *label, | 
| wolffd@0 | 3065 				    long int *unlabeled, | 
| wolffd@0 | 3066 				    double *a, double *lin, | 
| wolffd@0 | 3067 				    double *c, long int totdoc, | 
| wolffd@0 | 3068 				    long int qp_size, | 
| wolffd@0 | 3069 				    LEARN_PARM *learn_parm, | 
| wolffd@0 | 3070 				    long int *inconsistent, | 
| wolffd@0 | 3071 				    long int *active2dnum, | 
| wolffd@0 | 3072 				    long int *working2dnum, | 
| wolffd@0 | 3073 				    double *selcrit, | 
| wolffd@0 | 3074 				    long int *select, | 
| wolffd@0 | 3075 				    KERNEL_CACHE *kernel_cache, | 
| wolffd@0 | 3076 				    long int *key, | 
| wolffd@0 | 3077 				    long int *chosen, | 
| wolffd@0 | 3078 				    long int iteration) | 
| wolffd@0 | 3079 /* Use the feasible direction approach to select the next | 
| wolffd@0 | 3080    qp-subproblem (see section 'Selecting a good working set'). Chooses | 
| wolffd@0 | 3081    a feasible direction at (pseudo) random to help jump over numerical | 
| wolffd@0 | 3082    problem. */ | 
| wolffd@0 | 3083 { | 
| wolffd@0 | 3084   long choosenum,i,j,k,activedoc,inum; | 
| wolffd@0 | 3085   double s; | 
| wolffd@0 | 3086 | 
| wolffd@0 | 3087   for(inum=0;working2dnum[inum]>=0;inum++); /* find end of index */ | 
| wolffd@0 | 3088   choosenum=0; | 
| wolffd@0 | 3089   activedoc=0; | 
| wolffd@0 | 3090   for(i=0;(j=active2dnum[i])>=0;i++) { | 
| wolffd@0 | 3091     s=-label[j]; | 
| wolffd@0 | 3092     if((!((a[j]<=(0+learn_parm->epsilon_a)) && (s<0))) | 
| wolffd@0 | 3093        && (!((a[j]>=(learn_parm->svm_cost[j]-learn_parm->epsilon_a)) | 
| wolffd@0 | 3094 	     && (s>0))) | 
| wolffd@0 | 3095        && (!inconsistent[j]) | 
| wolffd@0 | 3096        && (label[j]) | 
| wolffd@0 | 3097        && (!chosen[j])) { | 
| wolffd@0 | 3098       selcrit[activedoc]=(j+iteration) % totdoc; | 
| wolffd@0 | 3099       key[activedoc]=j; | 
| wolffd@0 | 3100       activedoc++; | 
| wolffd@0 | 3101     } | 
| wolffd@0 | 3102   } | 
| wolffd@0 | 3103   select_top_n(selcrit,activedoc,select,(long)(qp_size/2)); | 
| wolffd@0 | 3104   for(k=0;(choosenum<(qp_size/2)) && (k<(qp_size/2)) && (k<activedoc);k++) { | 
| wolffd@0 | 3105     i=key[select[k]]; | 
| wolffd@0 | 3106     chosen[i]=1; | 
| wolffd@0 | 3107     working2dnum[inum+choosenum]=i; | 
| wolffd@0 | 3108     choosenum+=1; | 
| wolffd@0 | 3109     kernel_cache_touch(kernel_cache,i); /* make sure it does not get kicked */ | 
| wolffd@0 | 3110                                         /* out of cache */ | 
| wolffd@0 | 3111   } | 
| wolffd@0 | 3112 | 
| wolffd@0 | 3113   activedoc=0; | 
| wolffd@0 | 3114   for(i=0;(j=active2dnum[i])>=0;i++) { | 
| wolffd@0 | 3115     s=label[j]; | 
| wolffd@0 | 3116     if((!((a[j]<=(0+learn_parm->epsilon_a)) && (s<0))) | 
| wolffd@0 | 3117        && (!((a[j]>=(learn_parm->svm_cost[j]-learn_parm->epsilon_a)) | 
| wolffd@0 | 3118 	     && (s>0))) | 
| wolffd@0 | 3119        && (!inconsistent[j]) | 
| wolffd@0 | 3120        && (label[j]) | 
| wolffd@0 | 3121        && (!chosen[j])) { | 
| wolffd@0 | 3122       selcrit[activedoc]=(j+iteration) % totdoc; | 
| wolffd@0 | 3123       key[activedoc]=j; | 
| wolffd@0 | 3124       activedoc++; | 
| wolffd@0 | 3125     } | 
| wolffd@0 | 3126   } | 
| wolffd@0 | 3127   select_top_n(selcrit,activedoc,select,(long)(qp_size/2)); | 
| wolffd@0 | 3128   for(k=0;(choosenum<qp_size) && (k<(qp_size/2)) && (k<activedoc);k++) { | 
| wolffd@0 | 3129     i=key[select[k]]; | 
| wolffd@0 | 3130     chosen[i]=1; | 
| wolffd@0 | 3131     working2dnum[inum+choosenum]=i; | 
| wolffd@0 | 3132     choosenum+=1; | 
| wolffd@0 | 3133     kernel_cache_touch(kernel_cache,i); /* make sure it does not get kicked */ | 
| wolffd@0 | 3134                                         /* out of cache */ | 
| wolffd@0 | 3135   } | 
| wolffd@0 | 3136   working2dnum[inum+choosenum]=-1; /* complete index */ | 
| wolffd@0 | 3137   return(choosenum); | 
| wolffd@0 | 3138 } | 
| wolffd@0 | 3139 | 
| wolffd@0 | 3140 long select_next_qp_slackset(DOC **docs, long int *label, | 
| wolffd@0 | 3141 			     double *a, double *lin, | 
| wolffd@0 | 3142 			     double *slack, double *alphaslack, | 
| wolffd@0 | 3143 			     double *c, | 
| wolffd@0 | 3144 			     LEARN_PARM *learn_parm, | 
| wolffd@0 | 3145 			     long int *active2dnum, double *maxviol) | 
| wolffd@0 | 3146      /* returns the slackset with the largest internal violation */ | 
| wolffd@0 | 3147 { | 
| wolffd@0 | 3148   long i,ii,maxdiffid; | 
| wolffd@0 | 3149   double dist,target,maxdiff,ex_c; | 
| wolffd@0 | 3150 | 
| wolffd@0 | 3151   maxdiff=0; | 
| wolffd@0 | 3152   maxdiffid=0; | 
| wolffd@0 | 3153   for(ii=0;(i=active2dnum[ii])>=0;ii++) { | 
| wolffd@0 | 3154     ex_c=learn_parm->svm_c-learn_parm->epsilon_a; | 
| wolffd@0 | 3155     if(alphaslack[docs[i]->slackid] >= ex_c) { | 
| wolffd@0 | 3156       dist=(lin[i])*(double)label[i]+slack[docs[i]->slackid]; /* distance */ | 
| wolffd@0 | 3157       target=-(learn_parm->eps-(double)label[i]*c[i]); /* rhs of constraint */ | 
| wolffd@0 | 3158       if((a[i]>learn_parm->epsilon_a) && (dist > target)) { | 
| wolffd@0 | 3159 	if((dist-target)>maxdiff) { /* largest violation */ | 
| wolffd@0 | 3160 	  maxdiff=dist-target; | 
| wolffd@0 | 3161 	  maxdiffid=docs[i]->slackid; | 
| wolffd@0 | 3162 	} | 
| wolffd@0 | 3163       } | 
| wolffd@0 | 3164     } | 
| wolffd@0 | 3165   } | 
| wolffd@0 | 3166   (*maxviol)=maxdiff; | 
| wolffd@0 | 3167   return(maxdiffid); | 
| wolffd@0 | 3168 } | 
| wolffd@0 | 3169 | 
| wolffd@0 | 3170 | 
| wolffd@0 | 3171 void select_top_n(double *selcrit, long int range, long int *select, | 
| wolffd@0 | 3172 		  long int n) | 
| wolffd@0 | 3173 { | 
| wolffd@0 | 3174   register long i,j; | 
| wolffd@0 | 3175 | 
| wolffd@0 | 3176   for(i=0;(i<n) && (i<range);i++) { /* Initialize with the first n elements */ | 
| wolffd@0 | 3177     for(j=i;j>=0;j--) { | 
| wolffd@0 | 3178       if((j>0) && (selcrit[select[j-1]]<selcrit[i])){ | 
| wolffd@0 | 3179 	select[j]=select[j-1]; | 
| wolffd@0 | 3180       } | 
| wolffd@0 | 3181       else { | 
| wolffd@0 | 3182 	select[j]=i; | 
| wolffd@0 | 3183 	j=-1; | 
| wolffd@0 | 3184       } | 
| wolffd@0 | 3185     } | 
| wolffd@0 | 3186   } | 
| wolffd@0 | 3187   if(n>0) { | 
| wolffd@0 | 3188     for(i=n;i<range;i++) { | 
| wolffd@0 | 3189       if(selcrit[i]>selcrit[select[n-1]]) { | 
| wolffd@0 | 3190 	for(j=n-1;j>=0;j--) { | 
| wolffd@0 | 3191 	  if((j>0) && (selcrit[select[j-1]]<selcrit[i])) { | 
| wolffd@0 | 3192 	    select[j]=select[j-1]; | 
| wolffd@0 | 3193 	  } | 
| wolffd@0 | 3194 	  else { | 
| wolffd@0 | 3195 	    select[j]=i; | 
| wolffd@0 | 3196 	    j=-1; | 
| wolffd@0 | 3197 	  } | 
| wolffd@0 | 3198 	} | 
| wolffd@0 | 3199       } | 
| wolffd@0 | 3200     } | 
| wolffd@0 | 3201   } | 
| wolffd@0 | 3202 } | 
| wolffd@0 | 3203 | 
| wolffd@0 | 3204 | 
| wolffd@0 | 3205 /******************************** Shrinking  *********************************/ | 
| wolffd@0 | 3206 | 
| wolffd@0 | 3207 void init_shrink_state(SHRINK_STATE *shrink_state, long int totdoc, | 
| wolffd@0 | 3208 		       long int maxhistory) | 
| wolffd@0 | 3209 { | 
| wolffd@0 | 3210   long i; | 
| wolffd@0 | 3211 | 
| wolffd@0 | 3212   shrink_state->deactnum=0; | 
| wolffd@0 | 3213   shrink_state->active = (long *)my_malloc(sizeof(long)*totdoc); | 
| wolffd@0 | 3214   shrink_state->inactive_since = (long *)my_malloc(sizeof(long)*totdoc); | 
| wolffd@0 | 3215   shrink_state->a_history = (double **)my_malloc(sizeof(double *)*maxhistory); | 
| wolffd@0 | 3216   shrink_state->maxhistory=maxhistory; | 
| wolffd@0 | 3217   shrink_state->last_lin = (double *)my_malloc(sizeof(double)*totdoc); | 
| wolffd@0 | 3218   shrink_state->last_a = (double *)my_malloc(sizeof(double)*totdoc); | 
| wolffd@0 | 3219 | 
| wolffd@0 | 3220   for(i=0;i<totdoc;i++) { | 
| wolffd@0 | 3221     shrink_state->active[i]=1; | 
| wolffd@0 | 3222     shrink_state->inactive_since[i]=0; | 
| wolffd@0 | 3223     shrink_state->last_a[i]=0; | 
| wolffd@0 | 3224     shrink_state->last_lin[i]=0; | 
| wolffd@0 | 3225   } | 
| wolffd@0 | 3226 } | 
| wolffd@0 | 3227 | 
| wolffd@0 | 3228 void shrink_state_cleanup(SHRINK_STATE *shrink_state) | 
| wolffd@0 | 3229 { | 
| wolffd@0 | 3230   free(shrink_state->active); | 
| wolffd@0 | 3231   free(shrink_state->inactive_since); | 
| wolffd@0 | 3232   if(shrink_state->deactnum > 0) | 
| wolffd@0 | 3233     free(shrink_state->a_history[shrink_state->deactnum-1]); | 
| wolffd@0 | 3234   free(shrink_state->a_history); | 
| wolffd@0 | 3235   free(shrink_state->last_a); | 
| wolffd@0 | 3236   free(shrink_state->last_lin); | 
| wolffd@0 | 3237 } | 
| wolffd@0 | 3238 | 
| wolffd@0 | 3239 long shrink_problem(DOC **docs, | 
| wolffd@0 | 3240 		    LEARN_PARM *learn_parm, | 
| wolffd@0 | 3241 		    SHRINK_STATE *shrink_state, | 
| wolffd@0 | 3242 		    KERNEL_PARM *kernel_parm, | 
| wolffd@0 | 3243 		    long int *active2dnum, | 
| wolffd@0 | 3244 		    long int *last_suboptimal_at, | 
| wolffd@0 | 3245 		    long int iteration, | 
| wolffd@0 | 3246 		    long int totdoc, | 
| wolffd@0 | 3247 		    long int minshrink, | 
| wolffd@0 | 3248 		    double *a, | 
| wolffd@0 | 3249 		    long int *inconsistent) | 
| wolffd@0 | 3250      /* Shrink some variables away.  Do the shrinking only if at least | 
| wolffd@0 | 3251         minshrink variables can be removed. */ | 
| wolffd@0 | 3252 { | 
| wolffd@0 | 3253   long i,ii,change,activenum,lastiter; | 
| wolffd@0 | 3254   double *a_old; | 
| wolffd@0 | 3255 | 
| wolffd@0 | 3256   activenum=0; | 
| wolffd@0 | 3257   change=0; | 
| wolffd@0 | 3258   for(ii=0;active2dnum[ii]>=0;ii++) { | 
| wolffd@0 | 3259     i=active2dnum[ii]; | 
| wolffd@0 | 3260     activenum++; | 
| wolffd@0 | 3261     if(learn_parm->sharedslack) | 
| wolffd@0 | 3262       lastiter=last_suboptimal_at[docs[i]->slackid]; | 
| wolffd@0 | 3263     else | 
| wolffd@0 | 3264       lastiter=last_suboptimal_at[i]; | 
| wolffd@0 | 3265     if(((iteration-lastiter) > learn_parm->svm_iter_to_shrink) | 
| wolffd@0 | 3266        || (inconsistent[i])) { | 
| wolffd@0 | 3267       change++; | 
| wolffd@0 | 3268     } | 
| wolffd@0 | 3269   } | 
| wolffd@0 | 3270   if((change>=minshrink) /* shrink only if sufficiently many candidates */ | 
| wolffd@0 | 3271      && (shrink_state->deactnum<shrink_state->maxhistory)) { /* and enough memory */ | 
| wolffd@0 | 3272     /* Shrink problem by removing those variables which are */ | 
| wolffd@0 | 3273     /* optimal at a bound for a minimum number of iterations */ | 
| wolffd@0 | 3274     if(verbosity>=2) { | 
| wolffd@0 | 3275       printf(" Shrinking..."); fflush(stdout); | 
| wolffd@0 | 3276     } | 
| wolffd@0 | 3277     if(kernel_parm->kernel_type != LINEAR) { /*  non-linear case save alphas */ | 
| wolffd@0 | 3278       a_old=(double *)my_malloc(sizeof(double)*totdoc); | 
| wolffd@0 | 3279       shrink_state->a_history[shrink_state->deactnum]=a_old; | 
| wolffd@0 | 3280       for(i=0;i<totdoc;i++) { | 
| wolffd@0 | 3281 	a_old[i]=a[i]; | 
| wolffd@0 | 3282       } | 
| wolffd@0 | 3283     } | 
| wolffd@0 | 3284     for(ii=0;active2dnum[ii]>=0;ii++) { | 
| wolffd@0 | 3285       i=active2dnum[ii]; | 
| wolffd@0 | 3286       if(learn_parm->sharedslack) | 
| wolffd@0 | 3287 	lastiter=last_suboptimal_at[docs[i]->slackid]; | 
| wolffd@0 | 3288       else | 
| wolffd@0 | 3289 	lastiter=last_suboptimal_at[i]; | 
| wolffd@0 | 3290       if(((iteration-lastiter) > learn_parm->svm_iter_to_shrink) | 
| wolffd@0 | 3291 	 || (inconsistent[i])) { | 
| wolffd@0 | 3292 	shrink_state->active[i]=0; | 
| wolffd@0 | 3293 	shrink_state->inactive_since[i]=shrink_state->deactnum; | 
| wolffd@0 | 3294       } | 
| wolffd@0 | 3295     } | 
| wolffd@0 | 3296     activenum=compute_index(shrink_state->active,totdoc,active2dnum); | 
| wolffd@0 | 3297     shrink_state->deactnum++; | 
| wolffd@0 | 3298     if(kernel_parm->kernel_type == LINEAR) { | 
| wolffd@0 | 3299       shrink_state->deactnum=0; | 
| wolffd@0 | 3300     } | 
| wolffd@0 | 3301     if(verbosity>=2) { | 
| wolffd@0 | 3302       printf("done.\n"); fflush(stdout); | 
| wolffd@0 | 3303       printf(" Number of inactive variables = %ld\n",totdoc-activenum); | 
| wolffd@0 | 3304     } | 
| wolffd@0 | 3305   } | 
| wolffd@0 | 3306   return(activenum); | 
| wolffd@0 | 3307 } | 
| wolffd@0 | 3308 | 
| wolffd@0 | 3309 | 
| wolffd@0 | 3310 void reactivate_inactive_examples(long int *label, | 
| wolffd@0 | 3311 				  long int *unlabeled, | 
| wolffd@0 | 3312 				  double *a, | 
| wolffd@0 | 3313 				  SHRINK_STATE *shrink_state, | 
| wolffd@0 | 3314 				  double *lin, | 
| wolffd@0 | 3315 				  double *c, | 
| wolffd@0 | 3316 				  long int totdoc, | 
| wolffd@0 | 3317 				  long int totwords, | 
| wolffd@0 | 3318 				  long int iteration, | 
| wolffd@0 | 3319 				  LEARN_PARM *learn_parm, | 
| wolffd@0 | 3320 				  long int *inconsistent, | 
| wolffd@0 | 3321 				  DOC **docs, | 
| wolffd@0 | 3322 				  KERNEL_PARM *kernel_parm, | 
| wolffd@0 | 3323 				  KERNEL_CACHE *kernel_cache, | 
| wolffd@0 | 3324 				  MODEL *model, | 
| wolffd@0 | 3325 				  CFLOAT *aicache, | 
| wolffd@0 | 3326 				  double *weights, | 
| wolffd@0 | 3327 				  double *maxdiff) | 
| wolffd@0 | 3328      /* Make all variables active again which had been removed by | 
| wolffd@0 | 3329         shrinking. */ | 
| wolffd@0 | 3330      /* Computes lin for those variables from scratch. */ | 
| wolffd@0 | 3331 { | 
| wolffd@0 | 3332   register long i,j,ii,jj,t,*changed2dnum,*inactive2dnum; | 
| wolffd@0 | 3333   long *changed,*inactive; | 
| wolffd@0 | 3334   register double kernel_val,*a_old,dist; | 
| wolffd@0 | 3335   double ex_c,target; | 
| wolffd@0 | 3336   SVECTOR *f; | 
| wolffd@0 | 3337 | 
| wolffd@0 | 3338   if(kernel_parm->kernel_type == LINEAR) { /* special linear case */ | 
| wolffd@0 | 3339     a_old=shrink_state->last_a; | 
| wolffd@0 | 3340     clear_vector_n(weights,totwords); | 
| wolffd@0 | 3341     for(i=0;i<totdoc;i++) { | 
| wolffd@0 | 3342       if(a[i] != a_old[i]) { | 
| wolffd@0 | 3343 	for(f=docs[i]->fvec;f;f=f->next) | 
| wolffd@0 | 3344 	  add_vector_ns(weights,f, | 
| wolffd@0 | 3345 			f->factor*((a[i]-a_old[i])*(double)label[i])); | 
| wolffd@0 | 3346 	a_old[i]=a[i]; | 
| wolffd@0 | 3347       } | 
| wolffd@0 | 3348     } | 
| wolffd@0 | 3349     for(i=0;i<totdoc;i++) { | 
| wolffd@0 | 3350       if(!shrink_state->active[i]) { | 
| wolffd@0 | 3351 	for(f=docs[i]->fvec;f;f=f->next) | 
| wolffd@0 | 3352 	  lin[i]=shrink_state->last_lin[i]+f->factor*sprod_ns(weights,f); | 
| wolffd@0 | 3353       } | 
| wolffd@0 | 3354       shrink_state->last_lin[i]=lin[i]; | 
| wolffd@0 | 3355     } | 
| wolffd@0 | 3356   } | 
| wolffd@0 | 3357   else { | 
| wolffd@0 | 3358     changed=(long *)my_malloc(sizeof(long)*totdoc); | 
| wolffd@0 | 3359     changed2dnum=(long *)my_malloc(sizeof(long)*(totdoc+11)); | 
| wolffd@0 | 3360     inactive=(long *)my_malloc(sizeof(long)*totdoc); | 
| wolffd@0 | 3361     inactive2dnum=(long *)my_malloc(sizeof(long)*(totdoc+11)); | 
| wolffd@0 | 3362     for(t=shrink_state->deactnum-1;(t>=0) && shrink_state->a_history[t];t--) { | 
| wolffd@0 | 3363       if(verbosity>=2) { | 
| wolffd@0 | 3364 	printf("%ld..",t); fflush(stdout); | 
| wolffd@0 | 3365       } | 
| wolffd@0 | 3366       a_old=shrink_state->a_history[t]; | 
| wolffd@0 | 3367       for(i=0;i<totdoc;i++) { | 
| wolffd@0 | 3368 	inactive[i]=((!shrink_state->active[i]) | 
| wolffd@0 | 3369 		     && (shrink_state->inactive_since[i] == t)); | 
| wolffd@0 | 3370 	changed[i]= (a[i] != a_old[i]); | 
| wolffd@0 | 3371       } | 
| wolffd@0 | 3372       compute_index(inactive,totdoc,inactive2dnum); | 
| wolffd@0 | 3373       compute_index(changed,totdoc,changed2dnum); | 
| wolffd@0 | 3374 | 
| wolffd@0 | 3375       for(ii=0;(i=changed2dnum[ii])>=0;ii++) { | 
| wolffd@0 | 3376 	get_kernel_row(kernel_cache,docs,i,totdoc,inactive2dnum,aicache, | 
| wolffd@0 | 3377 		       kernel_parm); | 
| wolffd@0 | 3378 	for(jj=0;(j=inactive2dnum[jj])>=0;jj++) { | 
| wolffd@0 | 3379 	  kernel_val=aicache[j]; | 
| wolffd@0 | 3380 	  lin[j]+=(((a[i]*kernel_val)-(a_old[i]*kernel_val))*(double)label[i]); | 
| wolffd@0 | 3381 	} | 
| wolffd@0 | 3382       } | 
| wolffd@0 | 3383     } | 
| wolffd@0 | 3384     free(changed); | 
| wolffd@0 | 3385     free(changed2dnum); | 
| wolffd@0 | 3386     free(inactive); | 
| wolffd@0 | 3387     free(inactive2dnum); | 
| wolffd@0 | 3388   } | 
| wolffd@0 | 3389   (*maxdiff)=0; | 
| wolffd@0 | 3390   for(i=0;i<totdoc;i++) { | 
| wolffd@0 | 3391     shrink_state->inactive_since[i]=shrink_state->deactnum-1; | 
| wolffd@0 | 3392     if(!inconsistent[i]) { | 
| wolffd@0 | 3393       dist=(lin[i]-model->b)*(double)label[i]; | 
| wolffd@0 | 3394       target=-(learn_parm->eps-(double)label[i]*c[i]); | 
| wolffd@0 | 3395       ex_c=learn_parm->svm_cost[i]-learn_parm->epsilon_a; | 
| wolffd@0 | 3396       if((a[i]>learn_parm->epsilon_a) && (dist > target)) { | 
| wolffd@0 | 3397 	if((dist-target)>(*maxdiff))  /* largest violation */ | 
| wolffd@0 | 3398 	  (*maxdiff)=dist-target; | 
| wolffd@0 | 3399       } | 
| wolffd@0 | 3400       else if((a[i]<ex_c) && (dist < target)) { | 
| wolffd@0 | 3401 	if((target-dist)>(*maxdiff))  /* largest violation */ | 
| wolffd@0 | 3402 	  (*maxdiff)=target-dist; | 
| wolffd@0 | 3403       } | 
| wolffd@0 | 3404       if((a[i]>(0+learn_parm->epsilon_a)) | 
| wolffd@0 | 3405 	 && (a[i]<ex_c)) { | 
| wolffd@0 | 3406 	shrink_state->active[i]=1;                         /* not at bound */ | 
| wolffd@0 | 3407       } | 
| wolffd@0 | 3408       else if((a[i]<=(0+learn_parm->epsilon_a)) && (dist < (target+learn_parm->epsilon_shrink))) { | 
| wolffd@0 | 3409 	shrink_state->active[i]=1; | 
| wolffd@0 | 3410       } | 
| wolffd@0 | 3411       else if((a[i]>=ex_c) | 
| wolffd@0 | 3412 	      && (dist > (target-learn_parm->epsilon_shrink))) { | 
| wolffd@0 | 3413 	shrink_state->active[i]=1; | 
| wolffd@0 | 3414       } | 
| wolffd@0 | 3415       else if(learn_parm->sharedslack) { /* make all active when sharedslack */ | 
| wolffd@0 | 3416 	shrink_state->active[i]=1; | 
| wolffd@0 | 3417       } | 
| wolffd@0 | 3418     } | 
| wolffd@0 | 3419   } | 
| wolffd@0 | 3420   if(kernel_parm->kernel_type != LINEAR) { /* update history for non-linear */ | 
| wolffd@0 | 3421     for(i=0;i<totdoc;i++) { | 
| wolffd@0 | 3422       (shrink_state->a_history[shrink_state->deactnum-1])[i]=a[i]; | 
| wolffd@0 | 3423     } | 
| wolffd@0 | 3424     for(t=shrink_state->deactnum-2;(t>=0) && shrink_state->a_history[t];t--) { | 
| wolffd@0 | 3425       free(shrink_state->a_history[t]); | 
| wolffd@0 | 3426       shrink_state->a_history[t]=0; | 
| wolffd@0 | 3427     } | 
| wolffd@0 | 3428   } | 
| wolffd@0 | 3429 } | 
| wolffd@0 | 3430 | 
| wolffd@0 | 3431 /****************************** Cache handling *******************************/ | 
| wolffd@0 | 3432 | 
| wolffd@0 | 3433 void get_kernel_row(KERNEL_CACHE *kernel_cache, DOC **docs, | 
| wolffd@0 | 3434 		    long int docnum, long int totdoc, | 
| wolffd@0 | 3435 		    long int *active2dnum, CFLOAT *buffer, | 
| wolffd@0 | 3436 		    KERNEL_PARM *kernel_parm) | 
| wolffd@0 | 3437      /* Get's a row of the matrix of kernel values This matrix has the | 
| wolffd@0 | 3438       same form as the Hessian, just that the elements are not | 
| wolffd@0 | 3439       multiplied by */ | 
| wolffd@0 | 3440      /* y_i * y_j * a_i * a_j */ | 
| wolffd@0 | 3441      /* Takes the values from the cache if available. */ | 
| wolffd@0 | 3442 { | 
| wolffd@0 | 3443   register long i,j,start; | 
| wolffd@0 | 3444   DOC *ex; | 
| wolffd@0 | 3445 | 
| wolffd@0 | 3446   ex=docs[docnum]; | 
| wolffd@0 | 3447 | 
| wolffd@0 | 3448   if(kernel_cache->index[docnum] != -1) { /* row is cached? */ | 
| wolffd@0 | 3449     kernel_cache->lru[kernel_cache->index[docnum]]=kernel_cache->time; /* lru */ | 
| wolffd@0 | 3450     start=kernel_cache->activenum*kernel_cache->index[docnum]; | 
| wolffd@0 | 3451     for(i=0;(j=active2dnum[i])>=0;i++) { | 
| wolffd@0 | 3452       if(kernel_cache->totdoc2active[j] >= 0) { /* column is cached? */ | 
| wolffd@0 | 3453 	buffer[j]=kernel_cache->buffer[start+kernel_cache->totdoc2active[j]]; | 
| wolffd@0 | 3454       } | 
| wolffd@0 | 3455       else { | 
| wolffd@0 | 3456 	buffer[j]=(CFLOAT)kernel(kernel_parm,ex,docs[j]); | 
| wolffd@0 | 3457       } | 
| wolffd@0 | 3458     } | 
| wolffd@0 | 3459   } | 
| wolffd@0 | 3460   else { | 
| wolffd@0 | 3461     for(i=0;(j=active2dnum[i])>=0;i++) { | 
| wolffd@0 | 3462       buffer[j]=(CFLOAT)kernel(kernel_parm,ex,docs[j]); | 
| wolffd@0 | 3463     } | 
| wolffd@0 | 3464   } | 
| wolffd@0 | 3465 } | 
| wolffd@0 | 3466 | 
| wolffd@0 | 3467 | 
| wolffd@0 | 3468 void cache_kernel_row(KERNEL_CACHE *kernel_cache, DOC **docs, | 
| wolffd@0 | 3469 		      long int m, KERNEL_PARM *kernel_parm) | 
| wolffd@0 | 3470      /* Fills cache for the row m */ | 
| wolffd@0 | 3471 { | 
| wolffd@0 | 3472   register DOC *ex; | 
| wolffd@0 | 3473   register long j,k,l; | 
| wolffd@0 | 3474   register CFLOAT *cache; | 
| wolffd@0 | 3475 | 
| wolffd@0 | 3476   if(!kernel_cache_check(kernel_cache,m)) {  /* not cached yet*/ | 
| wolffd@0 | 3477     cache = kernel_cache_clean_and_malloc(kernel_cache,m); | 
| wolffd@0 | 3478     if(cache) { | 
| wolffd@0 | 3479       l=kernel_cache->totdoc2active[m]; | 
| wolffd@0 | 3480       ex=docs[m]; | 
| wolffd@0 | 3481       for(j=0;j<kernel_cache->activenum;j++) {  /* fill cache */ | 
| wolffd@0 | 3482 	k=kernel_cache->active2totdoc[j]; | 
| wolffd@0 | 3483 	if((kernel_cache->index[k] != -1) && (l != -1) && (k != m)) { | 
| wolffd@0 | 3484 	  cache[j]=kernel_cache->buffer[kernel_cache->activenum | 
| wolffd@0 | 3485 				       *kernel_cache->index[k]+l]; | 
| wolffd@0 | 3486 	} | 
| wolffd@0 | 3487 	else { | 
| wolffd@0 | 3488 	  cache[j]=kernel(kernel_parm,ex,docs[k]); | 
| wolffd@0 | 3489 	} | 
| wolffd@0 | 3490       } | 
| wolffd@0 | 3491     } | 
| wolffd@0 | 3492     else { | 
| wolffd@0 | 3493       perror("Error: Kernel cache full! => increase cache size"); | 
| wolffd@0 | 3494     } | 
| wolffd@0 | 3495   } | 
| wolffd@0 | 3496 } | 
| wolffd@0 | 3497 | 
| wolffd@0 | 3498 | 
| wolffd@0 | 3499 void cache_multiple_kernel_rows(KERNEL_CACHE *kernel_cache, DOC **docs, | 
| wolffd@0 | 3500 				long int *key, long int varnum, | 
| wolffd@0 | 3501 				KERNEL_PARM *kernel_parm) | 
| wolffd@0 | 3502      /* Fills cache for the rows in key */ | 
| wolffd@0 | 3503 { | 
| wolffd@0 | 3504   register long i; | 
| wolffd@0 | 3505 | 
| wolffd@0 | 3506   for(i=0;i<varnum;i++) {  /* fill up kernel cache */ | 
| wolffd@0 | 3507     cache_kernel_row(kernel_cache,docs,key[i],kernel_parm); | 
| wolffd@0 | 3508   } | 
| wolffd@0 | 3509 } | 
| wolffd@0 | 3510 | 
| wolffd@0 | 3511 | 
| wolffd@0 | 3512 void kernel_cache_shrink(KERNEL_CACHE *kernel_cache, long int totdoc, | 
| wolffd@0 | 3513 			 long int numshrink, long int *after) | 
| wolffd@0 | 3514      /* Remove numshrink columns in the cache which correspond to | 
| wolffd@0 | 3515         examples marked 0 in after. */ | 
| wolffd@0 | 3516 { | 
| wolffd@0 | 3517   register long i,j,jj,from=0,to=0,scount; | 
| wolffd@0 | 3518   long *keep; | 
| wolffd@0 | 3519 | 
| wolffd@0 | 3520   if(verbosity>=2) { | 
| wolffd@0 | 3521     printf(" Reorganizing cache..."); fflush(stdout); | 
| wolffd@0 | 3522   } | 
| wolffd@0 | 3523 | 
| wolffd@0 | 3524   keep=(long *)my_malloc(sizeof(long)*totdoc); | 
| wolffd@0 | 3525   for(j=0;j<totdoc;j++) { | 
| wolffd@0 | 3526     keep[j]=1; | 
| wolffd@0 | 3527   } | 
| wolffd@0 | 3528   scount=0; | 
| wolffd@0 | 3529   for(jj=0;(jj<kernel_cache->activenum) && (scount<numshrink);jj++) { | 
| wolffd@0 | 3530     j=kernel_cache->active2totdoc[jj]; | 
| wolffd@0 | 3531     if(!after[j]) { | 
| wolffd@0 | 3532       scount++; | 
| wolffd@0 | 3533       keep[j]=0; | 
| wolffd@0 | 3534     } | 
| wolffd@0 | 3535   } | 
| wolffd@0 | 3536 | 
| wolffd@0 | 3537   for(i=0;i<kernel_cache->max_elems;i++) { | 
| wolffd@0 | 3538     for(jj=0;jj<kernel_cache->activenum;jj++) { | 
| wolffd@0 | 3539       j=kernel_cache->active2totdoc[jj]; | 
| wolffd@0 | 3540       if(!keep[j]) { | 
| wolffd@0 | 3541 	from++; | 
| wolffd@0 | 3542       } | 
| wolffd@0 | 3543       else { | 
| wolffd@0 | 3544 	kernel_cache->buffer[to]=kernel_cache->buffer[from]; | 
| wolffd@0 | 3545 	to++; | 
| wolffd@0 | 3546 	from++; | 
| wolffd@0 | 3547       } | 
| wolffd@0 | 3548     } | 
| wolffd@0 | 3549   } | 
| wolffd@0 | 3550 | 
| wolffd@0 | 3551   kernel_cache->activenum=0; | 
| wolffd@0 | 3552   for(j=0;j<totdoc;j++) { | 
| wolffd@0 | 3553     if((keep[j]) && (kernel_cache->totdoc2active[j] != -1)) { | 
| wolffd@0 | 3554       kernel_cache->active2totdoc[kernel_cache->activenum]=j; | 
| wolffd@0 | 3555       kernel_cache->totdoc2active[j]=kernel_cache->activenum; | 
| wolffd@0 | 3556       kernel_cache->activenum++; | 
| wolffd@0 | 3557     } | 
| wolffd@0 | 3558     else { | 
| wolffd@0 | 3559       kernel_cache->totdoc2active[j]=-1; | 
| wolffd@0 | 3560     } | 
| wolffd@0 | 3561   } | 
| wolffd@0 | 3562 | 
| wolffd@0 | 3563   kernel_cache->max_elems=(long)(kernel_cache->buffsize/kernel_cache->activenum); | 
| wolffd@0 | 3564   if(kernel_cache->max_elems>totdoc) { | 
| wolffd@0 | 3565     kernel_cache->max_elems=totdoc; | 
| wolffd@0 | 3566   } | 
| wolffd@0 | 3567 | 
| wolffd@0 | 3568   free(keep); | 
| wolffd@0 | 3569 | 
| wolffd@0 | 3570   if(verbosity>=2) { | 
| wolffd@0 | 3571     printf("done.\n"); fflush(stdout); | 
| wolffd@0 | 3572     printf(" Cache-size in rows = %ld\n",kernel_cache->max_elems); | 
| wolffd@0 | 3573   } | 
| wolffd@0 | 3574 } | 
| wolffd@0 | 3575 | 
| wolffd@0 | 3576 KERNEL_CACHE *kernel_cache_init(long int totdoc, long int buffsize) | 
| wolffd@0 | 3577 { | 
| wolffd@0 | 3578   long i; | 
| wolffd@0 | 3579   KERNEL_CACHE *kernel_cache; | 
| wolffd@0 | 3580 | 
| wolffd@0 | 3581   kernel_cache=(KERNEL_CACHE *)my_malloc(sizeof(KERNEL_CACHE)); | 
| wolffd@0 | 3582   kernel_cache->index = (long *)my_malloc(sizeof(long)*totdoc); | 
| wolffd@0 | 3583   kernel_cache->occu = (long *)my_malloc(sizeof(long)*totdoc); | 
| wolffd@0 | 3584   kernel_cache->lru = (long *)my_malloc(sizeof(long)*totdoc); | 
| wolffd@0 | 3585   kernel_cache->invindex = (long *)my_malloc(sizeof(long)*totdoc); | 
| wolffd@0 | 3586   kernel_cache->active2totdoc = (long *)my_malloc(sizeof(long)*totdoc); | 
| wolffd@0 | 3587   kernel_cache->totdoc2active = (long *)my_malloc(sizeof(long)*totdoc); | 
| wolffd@0 | 3588   kernel_cache->buffer = (CFLOAT *)my_malloc((size_t)(buffsize)*1024*1024); | 
| wolffd@0 | 3589 | 
| wolffd@0 | 3590   kernel_cache->buffsize=(long)(buffsize/sizeof(CFLOAT)*1024*1024); | 
| wolffd@0 | 3591 | 
| wolffd@0 | 3592   kernel_cache->max_elems=(long)(kernel_cache->buffsize/totdoc); | 
| wolffd@0 | 3593   if(kernel_cache->max_elems>totdoc) { | 
| wolffd@0 | 3594     kernel_cache->max_elems=totdoc; | 
| wolffd@0 | 3595   } | 
| wolffd@0 | 3596 | 
| wolffd@0 | 3597   if(verbosity>=2) { | 
| wolffd@0 | 3598     printf(" Cache-size in rows = %ld\n",kernel_cache->max_elems); | 
| wolffd@0 | 3599     printf(" Kernel evals so far: %ld\n",kernel_cache_statistic); | 
| wolffd@0 | 3600   } | 
| wolffd@0 | 3601 | 
| wolffd@0 | 3602   kernel_cache->elems=0;   /* initialize cache */ | 
| wolffd@0 | 3603   for(i=0;i<totdoc;i++) { | 
| wolffd@0 | 3604     kernel_cache->index[i]=-1; | 
| wolffd@0 | 3605     kernel_cache->lru[i]=0; | 
| wolffd@0 | 3606   } | 
| wolffd@0 | 3607   for(i=0;i<totdoc;i++) { | 
| wolffd@0 | 3608     kernel_cache->occu[i]=0; | 
| wolffd@0 | 3609     kernel_cache->invindex[i]=-1; | 
| wolffd@0 | 3610   } | 
| wolffd@0 | 3611 | 
| wolffd@0 | 3612   kernel_cache->activenum=totdoc;; | 
| wolffd@0 | 3613   for(i=0;i<totdoc;i++) { | 
| wolffd@0 | 3614       kernel_cache->active2totdoc[i]=i; | 
| wolffd@0 | 3615       kernel_cache->totdoc2active[i]=i; | 
| wolffd@0 | 3616   } | 
| wolffd@0 | 3617 | 
| wolffd@0 | 3618   kernel_cache->time=0; | 
| wolffd@0 | 3619 | 
| wolffd@0 | 3620   return(kernel_cache); | 
| wolffd@0 | 3621 } | 
| wolffd@0 | 3622 | 
| wolffd@0 | 3623 void kernel_cache_reset_lru(KERNEL_CACHE *kernel_cache) | 
| wolffd@0 | 3624 { | 
| wolffd@0 | 3625   long maxlru=0,k; | 
| wolffd@0 | 3626 | 
| wolffd@0 | 3627   for(k=0;k<kernel_cache->max_elems;k++) { | 
| wolffd@0 | 3628     if(maxlru < kernel_cache->lru[k]) | 
| wolffd@0 | 3629       maxlru=kernel_cache->lru[k]; | 
| wolffd@0 | 3630   } | 
| wolffd@0 | 3631   for(k=0;k<kernel_cache->max_elems;k++) { | 
| wolffd@0 | 3632       kernel_cache->lru[k]-=maxlru; | 
| wolffd@0 | 3633   } | 
| wolffd@0 | 3634 } | 
| wolffd@0 | 3635 | 
| wolffd@0 | 3636 void kernel_cache_cleanup(KERNEL_CACHE *kernel_cache) | 
| wolffd@0 | 3637 { | 
| wolffd@0 | 3638   free(kernel_cache->index); | 
| wolffd@0 | 3639   free(kernel_cache->occu); | 
| wolffd@0 | 3640   free(kernel_cache->lru); | 
| wolffd@0 | 3641   free(kernel_cache->invindex); | 
| wolffd@0 | 3642   free(kernel_cache->active2totdoc); | 
| wolffd@0 | 3643   free(kernel_cache->totdoc2active); | 
| wolffd@0 | 3644   free(kernel_cache->buffer); | 
| wolffd@0 | 3645   free(kernel_cache); | 
| wolffd@0 | 3646 } | 
| wolffd@0 | 3647 | 
| wolffd@0 | 3648 long kernel_cache_malloc(KERNEL_CACHE *kernel_cache) | 
| wolffd@0 | 3649 { | 
| wolffd@0 | 3650   long i; | 
| wolffd@0 | 3651 | 
| wolffd@0 | 3652   if(kernel_cache_space_available(kernel_cache)) { | 
| wolffd@0 | 3653     for(i=0;i<kernel_cache->max_elems;i++) { | 
| wolffd@0 | 3654       if(!kernel_cache->occu[i]) { | 
| wolffd@0 | 3655 	kernel_cache->occu[i]=1; | 
| wolffd@0 | 3656 	kernel_cache->elems++; | 
| wolffd@0 | 3657 	return(i); | 
| wolffd@0 | 3658       } | 
| wolffd@0 | 3659     } | 
| wolffd@0 | 3660   } | 
| wolffd@0 | 3661   return(-1); | 
| wolffd@0 | 3662 } | 
| wolffd@0 | 3663 | 
| wolffd@0 | 3664 void kernel_cache_free(KERNEL_CACHE *kernel_cache, long int i) | 
| wolffd@0 | 3665 { | 
| wolffd@0 | 3666   kernel_cache->occu[i]=0; | 
| wolffd@0 | 3667   kernel_cache->elems--; | 
| wolffd@0 | 3668 } | 
| wolffd@0 | 3669 | 
| wolffd@0 | 3670 long kernel_cache_free_lru(KERNEL_CACHE *kernel_cache) | 
| wolffd@0 | 3671      /* remove least recently used cache element */ | 
| wolffd@0 | 3672 { | 
| wolffd@0 | 3673   register long k,least_elem=-1,least_time; | 
| wolffd@0 | 3674 | 
| wolffd@0 | 3675   least_time=kernel_cache->time+1; | 
| wolffd@0 | 3676   for(k=0;k<kernel_cache->max_elems;k++) { | 
| wolffd@0 | 3677     if(kernel_cache->invindex[k] != -1) { | 
| wolffd@0 | 3678       if(kernel_cache->lru[k]<least_time) { | 
| wolffd@0 | 3679 	least_time=kernel_cache->lru[k]; | 
| wolffd@0 | 3680 	least_elem=k; | 
| wolffd@0 | 3681       } | 
| wolffd@0 | 3682     } | 
| wolffd@0 | 3683   } | 
| wolffd@0 | 3684   if(least_elem != -1) { | 
| wolffd@0 | 3685     kernel_cache_free(kernel_cache,least_elem); | 
| wolffd@0 | 3686     kernel_cache->index[kernel_cache->invindex[least_elem]]=-1; | 
| wolffd@0 | 3687     kernel_cache->invindex[least_elem]=-1; | 
| wolffd@0 | 3688     return(1); | 
| wolffd@0 | 3689   } | 
| wolffd@0 | 3690   return(0); | 
| wolffd@0 | 3691 } | 
| wolffd@0 | 3692 | 
| wolffd@0 | 3693 | 
| wolffd@0 | 3694 CFLOAT *kernel_cache_clean_and_malloc(KERNEL_CACHE *kernel_cache, | 
| wolffd@0 | 3695 				      long int docnum) | 
| wolffd@0 | 3696      /* Get a free cache entry. In case cache is full, the lru element | 
| wolffd@0 | 3697         is removed. */ | 
| wolffd@0 | 3698 { | 
| wolffd@0 | 3699   long result; | 
| wolffd@0 | 3700   if((result = kernel_cache_malloc(kernel_cache)) == -1) { | 
| wolffd@0 | 3701     if(kernel_cache_free_lru(kernel_cache)) { | 
| wolffd@0 | 3702       result = kernel_cache_malloc(kernel_cache); | 
| wolffd@0 | 3703     } | 
| wolffd@0 | 3704   } | 
| wolffd@0 | 3705   kernel_cache->index[docnum]=result; | 
| wolffd@0 | 3706   if(result == -1) { | 
| wolffd@0 | 3707     return(0); | 
| wolffd@0 | 3708   } | 
| wolffd@0 | 3709   kernel_cache->invindex[result]=docnum; | 
| wolffd@0 | 3710   kernel_cache->lru[kernel_cache->index[docnum]]=kernel_cache->time; /* lru */ | 
| wolffd@0 | 3711   return((CFLOAT *)((long)kernel_cache->buffer | 
| wolffd@0 | 3712 		    +(kernel_cache->activenum*sizeof(CFLOAT)* | 
| wolffd@0 | 3713 		      kernel_cache->index[docnum]))); | 
| wolffd@0 | 3714 } | 
| wolffd@0 | 3715 | 
| wolffd@0 | 3716 long kernel_cache_touch(KERNEL_CACHE *kernel_cache, long int docnum) | 
| wolffd@0 | 3717      /* Update lru time to avoid removal from cache. */ | 
| wolffd@0 | 3718 { | 
| wolffd@0 | 3719   if(kernel_cache && kernel_cache->index[docnum] != -1) { | 
| wolffd@0 | 3720     kernel_cache->lru[kernel_cache->index[docnum]]=kernel_cache->time; /* lru */ | 
| wolffd@0 | 3721     return(1); | 
| wolffd@0 | 3722   } | 
| wolffd@0 | 3723   return(0); | 
| wolffd@0 | 3724 } | 
| wolffd@0 | 3725 | 
| wolffd@0 | 3726 long kernel_cache_check(KERNEL_CACHE *kernel_cache, long int docnum) | 
| wolffd@0 | 3727      /* Is that row cached? */ | 
| wolffd@0 | 3728 { | 
| wolffd@0 | 3729   return(kernel_cache->index[docnum] != -1); | 
| wolffd@0 | 3730 } | 
| wolffd@0 | 3731 | 
| wolffd@0 | 3732 long kernel_cache_space_available(KERNEL_CACHE *kernel_cache) | 
| wolffd@0 | 3733      /* Is there room for one more row? */ | 
| wolffd@0 | 3734 { | 
| wolffd@0 | 3735   return(kernel_cache->elems < kernel_cache->max_elems); | 
| wolffd@0 | 3736 } | 
| wolffd@0 | 3737 | 
| wolffd@0 | 3738 /************************** Compute estimates ******************************/ | 
| wolffd@0 | 3739 | 
| wolffd@0 | 3740 void compute_xa_estimates(MODEL *model, long int *label, | 
| wolffd@0 | 3741 			  long int *unlabeled, long int totdoc, | 
| wolffd@0 | 3742 			  DOC **docs, double *lin, double *a, | 
| wolffd@0 | 3743 			  KERNEL_PARM *kernel_parm, | 
| wolffd@0 | 3744 			  LEARN_PARM *learn_parm, double *error, | 
| wolffd@0 | 3745 			  double *recall, double *precision) | 
| wolffd@0 | 3746      /* Computes xa-estimate of error rate, recall, and precision. See | 
| wolffd@0 | 3747         T. Joachims, Estimating the Generalization Performance of an SVM | 
| wolffd@0 | 3748         Efficiently, IMCL, 2000. */ | 
| wolffd@0 | 3749 { | 
| wolffd@0 | 3750   long i,looerror,looposerror,loonegerror; | 
| wolffd@0 | 3751   long totex,totposex; | 
| wolffd@0 | 3752   double xi,r_delta,r_delta_sq,sim=0; | 
| wolffd@0 | 3753   long *sv2dnum=NULL,*sv=NULL,svnum; | 
| wolffd@0 | 3754 | 
| wolffd@0 | 3755   r_delta=estimate_r_delta(docs,totdoc,kernel_parm); | 
| wolffd@0 | 3756   r_delta_sq=r_delta*r_delta; | 
| wolffd@0 | 3757 | 
| wolffd@0 | 3758   looerror=0; | 
| wolffd@0 | 3759   looposerror=0; | 
| wolffd@0 | 3760   loonegerror=0; | 
| wolffd@0 | 3761   totex=0; | 
| wolffd@0 | 3762   totposex=0; | 
| wolffd@0 | 3763   svnum=0; | 
| wolffd@0 | 3764 | 
| wolffd@0 | 3765   if(learn_parm->xa_depth > 0) { | 
| wolffd@0 | 3766     sv = (long *)my_malloc(sizeof(long)*(totdoc+11)); | 
| wolffd@0 | 3767     for(i=0;i<totdoc;i++) | 
| wolffd@0 | 3768       sv[i]=0; | 
| wolffd@0 | 3769     for(i=1;i<model->sv_num;i++) | 
| wolffd@0 | 3770       if(a[model->supvec[i]->docnum] | 
| wolffd@0 | 3771 	 < (learn_parm->svm_cost[model->supvec[i]->docnum] | 
| wolffd@0 | 3772 	    -learn_parm->epsilon_a)) { | 
| wolffd@0 | 3773 	sv[model->supvec[i]->docnum]=1; | 
| wolffd@0 | 3774 	svnum++; | 
| wolffd@0 | 3775       } | 
| wolffd@0 | 3776     sv2dnum = (long *)my_malloc(sizeof(long)*(totdoc+11)); | 
| wolffd@0 | 3777     clear_index(sv2dnum); | 
| wolffd@0 | 3778     compute_index(sv,totdoc,sv2dnum); | 
| wolffd@0 | 3779   } | 
| wolffd@0 | 3780 | 
| wolffd@0 | 3781   for(i=0;i<totdoc;i++) { | 
| wolffd@0 | 3782     if(unlabeled[i]) { | 
| wolffd@0 | 3783       /* ignore it */ | 
| wolffd@0 | 3784     } | 
| wolffd@0 | 3785     else { | 
| wolffd@0 | 3786       xi=1.0-((lin[i]-model->b)*(double)label[i]); | 
| wolffd@0 | 3787       if(xi<0) xi=0; | 
| wolffd@0 | 3788       if(label[i]>0) { | 
| wolffd@0 | 3789 	totposex++; | 
| wolffd@0 | 3790       } | 
| wolffd@0 | 3791       if((learn_parm->rho*a[i]*r_delta_sq+xi) >= 1.0) { | 
| wolffd@0 | 3792 	if(learn_parm->xa_depth > 0) {  /* makes assumptions */ | 
| wolffd@0 | 3793 	  sim=distribute_alpha_t_greedily(sv2dnum,svnum,docs,a,i,label, | 
| wolffd@0 | 3794 					  kernel_parm,learn_parm, | 
| wolffd@0 | 3795 		            (double)((1.0-xi-a[i]*r_delta_sq)/(2.0*a[i]))); | 
| wolffd@0 | 3796 	} | 
| wolffd@0 | 3797 	if((learn_parm->xa_depth == 0) || | 
| wolffd@0 | 3798 	   ((a[i]*kernel(kernel_parm,docs[i],docs[i])+a[i]*2.0*sim+xi) >= 1.0)) { | 
| wolffd@0 | 3799 	  looerror++; | 
| wolffd@0 | 3800 	  if(label[i]>0) { | 
| wolffd@0 | 3801 	    looposerror++; | 
| wolffd@0 | 3802 	  } | 
| wolffd@0 | 3803 	  else { | 
| wolffd@0 | 3804 	    loonegerror++; | 
| wolffd@0 | 3805 	  } | 
| wolffd@0 | 3806 	} | 
| wolffd@0 | 3807       } | 
| wolffd@0 | 3808       totex++; | 
| wolffd@0 | 3809     } | 
| wolffd@0 | 3810   } | 
| wolffd@0 | 3811 | 
| wolffd@0 | 3812   (*error)=((double)looerror/(double)totex)*100.0; | 
| wolffd@0 | 3813   (*recall)=(1.0-(double)looposerror/(double)totposex)*100.0; | 
| wolffd@0 | 3814   (*precision)=(((double)totposex-(double)looposerror) | 
| wolffd@0 | 3815     /((double)totposex-(double)looposerror+(double)loonegerror))*100.0; | 
| wolffd@0 | 3816 | 
| wolffd@0 | 3817   free(sv); | 
| wolffd@0 | 3818   free(sv2dnum); | 
| wolffd@0 | 3819 } | 
| wolffd@0 | 3820 | 
| wolffd@0 | 3821 | 
| wolffd@0 | 3822 double distribute_alpha_t_greedily(long int *sv2dnum, long int svnum, | 
| wolffd@0 | 3823 				   DOC **docs, double *a, | 
| wolffd@0 | 3824 				   long int docnum, | 
| wolffd@0 | 3825 				   long int *label, | 
| wolffd@0 | 3826 				   KERNEL_PARM *kernel_parm, | 
| wolffd@0 | 3827 				   LEARN_PARM *learn_parm, double thresh) | 
| wolffd@0 | 3828      /* Experimental Code improving plain XiAlpha Estimates by | 
| wolffd@0 | 3829 	computing a better bound using a greedy optimzation strategy. */ | 
| wolffd@0 | 3830 { | 
| wolffd@0 | 3831   long best_depth=0; | 
| wolffd@0 | 3832   long i,j,k,d,skip,allskip; | 
| wolffd@0 | 3833   double best,best_val[101],val,init_val_sq,init_val_lin; | 
| wolffd@0 | 3834   long best_ex[101]; | 
| wolffd@0 | 3835   CFLOAT *cache,*trow; | 
| wolffd@0 | 3836 | 
| wolffd@0 | 3837   cache=(CFLOAT *)my_malloc(sizeof(CFLOAT)*learn_parm->xa_depth*svnum); | 
| wolffd@0 | 3838   trow = (CFLOAT *)my_malloc(sizeof(CFLOAT)*svnum); | 
| wolffd@0 | 3839 | 
| wolffd@0 | 3840   for(k=0;k<svnum;k++) { | 
| wolffd@0 | 3841     trow[k]=kernel(kernel_parm,docs[docnum],docs[sv2dnum[k]]); | 
| wolffd@0 | 3842   } | 
| wolffd@0 | 3843 | 
| wolffd@0 | 3844   init_val_sq=0; | 
| wolffd@0 | 3845   init_val_lin=0; | 
| wolffd@0 | 3846   best=0; | 
| wolffd@0 | 3847 | 
| wolffd@0 | 3848   for(d=0;d<learn_parm->xa_depth;d++) { | 
| wolffd@0 | 3849     allskip=1; | 
| wolffd@0 | 3850     if(d>=1) { | 
| wolffd@0 | 3851       init_val_sq+=cache[best_ex[d-1]+svnum*(d-1)]; | 
| wolffd@0 | 3852       for(k=0;k<d-1;k++) { | 
| wolffd@0 | 3853         init_val_sq+=2.0*cache[best_ex[k]+svnum*(d-1)]; | 
| wolffd@0 | 3854       } | 
| wolffd@0 | 3855       init_val_lin+=trow[best_ex[d-1]]; | 
| wolffd@0 | 3856     } | 
| wolffd@0 | 3857     for(i=0;i<svnum;i++) { | 
| wolffd@0 | 3858       skip=0; | 
| wolffd@0 | 3859       if(sv2dnum[i] == docnum) skip=1; | 
| wolffd@0 | 3860       for(j=0;j<d;j++) { | 
| wolffd@0 | 3861 	if(i == best_ex[j]) skip=1; | 
| wolffd@0 | 3862       } | 
| wolffd@0 | 3863 | 
| wolffd@0 | 3864       if(!skip) { | 
| wolffd@0 | 3865 	val=init_val_sq; | 
| wolffd@0 | 3866 	if(kernel_parm->kernel_type == LINEAR) | 
| wolffd@0 | 3867 	  val+=docs[sv2dnum[i]]->fvec->twonorm_sq; | 
| wolffd@0 | 3868 	else | 
| wolffd@0 | 3869 	  val+=kernel(kernel_parm,docs[sv2dnum[i]],docs[sv2dnum[i]]); | 
| wolffd@0 | 3870 	for(j=0;j<d;j++) { | 
| wolffd@0 | 3871 	  val+=2.0*cache[i+j*svnum]; | 
| wolffd@0 | 3872 	} | 
| wolffd@0 | 3873 	val*=(1.0/(2.0*(d+1.0)*(d+1.0))); | 
| wolffd@0 | 3874 	val-=((init_val_lin+trow[i])/(d+1.0)); | 
| wolffd@0 | 3875 | 
| wolffd@0 | 3876 	if(allskip || (val < best_val[d])) { | 
| wolffd@0 | 3877 	  best_val[d]=val; | 
| wolffd@0 | 3878 	  best_ex[d]=i; | 
| wolffd@0 | 3879 	} | 
| wolffd@0 | 3880 	allskip=0; | 
| wolffd@0 | 3881 	if(val < thresh) { | 
| wolffd@0 | 3882 	  i=svnum; | 
| wolffd@0 | 3883 	  /*	  printf("EARLY"); */ | 
| wolffd@0 | 3884 	} | 
| wolffd@0 | 3885       } | 
| wolffd@0 | 3886     } | 
| wolffd@0 | 3887     if(!allskip) { | 
| wolffd@0 | 3888       for(k=0;k<svnum;k++) { | 
| wolffd@0 | 3889 	  cache[d*svnum+k]=kernel(kernel_parm, | 
| wolffd@0 | 3890 				  docs[sv2dnum[best_ex[d]]], | 
| wolffd@0 | 3891 				  docs[sv2dnum[k]]); | 
| wolffd@0 | 3892       } | 
| wolffd@0 | 3893     } | 
| wolffd@0 | 3894     if((!allskip) && ((best_val[d] < best) || (d == 0))) { | 
| wolffd@0 | 3895       best=best_val[d]; | 
| wolffd@0 | 3896       best_depth=d; | 
| wolffd@0 | 3897     } | 
| wolffd@0 | 3898     if(allskip || (best < thresh)) { | 
| wolffd@0 | 3899       d=learn_parm->xa_depth; | 
| wolffd@0 | 3900     } | 
| wolffd@0 | 3901   } | 
| wolffd@0 | 3902 | 
| wolffd@0 | 3903   free(cache); | 
| wolffd@0 | 3904   free(trow); | 
| wolffd@0 | 3905 | 
| wolffd@0 | 3906   /*  printf("Distribute[%ld](%ld)=%f, ",docnum,best_depth,best); */ | 
| wolffd@0 | 3907   return(best); | 
| wolffd@0 | 3908 } | 
| wolffd@0 | 3909 | 
| wolffd@0 | 3910 | 
| wolffd@0 | 3911 void estimate_transduction_quality(MODEL *model, long int *label, | 
| wolffd@0 | 3912 				   long int *unlabeled, | 
| wolffd@0 | 3913 				   long int totdoc, DOC **docs, double *lin) | 
| wolffd@0 | 3914      /* Loo-bound based on observation that loo-errors must have an | 
| wolffd@0 | 3915 	equal distribution in both training and test examples, given | 
| wolffd@0 | 3916 	that the test examples are classified correctly. Compare | 
| wolffd@0 | 3917 	chapter "Constraints on the Transductive Hyperplane" in my | 
| wolffd@0 | 3918 	Dissertation. */ | 
| wolffd@0 | 3919 { | 
| wolffd@0 | 3920   long i,j,l=0,ulab=0,lab=0,labpos=0,labneg=0,ulabpos=0,ulabneg=0,totulab=0; | 
| wolffd@0 | 3921   double totlab=0,totlabpos=0,totlabneg=0,labsum=0,ulabsum=0; | 
| wolffd@0 | 3922   double r_delta,r_delta_sq,xi,xisum=0,asum=0; | 
| wolffd@0 | 3923 | 
| wolffd@0 | 3924   r_delta=estimate_r_delta(docs,totdoc,&(model->kernel_parm)); | 
| wolffd@0 | 3925   r_delta_sq=r_delta*r_delta; | 
| wolffd@0 | 3926 | 
| wolffd@0 | 3927   for(j=0;j<totdoc;j++) { | 
| wolffd@0 | 3928     if(unlabeled[j]) { | 
| wolffd@0 | 3929       totulab++; | 
| wolffd@0 | 3930     } | 
| wolffd@0 | 3931     else { | 
| wolffd@0 | 3932       totlab++; | 
| wolffd@0 | 3933       if(label[j] > 0) | 
| wolffd@0 | 3934 	totlabpos++; | 
| wolffd@0 | 3935       else | 
| wolffd@0 | 3936 	totlabneg++; | 
| wolffd@0 | 3937     } | 
| wolffd@0 | 3938   } | 
| wolffd@0 | 3939   for(j=1;j<model->sv_num;j++) { | 
| wolffd@0 | 3940     i=model->supvec[j]->docnum; | 
| wolffd@0 | 3941     xi=1.0-((lin[i]-model->b)*(double)label[i]); | 
| wolffd@0 | 3942     if(xi<0) xi=0; | 
| wolffd@0 | 3943 | 
| wolffd@0 | 3944     xisum+=xi; | 
| wolffd@0 | 3945     asum+=fabs(model->alpha[j]); | 
| wolffd@0 | 3946     if(unlabeled[i]) { | 
| wolffd@0 | 3947       ulabsum+=(fabs(model->alpha[j])*r_delta_sq+xi); | 
| wolffd@0 | 3948     } | 
| wolffd@0 | 3949     else { | 
| wolffd@0 | 3950       labsum+=(fabs(model->alpha[j])*r_delta_sq+xi); | 
| wolffd@0 | 3951     } | 
| wolffd@0 | 3952     if((fabs(model->alpha[j])*r_delta_sq+xi) >= 1) { | 
| wolffd@0 | 3953       l++; | 
| wolffd@0 | 3954       if(unlabeled[model->supvec[j]->docnum]) { | 
| wolffd@0 | 3955 	ulab++; | 
| wolffd@0 | 3956 	if(model->alpha[j] > 0) | 
| wolffd@0 | 3957 	  ulabpos++; | 
| wolffd@0 | 3958 	else | 
| wolffd@0 | 3959 	  ulabneg++; | 
| wolffd@0 | 3960       } | 
| wolffd@0 | 3961       else { | 
| wolffd@0 | 3962 	lab++; | 
| wolffd@0 | 3963 	if(model->alpha[j] > 0) | 
| wolffd@0 | 3964 	  labpos++; | 
| wolffd@0 | 3965 	else | 
| wolffd@0 | 3966 	  labneg++; | 
| wolffd@0 | 3967       } | 
| wolffd@0 | 3968     } | 
| wolffd@0 | 3969   } | 
| wolffd@0 | 3970   printf("xacrit>=1: labeledpos=%.5f labeledneg=%.5f default=%.5f\n",(double)labpos/(double)totlab*100.0,(double)labneg/(double)totlab*100.0,(double)totlabpos/(double)(totlab)*100.0); | 
| wolffd@0 | 3971   printf("xacrit>=1: unlabelpos=%.5f unlabelneg=%.5f\n",(double)ulabpos/(double)totulab*100.0,(double)ulabneg/(double)totulab*100.0); | 
| wolffd@0 | 3972   printf("xacrit>=1: labeled=%.5f unlabled=%.5f all=%.5f\n",(double)lab/(double)totlab*100.0,(double)ulab/(double)totulab*100.0,(double)l/(double)(totdoc)*100.0); | 
| wolffd@0 | 3973   printf("xacritsum: labeled=%.5f unlabled=%.5f all=%.5f\n",(double)labsum/(double)totlab*100.0,(double)ulabsum/(double)totulab*100.0,(double)(labsum+ulabsum)/(double)(totdoc)*100.0); | 
| wolffd@0 | 3974   printf("r_delta_sq=%.5f xisum=%.5f asum=%.5f\n",r_delta_sq,xisum,asum); | 
| wolffd@0 | 3975 } | 
| wolffd@0 | 3976 | 
| wolffd@0 | 3977 double estimate_margin_vcdim(MODEL *model, double w, double R, | 
| wolffd@0 | 3978 			     KERNEL_PARM *kernel_parm) | 
| wolffd@0 | 3979      /* optional: length of model vector in feature space */ | 
| wolffd@0 | 3980      /* optional: radius of ball containing the data */ | 
| wolffd@0 | 3981 { | 
| wolffd@0 | 3982   double h; | 
| wolffd@0 | 3983 | 
| wolffd@0 | 3984   /* follows chapter 5.6.4 in [Vapnik/95] */ | 
| wolffd@0 | 3985 | 
| wolffd@0 | 3986   if(w<0) { | 
| wolffd@0 | 3987     w=model_length_s(model,kernel_parm); | 
| wolffd@0 | 3988   } | 
| wolffd@0 | 3989   if(R<0) { | 
| wolffd@0 | 3990     R=estimate_sphere(model,kernel_parm); | 
| wolffd@0 | 3991   } | 
| wolffd@0 | 3992   h = w*w * R*R +1; | 
| wolffd@0 | 3993   return(h); | 
| wolffd@0 | 3994 } | 
| wolffd@0 | 3995 | 
| wolffd@0 | 3996 double estimate_sphere(MODEL *model, KERNEL_PARM *kernel_parm) | 
| wolffd@0 | 3997                           /* Approximates the radius of the ball containing */ | 
| wolffd@0 | 3998                           /* the support vectors by bounding it with the */ | 
| wolffd@0 | 3999 {                         /* length of the longest support vector. This is */ | 
| wolffd@0 | 4000   register long j;        /* pretty good for text categorization, since all */ | 
| wolffd@0 | 4001   double xlen,maxxlen=0;  /* documents have feature vectors of length 1. It */ | 
| wolffd@0 | 4002   DOC *nulldoc;           /* assumes that the center of the ball is at the */ | 
| wolffd@0 | 4003   WORD nullword;          /* origin of the space. */ | 
| wolffd@0 | 4004 | 
| wolffd@0 | 4005   nullword.wnum=0; | 
| wolffd@0 | 4006   nulldoc=create_example(-2,0,0,0.0,create_svector(&nullword,"",1.0)); | 
| wolffd@0 | 4007 | 
| wolffd@0 | 4008   for(j=1;j<model->sv_num;j++) { | 
| wolffd@0 | 4009     xlen=sqrt(kernel(kernel_parm,model->supvec[j],model->supvec[j]) | 
| wolffd@0 | 4010 	      -2*kernel(kernel_parm,model->supvec[j],nulldoc) | 
| wolffd@0 | 4011 	      +kernel(kernel_parm,nulldoc,nulldoc)); | 
| wolffd@0 | 4012     if(xlen>maxxlen) { | 
| wolffd@0 | 4013       maxxlen=xlen; | 
| wolffd@0 | 4014     } | 
| wolffd@0 | 4015   } | 
| wolffd@0 | 4016 | 
| wolffd@0 | 4017   free_example(nulldoc,1); | 
| wolffd@0 | 4018   return(maxxlen); | 
| wolffd@0 | 4019 } | 
| wolffd@0 | 4020 | 
| wolffd@0 | 4021 double estimate_r_delta(DOC **docs, long int totdoc, KERNEL_PARM *kernel_parm) | 
| wolffd@0 | 4022 { | 
| wolffd@0 | 4023   long i; | 
| wolffd@0 | 4024   double maxxlen,xlen; | 
| wolffd@0 | 4025   DOC *nulldoc;           /* assumes that the center of the ball is at the */ | 
| wolffd@0 | 4026   WORD nullword;          /* origin of the space. */ | 
| wolffd@0 | 4027 | 
| wolffd@0 | 4028   nullword.wnum=0; | 
| wolffd@0 | 4029   nulldoc=create_example(-2,0,0,0.0,create_svector(&nullword,"",1.0)); | 
| wolffd@0 | 4030 | 
| wolffd@0 | 4031   maxxlen=0; | 
| wolffd@0 | 4032   for(i=0;i<totdoc;i++) { | 
| wolffd@0 | 4033     xlen=sqrt(kernel(kernel_parm,docs[i],docs[i]) | 
| wolffd@0 | 4034 	      -2*kernel(kernel_parm,docs[i],nulldoc) | 
| wolffd@0 | 4035 	      +kernel(kernel_parm,nulldoc,nulldoc)); | 
| wolffd@0 | 4036     if(xlen>maxxlen) { | 
| wolffd@0 | 4037       maxxlen=xlen; | 
| wolffd@0 | 4038     } | 
| wolffd@0 | 4039   } | 
| wolffd@0 | 4040 | 
| wolffd@0 | 4041   free_example(nulldoc,1); | 
| wolffd@0 | 4042   return(maxxlen); | 
| wolffd@0 | 4043 } | 
| wolffd@0 | 4044 | 
| wolffd@0 | 4045 double estimate_r_delta_average(DOC **docs, long int totdoc, | 
| wolffd@0 | 4046 				KERNEL_PARM *kernel_parm) | 
| wolffd@0 | 4047 { | 
| wolffd@0 | 4048   long i; | 
| wolffd@0 | 4049   double avgxlen; | 
| wolffd@0 | 4050   DOC *nulldoc;           /* assumes that the center of the ball is at the */ | 
| wolffd@0 | 4051   WORD nullword;          /* origin of the space. */ | 
| wolffd@0 | 4052 | 
| wolffd@0 | 4053   nullword.wnum=0; | 
| wolffd@0 | 4054   nulldoc=create_example(-2,0,0,0.0,create_svector(&nullword,"",1.0)); | 
| wolffd@0 | 4055 | 
| wolffd@0 | 4056   avgxlen=0; | 
| wolffd@0 | 4057   for(i=0;i<totdoc;i++) { | 
| wolffd@0 | 4058     avgxlen+=sqrt(kernel(kernel_parm,docs[i],docs[i]) | 
| wolffd@0 | 4059 		  -2*kernel(kernel_parm,docs[i],nulldoc) | 
| wolffd@0 | 4060 		  +kernel(kernel_parm,nulldoc,nulldoc)); | 
| wolffd@0 | 4061   } | 
| wolffd@0 | 4062 | 
| wolffd@0 | 4063   free_example(nulldoc,1); | 
| wolffd@0 | 4064   return(avgxlen/totdoc); | 
| wolffd@0 | 4065 } | 
| wolffd@0 | 4066 | 
| wolffd@0 | 4067 double length_of_longest_document_vector(DOC **docs, long int totdoc, | 
| wolffd@0 | 4068 					 KERNEL_PARM *kernel_parm) | 
| wolffd@0 | 4069 { | 
| wolffd@0 | 4070   long i; | 
| wolffd@0 | 4071   double maxxlen,xlen; | 
| wolffd@0 | 4072 | 
| wolffd@0 | 4073   maxxlen=0; | 
| wolffd@0 | 4074   for(i=0;i<totdoc;i++) { | 
| wolffd@0 | 4075     xlen=sqrt(kernel(kernel_parm,docs[i],docs[i])); | 
| wolffd@0 | 4076     if(xlen>maxxlen) { | 
| wolffd@0 | 4077       maxxlen=xlen; | 
| wolffd@0 | 4078     } | 
| wolffd@0 | 4079   } | 
| wolffd@0 | 4080 | 
| wolffd@0 | 4081   return(maxxlen); | 
| wolffd@0 | 4082 } | 
| wolffd@0 | 4083 | 
| wolffd@0 | 4084 /****************************** IO-handling **********************************/ | 
| wolffd@0 | 4085 | 
| wolffd@0 | 4086 void write_prediction(char *predfile, MODEL *model, double *lin, | 
| wolffd@0 | 4087 		      double *a, long int *unlabeled, | 
| wolffd@0 | 4088 		      long int *label, long int totdoc, | 
| wolffd@0 | 4089 		      LEARN_PARM *learn_parm) | 
| wolffd@0 | 4090 { | 
| wolffd@0 | 4091   FILE *predfl; | 
| wolffd@0 | 4092   long i; | 
| wolffd@0 | 4093   double dist,a_max; | 
| wolffd@0 | 4094 | 
| wolffd@0 | 4095   if(verbosity>=1) { | 
| wolffd@0 | 4096     printf("Writing prediction file..."); fflush(stdout); | 
| wolffd@0 | 4097   } | 
| wolffd@0 | 4098   if ((predfl = fopen (predfile, "w")) == NULL) | 
| wolffd@0 | 4099   { perror (predfile); exit (1); } | 
| wolffd@0 | 4100   a_max=learn_parm->epsilon_a; | 
| wolffd@0 | 4101   for(i=0;i<totdoc;i++) { | 
| wolffd@0 | 4102     if((unlabeled[i]) && (a[i]>a_max)) { | 
| wolffd@0 | 4103       a_max=a[i]; | 
| wolffd@0 | 4104     } | 
| wolffd@0 | 4105   } | 
| wolffd@0 | 4106   for(i=0;i<totdoc;i++) { | 
| wolffd@0 | 4107     if(unlabeled[i]) { | 
| wolffd@0 | 4108       if((a[i]>(learn_parm->epsilon_a))) { | 
| wolffd@0 | 4109 	dist=(double)label[i]*(1.0-learn_parm->epsilon_crit-a[i]/(a_max*2.0)); | 
| wolffd@0 | 4110       } | 
| wolffd@0 | 4111       else { | 
| wolffd@0 | 4112 	dist=(lin[i]-model->b); | 
| wolffd@0 | 4113       } | 
| wolffd@0 | 4114       if(dist>0) { | 
| wolffd@0 | 4115 	fprintf(predfl,"%.8g:+1 %.8g:-1\n",dist,-dist); | 
| wolffd@0 | 4116       } | 
| wolffd@0 | 4117       else { | 
| wolffd@0 | 4118 	fprintf(predfl,"%.8g:-1 %.8g:+1\n",-dist,dist); | 
| wolffd@0 | 4119       } | 
| wolffd@0 | 4120     } | 
| wolffd@0 | 4121   } | 
| wolffd@0 | 4122   fclose(predfl); | 
| wolffd@0 | 4123   if(verbosity>=1) { | 
| wolffd@0 | 4124     printf("done\n"); | 
| wolffd@0 | 4125   } | 
| wolffd@0 | 4126 } | 
| wolffd@0 | 4127 | 
| wolffd@0 | 4128 void write_alphas(char *alphafile, double *a, | 
| wolffd@0 | 4129 		  long int *label, long int totdoc) | 
| wolffd@0 | 4130 { | 
| wolffd@0 | 4131   FILE *alphafl; | 
| wolffd@0 | 4132   long i; | 
| wolffd@0 | 4133 | 
| wolffd@0 | 4134   if(verbosity>=1) { | 
| wolffd@0 | 4135     printf("Writing alpha file..."); fflush(stdout); | 
| wolffd@0 | 4136   } | 
| wolffd@0 | 4137   if ((alphafl = fopen (alphafile, "w")) == NULL) | 
| wolffd@0 | 4138   { perror (alphafile); exit (1); } | 
| wolffd@0 | 4139   for(i=0;i<totdoc;i++) { | 
| wolffd@0 | 4140     fprintf(alphafl,"%.18g\n",a[i]*(double)label[i]); | 
| wolffd@0 | 4141   } | 
| wolffd@0 | 4142   fclose(alphafl); | 
| wolffd@0 | 4143   if(verbosity>=1) { | 
| wolffd@0 | 4144     printf("done\n"); | 
| wolffd@0 | 4145   } | 
| wolffd@0 | 4146 } | 
| wolffd@0 | 4147 |