Mercurial > hg > camir-aes2014
diff toolboxes/SVM-light/src/svm_learn.c @ 0:e9a9cd732c1e tip
first hg version after svn
author | wolffd |
---|---|
date | Tue, 10 Feb 2015 15:05:51 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/toolboxes/SVM-light/src/svm_learn.c Tue Feb 10 15:05:51 2015 +0000 @@ -0,0 +1,4147 @@ +/***********************************************************************/ +/* */ +/* svm_learn.c */ +/* */ +/* Learning module of Support Vector Machine. */ +/* */ +/* Author: Thorsten Joachims */ +/* Date: 02.07.02 */ +/* */ +/* Copyright (c) 2002 Thorsten Joachims - All rights reserved */ +/* */ +/* This software is available for non-commercial use only. It must */ +/* not be modified and distributed without prior permission of the */ +/* author. The author is not responsible for implications from the */ +/* use of this software. */ +/* */ +/***********************************************************************/ + + +# include "svm_common.h" +# include "svm_learn.h" + + +/* interface to QP-solver */ +double *optimize_qp(QP *, double *, long, double *, LEARN_PARM *); + +/*---------------------------------------------------------------------------*/ + +/* Learns an SVM classification model based on the training data in + docs/label. The resulting model is returned in the structure + model. */ + +void svm_learn_classification(DOC **docs, double *class, long int + totdoc, long int totwords, + LEARN_PARM *learn_parm, + KERNEL_PARM *kernel_parm, + KERNEL_CACHE *kernel_cache, + MODEL *model, + double *alpha) + /* docs: Training vectors (x-part) */ + /* class: Training labels (y-part, zero if test example for + transduction) */ + /* totdoc: Number of examples in docs/label */ + /* totwords: Number of features (i.e. highest feature index) */ + /* learn_parm: Learning paramenters */ + /* kernel_parm: Kernel paramenters */ + /* kernel_cache:Initialized Cache of size totdoc, if using a kernel. + NULL if linear.*/ + /* model: Returns learning result (assumed empty before called) */ + /* alpha: Start values for the alpha variables or NULL + pointer. The new alpha values are returned after + optimization if not NULL. Array must be of size totdoc. */ +{ + long *inconsistent,i,*label; + long inconsistentnum; + long misclassified,upsupvecnum; + double loss,model_length,example_length; + double maxdiff,*lin,*a,*c; + long runtime_start,runtime_end; + long iterations; + long *unlabeled,transduction; + long heldout; + long loo_count=0,loo_count_pos=0,loo_count_neg=0,trainpos=0,trainneg=0; + long loocomputed=0,runtime_start_loo=0,runtime_start_xa=0; + double heldout_c=0,r_delta_sq=0,r_delta,r_delta_avg; + long *index,*index2dnum; + double *weights; + CFLOAT *aicache; /* buffer to keep one row of hessian */ + + double *xi_fullset; /* buffer for storing xi on full sample in loo */ + double *a_fullset; /* buffer for storing alpha on full sample in loo */ + TIMING timing_profile; + SHRINK_STATE shrink_state; + + runtime_start=get_runtime(); + timing_profile.time_kernel=0; + timing_profile.time_opti=0; + timing_profile.time_shrink=0; + timing_profile.time_update=0; + timing_profile.time_model=0; + timing_profile.time_check=0; + timing_profile.time_select=0; + kernel_cache_statistic=0; + + learn_parm->totwords=totwords; + + /* make sure -n value is reasonable */ + if((learn_parm->svm_newvarsinqp < 2) + || (learn_parm->svm_newvarsinqp > learn_parm->svm_maxqpsize)) { + learn_parm->svm_newvarsinqp=learn_parm->svm_maxqpsize; + } + + init_shrink_state(&shrink_state,totdoc,(long)MAXSHRINK); + + label = (long *)my_malloc(sizeof(long)*totdoc); + inconsistent = (long *)my_malloc(sizeof(long)*totdoc); + unlabeled = (long *)my_malloc(sizeof(long)*totdoc); + c = (double *)my_malloc(sizeof(double)*totdoc); + a = (double *)my_malloc(sizeof(double)*totdoc); + a_fullset = (double *)my_malloc(sizeof(double)*totdoc); + xi_fullset = (double *)my_malloc(sizeof(double)*totdoc); + lin = (double *)my_malloc(sizeof(double)*totdoc); + learn_parm->svm_cost = (double *)my_malloc(sizeof(double)*totdoc); + model->supvec = (DOC **)my_malloc(sizeof(DOC *)*(totdoc+2)); + model->alpha = (double *)my_malloc(sizeof(double)*(totdoc+2)); + model->index = (long *)my_malloc(sizeof(long)*(totdoc+2)); + + model->at_upper_bound=0; + model->b=0; + model->supvec[0]=0; /* element 0 reserved and empty for now */ + model->alpha[0]=0; + model->lin_weights=NULL; + model->totwords=totwords; + model->totdoc=totdoc; + model->kernel_parm=(*kernel_parm); + model->sv_num=1; + model->loo_error=-1; + model->loo_recall=-1; + model->loo_precision=-1; + model->xa_error=-1; + model->xa_recall=-1; + model->xa_precision=-1; + inconsistentnum=0; + transduction=0; + + r_delta=estimate_r_delta(docs,totdoc,kernel_parm); + r_delta_sq=r_delta*r_delta; + + r_delta_avg=estimate_r_delta_average(docs,totdoc,kernel_parm); + if(learn_parm->svm_c == 0.0) { /* default value for C */ + learn_parm->svm_c=1.0/(r_delta_avg*r_delta_avg); + if(verbosity>=1) + printf("Setting default regularization parameter C=%.4f\n", + learn_parm->svm_c); + } + + learn_parm->eps=-1.0; /* equivalent regression epsilon for + classification */ + + for(i=0;i<totdoc;i++) { /* various inits */ + docs[i]->docnum=i; + inconsistent[i]=0; + a[i]=0; + lin[i]=0; + c[i]=0.0; + unlabeled[i]=0; + if(class[i] == 0) { + unlabeled[i]=1; + label[i]=0; + transduction=1; + } + if(class[i] > 0) { + learn_parm->svm_cost[i]=learn_parm->svm_c*learn_parm->svm_costratio* + docs[i]->costfactor; + label[i]=1; + trainpos++; + } + else if(class[i] < 0) { + learn_parm->svm_cost[i]=learn_parm->svm_c*docs[i]->costfactor; + label[i]=-1; + trainneg++; + } + else { + learn_parm->svm_cost[i]=0; + } + } + if(verbosity>=2) { + printf("%ld positive, %ld negative, and %ld unlabeled examples.\n",trainpos,trainneg,totdoc-trainpos-trainneg); fflush(stdout); + } + + /* caching makes no sense for linear kernel */ + if(kernel_parm->kernel_type == LINEAR) { + kernel_cache = NULL; + } + + /* compute starting state for initial alpha values */ + if(alpha) { + if(verbosity>=1) { + printf("Computing starting state..."); fflush(stdout); + } + index = (long *)my_malloc(sizeof(long)*totdoc); + index2dnum = (long *)my_malloc(sizeof(long)*(totdoc+11)); + weights=(double *)my_malloc(sizeof(double)*(totwords+1)); + aicache = (CFLOAT *)my_malloc(sizeof(CFLOAT)*totdoc); + for(i=0;i<totdoc;i++) { /* create full index and clip alphas */ + index[i]=1; + alpha[i]=fabs(alpha[i]); + if(alpha[i]<0) alpha[i]=0; + if(alpha[i]>learn_parm->svm_cost[i]) alpha[i]=learn_parm->svm_cost[i]; + } + if(kernel_parm->kernel_type != LINEAR) { + for(i=0;i<totdoc;i++) /* fill kernel cache with unbounded SV */ + if((alpha[i]>0) && (alpha[i]<learn_parm->svm_cost[i]) + && (kernel_cache_space_available(kernel_cache))) + cache_kernel_row(kernel_cache,docs,i,kernel_parm); + for(i=0;i<totdoc;i++) /* fill rest of kernel cache with bounded SV */ + if((alpha[i]==learn_parm->svm_cost[i]) + && (kernel_cache_space_available(kernel_cache))) + cache_kernel_row(kernel_cache,docs,i,kernel_parm); + } + (void)compute_index(index,totdoc,index2dnum); + update_linear_component(docs,label,index2dnum,alpha,a,index2dnum,totdoc, + totwords,kernel_parm,kernel_cache,lin,aicache, + weights); + (void)calculate_svm_model(docs,label,unlabeled,lin,alpha,a,c, + learn_parm,index2dnum,index2dnum,model); + for(i=0;i<totdoc;i++) { /* copy initial alphas */ + a[i]=alpha[i]; + } + free(index); + free(index2dnum); + free(weights); + free(aicache); + if(verbosity>=1) { + printf("done.\n"); fflush(stdout); + } + } + + if(transduction) { + learn_parm->svm_iter_to_shrink=99999999; + if(verbosity >= 1) + printf("\nDeactivating Shrinking due to an incompatibility with the transductive \nlearner in the current version.\n\n"); + } + + if(transduction && learn_parm->compute_loo) { + learn_parm->compute_loo=0; + if(verbosity >= 1) + printf("\nCannot compute leave-one-out estimates for transductive learner.\n\n"); + } + + if(learn_parm->remove_inconsistent && learn_parm->compute_loo) { + learn_parm->compute_loo=0; + printf("\nCannot compute leave-one-out estimates when removing inconsistent examples.\n\n"); + } + + if(learn_parm->compute_loo && ((trainpos == 1) || (trainneg == 1))) { + learn_parm->compute_loo=0; + printf("\nCannot compute leave-one-out with only one example in one class.\n\n"); + } + + + if(verbosity==1) { + printf("Optimizing"); fflush(stdout); + } + + /* train the svm */ + iterations=optimize_to_convergence(docs,label,totdoc,totwords,learn_parm, + kernel_parm,kernel_cache,&shrink_state,model, + inconsistent,unlabeled,a,lin, + c,&timing_profile, + &maxdiff,(long)-1, + (long)1); + + if(verbosity>=1) { + if(verbosity==1) printf("done. (%ld iterations)\n",iterations); + + misclassified=0; + for(i=0;(i<totdoc);i++) { /* get final statistic */ + if((lin[i]-model->b)*(double)label[i] <= 0.0) + misclassified++; + } + + printf("Optimization finished (%ld misclassified, maxdiff=%.5f).\n", + misclassified,maxdiff); + + runtime_end=get_runtime(); + if(verbosity>=2) { + printf("Runtime in cpu-seconds: %.2f (%.2f%% for kernel/%.2f%% for optimizer/%.2f%% for final/%.2f%% for update/%.2f%% for model/%.2f%% for check/%.2f%% for select)\n", + ((float)runtime_end-(float)runtime_start)/100.0, + (100.0*timing_profile.time_kernel)/(float)(runtime_end-runtime_start), + (100.0*timing_profile.time_opti)/(float)(runtime_end-runtime_start), + (100.0*timing_profile.time_shrink)/(float)(runtime_end-runtime_start), + (100.0*timing_profile.time_update)/(float)(runtime_end-runtime_start), + (100.0*timing_profile.time_model)/(float)(runtime_end-runtime_start), + (100.0*timing_profile.time_check)/(float)(runtime_end-runtime_start), + (100.0*timing_profile.time_select)/(float)(runtime_end-runtime_start)); + } + else { + printf("Runtime in cpu-seconds: %.2f\n", + (runtime_end-runtime_start)/100.0); + } + + if(learn_parm->remove_inconsistent) { + inconsistentnum=0; + for(i=0;i<totdoc;i++) + if(inconsistent[i]) + inconsistentnum++; + printf("Number of SV: %ld (plus %ld inconsistent examples)\n", + model->sv_num-1,inconsistentnum); + } + else { + upsupvecnum=0; + for(i=1;i<model->sv_num;i++) { + if(fabs(model->alpha[i]) >= + (learn_parm->svm_cost[(model->supvec[i])->docnum]- + learn_parm->epsilon_a)) + upsupvecnum++; + } + printf("Number of SV: %ld (including %ld at upper bound)\n", + model->sv_num-1,upsupvecnum); + } + + if((verbosity>=1) && (!learn_parm->skip_final_opt_check)) { + loss=0; + model_length=0; + for(i=0;i<totdoc;i++) { + if((lin[i]-model->b)*(double)label[i] < 1.0-learn_parm->epsilon_crit) + loss+=1.0-(lin[i]-model->b)*(double)label[i]; + model_length+=a[i]*label[i]*lin[i]; + } + model_length=sqrt(model_length); + fprintf(stdout,"L1 loss: loss=%.5f\n",loss); + fprintf(stdout,"Norm of weight vector: |w|=%.5f\n",model_length); + example_length=estimate_sphere(model,kernel_parm); + fprintf(stdout,"Norm of longest example vector: |x|=%.5f\n", + length_of_longest_document_vector(docs,totdoc,kernel_parm)); + fprintf(stdout,"Estimated VCdim of classifier: VCdim<=%.5f\n", + estimate_margin_vcdim(model,model_length,example_length, + kernel_parm)); + if((!learn_parm->remove_inconsistent) && (!transduction)) { + runtime_start_xa=get_runtime(); + if(verbosity>=1) { + printf("Computing XiAlpha-estimates..."); fflush(stdout); + } + compute_xa_estimates(model,label,unlabeled,totdoc,docs,lin,a, + kernel_parm,learn_parm,&(model->xa_error), + &(model->xa_recall),&(model->xa_precision)); + if(verbosity>=1) { + printf("done\n"); + } + printf("Runtime for XiAlpha-estimates in cpu-seconds: %.2f\n", + (get_runtime()-runtime_start_xa)/100.0); + + fprintf(stdout,"XiAlpha-estimate of the error: error<=%.2f%% (rho=%.2f,depth=%ld)\n", + model->xa_error,learn_parm->rho,learn_parm->xa_depth); + fprintf(stdout,"XiAlpha-estimate of the recall: recall=>%.2f%% (rho=%.2f,depth=%ld)\n", + model->xa_recall,learn_parm->rho,learn_parm->xa_depth); + fprintf(stdout,"XiAlpha-estimate of the precision: precision=>%.2f%% (rho=%.2f,depth=%ld)\n", + model->xa_precision,learn_parm->rho,learn_parm->xa_depth); + } + else if(!learn_parm->remove_inconsistent) { + estimate_transduction_quality(model,label,unlabeled,totdoc,docs,lin); + } + } + if(verbosity>=1) { + printf("Number of kernel evaluations: %ld\n",kernel_cache_statistic); + } + } + + + /* leave-one-out testing starts now */ + if(learn_parm->compute_loo) { + /* save results of training on full dataset for leave-one-out */ + runtime_start_loo=get_runtime(); + for(i=0;i<totdoc;i++) { + xi_fullset[i]=1.0-((lin[i]-model->b)*(double)label[i]); + if(xi_fullset[i]<0) xi_fullset[i]=0; + a_fullset[i]=a[i]; + } + if(verbosity>=1) { + printf("Computing leave-one-out"); + } + + /* repeat this loop for every held-out example */ + for(heldout=0;(heldout<totdoc);heldout++) { + if(learn_parm->rho*a_fullset[heldout]*r_delta_sq+xi_fullset[heldout] + < 1.0) { + /* guaranteed to not produce a leave-one-out error */ + if(verbosity==1) { + printf("+"); fflush(stdout); + } + } + else if(xi_fullset[heldout] > 1.0) { + /* guaranteed to produce a leave-one-out error */ + loo_count++; + if(label[heldout] > 0) loo_count_pos++; else loo_count_neg++; + if(verbosity==1) { + printf("-"); fflush(stdout); + } + } + else { + loocomputed++; + heldout_c=learn_parm->svm_cost[heldout]; /* set upper bound to zero */ + learn_parm->svm_cost[heldout]=0; + /* make sure heldout example is not currently */ + /* shrunk away. Assumes that lin is up to date! */ + shrink_state.active[heldout]=1; + if(verbosity>=2) + printf("\nLeave-One-Out test on example %ld\n",heldout); + if(verbosity>=1) { + printf("(?[%ld]",heldout); fflush(stdout); + } + + optimize_to_convergence(docs,label,totdoc,totwords,learn_parm, + kernel_parm, + kernel_cache,&shrink_state,model,inconsistent,unlabeled, + a,lin,c,&timing_profile, + &maxdiff,heldout,(long)2); + + /* printf("%.20f\n",(lin[heldout]-model->b)*(double)label[heldout]); */ + + if(((lin[heldout]-model->b)*(double)label[heldout]) <= 0.0) { + loo_count++; /* there was a loo-error */ + if(label[heldout] > 0) loo_count_pos++; else loo_count_neg++; + if(verbosity>=1) { + printf("-)"); fflush(stdout); + } + } + else { + if(verbosity>=1) { + printf("+)"); fflush(stdout); + } + } + /* now we need to restore the original data set*/ + learn_parm->svm_cost[heldout]=heldout_c; /* restore upper bound */ + } + } /* end of leave-one-out loop */ + + + if(verbosity>=1) { + printf("\nRetrain on full problem"); fflush(stdout); + } + optimize_to_convergence(docs,label,totdoc,totwords,learn_parm, + kernel_parm, + kernel_cache,&shrink_state,model,inconsistent,unlabeled, + a,lin,c,&timing_profile, + &maxdiff,(long)-1,(long)1); + if(verbosity >= 1) + printf("done.\n"); + + + /* after all leave-one-out computed */ + model->loo_error=100.0*loo_count/(double)totdoc; + model->loo_recall=(1.0-(double)loo_count_pos/(double)trainpos)*100.0; + model->loo_precision=(trainpos-loo_count_pos)/ + (double)(trainpos-loo_count_pos+loo_count_neg)*100.0; + if(verbosity >= 1) { + fprintf(stdout,"Leave-one-out estimate of the error: error=%.2f%%\n", + model->loo_error); + fprintf(stdout,"Leave-one-out estimate of the recall: recall=%.2f%%\n", + model->loo_recall); + fprintf(stdout,"Leave-one-out estimate of the precision: precision=%.2f%%\n", + model->loo_precision); + fprintf(stdout,"Actual leave-one-outs computed: %ld (rho=%.2f)\n", + loocomputed,learn_parm->rho); + printf("Runtime for leave-one-out in cpu-seconds: %.2f\n", + (double)(get_runtime()-runtime_start_loo)/100.0); + } + } + + if(learn_parm->alphafile[0]) + write_alphas(learn_parm->alphafile,a,label,totdoc); + + shrink_state_cleanup(&shrink_state); + free(label); + free(inconsistent); + free(unlabeled); + free(c); + free(a); + free(a_fullset); + free(xi_fullset); + free(lin); + free(learn_parm->svm_cost); +} + + +/* Learns an SVM regression model based on the training data in + docs/label. The resulting model is returned in the structure + model. */ + +void svm_learn_regression(DOC **docs, double *value, long int totdoc, + long int totwords, LEARN_PARM *learn_parm, + KERNEL_PARM *kernel_parm, + KERNEL_CACHE **kernel_cache, MODEL *model) + /* docs: Training vectors (x-part) */ + /* class: Training value (y-part) */ + /* totdoc: Number of examples in docs/label */ + /* totwords: Number of features (i.e. highest feature index) */ + /* learn_parm: Learning paramenters */ + /* kernel_parm: Kernel paramenters */ + /* kernel_cache:Initialized Cache, if using a kernel. NULL if + linear. Note that it will be free'd and reassigned */ + /* model: Returns learning result (assumed empty before called) */ +{ + long *inconsistent,i,j; + long inconsistentnum; + long upsupvecnum; + double loss,model_length,example_length; + double maxdiff,*lin,*a,*c; + long runtime_start,runtime_end; + long iterations,kernel_cache_size; + long *unlabeled; + double r_delta_sq=0,r_delta,r_delta_avg; + double *xi_fullset; /* buffer for storing xi on full sample in loo */ + double *a_fullset; /* buffer for storing alpha on full sample in loo */ + TIMING timing_profile; + SHRINK_STATE shrink_state; + DOC **docs_org; + long *label; + + /* set up regression problem in standard form */ + docs_org=docs; + docs = (DOC **)my_malloc(sizeof(DOC)*2*totdoc); + label = (long *)my_malloc(sizeof(long)*2*totdoc); + c = (double *)my_malloc(sizeof(double)*2*totdoc); + for(i=0;i<totdoc;i++) { + j=2*totdoc-1-i; + docs[i]=create_example(i,0,0,docs_org[i]->costfactor,docs_org[i]->fvec); + label[i]=+1; + c[i]=value[i]; + docs[j]=create_example(j,0,0,docs_org[i]->costfactor,docs_org[i]->fvec); + label[j]=-1; + c[j]=value[i]; + } + totdoc*=2; + + /* need to get a bigger kernel cache */ + if(*kernel_cache) { + kernel_cache_size=(*kernel_cache)->buffsize*sizeof(CFLOAT)/(1024*1024); + kernel_cache_cleanup(*kernel_cache); + (*kernel_cache)=kernel_cache_init(totdoc,kernel_cache_size); + } + + runtime_start=get_runtime(); + timing_profile.time_kernel=0; + timing_profile.time_opti=0; + timing_profile.time_shrink=0; + timing_profile.time_update=0; + timing_profile.time_model=0; + timing_profile.time_check=0; + timing_profile.time_select=0; + kernel_cache_statistic=0; + + learn_parm->totwords=totwords; + + /* make sure -n value is reasonable */ + if((learn_parm->svm_newvarsinqp < 2) + || (learn_parm->svm_newvarsinqp > learn_parm->svm_maxqpsize)) { + learn_parm->svm_newvarsinqp=learn_parm->svm_maxqpsize; + } + + init_shrink_state(&shrink_state,totdoc,(long)MAXSHRINK); + + inconsistent = (long *)my_malloc(sizeof(long)*totdoc); + unlabeled = (long *)my_malloc(sizeof(long)*totdoc); + a = (double *)my_malloc(sizeof(double)*totdoc); + a_fullset = (double *)my_malloc(sizeof(double)*totdoc); + xi_fullset = (double *)my_malloc(sizeof(double)*totdoc); + lin = (double *)my_malloc(sizeof(double)*totdoc); + learn_parm->svm_cost = (double *)my_malloc(sizeof(double)*totdoc); + model->supvec = (DOC **)my_malloc(sizeof(DOC *)*(totdoc+2)); + model->alpha = (double *)my_malloc(sizeof(double)*(totdoc+2)); + model->index = (long *)my_malloc(sizeof(long)*(totdoc+2)); + + model->at_upper_bound=0; + model->b=0; + model->supvec[0]=0; /* element 0 reserved and empty for now */ + model->alpha[0]=0; + model->lin_weights=NULL; + model->totwords=totwords; + model->totdoc=totdoc; + model->kernel_parm=(*kernel_parm); + model->sv_num=1; + model->loo_error=-1; + model->loo_recall=-1; + model->loo_precision=-1; + model->xa_error=-1; + model->xa_recall=-1; + model->xa_precision=-1; + inconsistentnum=0; + + r_delta=estimate_r_delta(docs,totdoc,kernel_parm); + r_delta_sq=r_delta*r_delta; + + r_delta_avg=estimate_r_delta_average(docs,totdoc,kernel_parm); + if(learn_parm->svm_c == 0.0) { /* default value for C */ + learn_parm->svm_c=1.0/(r_delta_avg*r_delta_avg); + if(verbosity>=1) + printf("Setting default regularization parameter C=%.4f\n", + learn_parm->svm_c); + } + + for(i=0;i<totdoc;i++) { /* various inits */ + inconsistent[i]=0; + a[i]=0; + lin[i]=0; + unlabeled[i]=0; + if(label[i] > 0) { + learn_parm->svm_cost[i]=learn_parm->svm_c*learn_parm->svm_costratio* + docs[i]->costfactor; + } + else if(label[i] < 0) { + learn_parm->svm_cost[i]=learn_parm->svm_c*docs[i]->costfactor; + } + } + + /* caching makes no sense for linear kernel */ + if((kernel_parm->kernel_type == LINEAR) && (*kernel_cache)) { + printf("WARNING: Using a kernel cache for linear case will slow optimization down!\n"); + } + + if(verbosity==1) { + printf("Optimizing"); fflush(stdout); + } + + /* train the svm */ + iterations=optimize_to_convergence(docs,label,totdoc,totwords,learn_parm, + kernel_parm,*kernel_cache,&shrink_state, + model,inconsistent,unlabeled,a,lin,c, + &timing_profile,&maxdiff,(long)-1, + (long)1); + + if(verbosity>=1) { + if(verbosity==1) printf("done. (%ld iterations)\n",iterations); + + printf("Optimization finished (maxdiff=%.5f).\n",maxdiff); + + runtime_end=get_runtime(); + if(verbosity>=2) { + printf("Runtime in cpu-seconds: %.2f (%.2f%% for kernel/%.2f%% for optimizer/%.2f%% for final/%.2f%% for update/%.2f%% for model/%.2f%% for check/%.2f%% for select)\n", + ((float)runtime_end-(float)runtime_start)/100.0, + (100.0*timing_profile.time_kernel)/(float)(runtime_end-runtime_start), + (100.0*timing_profile.time_opti)/(float)(runtime_end-runtime_start), + (100.0*timing_profile.time_shrink)/(float)(runtime_end-runtime_start), + (100.0*timing_profile.time_update)/(float)(runtime_end-runtime_start), + (100.0*timing_profile.time_model)/(float)(runtime_end-runtime_start), + (100.0*timing_profile.time_check)/(float)(runtime_end-runtime_start), + (100.0*timing_profile.time_select)/(float)(runtime_end-runtime_start)); + } + else { + printf("Runtime in cpu-seconds: %.2f\n", + (runtime_end-runtime_start)/100.0); + } + + if(learn_parm->remove_inconsistent) { + inconsistentnum=0; + for(i=0;i<totdoc;i++) + if(inconsistent[i]) + inconsistentnum++; + printf("Number of SV: %ld (plus %ld inconsistent examples)\n", + model->sv_num-1,inconsistentnum); + } + else { + upsupvecnum=0; + for(i=1;i<model->sv_num;i++) { + if(fabs(model->alpha[i]) >= + (learn_parm->svm_cost[(model->supvec[i])->docnum]- + learn_parm->epsilon_a)) + upsupvecnum++; + } + printf("Number of SV: %ld (including %ld at upper bound)\n", + model->sv_num-1,upsupvecnum); + } + + if((verbosity>=1) && (!learn_parm->skip_final_opt_check)) { + loss=0; + model_length=0; + for(i=0;i<totdoc;i++) { + if((lin[i]-model->b)*(double)label[i] < (-learn_parm->eps+(double)label[i]*c[i])-learn_parm->epsilon_crit) + loss+=-learn_parm->eps+(double)label[i]*c[i]-(lin[i]-model->b)*(double)label[i]; + model_length+=a[i]*label[i]*lin[i]; + } + model_length=sqrt(model_length); + fprintf(stdout,"L1 loss: loss=%.5f\n",loss); + fprintf(stdout,"Norm of weight vector: |w|=%.5f\n",model_length); + example_length=estimate_sphere(model,kernel_parm); + fprintf(stdout,"Norm of longest example vector: |x|=%.5f\n", + length_of_longest_document_vector(docs,totdoc,kernel_parm)); + } + if(verbosity>=1) { + printf("Number of kernel evaluations: %ld\n",kernel_cache_statistic); + } + } + + if(learn_parm->alphafile[0]) + write_alphas(learn_parm->alphafile,a,label,totdoc); + + /* this makes sure the model we return does not contain pointers to the + temporary documents */ + for(i=1;i<model->sv_num;i++) { + j=model->supvec[i]->docnum; + if(j >= (totdoc/2)) { + j=totdoc-j-1; + } + model->supvec[i]=docs_org[j]; + } + + shrink_state_cleanup(&shrink_state); + for(i=0;i<totdoc;i++) + free_example(docs[i],0); + free(docs); + free(label); + free(inconsistent); + free(unlabeled); + free(c); + free(a); + free(a_fullset); + free(xi_fullset); + free(lin); + free(learn_parm->svm_cost); +} + +void svm_learn_ranking(DOC **docs, double *rankvalue, long int totdoc, + long int totwords, LEARN_PARM *learn_parm, + KERNEL_PARM *kernel_parm, KERNEL_CACHE **kernel_cache, + MODEL *model) + /* docs: Training vectors (x-part) */ + /* rankvalue: Training target values that determine the ranking */ + /* totdoc: Number of examples in docs/label */ + /* totwords: Number of features (i.e. highest feature index) */ + /* learn_parm: Learning paramenters */ + /* kernel_parm: Kernel paramenters */ + /* kernel_cache:Initialized pointer to Cache of size 1*totdoc, if + using a kernel. NULL if linear. NOTE: Cache is + getting reinitialized in this function */ + /* model: Returns learning result (assumed empty before called) */ +{ + DOC **docdiff; + long i,j,k,totpair,kernel_cache_size; + double *target,*alpha,cost; + long *greater,*lesser; + MODEL *pairmodel; + SVECTOR *flow,*fhigh; + + totpair=0; + for(i=0;i<totdoc;i++) { + for(j=i+1;j<totdoc;j++) { + if((docs[i]->queryid==docs[j]->queryid) && (rankvalue[i] != rankvalue[j])) { + totpair++; + } + } + } + + printf("Constructing %ld rank constraints...",totpair); fflush(stdout); + docdiff=(DOC **)my_malloc(sizeof(DOC)*totpair); + target=(double *)my_malloc(sizeof(double)*totpair); + greater=(long *)my_malloc(sizeof(long)*totpair); + lesser=(long *)my_malloc(sizeof(long)*totpair); + + k=0; + for(i=0;i<totdoc;i++) { + for(j=i+1;j<totdoc;j++) { + if(docs[i]->queryid == docs[j]->queryid) { + cost=(docs[i]->costfactor+docs[j]->costfactor)/2.0; + if(rankvalue[i] > rankvalue[j]) { + if(kernel_parm->kernel_type == LINEAR) + docdiff[k]=create_example(k,0,0,cost, + sub_ss(docs[i]->fvec,docs[j]->fvec)); + else { + flow=copy_svector(docs[j]->fvec); + flow->factor=-1.0; + flow->next=NULL; + fhigh=copy_svector(docs[i]->fvec); + fhigh->factor=1.0; + fhigh->next=flow; + docdiff[k]=create_example(k,0,0,cost,fhigh); + } + target[k]=1; + greater[k]=i; + lesser[k]=j; + k++; + } + else if(rankvalue[i] < rankvalue[j]) { + if(kernel_parm->kernel_type == LINEAR) + docdiff[k]=create_example(k,0,0,cost, + sub_ss(docs[i]->fvec,docs[j]->fvec)); + else { + flow=copy_svector(docs[j]->fvec); + flow->factor=-1.0; + flow->next=NULL; + fhigh=copy_svector(docs[i]->fvec); + fhigh->factor=1.0; + fhigh->next=flow; + docdiff[k]=create_example(k,0,0,cost,fhigh); + } + target[k]=-1; + greater[k]=i; + lesser[k]=j; + k++; + } + } + } + } + printf("done.\n"); fflush(stdout); + + /* need to get a bigger kernel cache */ + if(*kernel_cache) { + kernel_cache_size=(*kernel_cache)->buffsize*sizeof(CFLOAT)/(1024*1024); + kernel_cache_cleanup(*kernel_cache); + (*kernel_cache)=kernel_cache_init(totpair,kernel_cache_size); + } + + /* must use unbiased hyperplane on difference vectors */ + learn_parm->biased_hyperplane=0; + pairmodel=(MODEL *)my_malloc(sizeof(MODEL)); + svm_learn_classification(docdiff,target,totpair,totwords,learn_parm, + kernel_parm,(*kernel_cache),pairmodel,NULL); + + /* Transfer the result into a more compact model. If you would like + to output the original model on pairs of documents, see below. */ + alpha=(double *)my_malloc(sizeof(double)*totdoc); + for(i=0;i<totdoc;i++) { + alpha[i]=0; + } + for(i=1;i<pairmodel->sv_num;i++) { + alpha[lesser[(pairmodel->supvec[i])->docnum]]-=pairmodel->alpha[i]; + alpha[greater[(pairmodel->supvec[i])->docnum]]+=pairmodel->alpha[i]; + } + model->supvec = (DOC **)my_malloc(sizeof(DOC *)*(totdoc+2)); + model->alpha = (double *)my_malloc(sizeof(double)*(totdoc+2)); + model->index = (long *)my_malloc(sizeof(long)*(totdoc+2)); + model->supvec[0]=0; /* element 0 reserved and empty for now */ + model->alpha[0]=0; + model->sv_num=1; + for(i=0;i<totdoc;i++) { + if(alpha[i]) { + model->supvec[model->sv_num]=docs[i]; + model->alpha[model->sv_num]=alpha[i]; + model->index[i]=model->sv_num; + model->sv_num++; + } + else { + model->index[i]=-1; + } + } + model->at_upper_bound=0; + model->b=0; + model->lin_weights=NULL; + model->totwords=totwords; + model->totdoc=totdoc; + model->kernel_parm=(*kernel_parm); + model->loo_error=-1; + model->loo_recall=-1; + model->loo_precision=-1; + model->xa_error=-1; + model->xa_recall=-1; + model->xa_precision=-1; + + free(alpha); + free(greater); + free(lesser); + free(target); + + /* If you would like to output the original model on pairs of + document, replace the following lines with '(*model)=(*pairmodel);' */ + for(i=0;i<totpair;i++) + free_example(docdiff[i],1); + free(docdiff); + free_model(pairmodel,0); +} + + +/* The following solves a freely defined and given set of + inequalities. The optimization problem is of the following form: + + min 0.5 w*w + C sum_i C_i \xi_i + s.t. x_i * w > rhs_i - \xi_i + + This corresponds to the -z o option. */ + +void svm_learn_optimization(DOC **docs, double *rhs, long int + totdoc, long int totwords, + LEARN_PARM *learn_parm, + KERNEL_PARM *kernel_parm, + KERNEL_CACHE *kernel_cache, MODEL *model, + double *alpha) + /* docs: Left-hand side of inequalities (x-part) */ + /* rhs: Right-hand side of inequalities */ + /* totdoc: Number of examples in docs/label */ + /* totwords: Number of features (i.e. highest feature index) */ + /* learn_parm: Learning paramenters */ + /* kernel_parm: Kernel paramenters */ + /* kernel_cache:Initialized Cache of size 1*totdoc, if using a kernel. + NULL if linear.*/ + /* model: Returns solution as SV expansion (assumed empty before called) */ + /* alpha: Start values for the alpha variables or NULL + pointer. The new alpha values are returned after + optimization if not NULL. Array must be of size totdoc. */ +{ + long i,*label; + long misclassified,upsupvecnum; + double loss,model_length,example_length; + double maxdiff,*lin,*a,*c; + long runtime_start,runtime_end; + long iterations,maxslackid,svsetnum; + long *unlabeled,*inconsistent; + double r_delta_sq=0,r_delta,r_delta_avg; + long *index,*index2dnum; + double *weights,*slack,*alphaslack; + CFLOAT *aicache; /* buffer to keep one row of hessian */ + + TIMING timing_profile; + SHRINK_STATE shrink_state; + + runtime_start=get_runtime(); + timing_profile.time_kernel=0; + timing_profile.time_opti=0; + timing_profile.time_shrink=0; + timing_profile.time_update=0; + timing_profile.time_model=0; + timing_profile.time_check=0; + timing_profile.time_select=0; + kernel_cache_statistic=0; + + learn_parm->totwords=totwords; + + /* make sure -n value is reasonable */ + if((learn_parm->svm_newvarsinqp < 2) + || (learn_parm->svm_newvarsinqp > learn_parm->svm_maxqpsize)) { + learn_parm->svm_newvarsinqp=learn_parm->svm_maxqpsize; + } + + init_shrink_state(&shrink_state,totdoc,(long)MAXSHRINK); + + label = (long *)my_malloc(sizeof(long)*totdoc); + unlabeled = (long *)my_malloc(sizeof(long)*totdoc); + inconsistent = (long *)my_malloc(sizeof(long)*totdoc); + c = (double *)my_malloc(sizeof(double)*totdoc); + a = (double *)my_malloc(sizeof(double)*totdoc); + lin = (double *)my_malloc(sizeof(double)*totdoc); + learn_parm->svm_cost = (double *)my_malloc(sizeof(double)*totdoc); + model->supvec = (DOC **)my_malloc(sizeof(DOC *)*(totdoc+2)); + model->alpha = (double *)my_malloc(sizeof(double)*(totdoc+2)); + model->index = (long *)my_malloc(sizeof(long)*(totdoc+2)); + + model->at_upper_bound=0; + model->b=0; + model->supvec[0]=0; /* element 0 reserved and empty for now */ + model->alpha[0]=0; + model->lin_weights=NULL; + model->totwords=totwords; + model->totdoc=totdoc; + model->kernel_parm=(*kernel_parm); + model->sv_num=1; + model->loo_error=-1; + model->loo_recall=-1; + model->loo_precision=-1; + model->xa_error=-1; + model->xa_recall=-1; + model->xa_precision=-1; + + r_delta=estimate_r_delta(docs,totdoc,kernel_parm); + r_delta_sq=r_delta*r_delta; + + r_delta_avg=estimate_r_delta_average(docs,totdoc,kernel_parm); + if(learn_parm->svm_c == 0.0) { /* default value for C */ + learn_parm->svm_c=1.0/(r_delta_avg*r_delta_avg); + if(verbosity>=1) + printf("Setting default regularization parameter C=%.4f\n", + learn_parm->svm_c); + } + + learn_parm->biased_hyperplane=0; /* learn an unbiased hyperplane */ + + learn_parm->eps=0.0; /* No margin, unless explicitly handcoded + in the right-hand side in the training + set. */ + + for(i=0;i<totdoc;i++) { /* various inits */ + docs[i]->docnum=i; + a[i]=0; + lin[i]=0; + c[i]=rhs[i]; /* set right-hand side */ + unlabeled[i]=0; + inconsistent[i]=0; + learn_parm->svm_cost[i]=learn_parm->svm_c*learn_parm->svm_costratio* + docs[i]->costfactor; + label[i]=1; + } + if(learn_parm->sharedslack) /* if shared slacks are used, they must */ + for(i=0;i<totdoc;i++) /* be used on every constraint */ + if(!docs[i]->slackid) { + perror("Error: Missing shared slacks definitions in some of the examples."); + exit(0); + } + + /* compute starting state for initial alpha values */ + if(alpha) { + if(verbosity>=1) { + printf("Computing starting state..."); fflush(stdout); + } + index = (long *)my_malloc(sizeof(long)*totdoc); + index2dnum = (long *)my_malloc(sizeof(long)*(totdoc+11)); + weights=(double *)my_malloc(sizeof(double)*(totwords+1)); + aicache = (CFLOAT *)my_malloc(sizeof(CFLOAT)*totdoc); + for(i=0;i<totdoc;i++) { /* create full index and clip alphas */ + index[i]=1; + alpha[i]=fabs(alpha[i]); + if(alpha[i]<0) alpha[i]=0; + if(alpha[i]>learn_parm->svm_cost[i]) alpha[i]=learn_parm->svm_cost[i]; + } + if(kernel_parm->kernel_type != LINEAR) { + for(i=0;i<totdoc;i++) /* fill kernel cache with unbounded SV */ + if((alpha[i]>0) && (alpha[i]<learn_parm->svm_cost[i]) + && (kernel_cache_space_available(kernel_cache))) + cache_kernel_row(kernel_cache,docs,i,kernel_parm); + for(i=0;i<totdoc;i++) /* fill rest of kernel cache with bounded SV */ + if((alpha[i]==learn_parm->svm_cost[i]) + && (kernel_cache_space_available(kernel_cache))) + cache_kernel_row(kernel_cache,docs,i,kernel_parm); + } + (void)compute_index(index,totdoc,index2dnum); + update_linear_component(docs,label,index2dnum,alpha,a,index2dnum,totdoc, + totwords,kernel_parm,kernel_cache,lin,aicache, + weights); + (void)calculate_svm_model(docs,label,unlabeled,lin,alpha,a,c, + learn_parm,index2dnum,index2dnum,model); + for(i=0;i<totdoc;i++) { /* copy initial alphas */ + a[i]=alpha[i]; + } + free(index); + free(index2dnum); + free(weights); + free(aicache); + if(verbosity>=1) { + printf("done.\n"); fflush(stdout); + } + } + + /* removing inconsistent does not work for general optimization problem */ + if(learn_parm->remove_inconsistent) { + learn_parm->remove_inconsistent = 0; + printf("'remove inconsistent' not available in this mode. Switching option off!"); fflush(stdout); + } + + /* caching makes no sense for linear kernel */ + if(kernel_parm->kernel_type == LINEAR) { + kernel_cache = NULL; + } + + if(verbosity==1) { + printf("Optimizing"); fflush(stdout); + } + + /* train the svm */ + if(learn_parm->sharedslack) + iterations=optimize_to_convergence_sharedslack(docs,label,totdoc, + totwords,learn_parm,kernel_parm, + kernel_cache,&shrink_state,model, + a,lin,c,&timing_profile, + &maxdiff); + else + iterations=optimize_to_convergence(docs,label,totdoc, + totwords,learn_parm,kernel_parm, + kernel_cache,&shrink_state,model, + inconsistent,unlabeled, + a,lin,c,&timing_profile, + &maxdiff,(long)-1,(long)1); + + if(verbosity>=1) { + if(verbosity==1) printf("done. (%ld iterations)\n",iterations); + + misclassified=0; + for(i=0;(i<totdoc);i++) { /* get final statistic */ + if((lin[i]-model->b)*(double)label[i] <= 0.0) + misclassified++; + } + + printf("Optimization finished (maxdiff=%.5f).\n",maxdiff); + + runtime_end=get_runtime(); + if(verbosity>=2) { + printf("Runtime in cpu-seconds: %.2f (%.2f%% for kernel/%.2f%% for optimizer/%.2f%% for final/%.2f%% for update/%.2f%% for model/%.2f%% for check/%.2f%% for select)\n", + ((float)runtime_end-(float)runtime_start)/100.0, + (100.0*timing_profile.time_kernel)/(float)(runtime_end-runtime_start), + (100.0*timing_profile.time_opti)/(float)(runtime_end-runtime_start), + (100.0*timing_profile.time_shrink)/(float)(runtime_end-runtime_start), + (100.0*timing_profile.time_update)/(float)(runtime_end-runtime_start), + (100.0*timing_profile.time_model)/(float)(runtime_end-runtime_start), + (100.0*timing_profile.time_check)/(float)(runtime_end-runtime_start), + (100.0*timing_profile.time_select)/(float)(runtime_end-runtime_start)); + } + else { + printf("Runtime in cpu-seconds: %.2f\n", + (runtime_end-runtime_start)/100.0); + } + } + if((verbosity>=1) && (!learn_parm->skip_final_opt_check)) { + loss=0; + model_length=0; + for(i=0;i<totdoc;i++) { + if((lin[i]-model->b)*(double)label[i] < c[i]-learn_parm->epsilon_crit) + loss+=c[i]-(lin[i]-model->b)*(double)label[i]; + model_length+=a[i]*label[i]*lin[i]; + } + model_length=sqrt(model_length); + fprintf(stdout,"Norm of weight vector: |w|=%.5f\n",model_length); + } + + if(learn_parm->sharedslack) { + index = (long *)my_malloc(sizeof(long)*totdoc); + index2dnum = (long *)my_malloc(sizeof(long)*(totdoc+11)); + maxslackid=0; + for(i=0;i<totdoc;i++) { /* create full index */ + index[i]=1; + if(maxslackid<docs[i]->slackid) + maxslackid=docs[i]->slackid; + } + (void)compute_index(index,totdoc,index2dnum); + slack=(double *)my_malloc(sizeof(double)*(maxslackid+1)); + alphaslack=(double *)my_malloc(sizeof(double)*(maxslackid+1)); + for(i=0;i<=maxslackid;i++) { /* init shared slacks */ + slack[i]=0; + alphaslack[i]=0; + } + compute_shared_slacks(docs,label,a,lin,c,index2dnum,learn_parm, + slack,alphaslack); + loss=0; + model->at_upper_bound=0; + svsetnum=0; + for(i=0;i<=maxslackid;i++) { /* create full index */ + loss+=slack[i]; + if(alphaslack[i] > (learn_parm->svm_c - learn_parm->epsilon_a)) + model->at_upper_bound++; + if(alphaslack[i] > learn_parm->epsilon_a) + svsetnum++; + } + free(index); + free(index2dnum); + free(slack); + free(alphaslack); + } + + if((verbosity>=1) && (!learn_parm->skip_final_opt_check)) { + if(learn_parm->sharedslack) { + printf("Number of SV: %ld\n", + model->sv_num-1); + printf("Number of non-zero slack variables: %ld (out of %ld)\n", + model->at_upper_bound,svsetnum); + fprintf(stdout,"L1 loss: loss=%.5f\n",loss); + } + else { + upsupvecnum=0; + for(i=1;i<model->sv_num;i++) { + if(fabs(model->alpha[i]) >= + (learn_parm->svm_cost[(model->supvec[i])->docnum]- + learn_parm->epsilon_a)) + upsupvecnum++; + } + printf("Number of SV: %ld (including %ld at upper bound)\n", + model->sv_num-1,upsupvecnum); + fprintf(stdout,"L1 loss: loss=%.5f\n",loss); + } + example_length=estimate_sphere(model,kernel_parm); + fprintf(stdout,"Norm of longest example vector: |x|=%.5f\n", + length_of_longest_document_vector(docs,totdoc,kernel_parm)); + } + if(verbosity>=1) { + printf("Number of kernel evaluations: %ld\n",kernel_cache_statistic); + } + + if(alpha) { + for(i=0;i<totdoc;i++) { /* copy final alphas */ + alpha[i]=a[i]; + } + } + + if(learn_parm->alphafile[0]) + write_alphas(learn_parm->alphafile,a,label,totdoc); + + shrink_state_cleanup(&shrink_state); + free(label); + free(unlabeled); + free(inconsistent); + free(c); + free(a); + free(lin); + free(learn_parm->svm_cost); +} + + +long optimize_to_convergence(DOC **docs, long int *label, long int totdoc, + long int totwords, LEARN_PARM *learn_parm, + KERNEL_PARM *kernel_parm, + KERNEL_CACHE *kernel_cache, + SHRINK_STATE *shrink_state, MODEL *model, + long int *inconsistent, long int *unlabeled, + double *a, double *lin, double *c, + TIMING *timing_profile, double *maxdiff, + long int heldout, long int retrain) + /* docs: Training vectors (x-part) */ + /* label: Training labels/value (y-part, zero if test example for + transduction) */ + /* totdoc: Number of examples in docs/label */ + /* totwords: Number of features (i.e. highest feature index) */ + /* laern_parm: Learning paramenters */ + /* kernel_parm: Kernel paramenters */ + /* kernel_cache: Initialized/partly filled Cache, if using a kernel. + NULL if linear. */ + /* shrink_state: State of active variables */ + /* model: Returns learning result */ + /* inconsistent: examples thrown out as inconstistent */ + /* unlabeled: test examples for transduction */ + /* a: alphas */ + /* lin: linear component of gradient */ + /* c: right hand side of inequalities (margin) */ + /* maxdiff: returns maximum violation of KT-conditions */ + /* heldout: marks held-out example for leave-one-out (or -1) */ + /* retrain: selects training mode (1=regular / 2=holdout) */ +{ + long *chosen,*key,i,j,jj,*last_suboptimal_at,noshrink; + long inconsistentnum,choosenum,already_chosen=0,iteration; + long misclassified,supvecnum=0,*active2dnum,inactivenum; + long *working2dnum,*selexam; + long activenum; + double criterion,eq; + double *a_old; + long t0=0,t1=0,t2=0,t3=0,t4=0,t5=0,t6=0; /* timing */ + long transductcycle; + long transduction; + double epsilon_crit_org; + double bestmaxdiff; + long bestmaxdiffiter,terminate; + + double *selcrit; /* buffer for sorting */ + CFLOAT *aicache; /* buffer to keep one row of hessian */ + double *weights; /* buffer for weight vector in linear case */ + QP qp; /* buffer for one quadratic program */ + + epsilon_crit_org=learn_parm->epsilon_crit; /* save org */ + if(kernel_parm->kernel_type == LINEAR) { + learn_parm->epsilon_crit=2.0; + kernel_cache=NULL; /* caching makes no sense for linear kernel */ + } + learn_parm->epsilon_shrink=2; + (*maxdiff)=1; + + learn_parm->totwords=totwords; + + chosen = (long *)my_malloc(sizeof(long)*totdoc); + last_suboptimal_at = (long *)my_malloc(sizeof(long)*totdoc); + key = (long *)my_malloc(sizeof(long)*(totdoc+11)); + selcrit = (double *)my_malloc(sizeof(double)*totdoc); + selexam = (long *)my_malloc(sizeof(long)*totdoc); + a_old = (double *)my_malloc(sizeof(double)*totdoc); + aicache = (CFLOAT *)my_malloc(sizeof(CFLOAT)*totdoc); + working2dnum = (long *)my_malloc(sizeof(long)*(totdoc+11)); + active2dnum = (long *)my_malloc(sizeof(long)*(totdoc+11)); + qp.opt_ce = (double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize); + qp.opt_ce0 = (double *)my_malloc(sizeof(double)); + qp.opt_g = (double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize + *learn_parm->svm_maxqpsize); + qp.opt_g0 = (double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize); + qp.opt_xinit = (double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize); + qp.opt_low=(double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize); + qp.opt_up=(double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize); + weights=(double *)my_malloc(sizeof(double)*(totwords+1)); + + choosenum=0; + inconsistentnum=0; + transductcycle=0; + transduction=0; + if(!retrain) retrain=1; + iteration=1; + bestmaxdiffiter=1; + bestmaxdiff=999999999; + terminate=0; + + if(kernel_cache) { + kernel_cache->time=iteration; /* for lru cache */ + kernel_cache_reset_lru(kernel_cache); + } + + for(i=0;i<totdoc;i++) { /* various inits */ + chosen[i]=0; + a_old[i]=a[i]; + last_suboptimal_at[i]=1; + if(inconsistent[i]) + inconsistentnum++; + if(unlabeled[i]) { + transduction=1; + } + } + activenum=compute_index(shrink_state->active,totdoc,active2dnum); + inactivenum=totdoc-activenum; + clear_index(working2dnum); + + /* repeat this loop until we have convergence */ + for(;retrain && (!terminate);iteration++) { + + if(kernel_cache) + kernel_cache->time=iteration; /* for lru cache */ + if(verbosity>=2) { + printf( + "Iteration %ld: ",iteration); fflush(stdout); + } + else if(verbosity==1) { + printf("."); fflush(stdout); + } + + if(verbosity>=2) t0=get_runtime(); + if(verbosity>=3) { + printf("\nSelecting working set... "); fflush(stdout); + } + + if(learn_parm->svm_newvarsinqp>learn_parm->svm_maxqpsize) + learn_parm->svm_newvarsinqp=learn_parm->svm_maxqpsize; + + i=0; + for(jj=0;(j=working2dnum[jj])>=0;jj++) { /* clear working set */ + if((chosen[j]>=(learn_parm->svm_maxqpsize/ + minl(learn_parm->svm_maxqpsize, + learn_parm->svm_newvarsinqp))) + || (inconsistent[j]) + || (j == heldout)) { + chosen[j]=0; + choosenum--; + } + else { + chosen[j]++; + working2dnum[i++]=j; + } + } + working2dnum[i]=-1; + + if(retrain == 2) { + choosenum=0; + for(jj=0;(j=working2dnum[jj])>=0;jj++) { /* fully clear working set */ + chosen[j]=0; + } + clear_index(working2dnum); + for(i=0;i<totdoc;i++) { /* set inconsistent examples to zero (-i 1) */ + if((inconsistent[i] || (heldout==i)) && (a[i] != 0.0)) { + chosen[i]=99999; + choosenum++; + a[i]=0; + } + } + if(learn_parm->biased_hyperplane) { + eq=0; + for(i=0;i<totdoc;i++) { /* make sure we fulfill equality constraint */ + eq+=a[i]*label[i]; + } + for(i=0;(i<totdoc) && (fabs(eq) > learn_parm->epsilon_a);i++) { + if((eq*label[i] > 0) && (a[i] > 0)) { + chosen[i]=88888; + choosenum++; + if((eq*label[i]) > a[i]) { + eq-=(a[i]*label[i]); + a[i]=0; + } + else { + a[i]-=(eq*label[i]); + eq=0; + } + } + } + } + compute_index(chosen,totdoc,working2dnum); + } + else { /* select working set according to steepest gradient */ + if(iteration % 101) { + already_chosen=0; + if((minl(learn_parm->svm_newvarsinqp, + learn_parm->svm_maxqpsize-choosenum)>=4) + && (kernel_parm->kernel_type != LINEAR)) { + /* select part of the working set from cache */ + already_chosen=select_next_qp_subproblem_grad( + label,unlabeled,a,lin,c,totdoc, + (long)(minl(learn_parm->svm_maxqpsize-choosenum, + learn_parm->svm_newvarsinqp) + /2), + learn_parm,inconsistent,active2dnum, + working2dnum,selcrit,selexam,kernel_cache,1, + key,chosen); + choosenum+=already_chosen; + } + choosenum+=select_next_qp_subproblem_grad( + label,unlabeled,a,lin,c,totdoc, + minl(learn_parm->svm_maxqpsize-choosenum, + learn_parm->svm_newvarsinqp-already_chosen), + learn_parm,inconsistent,active2dnum, + working2dnum,selcrit,selexam,kernel_cache,0,key, + chosen); + } + else { /* once in a while, select a somewhat random working set + to get unlocked of infinite loops due to numerical + inaccuracies in the core qp-solver */ + choosenum+=select_next_qp_subproblem_rand( + label,unlabeled,a,lin,c,totdoc, + minl(learn_parm->svm_maxqpsize-choosenum, + learn_parm->svm_newvarsinqp), + learn_parm,inconsistent,active2dnum, + working2dnum,selcrit,selexam,kernel_cache,key, + chosen,iteration); + } + } + + if(verbosity>=2) { + printf(" %ld vectors chosen\n",choosenum); fflush(stdout); + } + + if(verbosity>=2) t1=get_runtime(); + + if(kernel_cache) + cache_multiple_kernel_rows(kernel_cache,docs,working2dnum, + choosenum,kernel_parm); + + if(verbosity>=2) t2=get_runtime(); + if(retrain != 2) { + optimize_svm(docs,label,unlabeled,inconsistent,0.0,chosen,active2dnum, + model,totdoc,working2dnum,choosenum,a,lin,c,learn_parm, + aicache,kernel_parm,&qp,&epsilon_crit_org); + } + + if(verbosity>=2) t3=get_runtime(); + update_linear_component(docs,label,active2dnum,a,a_old,working2dnum,totdoc, + totwords,kernel_parm,kernel_cache,lin,aicache, + weights); + + if(verbosity>=2) t4=get_runtime(); + supvecnum=calculate_svm_model(docs,label,unlabeled,lin,a,a_old,c, + learn_parm,working2dnum,active2dnum,model); + + if(verbosity>=2) t5=get_runtime(); + + /* The following computation of the objective function works only */ + /* relative to the active variables */ + if(verbosity>=3) { + criterion=compute_objective_function(a,lin,c,learn_parm->eps,label, + active2dnum); + printf("Objective function (over active variables): %.16f\n",criterion); + fflush(stdout); + } + + for(jj=0;(i=working2dnum[jj])>=0;jj++) { + a_old[i]=a[i]; + } + + if(retrain == 2) { /* reset inconsistent unlabeled examples */ + for(i=0;(i<totdoc);i++) { + if(inconsistent[i] && unlabeled[i]) { + inconsistent[i]=0; + label[i]=0; + } + } + } + + retrain=check_optimality(model,label,unlabeled,a,lin,c,totdoc,learn_parm, + maxdiff,epsilon_crit_org,&misclassified, + inconsistent,active2dnum,last_suboptimal_at, + iteration,kernel_parm); + + if(verbosity>=2) { + t6=get_runtime(); + timing_profile->time_select+=t1-t0; + timing_profile->time_kernel+=t2-t1; + timing_profile->time_opti+=t3-t2; + timing_profile->time_update+=t4-t3; + timing_profile->time_model+=t5-t4; + timing_profile->time_check+=t6-t5; + } + + /* checking whether optimizer got stuck */ + if((*maxdiff) < bestmaxdiff) { + bestmaxdiff=(*maxdiff); + bestmaxdiffiter=iteration; + } + if(iteration > (bestmaxdiffiter+learn_parm->maxiter)) { + /* long time no progress? */ + terminate=1; + retrain=0; + if(verbosity>=1) + printf("\nWARNING: Relaxing KT-Conditions due to slow progress! Terminating!\n"); + } + + noshrink=0; + if((!retrain) && (inactivenum>0) + && ((!learn_parm->skip_final_opt_check) + || (kernel_parm->kernel_type == LINEAR))) { + if(((verbosity>=1) && (kernel_parm->kernel_type != LINEAR)) + || (verbosity>=2)) { + if(verbosity==1) { + printf("\n"); + } + printf(" Checking optimality of inactive variables..."); + fflush(stdout); + } + t1=get_runtime(); + reactivate_inactive_examples(label,unlabeled,a,shrink_state,lin,c,totdoc, + totwords,iteration,learn_parm,inconsistent, + docs,kernel_parm,kernel_cache,model,aicache, + weights,maxdiff); + /* Update to new active variables. */ + activenum=compute_index(shrink_state->active,totdoc,active2dnum); + inactivenum=totdoc-activenum; + /* reset watchdog */ + bestmaxdiff=(*maxdiff); + bestmaxdiffiter=iteration; + /* termination criterion */ + noshrink=1; + retrain=0; + if((*maxdiff) > learn_parm->epsilon_crit) + retrain=1; + timing_profile->time_shrink+=get_runtime()-t1; + if(((verbosity>=1) && (kernel_parm->kernel_type != LINEAR)) + || (verbosity>=2)) { + printf("done.\n"); fflush(stdout); + printf(" Number of inactive variables = %ld\n",inactivenum); + } + } + + if((!retrain) && (learn_parm->epsilon_crit>(*maxdiff))) + learn_parm->epsilon_crit=(*maxdiff); + if((!retrain) && (learn_parm->epsilon_crit>epsilon_crit_org)) { + learn_parm->epsilon_crit/=2.0; + retrain=1; + noshrink=1; + } + if(learn_parm->epsilon_crit<epsilon_crit_org) + learn_parm->epsilon_crit=epsilon_crit_org; + + if(verbosity>=2) { + printf(" => (%ld SV (incl. %ld SV at u-bound), max violation=%.5f)\n", + supvecnum,model->at_upper_bound,(*maxdiff)); + fflush(stdout); + } + if(verbosity>=3) { + printf("\n"); + } + + if((!retrain) && (transduction)) { + for(i=0;(i<totdoc);i++) { + shrink_state->active[i]=1; + } + activenum=compute_index(shrink_state->active,totdoc,active2dnum); + inactivenum=0; + if(verbosity==1) printf("done\n"); + retrain=incorporate_unlabeled_examples(model,label,inconsistent, + unlabeled,a,lin,totdoc, + selcrit,selexam,key, + transductcycle,kernel_parm, + learn_parm); + epsilon_crit_org=learn_parm->epsilon_crit; + if(kernel_parm->kernel_type == LINEAR) + learn_parm->epsilon_crit=1; + transductcycle++; + /* reset watchdog */ + bestmaxdiff=(*maxdiff); + bestmaxdiffiter=iteration; + } + else if(((iteration % 10) == 0) && (!noshrink)) { + activenum=shrink_problem(docs,learn_parm,shrink_state,kernel_parm, + active2dnum,last_suboptimal_at,iteration,totdoc, + maxl((long)(activenum/10), + maxl((long)(totdoc/500),100)), + a,inconsistent); + inactivenum=totdoc-activenum; + if((kernel_cache) + && (supvecnum>kernel_cache->max_elems) + && ((kernel_cache->activenum-activenum)>maxl((long)(activenum/10),500))) { + kernel_cache_shrink(kernel_cache,totdoc, + minl((kernel_cache->activenum-activenum), + (kernel_cache->activenum-supvecnum)), + shrink_state->active); + } + } + + if((!retrain) && learn_parm->remove_inconsistent) { + if(verbosity>=1) { + printf(" Moving training errors to inconsistent examples..."); + fflush(stdout); + } + if(learn_parm->remove_inconsistent == 1) { + retrain=identify_inconsistent(a,label,unlabeled,totdoc,learn_parm, + &inconsistentnum,inconsistent); + } + else if(learn_parm->remove_inconsistent == 2) { + retrain=identify_misclassified(lin,label,unlabeled,totdoc, + model,&inconsistentnum,inconsistent); + } + else if(learn_parm->remove_inconsistent == 3) { + retrain=identify_one_misclassified(lin,label,unlabeled,totdoc, + model,&inconsistentnum,inconsistent); + } + if(retrain) { + if(kernel_parm->kernel_type == LINEAR) { /* reinit shrinking */ + learn_parm->epsilon_crit=2.0; + } + } + if(verbosity>=1) { + printf("done.\n"); + if(retrain) { + printf(" Now %ld inconsistent examples.\n",inconsistentnum); + } + } + } + } /* end of loop */ + + free(chosen); + free(last_suboptimal_at); + free(key); + free(selcrit); + free(selexam); + free(a_old); + free(aicache); + free(working2dnum); + free(active2dnum); + free(qp.opt_ce); + free(qp.opt_ce0); + free(qp.opt_g); + free(qp.opt_g0); + free(qp.opt_xinit); + free(qp.opt_low); + free(qp.opt_up); + free(weights); + + learn_parm->epsilon_crit=epsilon_crit_org; /* restore org */ + model->maxdiff=(*maxdiff); + + return(iteration); +} + +long optimize_to_convergence_sharedslack(DOC **docs, long int *label, + long int totdoc, + long int totwords, LEARN_PARM *learn_parm, + KERNEL_PARM *kernel_parm, + KERNEL_CACHE *kernel_cache, + SHRINK_STATE *shrink_state, MODEL *model, + double *a, double *lin, double *c, + TIMING *timing_profile, double *maxdiff) + /* docs: Training vectors (x-part) */ + /* label: Training labels/value (y-part, zero if test example for + transduction) */ + /* totdoc: Number of examples in docs/label */ + /* totwords: Number of features (i.e. highest feature index) */ + /* learn_parm: Learning paramenters */ + /* kernel_parm: Kernel paramenters */ + /* kernel_cache: Initialized/partly filled Cache, if using a kernel. + NULL if linear. */ + /* shrink_state: State of active variables */ + /* model: Returns learning result */ + /* a: alphas */ + /* lin: linear component of gradient */ + /* c: right hand side of inequalities (margin) */ + /* maxdiff: returns maximum violation of KT-conditions */ +{ + long *chosen,*key,i,j,jj,*last_suboptimal_at,noshrink,*unlabeled; + long *inconsistent,choosenum,already_chosen=0,iteration; + long misclassified,supvecnum=0,*active2dnum,inactivenum; + long *working2dnum,*selexam,*ignore; + long activenum,retrain,maxslackid,slackset,jointstep; + double criterion,eq_target; + double *a_old,*alphaslack; + long t0=0,t1=0,t2=0,t3=0,t4=0,t5=0,t6=0; /* timing */ + double epsilon_crit_org,maxsharedviol; + double bestmaxdiff; + long bestmaxdiffiter,terminate; + + double *selcrit; /* buffer for sorting */ + CFLOAT *aicache; /* buffer to keep one row of hessian */ + double *weights; /* buffer for weight vector in linear case */ + QP qp; /* buffer for one quadratic program */ + double *slack; /* vector of slack variables for optimization with + shared slacks */ + + epsilon_crit_org=learn_parm->epsilon_crit; /* save org */ + if(kernel_parm->kernel_type == LINEAR) { + learn_parm->epsilon_crit=2.0; + kernel_cache=NULL; /* caching makes no sense for linear kernel */ + } + learn_parm->epsilon_shrink=2; + (*maxdiff)=1; + + learn_parm->totwords=totwords; + + chosen = (long *)my_malloc(sizeof(long)*totdoc); + unlabeled = (long *)my_malloc(sizeof(long)*totdoc); + inconsistent = (long *)my_malloc(sizeof(long)*totdoc); + ignore = (long *)my_malloc(sizeof(long)*totdoc); + key = (long *)my_malloc(sizeof(long)*(totdoc+11)); + selcrit = (double *)my_malloc(sizeof(double)*totdoc); + selexam = (long *)my_malloc(sizeof(long)*totdoc); + a_old = (double *)my_malloc(sizeof(double)*totdoc); + aicache = (CFLOAT *)my_malloc(sizeof(CFLOAT)*totdoc); + working2dnum = (long *)my_malloc(sizeof(long)*(totdoc+11)); + active2dnum = (long *)my_malloc(sizeof(long)*(totdoc+11)); + qp.opt_ce = (double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize); + qp.opt_ce0 = (double *)my_malloc(sizeof(double)); + qp.opt_g = (double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize + *learn_parm->svm_maxqpsize); + qp.opt_g0 = (double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize); + qp.opt_xinit = (double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize); + qp.opt_low=(double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize); + qp.opt_up=(double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize); + weights=(double *)my_malloc(sizeof(double)*(totwords+1)); + maxslackid=0; + for(i=0;i<totdoc;i++) { /* determine size of slack array */ + if(maxslackid<docs[i]->slackid) + maxslackid=docs[i]->slackid; + } + slack=(double *)my_malloc(sizeof(double)*(maxslackid+1)); + alphaslack=(double *)my_malloc(sizeof(double)*(maxslackid+1)); + last_suboptimal_at = (long *)my_malloc(sizeof(long)*(maxslackid+1)); + for(i=0;i<=maxslackid;i++) { /* init shared slacks */ + slack[i]=0; + alphaslack[i]=0; + last_suboptimal_at[i]=1; + } + + choosenum=0; + retrain=1; + iteration=1; + bestmaxdiffiter=1; + bestmaxdiff=999999999; + terminate=0; + + if(kernel_cache) { + kernel_cache->time=iteration; /* for lru cache */ + kernel_cache_reset_lru(kernel_cache); + } + + for(i=0;i<totdoc;i++) { /* various inits */ + chosen[i]=0; + unlabeled[i]=0; + inconsistent[i]=0; + ignore[i]=0; + a_old[i]=a[i]; + } + activenum=compute_index(shrink_state->active,totdoc,active2dnum); + inactivenum=totdoc-activenum; + clear_index(working2dnum); + + /* call to init slack and alphaslack */ + compute_shared_slacks(docs,label,a,lin,c,active2dnum,learn_parm, + slack,alphaslack); + + /* repeat this loop until we have convergence */ + for(;retrain && (!terminate);iteration++) { + + if(kernel_cache) + kernel_cache->time=iteration; /* for lru cache */ + if(verbosity>=2) { + printf( + "Iteration %ld: ",iteration); fflush(stdout); + } + else if(verbosity==1) { + printf("."); fflush(stdout); + } + + if(verbosity>=2) t0=get_runtime(); + if(verbosity>=3) { + printf("\nSelecting working set... "); fflush(stdout); + } + + if(learn_parm->svm_newvarsinqp>learn_parm->svm_maxqpsize) + learn_parm->svm_newvarsinqp=learn_parm->svm_maxqpsize; + + /* select working set according to steepest gradient */ + jointstep=0; + eq_target=0; + if(iteration % 101) { + slackset=select_next_qp_slackset(docs,label,a,lin,slack,alphaslack,c, + learn_parm,active2dnum,&maxsharedviol); + if((iteration % 2) + || (!slackset) || (maxsharedviol<learn_parm->epsilon_crit)){ + /* do a step with examples from different slack sets */ + if(verbosity >= 2) { + printf("(i-step)"); fflush(stdout); + } + i=0; + for(jj=0;(j=working2dnum[jj])>=0;jj++) { /* clear old part of working set */ + if((chosen[j]>=(learn_parm->svm_maxqpsize/ + minl(learn_parm->svm_maxqpsize, + learn_parm->svm_newvarsinqp)))) { + chosen[j]=0; + choosenum--; + } + else { + chosen[j]++; + working2dnum[i++]=j; + } + } + working2dnum[i]=-1; + + already_chosen=0; + if((minl(learn_parm->svm_newvarsinqp, + learn_parm->svm_maxqpsize-choosenum)>=4) + && (kernel_parm->kernel_type != LINEAR)) { + /* select part of the working set from cache */ + already_chosen=select_next_qp_subproblem_grad( + label,unlabeled,a,lin,c,totdoc, + (long)(minl(learn_parm->svm_maxqpsize-choosenum, + learn_parm->svm_newvarsinqp) + /2), + learn_parm,inconsistent,active2dnum, + working2dnum,selcrit,selexam,kernel_cache, + (long)1,key,chosen); + choosenum+=already_chosen; + } + choosenum+=select_next_qp_subproblem_grad( + label,unlabeled,a,lin,c,totdoc, + minl(learn_parm->svm_maxqpsize-choosenum, + learn_parm->svm_newvarsinqp-already_chosen), + learn_parm,inconsistent,active2dnum, + working2dnum,selcrit,selexam,kernel_cache, + (long)0,key,chosen); + } + else { /* do a step with all examples from same slack set */ + if(verbosity >= 2) { + printf("(j-step on %ld)",slackset); fflush(stdout); + } + jointstep=1; + for(jj=0;(j=working2dnum[jj])>=0;jj++) { /* clear working set */ + chosen[j]=0; + } + working2dnum[0]=-1; + eq_target=alphaslack[slackset]; + for(j=0;j<totdoc;j++) { /* mask all but slackset */ + /* for(jj=0;(j=active2dnum[jj])>=0;jj++) { */ + if(docs[j]->slackid != slackset) + ignore[j]=1; + else { + ignore[j]=0; + learn_parm->svm_cost[j]=learn_parm->svm_c; + /* printf("Inslackset(%ld,%ld)",j,shrink_state->active[j]); */ + } + } + learn_parm->biased_hyperplane=1; + choosenum=select_next_qp_subproblem_grad( + label,unlabeled,a,lin,c,totdoc, + learn_parm->svm_maxqpsize, + learn_parm,ignore,active2dnum, + working2dnum,selcrit,selexam,kernel_cache, + (long)0,key,chosen); + learn_parm->biased_hyperplane=0; + } + } + else { /* once in a while, select a somewhat random working set + to get unlocked of infinite loops due to numerical + inaccuracies in the core qp-solver */ + choosenum+=select_next_qp_subproblem_rand( + label,unlabeled,a,lin,c,totdoc, + minl(learn_parm->svm_maxqpsize-choosenum, + learn_parm->svm_newvarsinqp), + learn_parm,inconsistent,active2dnum, + working2dnum,selcrit,selexam,kernel_cache,key, + chosen,iteration); + } + + if(verbosity>=2) { + printf(" %ld vectors chosen\n",choosenum); fflush(stdout); + } + + if(verbosity>=2) t1=get_runtime(); + + if(kernel_cache) + cache_multiple_kernel_rows(kernel_cache,docs,working2dnum, + choosenum,kernel_parm); + + if(verbosity>=2) t2=get_runtime(); + if(jointstep) learn_parm->biased_hyperplane=1; + optimize_svm(docs,label,unlabeled,ignore,eq_target,chosen,active2dnum, + model,totdoc,working2dnum,choosenum,a,lin,c,learn_parm, + aicache,kernel_parm,&qp,&epsilon_crit_org); + learn_parm->biased_hyperplane=0; + + for(jj=0;(i=working2dnum[jj])>=0;jj++) /* recompute sums of alphas */ + alphaslack[docs[i]->slackid]+=(a[i]-a_old[i]); + for(jj=0;(i=working2dnum[jj])>=0;jj++) { /* reduce alpha to fulfill + constraints */ + if(alphaslack[docs[i]->slackid] > learn_parm->svm_c) { + if(a[i] < (alphaslack[docs[i]->slackid]-learn_parm->svm_c)) { + alphaslack[docs[i]->slackid]-=a[i]; + a[i]=0; + } + else { + a[i]-=(alphaslack[docs[i]->slackid]-learn_parm->svm_c); + alphaslack[docs[i]->slackid]=learn_parm->svm_c; + } + } + } + for(jj=0;(i=active2dnum[jj])>=0;jj++) + learn_parm->svm_cost[i]=a[i]+(learn_parm->svm_c + -alphaslack[docs[i]->slackid]); + + if(verbosity>=2) t3=get_runtime(); + update_linear_component(docs,label,active2dnum,a,a_old,working2dnum,totdoc, + totwords,kernel_parm,kernel_cache,lin,aicache, + weights); + compute_shared_slacks(docs,label,a,lin,c,active2dnum,learn_parm, + slack,alphaslack); + + if(verbosity>=2) t4=get_runtime(); + supvecnum=calculate_svm_model(docs,label,unlabeled,lin,a,a_old,c, + learn_parm,working2dnum,active2dnum,model); + + if(verbosity>=2) t5=get_runtime(); + + /* The following computation of the objective function works only */ + /* relative to the active variables */ + if(verbosity>=3) { + criterion=compute_objective_function(a,lin,c,learn_parm->eps,label, + active2dnum); + printf("Objective function (over active variables): %.16f\n",criterion); + fflush(stdout); + } + + for(jj=0;(i=working2dnum[jj])>=0;jj++) { + a_old[i]=a[i]; + } + + retrain=check_optimality_sharedslack(docs,model,label,a,lin,c, + slack,alphaslack,totdoc,learn_parm, + maxdiff,epsilon_crit_org,&misclassified, + active2dnum,last_suboptimal_at, + iteration,kernel_parm); + + if(verbosity>=2) { + t6=get_runtime(); + timing_profile->time_select+=t1-t0; + timing_profile->time_kernel+=t2-t1; + timing_profile->time_opti+=t3-t2; + timing_profile->time_update+=t4-t3; + timing_profile->time_model+=t5-t4; + timing_profile->time_check+=t6-t5; + } + + /* checking whether optimizer got stuck */ + if((*maxdiff) < bestmaxdiff) { + bestmaxdiff=(*maxdiff); + bestmaxdiffiter=iteration; + } + if(iteration > (bestmaxdiffiter+learn_parm->maxiter)) { + /* long time no progress? */ + terminate=1; + retrain=0; + if(verbosity>=1) + printf("\nWARNING: Relaxing KT-Conditions due to slow progress! Terminating!\n"); + } + + noshrink=0; + + if((!retrain) && (inactivenum>0) + && ((!learn_parm->skip_final_opt_check) + || (kernel_parm->kernel_type == LINEAR))) { + if(((verbosity>=1) && (kernel_parm->kernel_type != LINEAR)) + || (verbosity>=2)) { + if(verbosity==1) { + printf("\n"); + } + printf(" Checking optimality of inactive variables..."); + fflush(stdout); + } + t1=get_runtime(); + reactivate_inactive_examples(label,unlabeled,a,shrink_state,lin,c,totdoc, + totwords,iteration,learn_parm,inconsistent, + docs,kernel_parm,kernel_cache,model,aicache, + weights,maxdiff); + /* Update to new active variables. */ + activenum=compute_index(shrink_state->active,totdoc,active2dnum); + inactivenum=totdoc-activenum; + /* check optimality, since check in reactivate does not work for + sharedslacks */ + retrain=check_optimality_sharedslack(docs,model,label,a,lin,c, + slack,alphaslack,totdoc,learn_parm, + maxdiff,epsilon_crit_org,&misclassified, + active2dnum,last_suboptimal_at, + iteration,kernel_parm); + + /* reset watchdog */ + bestmaxdiff=(*maxdiff); + bestmaxdiffiter=iteration; + /* termination criterion */ + noshrink=1; + retrain=0; + if((*maxdiff) > learn_parm->epsilon_crit) + retrain=1; + timing_profile->time_shrink+=get_runtime()-t1; + if(((verbosity>=1) && (kernel_parm->kernel_type != LINEAR)) + || (verbosity>=2)) { + printf("done.\n"); fflush(stdout); + printf(" Number of inactive variables = %ld\n",inactivenum); + } + } + + if((!retrain) && (learn_parm->epsilon_crit>(*maxdiff))) + learn_parm->epsilon_crit=(*maxdiff); + if((!retrain) && (learn_parm->epsilon_crit>epsilon_crit_org)) { + learn_parm->epsilon_crit/=2.0; + retrain=1; + noshrink=1; + } + if(learn_parm->epsilon_crit<epsilon_crit_org) + learn_parm->epsilon_crit=epsilon_crit_org; + + if(verbosity>=2) { + printf(" => (%ld SV (incl. %ld SV at u-bound), max violation=%.5f)\n", + supvecnum,model->at_upper_bound,(*maxdiff)); + fflush(stdout); + } + if(verbosity>=3) { + printf("\n"); + } + + if(((iteration % 10) == 0) && (!noshrink)) { + activenum=shrink_problem(docs,learn_parm,shrink_state, + kernel_parm,active2dnum, + last_suboptimal_at,iteration,totdoc, + maxl((long)(activenum/10), + maxl((long)(totdoc/500),100)), + a,inconsistent); + inactivenum=totdoc-activenum; + if((kernel_cache) + && (supvecnum>kernel_cache->max_elems) + && ((kernel_cache->activenum-activenum)>maxl((long)(activenum/10),500))) { + kernel_cache_shrink(kernel_cache,totdoc, + minl((kernel_cache->activenum-activenum), + (kernel_cache->activenum-supvecnum)), + shrink_state->active); + } + } + + } /* end of loop */ + + + free(alphaslack); + free(slack); + free(chosen); + free(unlabeled); + free(inconsistent); + free(ignore); + free(last_suboptimal_at); + free(key); + free(selcrit); + free(selexam); + free(a_old); + free(aicache); + free(working2dnum); + free(active2dnum); + free(qp.opt_ce); + free(qp.opt_ce0); + free(qp.opt_g); + free(qp.opt_g0); + free(qp.opt_xinit); + free(qp.opt_low); + free(qp.opt_up); + free(weights); + + learn_parm->epsilon_crit=epsilon_crit_org; /* restore org */ + model->maxdiff=(*maxdiff); + + return(iteration); +} + + +double compute_objective_function(double *a, double *lin, double *c, + double eps, long int *label, + long int *active2dnum) + /* Return value of objective function. */ + /* Works only relative to the active variables! */ +{ + long i,ii; + double criterion; + /* calculate value of objective function */ + criterion=0; + for(ii=0;active2dnum[ii]>=0;ii++) { + i=active2dnum[ii]; + criterion=criterion+(eps-(double)label[i]*c[i])*a[i]+0.5*a[i]*label[i]*lin[i]; + } + return(criterion); +} + +void clear_index(long int *index) + /* initializes and empties index */ +{ + index[0]=-1; +} + +void add_to_index(long int *index, long int elem) + /* initializes and empties index */ +{ + register long i; + for(i=0;index[i] != -1;i++); + index[i]=elem; + index[i+1]=-1; +} + +long compute_index(long int *binfeature, long int range, long int *index) + /* create an inverted index of binfeature */ +{ + register long i,ii; + + ii=0; + for(i=0;i<range;i++) { + if(binfeature[i]) { + index[ii]=i; + ii++; + } + } + for(i=0;i<4;i++) { + index[ii+i]=-1; + } + return(ii); +} + + +void optimize_svm(DOC **docs, long int *label, long int *unlabeled, + long int *exclude_from_eq_const, double eq_target, + long int *chosen, long int *active2dnum, MODEL *model, + long int totdoc, long int *working2dnum, long int varnum, + double *a, double *lin, double *c, LEARN_PARM *learn_parm, + CFLOAT *aicache, KERNEL_PARM *kernel_parm, QP *qp, + double *epsilon_crit_target) + /* Do optimization on the working set. */ +{ + long i; + double *a_v; + + compute_matrices_for_optimization(docs,label,unlabeled, + exclude_from_eq_const,eq_target,chosen, + active2dnum,working2dnum,model,a,lin,c, + varnum,totdoc,learn_parm,aicache, + kernel_parm,qp); + + if(verbosity>=3) { + printf("Running optimizer..."); fflush(stdout); + } + /* call the qp-subsolver */ + a_v=optimize_qp(qp,epsilon_crit_target, + learn_parm->svm_maxqpsize, + &(model->b), /* in case the optimizer gives us */ + /* the threshold for free. otherwise */ + /* b is calculated in calculate_model. */ + learn_parm); + if(verbosity>=3) { + printf("done\n"); + } + + for(i=0;i<varnum;i++) { + a[working2dnum[i]]=a_v[i]; + /* + if(a_v[i]<=(0+learn_parm->epsilon_a)) { + a[working2dnum[i]]=0; + } + else if(a_v[i]>=(learn_parm->svm_cost[working2dnum[i]]-learn_parm->epsilon_a)) { + a[working2dnum[i]]=learn_parm->svm_cost[working2dnum[i]]; + } + */ + } +} + +void compute_matrices_for_optimization(DOC **docs, long int *label, + long int *unlabeled, long *exclude_from_eq_const, double eq_target, + long int *chosen, long int *active2dnum, + long int *key, MODEL *model, double *a, double *lin, double *c, + long int varnum, long int totdoc, LEARN_PARM *learn_parm, + CFLOAT *aicache, KERNEL_PARM *kernel_parm, QP *qp) +{ + register long ki,kj,i,j; + register double kernel_temp; + + if(verbosity>=3) { + fprintf(stdout,"Computing qp-matrices (type %ld kernel [degree %ld, rbf_gamma %f, coef_lin %f, coef_const %f])...",kernel_parm->kernel_type,kernel_parm->poly_degree,kernel_parm->rbf_gamma,kernel_parm->coef_lin,kernel_parm->coef_const); + fflush(stdout); + } + + qp->opt_n=varnum; + qp->opt_ce0[0]=-eq_target; /* compute the constant for equality constraint */ + for(j=1;j<model->sv_num;j++) { /* start at 1 */ + if((!chosen[(model->supvec[j])->docnum]) + && (!exclude_from_eq_const[(model->supvec[j])->docnum])) { + qp->opt_ce0[0]+=model->alpha[j]; + } + } + if(learn_parm->biased_hyperplane) + qp->opt_m=1; + else + qp->opt_m=0; /* eq-constraint will be ignored */ + + /* init linear part of objective function */ + for(i=0;i<varnum;i++) { + qp->opt_g0[i]=lin[key[i]]; + } + + for(i=0;i<varnum;i++) { + ki=key[i]; + + /* Compute the matrix for equality constraints */ + qp->opt_ce[i]=label[ki]; + qp->opt_low[i]=0; + qp->opt_up[i]=learn_parm->svm_cost[ki]; + + kernel_temp=(double)kernel(kernel_parm,docs[ki],docs[ki]); + /* compute linear part of objective function */ + qp->opt_g0[i]-=(kernel_temp*a[ki]*(double)label[ki]); + /* compute quadratic part of objective function */ + qp->opt_g[varnum*i+i]=kernel_temp; + for(j=i+1;j<varnum;j++) { + kj=key[j]; + kernel_temp=(double)kernel(kernel_parm,docs[ki],docs[kj]); + /* compute linear part of objective function */ + qp->opt_g0[i]-=(kernel_temp*a[kj]*(double)label[kj]); + qp->opt_g0[j]-=(kernel_temp*a[ki]*(double)label[ki]); + /* compute quadratic part of objective function */ + qp->opt_g[varnum*i+j]=(double)label[ki]*(double)label[kj]*kernel_temp; + qp->opt_g[varnum*j+i]=(double)label[ki]*(double)label[kj]*kernel_temp; + } + + if(verbosity>=3) { + if(i % 20 == 0) { + fprintf(stdout,"%ld..",i); fflush(stdout); + } + } + } + + for(i=0;i<varnum;i++) { + /* assure starting at feasible point */ + qp->opt_xinit[i]=a[key[i]]; + /* set linear part of objective function */ + qp->opt_g0[i]=(learn_parm->eps-(double)label[key[i]]*c[key[i]])+qp->opt_g0[i]*(double)label[key[i]]; + } + + if(verbosity>=3) { + fprintf(stdout,"done\n"); + } +} + +long calculate_svm_model(DOC **docs, long int *label, long int *unlabeled, + double *lin, double *a, double *a_old, double *c, + LEARN_PARM *learn_parm, long int *working2dnum, + long int *active2dnum, MODEL *model) + /* Compute decision function based on current values */ + /* of alpha. */ +{ + long i,ii,pos,b_calculated=0,first_low,first_high; + double ex_c,b_temp,b_low,b_high; + + if(verbosity>=3) { + printf("Calculating model..."); fflush(stdout); + } + + if(!learn_parm->biased_hyperplane) { + model->b=0; + b_calculated=1; + } + + for(ii=0;(i=working2dnum[ii])>=0;ii++) { + if((a_old[i]>0) && (a[i]==0)) { /* remove from model */ + pos=model->index[i]; + model->index[i]=-1; + (model->sv_num)--; + model->supvec[pos]=model->supvec[model->sv_num]; + model->alpha[pos]=model->alpha[model->sv_num]; + model->index[(model->supvec[pos])->docnum]=pos; + } + else if((a_old[i]==0) && (a[i]>0)) { /* add to model */ + model->supvec[model->sv_num]=docs[i]; + model->alpha[model->sv_num]=a[i]*(double)label[i]; + model->index[i]=model->sv_num; + (model->sv_num)++; + } + else if(a_old[i]==a[i]) { /* nothing to do */ + } + else { /* just update alpha */ + model->alpha[model->index[i]]=a[i]*(double)label[i]; + } + + ex_c=learn_parm->svm_cost[i]-learn_parm->epsilon_a; + if((a_old[i]>=ex_c) && (a[i]<ex_c)) { + (model->at_upper_bound)--; + } + else if((a_old[i]<ex_c) && (a[i]>=ex_c)) { + (model->at_upper_bound)++; + } + + if((!b_calculated) + && (a[i]>learn_parm->epsilon_a) && (a[i]<ex_c)) { /* calculate b */ + model->b=((double)label[i]*learn_parm->eps-c[i]+lin[i]); + /* model->b=(-(double)label[i]+lin[i]); */ + b_calculated=1; + } + } + + /* No alpha in the working set not at bounds, so b was not + calculated in the usual way. The following handles this special + case. */ + if(learn_parm->biased_hyperplane + && (!b_calculated) + && (model->sv_num-1 == model->at_upper_bound)) { + first_low=1; + first_high=1; + b_low=0; + b_high=0; + for(ii=0;(i=active2dnum[ii])>=0;ii++) { + ex_c=learn_parm->svm_cost[i]-learn_parm->epsilon_a; + if(a[i]<ex_c) { + if(label[i]>0) { + b_temp=-(learn_parm->eps-c[i]+lin[i]); + if((b_temp>b_low) || (first_low)) { + b_low=b_temp; + first_low=0; + } + } + else { + b_temp=-(-learn_parm->eps-c[i]+lin[i]); + if((b_temp<b_high) || (first_high)) { + b_high=b_temp; + first_high=0; + } + } + } + else { + if(label[i]<0) { + b_temp=-(-learn_parm->eps-c[i]+lin[i]); + if((b_temp>b_low) || (first_low)) { + b_low=b_temp; + first_low=0; + } + } + else { + b_temp=-(learn_parm->eps-c[i]+lin[i]); + if((b_temp<b_high) || (first_high)) { + b_high=b_temp; + first_high=0; + } + } + } + } + if(first_high) { + model->b=-b_low; + } + else if(first_low) { + model->b=-b_high; + } + else { + model->b=-(b_high+b_low)/2.0; /* select b as the middle of range */ + /* printf("\nb_low=%f, b_high=%f,b=%f\n",b_low,b_high,model->b); */ + } + } + + if(verbosity>=3) { + printf("done\n"); fflush(stdout); + } + + return(model->sv_num-1); /* have to substract one, since element 0 is empty*/ +} + +long check_optimality(MODEL *model, long int *label, long int *unlabeled, + double *a, double *lin, double *c, long int totdoc, + LEARN_PARM *learn_parm, double *maxdiff, + double epsilon_crit_org, long int *misclassified, + long int *inconsistent, long int *active2dnum, + long int *last_suboptimal_at, + long int iteration, KERNEL_PARM *kernel_parm) + /* Check KT-conditions */ +{ + long i,ii,retrain; + double dist,ex_c,target; + + if(kernel_parm->kernel_type == LINEAR) { /* be optimistic */ + learn_parm->epsilon_shrink=-learn_parm->epsilon_crit+epsilon_crit_org; + } + else { /* be conservative */ + learn_parm->epsilon_shrink=learn_parm->epsilon_shrink*0.7+(*maxdiff)*0.3; + } + retrain=0; + (*maxdiff)=0; + (*misclassified)=0; + for(ii=0;(i=active2dnum[ii])>=0;ii++) { + if((!inconsistent[i]) && label[i]) { + dist=(lin[i]-model->b)*(double)label[i];/* 'distance' from + hyperplane*/ + target=-(learn_parm->eps-(double)label[i]*c[i]); + ex_c=learn_parm->svm_cost[i]-learn_parm->epsilon_a; + if(dist <= 0) { + (*misclassified)++; /* does not work due to deactivation of var */ + } + if((a[i]>learn_parm->epsilon_a) && (dist > target)) { + if((dist-target)>(*maxdiff)) /* largest violation */ + (*maxdiff)=dist-target; + } + else if((a[i]<ex_c) && (dist < target)) { + if((target-dist)>(*maxdiff)) /* largest violation */ + (*maxdiff)=target-dist; + } + /* Count how long a variable was at lower/upper bound (and optimal).*/ + /* Variables, which were at the bound and optimal for a long */ + /* time are unlikely to become support vectors. In case our */ + /* cache is filled up, those variables are excluded to save */ + /* kernel evaluations. (See chapter 'Shrinking').*/ + if((a[i]>(learn_parm->epsilon_a)) + && (a[i]<ex_c)) { + last_suboptimal_at[i]=iteration; /* not at bound */ + } + else if((a[i]<=(learn_parm->epsilon_a)) + && (dist < (target+learn_parm->epsilon_shrink))) { + last_suboptimal_at[i]=iteration; /* not likely optimal */ + } + else if((a[i]>=ex_c) + && (dist > (target-learn_parm->epsilon_shrink))) { + last_suboptimal_at[i]=iteration; /* not likely optimal */ + } + } + } + /* termination criterion */ + if((!retrain) && ((*maxdiff) > learn_parm->epsilon_crit)) { + retrain=1; + } + return(retrain); +} + +long check_optimality_sharedslack(DOC **docs, MODEL *model, long int *label, + double *a, double *lin, double *c, double *slack, + double *alphaslack, + long int totdoc, + LEARN_PARM *learn_parm, double *maxdiff, + double epsilon_crit_org, long int *misclassified, + long int *active2dnum, + long int *last_suboptimal_at, + long int iteration, KERNEL_PARM *kernel_parm) + /* Check KT-conditions */ +{ + long i,ii,retrain; + double dist,ex_c=0,target; + + if(kernel_parm->kernel_type == LINEAR) { /* be optimistic */ + learn_parm->epsilon_shrink=-learn_parm->epsilon_crit+epsilon_crit_org; + } + else { /* be conservative */ + learn_parm->epsilon_shrink=learn_parm->epsilon_shrink*0.7+(*maxdiff)*0.3; + } + + retrain=0; + (*maxdiff)=0; + (*misclassified)=0; + for(ii=0;(i=active2dnum[ii])>=0;ii++) { + /* 'distance' from hyperplane*/ + dist=(lin[i]-model->b)*(double)label[i]+slack[docs[i]->slackid]; + target=-(learn_parm->eps-(double)label[i]*c[i]); + ex_c=learn_parm->svm_c-learn_parm->epsilon_a; + if((a[i]>learn_parm->epsilon_a) && (dist > target)) { + if((dist-target)>(*maxdiff)) { /* largest violation */ + (*maxdiff)=dist-target; + if(verbosity>=5) printf("sid %ld: dist=%.2f, target=%.2f, slack=%.2f, a=%f, alphaslack=%f\n",docs[i]->slackid,dist,target,slack[docs[i]->slackid],a[i],alphaslack[docs[i]->slackid]); + if(verbosity>=5) printf(" (single %f)\n",(*maxdiff)); + } + } + if((alphaslack[docs[i]->slackid]<ex_c) && (slack[docs[i]->slackid]>0)) { + if((slack[docs[i]->slackid])>(*maxdiff)) { /* largest violation */ + (*maxdiff)=slack[docs[i]->slackid]; + if(verbosity>=5) printf("sid %ld: dist=%.2f, target=%.2f, slack=%.2f, a=%f, alphaslack=%f\n",docs[i]->slackid,dist,target,slack[docs[i]->slackid],a[i],alphaslack[docs[i]->slackid]); + if(verbosity>=5) printf(" (joint %f)\n",(*maxdiff)); + } + } + /* Count how long a variable was at lower/upper bound (and optimal).*/ + /* Variables, which were at the bound and optimal for a long */ + /* time are unlikely to become support vectors. In case our */ + /* cache is filled up, those variables are excluded to save */ + /* kernel evaluations. (See chapter 'Shrinking').*/ + if((a[i]>(learn_parm->epsilon_a)) + && (a[i]<ex_c)) { + last_suboptimal_at[docs[i]->slackid]=iteration; /* not at bound */ + } + else if((a[i]<=(learn_parm->epsilon_a)) + && (dist < (target+learn_parm->epsilon_shrink))) { + last_suboptimal_at[docs[i]->slackid]=iteration; /* not likely optimal */ + } + else if((a[i]>=ex_c) + && (slack[docs[i]->slackid] < learn_parm->epsilon_shrink)) { + last_suboptimal_at[docs[i]->slackid]=iteration; /* not likely optimal */ + } + } + /* termination criterion */ + if((!retrain) && ((*maxdiff) > learn_parm->epsilon_crit)) { + retrain=1; + } + return(retrain); +} + +void compute_shared_slacks(DOC **docs, long int *label, + double *a, double *lin, + double *c, long int *active2dnum, + LEARN_PARM *learn_parm, + double *slack, double *alphaslack) + /* compute the value of shared slacks and the joint alphas */ +{ + long jj,i; + double dist,target; + + for(jj=0;(i=active2dnum[jj])>=0;jj++) { /* clear slack variables */ + slack[docs[i]->slackid]=0.0; + alphaslack[docs[i]->slackid]=0.0; + } + for(jj=0;(i=active2dnum[jj])>=0;jj++) { /* recompute slack variables */ + dist=(lin[i])*(double)label[i]; + target=-(learn_parm->eps-(double)label[i]*c[i]); + if((target-dist) > slack[docs[i]->slackid]) + slack[docs[i]->slackid]=target-dist; + alphaslack[docs[i]->slackid]+=a[i]; + } +} + + +long identify_inconsistent(double *a, long int *label, + long int *unlabeled, long int totdoc, + LEARN_PARM *learn_parm, + long int *inconsistentnum, long int *inconsistent) +{ + long i,retrain; + + /* Throw out examples with multipliers at upper bound. This */ + /* corresponds to the -i 1 option. */ + /* ATTENTION: this is just a heuristic for finding a close */ + /* to minimum number of examples to exclude to */ + /* make the problem separable with desired margin */ + retrain=0; + for(i=0;i<totdoc;i++) { + if((!inconsistent[i]) && (!unlabeled[i]) + && (a[i]>=(learn_parm->svm_cost[i]-learn_parm->epsilon_a))) { + (*inconsistentnum)++; + inconsistent[i]=1; /* never choose again */ + retrain=2; /* start over */ + if(verbosity>=3) { + printf("inconsistent(%ld)..",i); fflush(stdout); + } + } + } + return(retrain); +} + +long identify_misclassified(double *lin, long int *label, + long int *unlabeled, long int totdoc, + MODEL *model, long int *inconsistentnum, + long int *inconsistent) +{ + long i,retrain; + double dist; + + /* Throw out misclassified examples. This */ + /* corresponds to the -i 2 option. */ + /* ATTENTION: this is just a heuristic for finding a close */ + /* to minimum number of examples to exclude to */ + /* make the problem separable with desired margin */ + retrain=0; + for(i=0;i<totdoc;i++) { + dist=(lin[i]-model->b)*(double)label[i]; /* 'distance' from hyperplane*/ + if((!inconsistent[i]) && (!unlabeled[i]) && (dist <= 0)) { + (*inconsistentnum)++; + inconsistent[i]=1; /* never choose again */ + retrain=2; /* start over */ + if(verbosity>=3) { + printf("inconsistent(%ld)..",i); fflush(stdout); + } + } + } + return(retrain); +} + +long identify_one_misclassified(double *lin, long int *label, + long int *unlabeled, + long int totdoc, MODEL *model, + long int *inconsistentnum, + long int *inconsistent) +{ + long i,retrain,maxex=-1; + double dist,maxdist=0; + + /* Throw out the 'most misclassified' example. This */ + /* corresponds to the -i 3 option. */ + /* ATTENTION: this is just a heuristic for finding a close */ + /* to minimum number of examples to exclude to */ + /* make the problem separable with desired margin */ + retrain=0; + for(i=0;i<totdoc;i++) { + if((!inconsistent[i]) && (!unlabeled[i])) { + dist=(lin[i]-model->b)*(double)label[i];/* 'distance' from hyperplane*/ + if(dist<maxdist) { + maxdist=dist; + maxex=i; + } + } + } + if(maxex>=0) { + (*inconsistentnum)++; + inconsistent[maxex]=1; /* never choose again */ + retrain=2; /* start over */ + if(verbosity>=3) { + printf("inconsistent(%ld)..",i); fflush(stdout); + } + } + return(retrain); +} + +void update_linear_component(DOC **docs, long int *label, + long int *active2dnum, double *a, + double *a_old, long int *working2dnum, + long int totdoc, long int totwords, + KERNEL_PARM *kernel_parm, + KERNEL_CACHE *kernel_cache, + double *lin, CFLOAT *aicache, double *weights) + /* keep track of the linear component */ + /* lin of the gradient etc. by updating */ + /* based on the change of the variables */ + /* in the current working set */ +{ + register long i,ii,j,jj; + register double tec; + SVECTOR *f; + + if(kernel_parm->kernel_type==0) { /* special linear case */ + clear_vector_n(weights,totwords); + for(ii=0;(i=working2dnum[ii])>=0;ii++) { + if(a[i] != a_old[i]) { + for(f=docs[i]->fvec;f;f=f->next) + add_vector_ns(weights,f, + f->factor*((a[i]-a_old[i])*(double)label[i])); + } + } + for(jj=0;(j=active2dnum[jj])>=0;jj++) { + for(f=docs[j]->fvec;f;f=f->next) + lin[j]+=f->factor*sprod_ns(weights,f); + } + } + else { /* general case */ + for(jj=0;(i=working2dnum[jj])>=0;jj++) { + if(a[i] != a_old[i]) { + get_kernel_row(kernel_cache,docs,i,totdoc,active2dnum,aicache, + kernel_parm); + for(ii=0;(j=active2dnum[ii])>=0;ii++) { + tec=aicache[j]; + lin[j]+=(((a[i]*tec)-(a_old[i]*tec))*(double)label[i]); + } + } + } + } +} + + +long incorporate_unlabeled_examples(MODEL *model, long int *label, + long int *inconsistent, + long int *unlabeled, + double *a, double *lin, + long int totdoc, double *selcrit, + long int *select, long int *key, + long int transductcycle, + KERNEL_PARM *kernel_parm, + LEARN_PARM *learn_parm) +{ + long i,j,k,j1,j2,j3,j4,unsupaddnum1=0,unsupaddnum2=0; + long pos,neg,upos,uneg,orgpos,orgneg,nolabel,newpos,newneg,allunlab; + double dist,model_length,posratio,negratio; + long check_every=2; + double loss; + static double switchsens=0.0,switchsensorg=0.0; + double umin,umax,sumalpha; + long imin=0,imax=0; + static long switchnum=0; + + switchsens/=1.2; + + /* assumes that lin[] is up to date -> no inactive vars */ + + orgpos=0; + orgneg=0; + newpos=0; + newneg=0; + nolabel=0; + allunlab=0; + for(i=0;i<totdoc;i++) { + if(!unlabeled[i]) { + if(label[i] > 0) { + orgpos++; + } + else { + orgneg++; + } + } + else { + allunlab++; + if(unlabeled[i]) { + if(label[i] > 0) { + newpos++; + } + else if(label[i] < 0) { + newneg++; + } + } + } + if(label[i]==0) { + nolabel++; + } + } + + if(learn_parm->transduction_posratio >= 0) { + posratio=learn_parm->transduction_posratio; + } + else { + posratio=(double)orgpos/(double)(orgpos+orgneg); /* use ratio of pos/neg */ + } /* in training data */ + negratio=1.0-posratio; + + learn_parm->svm_costratio=1.0; /* global */ + if(posratio>0) { + learn_parm->svm_costratio_unlab=negratio/posratio; + } + else { + learn_parm->svm_costratio_unlab=1.0; + } + + pos=0; + neg=0; + upos=0; + uneg=0; + for(i=0;i<totdoc;i++) { + dist=(lin[i]-model->b); /* 'distance' from hyperplane*/ + if(dist>0) { + pos++; + } + else { + neg++; + } + if(unlabeled[i]) { + if(dist>0) { + upos++; + } + else { + uneg++; + } + } + if((!unlabeled[i]) && (a[i]>(learn_parm->svm_cost[i]-learn_parm->epsilon_a))) { + /* printf("Ubounded %ld (class %ld, unlabeled %ld)\n",i,label[i],unlabeled[i]); */ + } + } + if(verbosity>=2) { + printf("POS=%ld, ORGPOS=%ld, ORGNEG=%ld\n",pos,orgpos,orgneg); + printf("POS=%ld, NEWPOS=%ld, NEWNEG=%ld\n",pos,newpos,newneg); + printf("pos ratio = %f (%f).\n",(double)(upos)/(double)(allunlab),posratio); + fflush(stdout); + } + + if(transductcycle == 0) { + j1=0; + j2=0; + j4=0; + for(i=0;i<totdoc;i++) { + dist=(lin[i]-model->b); /* 'distance' from hyperplane*/ + if((label[i]==0) && (unlabeled[i])) { + selcrit[j4]=dist; + key[j4]=i; + j4++; + } + } + unsupaddnum1=0; + unsupaddnum2=0; + select_top_n(selcrit,j4,select,(long)(allunlab*posratio+0.5)); + for(k=0;(k<(long)(allunlab*posratio+0.5));k++) { + i=key[select[k]]; + label[i]=1; + unsupaddnum1++; + j1++; + } + for(i=0;i<totdoc;i++) { + if((label[i]==0) && (unlabeled[i])) { + label[i]=-1; + j2++; + unsupaddnum2++; + } + } + for(i=0;i<totdoc;i++) { /* set upper bounds on vars */ + if(unlabeled[i]) { + if(label[i] == 1) { + learn_parm->svm_cost[i]=learn_parm->svm_c* + learn_parm->svm_costratio_unlab*learn_parm->svm_unlabbound; + } + else if(label[i] == -1) { + learn_parm->svm_cost[i]=learn_parm->svm_c* + learn_parm->svm_unlabbound; + } + } + } + if(verbosity>=1) { + /* printf("costratio %f, costratio_unlab %f, unlabbound %f\n", + learn_parm->svm_costratio,learn_parm->svm_costratio_unlab, + learn_parm->svm_unlabbound); */ + printf("Classifying unlabeled data as %ld POS / %ld NEG.\n", + unsupaddnum1,unsupaddnum2); + fflush(stdout); + } + if(verbosity >= 1) + printf("Retraining."); + if(verbosity >= 2) printf("\n"); + return((long)3); + } + if((transductcycle % check_every) == 0) { + if(verbosity >= 1) + printf("Retraining."); + if(verbosity >= 2) printf("\n"); + j1=0; + j2=0; + unsupaddnum1=0; + unsupaddnum2=0; + for(i=0;i<totdoc;i++) { + if((unlabeled[i] == 2)) { + unlabeled[i]=1; + label[i]=1; + j1++; + unsupaddnum1++; + } + else if((unlabeled[i] == 3)) { + unlabeled[i]=1; + label[i]=-1; + j2++; + unsupaddnum2++; + } + } + for(i=0;i<totdoc;i++) { /* set upper bounds on vars */ + if(unlabeled[i]) { + if(label[i] == 1) { + learn_parm->svm_cost[i]=learn_parm->svm_c* + learn_parm->svm_costratio_unlab*learn_parm->svm_unlabbound; + } + else if(label[i] == -1) { + learn_parm->svm_cost[i]=learn_parm->svm_c* + learn_parm->svm_unlabbound; + } + } + } + + if(verbosity>=2) { + /* printf("costratio %f, costratio_unlab %f, unlabbound %f\n", + learn_parm->svm_costratio,learn_parm->svm_costratio_unlab, + learn_parm->svm_unlabbound); */ + printf("%ld positive -> Added %ld POS / %ld NEG unlabeled examples.\n", + upos,unsupaddnum1,unsupaddnum2); + fflush(stdout); + } + + if(learn_parm->svm_unlabbound == 1) { + learn_parm->epsilon_crit=0.001; /* do the last run right */ + } + else { + learn_parm->epsilon_crit=0.01; /* otherwise, no need to be so picky */ + } + + return((long)3); + } + else if(((transductcycle % check_every) < check_every)) { + model_length=0; + sumalpha=0; + loss=0; + for(i=0;i<totdoc;i++) { + model_length+=a[i]*label[i]*lin[i]; + sumalpha+=a[i]; + dist=(lin[i]-model->b); /* 'distance' from hyperplane*/ + if((label[i]*dist)<(1.0-learn_parm->epsilon_crit)) { + loss+=(1.0-(label[i]*dist))*learn_parm->svm_cost[i]; + } + } + model_length=sqrt(model_length); + if(verbosity>=2) { + printf("Model-length = %f (%f), loss = %f, objective = %f\n", + model_length,sumalpha,loss,loss+0.5*model_length*model_length); + fflush(stdout); + } + j1=0; + j2=0; + j3=0; + j4=0; + unsupaddnum1=0; + unsupaddnum2=0; + umin=99999; + umax=-99999; + j4=1; + while(j4) { + umin=99999; + umax=-99999; + for(i=0;(i<totdoc);i++) { + dist=(lin[i]-model->b); + if((label[i]>0) && (unlabeled[i]) && (!inconsistent[i]) + && (dist<umin)) { + umin=dist; + imin=i; + } + if((label[i]<0) && (unlabeled[i]) && (!inconsistent[i]) + && (dist>umax)) { + umax=dist; + imax=i; + } + } + if((umin < (umax+switchsens-1E-4))) { + j1++; + j2++; + unsupaddnum1++; + unlabeled[imin]=3; + inconsistent[imin]=1; + unsupaddnum2++; + unlabeled[imax]=2; + inconsistent[imax]=1; + } + else + j4=0; + j4=0; + } + for(j=0;(j<totdoc);j++) { + if(unlabeled[j] && (!inconsistent[j])) { + if(label[j]>0) { + unlabeled[j]=2; + } + else if(label[j]<0) { + unlabeled[j]=3; + } + /* inconsistent[j]=1; */ + j3++; + } + } + switchnum+=unsupaddnum1+unsupaddnum2; + + /* stop and print out current margin + printf("switchnum %ld %ld\n",switchnum,kernel_parm->poly_degree); + if(switchnum == 2*kernel_parm->poly_degree) { + learn_parm->svm_unlabbound=1; + } + */ + + if((!unsupaddnum1) && (!unsupaddnum2)) { + if((learn_parm->svm_unlabbound>=1) && ((newpos+newneg) == allunlab)) { + for(j=0;(j<totdoc);j++) { + inconsistent[j]=0; + if(unlabeled[j]) unlabeled[j]=1; + } + write_prediction(learn_parm->predfile,model,lin,a,unlabeled,label, + totdoc,learn_parm); + if(verbosity>=1) + printf("Number of switches: %ld\n",switchnum); + return((long)0); + } + switchsens=switchsensorg; + learn_parm->svm_unlabbound*=1.5; + if(learn_parm->svm_unlabbound>1) { + learn_parm->svm_unlabbound=1; + } + model->at_upper_bound=0; /* since upper bound increased */ + if(verbosity>=1) + printf("Increasing influence of unlabeled examples to %f%% .", + learn_parm->svm_unlabbound*100.0); + } + else if(verbosity>=1) { + printf("%ld positive -> Switching labels of %ld POS / %ld NEG unlabeled examples.", + upos,unsupaddnum1,unsupaddnum2); + fflush(stdout); + } + + if(verbosity >= 2) printf("\n"); + + learn_parm->epsilon_crit=0.5; /* don't need to be so picky */ + + for(i=0;i<totdoc;i++) { /* set upper bounds on vars */ + if(unlabeled[i]) { + if(label[i] == 1) { + learn_parm->svm_cost[i]=learn_parm->svm_c* + learn_parm->svm_costratio_unlab*learn_parm->svm_unlabbound; + } + else if(label[i] == -1) { + learn_parm->svm_cost[i]=learn_parm->svm_c* + learn_parm->svm_unlabbound; + } + } + } + + return((long)2); + } + + return((long)0); +} + +/*************************** Working set selection ***************************/ + +long select_next_qp_subproblem_grad(long int *label, + long int *unlabeled, + double *a, double *lin, + double *c, long int totdoc, + long int qp_size, + LEARN_PARM *learn_parm, + long int *inconsistent, + long int *active2dnum, + long int *working2dnum, + double *selcrit, + long int *select, + KERNEL_CACHE *kernel_cache, + long int cache_only, + long int *key, long int *chosen) + /* Use the feasible direction approach to select the next + qp-subproblem (see chapter 'Selecting a good working set'). If + 'cache_only' is true, then the variables are selected only among + those for which the kernel evaluations are cached. */ +{ + long choosenum,i,j,k,activedoc,inum,valid; + double s; + + for(inum=0;working2dnum[inum]>=0;inum++); /* find end of index */ + choosenum=0; + activedoc=0; + for(i=0;(j=active2dnum[i])>=0;i++) { + s=-label[j]; + if(kernel_cache && cache_only) + valid=(kernel_cache->index[j]>=0); + else + valid=1; + if(valid + && (!((a[j]<=(0+learn_parm->epsilon_a)) && (s<0))) + && (!((a[j]>=(learn_parm->svm_cost[j]-learn_parm->epsilon_a)) + && (s>0))) + && (!chosen[j]) + && (label[j]) + && (!inconsistent[j])) + { + selcrit[activedoc]=(double)label[j]*(learn_parm->eps-(double)label[j]*c[j]+(double)label[j]*lin[j]); + /* selcrit[activedoc]=(double)label[j]*(-1.0+(double)label[j]*lin[j]); */ + key[activedoc]=j; + activedoc++; + } + } + select_top_n(selcrit,activedoc,select,(long)(qp_size/2)); + for(k=0;(choosenum<(qp_size/2)) && (k<(qp_size/2)) && (k<activedoc);k++) { + /* if(learn_parm->biased_hyperplane || (selcrit[select[k]] > 0)) { */ + i=key[select[k]]; + chosen[i]=1; + working2dnum[inum+choosenum]=i; + choosenum+=1; + if(kernel_cache) + kernel_cache_touch(kernel_cache,i); /* make sure it does not get + kicked out of cache */ + /* } */ + } + + activedoc=0; + for(i=0;(j=active2dnum[i])>=0;i++) { + s=label[j]; + if(kernel_cache && cache_only) + valid=(kernel_cache->index[j]>=0); + else + valid=1; + if(valid + && (!((a[j]<=(0+learn_parm->epsilon_a)) && (s<0))) + && (!((a[j]>=(learn_parm->svm_cost[j]-learn_parm->epsilon_a)) + && (s>0))) + && (!chosen[j]) + && (label[j]) + && (!inconsistent[j])) + { + selcrit[activedoc]=-(double)label[j]*(learn_parm->eps-(double)label[j]*c[j]+(double)label[j]*lin[j]); + /* selcrit[activedoc]=-(double)(label[j]*(-1.0+(double)label[j]*lin[j])); */ + key[activedoc]=j; + activedoc++; + } + } + select_top_n(selcrit,activedoc,select,(long)(qp_size/2)); + for(k=0;(choosenum<qp_size) && (k<(qp_size/2)) && (k<activedoc);k++) { + /* if(learn_parm->biased_hyperplane || (selcrit[select[k]] > 0)) { */ + i=key[select[k]]; + chosen[i]=1; + working2dnum[inum+choosenum]=i; + choosenum+=1; + if(kernel_cache) + kernel_cache_touch(kernel_cache,i); /* make sure it does not get + kicked out of cache */ + /* } */ + } + working2dnum[inum+choosenum]=-1; /* complete index */ + return(choosenum); +} + +long select_next_qp_subproblem_rand(long int *label, + long int *unlabeled, + double *a, double *lin, + double *c, long int totdoc, + long int qp_size, + LEARN_PARM *learn_parm, + long int *inconsistent, + long int *active2dnum, + long int *working2dnum, + double *selcrit, + long int *select, + KERNEL_CACHE *kernel_cache, + long int *key, + long int *chosen, + long int iteration) +/* Use the feasible direction approach to select the next + qp-subproblem (see section 'Selecting a good working set'). Chooses + a feasible direction at (pseudo) random to help jump over numerical + problem. */ +{ + long choosenum,i,j,k,activedoc,inum; + double s; + + for(inum=0;working2dnum[inum]>=0;inum++); /* find end of index */ + choosenum=0; + activedoc=0; + for(i=0;(j=active2dnum[i])>=0;i++) { + s=-label[j]; + if((!((a[j]<=(0+learn_parm->epsilon_a)) && (s<0))) + && (!((a[j]>=(learn_parm->svm_cost[j]-learn_parm->epsilon_a)) + && (s>0))) + && (!inconsistent[j]) + && (label[j]) + && (!chosen[j])) { + selcrit[activedoc]=(j+iteration) % totdoc; + key[activedoc]=j; + activedoc++; + } + } + select_top_n(selcrit,activedoc,select,(long)(qp_size/2)); + for(k=0;(choosenum<(qp_size/2)) && (k<(qp_size/2)) && (k<activedoc);k++) { + i=key[select[k]]; + chosen[i]=1; + working2dnum[inum+choosenum]=i; + choosenum+=1; + kernel_cache_touch(kernel_cache,i); /* make sure it does not get kicked */ + /* out of cache */ + } + + activedoc=0; + for(i=0;(j=active2dnum[i])>=0;i++) { + s=label[j]; + if((!((a[j]<=(0+learn_parm->epsilon_a)) && (s<0))) + && (!((a[j]>=(learn_parm->svm_cost[j]-learn_parm->epsilon_a)) + && (s>0))) + && (!inconsistent[j]) + && (label[j]) + && (!chosen[j])) { + selcrit[activedoc]=(j+iteration) % totdoc; + key[activedoc]=j; + activedoc++; + } + } + select_top_n(selcrit,activedoc,select,(long)(qp_size/2)); + for(k=0;(choosenum<qp_size) && (k<(qp_size/2)) && (k<activedoc);k++) { + i=key[select[k]]; + chosen[i]=1; + working2dnum[inum+choosenum]=i; + choosenum+=1; + kernel_cache_touch(kernel_cache,i); /* make sure it does not get kicked */ + /* out of cache */ + } + working2dnum[inum+choosenum]=-1; /* complete index */ + return(choosenum); +} + +long select_next_qp_slackset(DOC **docs, long int *label, + double *a, double *lin, + double *slack, double *alphaslack, + double *c, + LEARN_PARM *learn_parm, + long int *active2dnum, double *maxviol) + /* returns the slackset with the largest internal violation */ +{ + long i,ii,maxdiffid; + double dist,target,maxdiff,ex_c; + + maxdiff=0; + maxdiffid=0; + for(ii=0;(i=active2dnum[ii])>=0;ii++) { + ex_c=learn_parm->svm_c-learn_parm->epsilon_a; + if(alphaslack[docs[i]->slackid] >= ex_c) { + dist=(lin[i])*(double)label[i]+slack[docs[i]->slackid]; /* distance */ + target=-(learn_parm->eps-(double)label[i]*c[i]); /* rhs of constraint */ + if((a[i]>learn_parm->epsilon_a) && (dist > target)) { + if((dist-target)>maxdiff) { /* largest violation */ + maxdiff=dist-target; + maxdiffid=docs[i]->slackid; + } + } + } + } + (*maxviol)=maxdiff; + return(maxdiffid); +} + + +void select_top_n(double *selcrit, long int range, long int *select, + long int n) +{ + register long i,j; + + for(i=0;(i<n) && (i<range);i++) { /* Initialize with the first n elements */ + for(j=i;j>=0;j--) { + if((j>0) && (selcrit[select[j-1]]<selcrit[i])){ + select[j]=select[j-1]; + } + else { + select[j]=i; + j=-1; + } + } + } + if(n>0) { + for(i=n;i<range;i++) { + if(selcrit[i]>selcrit[select[n-1]]) { + for(j=n-1;j>=0;j--) { + if((j>0) && (selcrit[select[j-1]]<selcrit[i])) { + select[j]=select[j-1]; + } + else { + select[j]=i; + j=-1; + } + } + } + } + } +} + + +/******************************** Shrinking *********************************/ + +void init_shrink_state(SHRINK_STATE *shrink_state, long int totdoc, + long int maxhistory) +{ + long i; + + shrink_state->deactnum=0; + shrink_state->active = (long *)my_malloc(sizeof(long)*totdoc); + shrink_state->inactive_since = (long *)my_malloc(sizeof(long)*totdoc); + shrink_state->a_history = (double **)my_malloc(sizeof(double *)*maxhistory); + shrink_state->maxhistory=maxhistory; + shrink_state->last_lin = (double *)my_malloc(sizeof(double)*totdoc); + shrink_state->last_a = (double *)my_malloc(sizeof(double)*totdoc); + + for(i=0;i<totdoc;i++) { + shrink_state->active[i]=1; + shrink_state->inactive_since[i]=0; + shrink_state->last_a[i]=0; + shrink_state->last_lin[i]=0; + } +} + +void shrink_state_cleanup(SHRINK_STATE *shrink_state) +{ + free(shrink_state->active); + free(shrink_state->inactive_since); + if(shrink_state->deactnum > 0) + free(shrink_state->a_history[shrink_state->deactnum-1]); + free(shrink_state->a_history); + free(shrink_state->last_a); + free(shrink_state->last_lin); +} + +long shrink_problem(DOC **docs, + LEARN_PARM *learn_parm, + SHRINK_STATE *shrink_state, + KERNEL_PARM *kernel_parm, + long int *active2dnum, + long int *last_suboptimal_at, + long int iteration, + long int totdoc, + long int minshrink, + double *a, + long int *inconsistent) + /* Shrink some variables away. Do the shrinking only if at least + minshrink variables can be removed. */ +{ + long i,ii,change,activenum,lastiter; + double *a_old; + + activenum=0; + change=0; + for(ii=0;active2dnum[ii]>=0;ii++) { + i=active2dnum[ii]; + activenum++; + if(learn_parm->sharedslack) + lastiter=last_suboptimal_at[docs[i]->slackid]; + else + lastiter=last_suboptimal_at[i]; + if(((iteration-lastiter) > learn_parm->svm_iter_to_shrink) + || (inconsistent[i])) { + change++; + } + } + if((change>=minshrink) /* shrink only if sufficiently many candidates */ + && (shrink_state->deactnum<shrink_state->maxhistory)) { /* and enough memory */ + /* Shrink problem by removing those variables which are */ + /* optimal at a bound for a minimum number of iterations */ + if(verbosity>=2) { + printf(" Shrinking..."); fflush(stdout); + } + if(kernel_parm->kernel_type != LINEAR) { /* non-linear case save alphas */ + a_old=(double *)my_malloc(sizeof(double)*totdoc); + shrink_state->a_history[shrink_state->deactnum]=a_old; + for(i=0;i<totdoc;i++) { + a_old[i]=a[i]; + } + } + for(ii=0;active2dnum[ii]>=0;ii++) { + i=active2dnum[ii]; + if(learn_parm->sharedslack) + lastiter=last_suboptimal_at[docs[i]->slackid]; + else + lastiter=last_suboptimal_at[i]; + if(((iteration-lastiter) > learn_parm->svm_iter_to_shrink) + || (inconsistent[i])) { + shrink_state->active[i]=0; + shrink_state->inactive_since[i]=shrink_state->deactnum; + } + } + activenum=compute_index(shrink_state->active,totdoc,active2dnum); + shrink_state->deactnum++; + if(kernel_parm->kernel_type == LINEAR) { + shrink_state->deactnum=0; + } + if(verbosity>=2) { + printf("done.\n"); fflush(stdout); + printf(" Number of inactive variables = %ld\n",totdoc-activenum); + } + } + return(activenum); +} + + +void reactivate_inactive_examples(long int *label, + long int *unlabeled, + double *a, + SHRINK_STATE *shrink_state, + double *lin, + double *c, + long int totdoc, + long int totwords, + long int iteration, + LEARN_PARM *learn_parm, + long int *inconsistent, + DOC **docs, + KERNEL_PARM *kernel_parm, + KERNEL_CACHE *kernel_cache, + MODEL *model, + CFLOAT *aicache, + double *weights, + double *maxdiff) + /* Make all variables active again which had been removed by + shrinking. */ + /* Computes lin for those variables from scratch. */ +{ + register long i,j,ii,jj,t,*changed2dnum,*inactive2dnum; + long *changed,*inactive; + register double kernel_val,*a_old,dist; + double ex_c,target; + SVECTOR *f; + + if(kernel_parm->kernel_type == LINEAR) { /* special linear case */ + a_old=shrink_state->last_a; + clear_vector_n(weights,totwords); + for(i=0;i<totdoc;i++) { + if(a[i] != a_old[i]) { + for(f=docs[i]->fvec;f;f=f->next) + add_vector_ns(weights,f, + f->factor*((a[i]-a_old[i])*(double)label[i])); + a_old[i]=a[i]; + } + } + for(i=0;i<totdoc;i++) { + if(!shrink_state->active[i]) { + for(f=docs[i]->fvec;f;f=f->next) + lin[i]=shrink_state->last_lin[i]+f->factor*sprod_ns(weights,f); + } + shrink_state->last_lin[i]=lin[i]; + } + } + else { + changed=(long *)my_malloc(sizeof(long)*totdoc); + changed2dnum=(long *)my_malloc(sizeof(long)*(totdoc+11)); + inactive=(long *)my_malloc(sizeof(long)*totdoc); + inactive2dnum=(long *)my_malloc(sizeof(long)*(totdoc+11)); + for(t=shrink_state->deactnum-1;(t>=0) && shrink_state->a_history[t];t--) { + if(verbosity>=2) { + printf("%ld..",t); fflush(stdout); + } + a_old=shrink_state->a_history[t]; + for(i=0;i<totdoc;i++) { + inactive[i]=((!shrink_state->active[i]) + && (shrink_state->inactive_since[i] == t)); + changed[i]= (a[i] != a_old[i]); + } + compute_index(inactive,totdoc,inactive2dnum); + compute_index(changed,totdoc,changed2dnum); + + for(ii=0;(i=changed2dnum[ii])>=0;ii++) { + get_kernel_row(kernel_cache,docs,i,totdoc,inactive2dnum,aicache, + kernel_parm); + for(jj=0;(j=inactive2dnum[jj])>=0;jj++) { + kernel_val=aicache[j]; + lin[j]+=(((a[i]*kernel_val)-(a_old[i]*kernel_val))*(double)label[i]); + } + } + } + free(changed); + free(changed2dnum); + free(inactive); + free(inactive2dnum); + } + (*maxdiff)=0; + for(i=0;i<totdoc;i++) { + shrink_state->inactive_since[i]=shrink_state->deactnum-1; + if(!inconsistent[i]) { + dist=(lin[i]-model->b)*(double)label[i]; + target=-(learn_parm->eps-(double)label[i]*c[i]); + ex_c=learn_parm->svm_cost[i]-learn_parm->epsilon_a; + if((a[i]>learn_parm->epsilon_a) && (dist > target)) { + if((dist-target)>(*maxdiff)) /* largest violation */ + (*maxdiff)=dist-target; + } + else if((a[i]<ex_c) && (dist < target)) { + if((target-dist)>(*maxdiff)) /* largest violation */ + (*maxdiff)=target-dist; + } + if((a[i]>(0+learn_parm->epsilon_a)) + && (a[i]<ex_c)) { + shrink_state->active[i]=1; /* not at bound */ + } + else if((a[i]<=(0+learn_parm->epsilon_a)) && (dist < (target+learn_parm->epsilon_shrink))) { + shrink_state->active[i]=1; + } + else if((a[i]>=ex_c) + && (dist > (target-learn_parm->epsilon_shrink))) { + shrink_state->active[i]=1; + } + else if(learn_parm->sharedslack) { /* make all active when sharedslack */ + shrink_state->active[i]=1; + } + } + } + if(kernel_parm->kernel_type != LINEAR) { /* update history for non-linear */ + for(i=0;i<totdoc;i++) { + (shrink_state->a_history[shrink_state->deactnum-1])[i]=a[i]; + } + for(t=shrink_state->deactnum-2;(t>=0) && shrink_state->a_history[t];t--) { + free(shrink_state->a_history[t]); + shrink_state->a_history[t]=0; + } + } +} + +/****************************** Cache handling *******************************/ + +void get_kernel_row(KERNEL_CACHE *kernel_cache, DOC **docs, + long int docnum, long int totdoc, + long int *active2dnum, CFLOAT *buffer, + KERNEL_PARM *kernel_parm) + /* Get's a row of the matrix of kernel values This matrix has the + same form as the Hessian, just that the elements are not + multiplied by */ + /* y_i * y_j * a_i * a_j */ + /* Takes the values from the cache if available. */ +{ + register long i,j,start; + DOC *ex; + + ex=docs[docnum]; + + if(kernel_cache->index[docnum] != -1) { /* row is cached? */ + kernel_cache->lru[kernel_cache->index[docnum]]=kernel_cache->time; /* lru */ + start=kernel_cache->activenum*kernel_cache->index[docnum]; + for(i=0;(j=active2dnum[i])>=0;i++) { + if(kernel_cache->totdoc2active[j] >= 0) { /* column is cached? */ + buffer[j]=kernel_cache->buffer[start+kernel_cache->totdoc2active[j]]; + } + else { + buffer[j]=(CFLOAT)kernel(kernel_parm,ex,docs[j]); + } + } + } + else { + for(i=0;(j=active2dnum[i])>=0;i++) { + buffer[j]=(CFLOAT)kernel(kernel_parm,ex,docs[j]); + } + } +} + + +void cache_kernel_row(KERNEL_CACHE *kernel_cache, DOC **docs, + long int m, KERNEL_PARM *kernel_parm) + /* Fills cache for the row m */ +{ + register DOC *ex; + register long j,k,l; + register CFLOAT *cache; + + if(!kernel_cache_check(kernel_cache,m)) { /* not cached yet*/ + cache = kernel_cache_clean_and_malloc(kernel_cache,m); + if(cache) { + l=kernel_cache->totdoc2active[m]; + ex=docs[m]; + for(j=0;j<kernel_cache->activenum;j++) { /* fill cache */ + k=kernel_cache->active2totdoc[j]; + if((kernel_cache->index[k] != -1) && (l != -1) && (k != m)) { + cache[j]=kernel_cache->buffer[kernel_cache->activenum + *kernel_cache->index[k]+l]; + } + else { + cache[j]=kernel(kernel_parm,ex,docs[k]); + } + } + } + else { + perror("Error: Kernel cache full! => increase cache size"); + } + } +} + + +void cache_multiple_kernel_rows(KERNEL_CACHE *kernel_cache, DOC **docs, + long int *key, long int varnum, + KERNEL_PARM *kernel_parm) + /* Fills cache for the rows in key */ +{ + register long i; + + for(i=0;i<varnum;i++) { /* fill up kernel cache */ + cache_kernel_row(kernel_cache,docs,key[i],kernel_parm); + } +} + + +void kernel_cache_shrink(KERNEL_CACHE *kernel_cache, long int totdoc, + long int numshrink, long int *after) + /* Remove numshrink columns in the cache which correspond to + examples marked 0 in after. */ +{ + register long i,j,jj,from=0,to=0,scount; + long *keep; + + if(verbosity>=2) { + printf(" Reorganizing cache..."); fflush(stdout); + } + + keep=(long *)my_malloc(sizeof(long)*totdoc); + for(j=0;j<totdoc;j++) { + keep[j]=1; + } + scount=0; + for(jj=0;(jj<kernel_cache->activenum) && (scount<numshrink);jj++) { + j=kernel_cache->active2totdoc[jj]; + if(!after[j]) { + scount++; + keep[j]=0; + } + } + + for(i=0;i<kernel_cache->max_elems;i++) { + for(jj=0;jj<kernel_cache->activenum;jj++) { + j=kernel_cache->active2totdoc[jj]; + if(!keep[j]) { + from++; + } + else { + kernel_cache->buffer[to]=kernel_cache->buffer[from]; + to++; + from++; + } + } + } + + kernel_cache->activenum=0; + for(j=0;j<totdoc;j++) { + if((keep[j]) && (kernel_cache->totdoc2active[j] != -1)) { + kernel_cache->active2totdoc[kernel_cache->activenum]=j; + kernel_cache->totdoc2active[j]=kernel_cache->activenum; + kernel_cache->activenum++; + } + else { + kernel_cache->totdoc2active[j]=-1; + } + } + + kernel_cache->max_elems=(long)(kernel_cache->buffsize/kernel_cache->activenum); + if(kernel_cache->max_elems>totdoc) { + kernel_cache->max_elems=totdoc; + } + + free(keep); + + if(verbosity>=2) { + printf("done.\n"); fflush(stdout); + printf(" Cache-size in rows = %ld\n",kernel_cache->max_elems); + } +} + +KERNEL_CACHE *kernel_cache_init(long int totdoc, long int buffsize) +{ + long i; + KERNEL_CACHE *kernel_cache; + + kernel_cache=(KERNEL_CACHE *)my_malloc(sizeof(KERNEL_CACHE)); + kernel_cache->index = (long *)my_malloc(sizeof(long)*totdoc); + kernel_cache->occu = (long *)my_malloc(sizeof(long)*totdoc); + kernel_cache->lru = (long *)my_malloc(sizeof(long)*totdoc); + kernel_cache->invindex = (long *)my_malloc(sizeof(long)*totdoc); + kernel_cache->active2totdoc = (long *)my_malloc(sizeof(long)*totdoc); + kernel_cache->totdoc2active = (long *)my_malloc(sizeof(long)*totdoc); + kernel_cache->buffer = (CFLOAT *)my_malloc((size_t)(buffsize)*1024*1024); + + kernel_cache->buffsize=(long)(buffsize/sizeof(CFLOAT)*1024*1024); + + kernel_cache->max_elems=(long)(kernel_cache->buffsize/totdoc); + if(kernel_cache->max_elems>totdoc) { + kernel_cache->max_elems=totdoc; + } + + if(verbosity>=2) { + printf(" Cache-size in rows = %ld\n",kernel_cache->max_elems); + printf(" Kernel evals so far: %ld\n",kernel_cache_statistic); + } + + kernel_cache->elems=0; /* initialize cache */ + for(i=0;i<totdoc;i++) { + kernel_cache->index[i]=-1; + kernel_cache->lru[i]=0; + } + for(i=0;i<totdoc;i++) { + kernel_cache->occu[i]=0; + kernel_cache->invindex[i]=-1; + } + + kernel_cache->activenum=totdoc;; + for(i=0;i<totdoc;i++) { + kernel_cache->active2totdoc[i]=i; + kernel_cache->totdoc2active[i]=i; + } + + kernel_cache->time=0; + + return(kernel_cache); +} + +void kernel_cache_reset_lru(KERNEL_CACHE *kernel_cache) +{ + long maxlru=0,k; + + for(k=0;k<kernel_cache->max_elems;k++) { + if(maxlru < kernel_cache->lru[k]) + maxlru=kernel_cache->lru[k]; + } + for(k=0;k<kernel_cache->max_elems;k++) { + kernel_cache->lru[k]-=maxlru; + } +} + +void kernel_cache_cleanup(KERNEL_CACHE *kernel_cache) +{ + free(kernel_cache->index); + free(kernel_cache->occu); + free(kernel_cache->lru); + free(kernel_cache->invindex); + free(kernel_cache->active2totdoc); + free(kernel_cache->totdoc2active); + free(kernel_cache->buffer); + free(kernel_cache); +} + +long kernel_cache_malloc(KERNEL_CACHE *kernel_cache) +{ + long i; + + if(kernel_cache_space_available(kernel_cache)) { + for(i=0;i<kernel_cache->max_elems;i++) { + if(!kernel_cache->occu[i]) { + kernel_cache->occu[i]=1; + kernel_cache->elems++; + return(i); + } + } + } + return(-1); +} + +void kernel_cache_free(KERNEL_CACHE *kernel_cache, long int i) +{ + kernel_cache->occu[i]=0; + kernel_cache->elems--; +} + +long kernel_cache_free_lru(KERNEL_CACHE *kernel_cache) + /* remove least recently used cache element */ +{ + register long k,least_elem=-1,least_time; + + least_time=kernel_cache->time+1; + for(k=0;k<kernel_cache->max_elems;k++) { + if(kernel_cache->invindex[k] != -1) { + if(kernel_cache->lru[k]<least_time) { + least_time=kernel_cache->lru[k]; + least_elem=k; + } + } + } + if(least_elem != -1) { + kernel_cache_free(kernel_cache,least_elem); + kernel_cache->index[kernel_cache->invindex[least_elem]]=-1; + kernel_cache->invindex[least_elem]=-1; + return(1); + } + return(0); +} + + +CFLOAT *kernel_cache_clean_and_malloc(KERNEL_CACHE *kernel_cache, + long int docnum) + /* Get a free cache entry. In case cache is full, the lru element + is removed. */ +{ + long result; + if((result = kernel_cache_malloc(kernel_cache)) == -1) { + if(kernel_cache_free_lru(kernel_cache)) { + result = kernel_cache_malloc(kernel_cache); + } + } + kernel_cache->index[docnum]=result; + if(result == -1) { + return(0); + } + kernel_cache->invindex[result]=docnum; + kernel_cache->lru[kernel_cache->index[docnum]]=kernel_cache->time; /* lru */ + return((CFLOAT *)((long)kernel_cache->buffer + +(kernel_cache->activenum*sizeof(CFLOAT)* + kernel_cache->index[docnum]))); +} + +long kernel_cache_touch(KERNEL_CACHE *kernel_cache, long int docnum) + /* Update lru time to avoid removal from cache. */ +{ + if(kernel_cache && kernel_cache->index[docnum] != -1) { + kernel_cache->lru[kernel_cache->index[docnum]]=kernel_cache->time; /* lru */ + return(1); + } + return(0); +} + +long kernel_cache_check(KERNEL_CACHE *kernel_cache, long int docnum) + /* Is that row cached? */ +{ + return(kernel_cache->index[docnum] != -1); +} + +long kernel_cache_space_available(KERNEL_CACHE *kernel_cache) + /* Is there room for one more row? */ +{ + return(kernel_cache->elems < kernel_cache->max_elems); +} + +/************************** Compute estimates ******************************/ + +void compute_xa_estimates(MODEL *model, long int *label, + long int *unlabeled, long int totdoc, + DOC **docs, double *lin, double *a, + KERNEL_PARM *kernel_parm, + LEARN_PARM *learn_parm, double *error, + double *recall, double *precision) + /* Computes xa-estimate of error rate, recall, and precision. See + T. Joachims, Estimating the Generalization Performance of an SVM + Efficiently, IMCL, 2000. */ +{ + long i,looerror,looposerror,loonegerror; + long totex,totposex; + double xi,r_delta,r_delta_sq,sim=0; + long *sv2dnum=NULL,*sv=NULL,svnum; + + r_delta=estimate_r_delta(docs,totdoc,kernel_parm); + r_delta_sq=r_delta*r_delta; + + looerror=0; + looposerror=0; + loonegerror=0; + totex=0; + totposex=0; + svnum=0; + + if(learn_parm->xa_depth > 0) { + sv = (long *)my_malloc(sizeof(long)*(totdoc+11)); + for(i=0;i<totdoc;i++) + sv[i]=0; + for(i=1;i<model->sv_num;i++) + if(a[model->supvec[i]->docnum] + < (learn_parm->svm_cost[model->supvec[i]->docnum] + -learn_parm->epsilon_a)) { + sv[model->supvec[i]->docnum]=1; + svnum++; + } + sv2dnum = (long *)my_malloc(sizeof(long)*(totdoc+11)); + clear_index(sv2dnum); + compute_index(sv,totdoc,sv2dnum); + } + + for(i=0;i<totdoc;i++) { + if(unlabeled[i]) { + /* ignore it */ + } + else { + xi=1.0-((lin[i]-model->b)*(double)label[i]); + if(xi<0) xi=0; + if(label[i]>0) { + totposex++; + } + if((learn_parm->rho*a[i]*r_delta_sq+xi) >= 1.0) { + if(learn_parm->xa_depth > 0) { /* makes assumptions */ + sim=distribute_alpha_t_greedily(sv2dnum,svnum,docs,a,i,label, + kernel_parm,learn_parm, + (double)((1.0-xi-a[i]*r_delta_sq)/(2.0*a[i]))); + } + if((learn_parm->xa_depth == 0) || + ((a[i]*kernel(kernel_parm,docs[i],docs[i])+a[i]*2.0*sim+xi) >= 1.0)) { + looerror++; + if(label[i]>0) { + looposerror++; + } + else { + loonegerror++; + } + } + } + totex++; + } + } + + (*error)=((double)looerror/(double)totex)*100.0; + (*recall)=(1.0-(double)looposerror/(double)totposex)*100.0; + (*precision)=(((double)totposex-(double)looposerror) + /((double)totposex-(double)looposerror+(double)loonegerror))*100.0; + + free(sv); + free(sv2dnum); +} + + +double distribute_alpha_t_greedily(long int *sv2dnum, long int svnum, + DOC **docs, double *a, + long int docnum, + long int *label, + KERNEL_PARM *kernel_parm, + LEARN_PARM *learn_parm, double thresh) + /* Experimental Code improving plain XiAlpha Estimates by + computing a better bound using a greedy optimzation strategy. */ +{ + long best_depth=0; + long i,j,k,d,skip,allskip; + double best,best_val[101],val,init_val_sq,init_val_lin; + long best_ex[101]; + CFLOAT *cache,*trow; + + cache=(CFLOAT *)my_malloc(sizeof(CFLOAT)*learn_parm->xa_depth*svnum); + trow = (CFLOAT *)my_malloc(sizeof(CFLOAT)*svnum); + + for(k=0;k<svnum;k++) { + trow[k]=kernel(kernel_parm,docs[docnum],docs[sv2dnum[k]]); + } + + init_val_sq=0; + init_val_lin=0; + best=0; + + for(d=0;d<learn_parm->xa_depth;d++) { + allskip=1; + if(d>=1) { + init_val_sq+=cache[best_ex[d-1]+svnum*(d-1)]; + for(k=0;k<d-1;k++) { + init_val_sq+=2.0*cache[best_ex[k]+svnum*(d-1)]; + } + init_val_lin+=trow[best_ex[d-1]]; + } + for(i=0;i<svnum;i++) { + skip=0; + if(sv2dnum[i] == docnum) skip=1; + for(j=0;j<d;j++) { + if(i == best_ex[j]) skip=1; + } + + if(!skip) { + val=init_val_sq; + if(kernel_parm->kernel_type == LINEAR) + val+=docs[sv2dnum[i]]->fvec->twonorm_sq; + else + val+=kernel(kernel_parm,docs[sv2dnum[i]],docs[sv2dnum[i]]); + for(j=0;j<d;j++) { + val+=2.0*cache[i+j*svnum]; + } + val*=(1.0/(2.0*(d+1.0)*(d+1.0))); + val-=((init_val_lin+trow[i])/(d+1.0)); + + if(allskip || (val < best_val[d])) { + best_val[d]=val; + best_ex[d]=i; + } + allskip=0; + if(val < thresh) { + i=svnum; + /* printf("EARLY"); */ + } + } + } + if(!allskip) { + for(k=0;k<svnum;k++) { + cache[d*svnum+k]=kernel(kernel_parm, + docs[sv2dnum[best_ex[d]]], + docs[sv2dnum[k]]); + } + } + if((!allskip) && ((best_val[d] < best) || (d == 0))) { + best=best_val[d]; + best_depth=d; + } + if(allskip || (best < thresh)) { + d=learn_parm->xa_depth; + } + } + + free(cache); + free(trow); + + /* printf("Distribute[%ld](%ld)=%f, ",docnum,best_depth,best); */ + return(best); +} + + +void estimate_transduction_quality(MODEL *model, long int *label, + long int *unlabeled, + long int totdoc, DOC **docs, double *lin) + /* Loo-bound based on observation that loo-errors must have an + equal distribution in both training and test examples, given + that the test examples are classified correctly. Compare + chapter "Constraints on the Transductive Hyperplane" in my + Dissertation. */ +{ + long i,j,l=0,ulab=0,lab=0,labpos=0,labneg=0,ulabpos=0,ulabneg=0,totulab=0; + double totlab=0,totlabpos=0,totlabneg=0,labsum=0,ulabsum=0; + double r_delta,r_delta_sq,xi,xisum=0,asum=0; + + r_delta=estimate_r_delta(docs,totdoc,&(model->kernel_parm)); + r_delta_sq=r_delta*r_delta; + + for(j=0;j<totdoc;j++) { + if(unlabeled[j]) { + totulab++; + } + else { + totlab++; + if(label[j] > 0) + totlabpos++; + else + totlabneg++; + } + } + for(j=1;j<model->sv_num;j++) { + i=model->supvec[j]->docnum; + xi=1.0-((lin[i]-model->b)*(double)label[i]); + if(xi<0) xi=0; + + xisum+=xi; + asum+=fabs(model->alpha[j]); + if(unlabeled[i]) { + ulabsum+=(fabs(model->alpha[j])*r_delta_sq+xi); + } + else { + labsum+=(fabs(model->alpha[j])*r_delta_sq+xi); + } + if((fabs(model->alpha[j])*r_delta_sq+xi) >= 1) { + l++; + if(unlabeled[model->supvec[j]->docnum]) { + ulab++; + if(model->alpha[j] > 0) + ulabpos++; + else + ulabneg++; + } + else { + lab++; + if(model->alpha[j] > 0) + labpos++; + else + labneg++; + } + } + } + printf("xacrit>=1: labeledpos=%.5f labeledneg=%.5f default=%.5f\n",(double)labpos/(double)totlab*100.0,(double)labneg/(double)totlab*100.0,(double)totlabpos/(double)(totlab)*100.0); + printf("xacrit>=1: unlabelpos=%.5f unlabelneg=%.5f\n",(double)ulabpos/(double)totulab*100.0,(double)ulabneg/(double)totulab*100.0); + printf("xacrit>=1: labeled=%.5f unlabled=%.5f all=%.5f\n",(double)lab/(double)totlab*100.0,(double)ulab/(double)totulab*100.0,(double)l/(double)(totdoc)*100.0); + printf("xacritsum: labeled=%.5f unlabled=%.5f all=%.5f\n",(double)labsum/(double)totlab*100.0,(double)ulabsum/(double)totulab*100.0,(double)(labsum+ulabsum)/(double)(totdoc)*100.0); + printf("r_delta_sq=%.5f xisum=%.5f asum=%.5f\n",r_delta_sq,xisum,asum); +} + +double estimate_margin_vcdim(MODEL *model, double w, double R, + KERNEL_PARM *kernel_parm) + /* optional: length of model vector in feature space */ + /* optional: radius of ball containing the data */ +{ + double h; + + /* follows chapter 5.6.4 in [Vapnik/95] */ + + if(w<0) { + w=model_length_s(model,kernel_parm); + } + if(R<0) { + R=estimate_sphere(model,kernel_parm); + } + h = w*w * R*R +1; + return(h); +} + +double estimate_sphere(MODEL *model, KERNEL_PARM *kernel_parm) + /* Approximates the radius of the ball containing */ + /* the support vectors by bounding it with the */ +{ /* length of the longest support vector. This is */ + register long j; /* pretty good for text categorization, since all */ + double xlen,maxxlen=0; /* documents have feature vectors of length 1. It */ + DOC *nulldoc; /* assumes that the center of the ball is at the */ + WORD nullword; /* origin of the space. */ + + nullword.wnum=0; + nulldoc=create_example(-2,0,0,0.0,create_svector(&nullword,"",1.0)); + + for(j=1;j<model->sv_num;j++) { + xlen=sqrt(kernel(kernel_parm,model->supvec[j],model->supvec[j]) + -2*kernel(kernel_parm,model->supvec[j],nulldoc) + +kernel(kernel_parm,nulldoc,nulldoc)); + if(xlen>maxxlen) { + maxxlen=xlen; + } + } + + free_example(nulldoc,1); + return(maxxlen); +} + +double estimate_r_delta(DOC **docs, long int totdoc, KERNEL_PARM *kernel_parm) +{ + long i; + double maxxlen,xlen; + DOC *nulldoc; /* assumes that the center of the ball is at the */ + WORD nullword; /* origin of the space. */ + + nullword.wnum=0; + nulldoc=create_example(-2,0,0,0.0,create_svector(&nullword,"",1.0)); + + maxxlen=0; + for(i=0;i<totdoc;i++) { + xlen=sqrt(kernel(kernel_parm,docs[i],docs[i]) + -2*kernel(kernel_parm,docs[i],nulldoc) + +kernel(kernel_parm,nulldoc,nulldoc)); + if(xlen>maxxlen) { + maxxlen=xlen; + } + } + + free_example(nulldoc,1); + return(maxxlen); +} + +double estimate_r_delta_average(DOC **docs, long int totdoc, + KERNEL_PARM *kernel_parm) +{ + long i; + double avgxlen; + DOC *nulldoc; /* assumes that the center of the ball is at the */ + WORD nullword; /* origin of the space. */ + + nullword.wnum=0; + nulldoc=create_example(-2,0,0,0.0,create_svector(&nullword,"",1.0)); + + avgxlen=0; + for(i=0;i<totdoc;i++) { + avgxlen+=sqrt(kernel(kernel_parm,docs[i],docs[i]) + -2*kernel(kernel_parm,docs[i],nulldoc) + +kernel(kernel_parm,nulldoc,nulldoc)); + } + + free_example(nulldoc,1); + return(avgxlen/totdoc); +} + +double length_of_longest_document_vector(DOC **docs, long int totdoc, + KERNEL_PARM *kernel_parm) +{ + long i; + double maxxlen,xlen; + + maxxlen=0; + for(i=0;i<totdoc;i++) { + xlen=sqrt(kernel(kernel_parm,docs[i],docs[i])); + if(xlen>maxxlen) { + maxxlen=xlen; + } + } + + return(maxxlen); +} + +/****************************** IO-handling **********************************/ + +void write_prediction(char *predfile, MODEL *model, double *lin, + double *a, long int *unlabeled, + long int *label, long int totdoc, + LEARN_PARM *learn_parm) +{ + FILE *predfl; + long i; + double dist,a_max; + + if(verbosity>=1) { + printf("Writing prediction file..."); fflush(stdout); + } + if ((predfl = fopen (predfile, "w")) == NULL) + { perror (predfile); exit (1); } + a_max=learn_parm->epsilon_a; + for(i=0;i<totdoc;i++) { + if((unlabeled[i]) && (a[i]>a_max)) { + a_max=a[i]; + } + } + for(i=0;i<totdoc;i++) { + if(unlabeled[i]) { + if((a[i]>(learn_parm->epsilon_a))) { + dist=(double)label[i]*(1.0-learn_parm->epsilon_crit-a[i]/(a_max*2.0)); + } + else { + dist=(lin[i]-model->b); + } + if(dist>0) { + fprintf(predfl,"%.8g:+1 %.8g:-1\n",dist,-dist); + } + else { + fprintf(predfl,"%.8g:-1 %.8g:+1\n",-dist,dist); + } + } + } + fclose(predfl); + if(verbosity>=1) { + printf("done\n"); + } +} + +void write_alphas(char *alphafile, double *a, + long int *label, long int totdoc) +{ + FILE *alphafl; + long i; + + if(verbosity>=1) { + printf("Writing alpha file..."); fflush(stdout); + } + if ((alphafl = fopen (alphafile, "w")) == NULL) + { perror (alphafile); exit (1); } + for(i=0;i<totdoc;i++) { + fprintf(alphafl,"%.18g\n",a[i]*(double)label[i]); + } + fclose(alphafl); + if(verbosity>=1) { + printf("done\n"); + } +} +