wolffd@0: /***********************************************************************/ wolffd@0: /* */ wolffd@0: /* svm_learn.c */ wolffd@0: /* */ wolffd@0: /* Learning module of Support Vector Machine. */ wolffd@0: /* */ wolffd@0: /* Author: Thorsten Joachims */ wolffd@0: /* Date: 02.07.02 */ wolffd@0: /* */ wolffd@0: /* Copyright (c) 2002 Thorsten Joachims - All rights reserved */ wolffd@0: /* */ wolffd@0: /* This software is available for non-commercial use only. It must */ wolffd@0: /* not be modified and distributed without prior permission of the */ wolffd@0: /* author. The author is not responsible for implications from the */ wolffd@0: /* use of this software. */ wolffd@0: /* */ wolffd@0: /***********************************************************************/ wolffd@0: wolffd@0: wolffd@0: # include "svm_common.h" wolffd@0: # include "svm_learn.h" wolffd@0: wolffd@0: wolffd@0: /* interface to QP-solver */ wolffd@0: double *optimize_qp(QP *, double *, long, double *, LEARN_PARM *); wolffd@0: wolffd@0: /*---------------------------------------------------------------------------*/ wolffd@0: wolffd@0: /* Learns an SVM classification model based on the training data in wolffd@0: docs/label. The resulting model is returned in the structure wolffd@0: model. */ wolffd@0: wolffd@0: void svm_learn_classification(DOC **docs, double *class, long int wolffd@0: totdoc, long int totwords, wolffd@0: LEARN_PARM *learn_parm, wolffd@0: KERNEL_PARM *kernel_parm, wolffd@0: KERNEL_CACHE *kernel_cache, wolffd@0: MODEL *model, wolffd@0: double *alpha) wolffd@0: /* docs: Training vectors (x-part) */ wolffd@0: /* class: Training labels (y-part, zero if test example for wolffd@0: transduction) */ wolffd@0: /* totdoc: Number of examples in docs/label */ wolffd@0: /* totwords: Number of features (i.e. highest feature index) */ wolffd@0: /* learn_parm: Learning paramenters */ wolffd@0: /* kernel_parm: Kernel paramenters */ wolffd@0: /* kernel_cache:Initialized Cache of size totdoc, if using a kernel. wolffd@0: NULL if linear.*/ wolffd@0: /* model: Returns learning result (assumed empty before called) */ wolffd@0: /* alpha: Start values for the alpha variables or NULL wolffd@0: pointer. The new alpha values are returned after wolffd@0: optimization if not NULL. Array must be of size totdoc. */ wolffd@0: { wolffd@0: long *inconsistent,i,*label; wolffd@0: long inconsistentnum; wolffd@0: long misclassified,upsupvecnum; wolffd@0: double loss,model_length,example_length; wolffd@0: double maxdiff,*lin,*a,*c; wolffd@0: long runtime_start,runtime_end; wolffd@0: long iterations; wolffd@0: long *unlabeled,transduction; wolffd@0: long heldout; wolffd@0: long loo_count=0,loo_count_pos=0,loo_count_neg=0,trainpos=0,trainneg=0; wolffd@0: long loocomputed=0,runtime_start_loo=0,runtime_start_xa=0; wolffd@0: double heldout_c=0,r_delta_sq=0,r_delta,r_delta_avg; wolffd@0: long *index,*index2dnum; wolffd@0: double *weights; wolffd@0: CFLOAT *aicache; /* buffer to keep one row of hessian */ wolffd@0: wolffd@0: double *xi_fullset; /* buffer for storing xi on full sample in loo */ wolffd@0: double *a_fullset; /* buffer for storing alpha on full sample in loo */ wolffd@0: TIMING timing_profile; wolffd@0: SHRINK_STATE shrink_state; wolffd@0: wolffd@0: runtime_start=get_runtime(); wolffd@0: timing_profile.time_kernel=0; wolffd@0: timing_profile.time_opti=0; wolffd@0: timing_profile.time_shrink=0; wolffd@0: timing_profile.time_update=0; wolffd@0: timing_profile.time_model=0; wolffd@0: timing_profile.time_check=0; wolffd@0: timing_profile.time_select=0; wolffd@0: kernel_cache_statistic=0; wolffd@0: wolffd@0: learn_parm->totwords=totwords; wolffd@0: wolffd@0: /* make sure -n value is reasonable */ wolffd@0: if((learn_parm->svm_newvarsinqp < 2) wolffd@0: || (learn_parm->svm_newvarsinqp > learn_parm->svm_maxqpsize)) { wolffd@0: learn_parm->svm_newvarsinqp=learn_parm->svm_maxqpsize; wolffd@0: } wolffd@0: wolffd@0: init_shrink_state(&shrink_state,totdoc,(long)MAXSHRINK); wolffd@0: wolffd@0: label = (long *)my_malloc(sizeof(long)*totdoc); wolffd@0: inconsistent = (long *)my_malloc(sizeof(long)*totdoc); wolffd@0: unlabeled = (long *)my_malloc(sizeof(long)*totdoc); wolffd@0: c = (double *)my_malloc(sizeof(double)*totdoc); wolffd@0: a = (double *)my_malloc(sizeof(double)*totdoc); wolffd@0: a_fullset = (double *)my_malloc(sizeof(double)*totdoc); wolffd@0: xi_fullset = (double *)my_malloc(sizeof(double)*totdoc); wolffd@0: lin = (double *)my_malloc(sizeof(double)*totdoc); wolffd@0: learn_parm->svm_cost = (double *)my_malloc(sizeof(double)*totdoc); wolffd@0: model->supvec = (DOC **)my_malloc(sizeof(DOC *)*(totdoc+2)); wolffd@0: model->alpha = (double *)my_malloc(sizeof(double)*(totdoc+2)); wolffd@0: model->index = (long *)my_malloc(sizeof(long)*(totdoc+2)); wolffd@0: wolffd@0: model->at_upper_bound=0; wolffd@0: model->b=0; wolffd@0: model->supvec[0]=0; /* element 0 reserved and empty for now */ wolffd@0: model->alpha[0]=0; wolffd@0: model->lin_weights=NULL; wolffd@0: model->totwords=totwords; wolffd@0: model->totdoc=totdoc; wolffd@0: model->kernel_parm=(*kernel_parm); wolffd@0: model->sv_num=1; wolffd@0: model->loo_error=-1; wolffd@0: model->loo_recall=-1; wolffd@0: model->loo_precision=-1; wolffd@0: model->xa_error=-1; wolffd@0: model->xa_recall=-1; wolffd@0: model->xa_precision=-1; wolffd@0: inconsistentnum=0; wolffd@0: transduction=0; wolffd@0: wolffd@0: r_delta=estimate_r_delta(docs,totdoc,kernel_parm); wolffd@0: r_delta_sq=r_delta*r_delta; wolffd@0: wolffd@0: r_delta_avg=estimate_r_delta_average(docs,totdoc,kernel_parm); wolffd@0: if(learn_parm->svm_c == 0.0) { /* default value for C */ wolffd@0: learn_parm->svm_c=1.0/(r_delta_avg*r_delta_avg); wolffd@0: if(verbosity>=1) wolffd@0: printf("Setting default regularization parameter C=%.4f\n", wolffd@0: learn_parm->svm_c); wolffd@0: } wolffd@0: wolffd@0: learn_parm->eps=-1.0; /* equivalent regression epsilon for wolffd@0: classification */ wolffd@0: wolffd@0: for(i=0;idocnum=i; wolffd@0: inconsistent[i]=0; wolffd@0: a[i]=0; wolffd@0: lin[i]=0; wolffd@0: c[i]=0.0; wolffd@0: unlabeled[i]=0; wolffd@0: if(class[i] == 0) { wolffd@0: unlabeled[i]=1; wolffd@0: label[i]=0; wolffd@0: transduction=1; wolffd@0: } wolffd@0: if(class[i] > 0) { wolffd@0: learn_parm->svm_cost[i]=learn_parm->svm_c*learn_parm->svm_costratio* wolffd@0: docs[i]->costfactor; wolffd@0: label[i]=1; wolffd@0: trainpos++; wolffd@0: } wolffd@0: else if(class[i] < 0) { wolffd@0: learn_parm->svm_cost[i]=learn_parm->svm_c*docs[i]->costfactor; wolffd@0: label[i]=-1; wolffd@0: trainneg++; wolffd@0: } wolffd@0: else { wolffd@0: learn_parm->svm_cost[i]=0; wolffd@0: } wolffd@0: } wolffd@0: if(verbosity>=2) { wolffd@0: printf("%ld positive, %ld negative, and %ld unlabeled examples.\n",trainpos,trainneg,totdoc-trainpos-trainneg); fflush(stdout); wolffd@0: } wolffd@0: wolffd@0: /* caching makes no sense for linear kernel */ wolffd@0: if(kernel_parm->kernel_type == LINEAR) { wolffd@0: kernel_cache = NULL; wolffd@0: } wolffd@0: wolffd@0: /* compute starting state for initial alpha values */ wolffd@0: if(alpha) { wolffd@0: if(verbosity>=1) { wolffd@0: printf("Computing starting state..."); fflush(stdout); wolffd@0: } wolffd@0: index = (long *)my_malloc(sizeof(long)*totdoc); wolffd@0: index2dnum = (long *)my_malloc(sizeof(long)*(totdoc+11)); wolffd@0: weights=(double *)my_malloc(sizeof(double)*(totwords+1)); wolffd@0: aicache = (CFLOAT *)my_malloc(sizeof(CFLOAT)*totdoc); wolffd@0: for(i=0;ilearn_parm->svm_cost[i]) alpha[i]=learn_parm->svm_cost[i]; wolffd@0: } wolffd@0: if(kernel_parm->kernel_type != LINEAR) { wolffd@0: for(i=0;i0) && (alpha[i]svm_cost[i]) wolffd@0: && (kernel_cache_space_available(kernel_cache))) wolffd@0: cache_kernel_row(kernel_cache,docs,i,kernel_parm); wolffd@0: for(i=0;isvm_cost[i]) wolffd@0: && (kernel_cache_space_available(kernel_cache))) wolffd@0: cache_kernel_row(kernel_cache,docs,i,kernel_parm); wolffd@0: } wolffd@0: (void)compute_index(index,totdoc,index2dnum); wolffd@0: update_linear_component(docs,label,index2dnum,alpha,a,index2dnum,totdoc, wolffd@0: totwords,kernel_parm,kernel_cache,lin,aicache, wolffd@0: weights); wolffd@0: (void)calculate_svm_model(docs,label,unlabeled,lin,alpha,a,c, wolffd@0: learn_parm,index2dnum,index2dnum,model); wolffd@0: for(i=0;i=1) { wolffd@0: printf("done.\n"); fflush(stdout); wolffd@0: } wolffd@0: } wolffd@0: wolffd@0: if(transduction) { wolffd@0: learn_parm->svm_iter_to_shrink=99999999; wolffd@0: if(verbosity >= 1) wolffd@0: printf("\nDeactivating Shrinking due to an incompatibility with the transductive \nlearner in the current version.\n\n"); wolffd@0: } wolffd@0: wolffd@0: if(transduction && learn_parm->compute_loo) { wolffd@0: learn_parm->compute_loo=0; wolffd@0: if(verbosity >= 1) wolffd@0: printf("\nCannot compute leave-one-out estimates for transductive learner.\n\n"); wolffd@0: } wolffd@0: wolffd@0: if(learn_parm->remove_inconsistent && learn_parm->compute_loo) { wolffd@0: learn_parm->compute_loo=0; wolffd@0: printf("\nCannot compute leave-one-out estimates when removing inconsistent examples.\n\n"); wolffd@0: } wolffd@0: wolffd@0: if(learn_parm->compute_loo && ((trainpos == 1) || (trainneg == 1))) { wolffd@0: learn_parm->compute_loo=0; wolffd@0: printf("\nCannot compute leave-one-out with only one example in one class.\n\n"); wolffd@0: } wolffd@0: wolffd@0: wolffd@0: if(verbosity==1) { wolffd@0: printf("Optimizing"); fflush(stdout); wolffd@0: } wolffd@0: wolffd@0: /* train the svm */ wolffd@0: iterations=optimize_to_convergence(docs,label,totdoc,totwords,learn_parm, wolffd@0: kernel_parm,kernel_cache,&shrink_state,model, wolffd@0: inconsistent,unlabeled,a,lin, wolffd@0: c,&timing_profile, wolffd@0: &maxdiff,(long)-1, wolffd@0: (long)1); wolffd@0: wolffd@0: if(verbosity>=1) { wolffd@0: if(verbosity==1) printf("done. (%ld iterations)\n",iterations); wolffd@0: wolffd@0: misclassified=0; wolffd@0: for(i=0;(ib)*(double)label[i] <= 0.0) wolffd@0: misclassified++; wolffd@0: } wolffd@0: wolffd@0: printf("Optimization finished (%ld misclassified, maxdiff=%.5f).\n", wolffd@0: misclassified,maxdiff); wolffd@0: wolffd@0: runtime_end=get_runtime(); wolffd@0: if(verbosity>=2) { wolffd@0: printf("Runtime in cpu-seconds: %.2f (%.2f%% for kernel/%.2f%% for optimizer/%.2f%% for final/%.2f%% for update/%.2f%% for model/%.2f%% for check/%.2f%% for select)\n", wolffd@0: ((float)runtime_end-(float)runtime_start)/100.0, wolffd@0: (100.0*timing_profile.time_kernel)/(float)(runtime_end-runtime_start), wolffd@0: (100.0*timing_profile.time_opti)/(float)(runtime_end-runtime_start), wolffd@0: (100.0*timing_profile.time_shrink)/(float)(runtime_end-runtime_start), wolffd@0: (100.0*timing_profile.time_update)/(float)(runtime_end-runtime_start), wolffd@0: (100.0*timing_profile.time_model)/(float)(runtime_end-runtime_start), wolffd@0: (100.0*timing_profile.time_check)/(float)(runtime_end-runtime_start), wolffd@0: (100.0*timing_profile.time_select)/(float)(runtime_end-runtime_start)); wolffd@0: } wolffd@0: else { wolffd@0: printf("Runtime in cpu-seconds: %.2f\n", wolffd@0: (runtime_end-runtime_start)/100.0); wolffd@0: } wolffd@0: wolffd@0: if(learn_parm->remove_inconsistent) { wolffd@0: inconsistentnum=0; wolffd@0: for(i=0;isv_num-1,inconsistentnum); wolffd@0: } wolffd@0: else { wolffd@0: upsupvecnum=0; wolffd@0: for(i=1;isv_num;i++) { wolffd@0: if(fabs(model->alpha[i]) >= wolffd@0: (learn_parm->svm_cost[(model->supvec[i])->docnum]- wolffd@0: learn_parm->epsilon_a)) wolffd@0: upsupvecnum++; wolffd@0: } wolffd@0: printf("Number of SV: %ld (including %ld at upper bound)\n", wolffd@0: model->sv_num-1,upsupvecnum); wolffd@0: } wolffd@0: wolffd@0: if((verbosity>=1) && (!learn_parm->skip_final_opt_check)) { wolffd@0: loss=0; wolffd@0: model_length=0; wolffd@0: for(i=0;ib)*(double)label[i] < 1.0-learn_parm->epsilon_crit) wolffd@0: loss+=1.0-(lin[i]-model->b)*(double)label[i]; wolffd@0: model_length+=a[i]*label[i]*lin[i]; wolffd@0: } wolffd@0: model_length=sqrt(model_length); wolffd@0: fprintf(stdout,"L1 loss: loss=%.5f\n",loss); wolffd@0: fprintf(stdout,"Norm of weight vector: |w|=%.5f\n",model_length); wolffd@0: example_length=estimate_sphere(model,kernel_parm); wolffd@0: fprintf(stdout,"Norm of longest example vector: |x|=%.5f\n", wolffd@0: length_of_longest_document_vector(docs,totdoc,kernel_parm)); wolffd@0: fprintf(stdout,"Estimated VCdim of classifier: VCdim<=%.5f\n", wolffd@0: estimate_margin_vcdim(model,model_length,example_length, wolffd@0: kernel_parm)); wolffd@0: if((!learn_parm->remove_inconsistent) && (!transduction)) { wolffd@0: runtime_start_xa=get_runtime(); wolffd@0: if(verbosity>=1) { wolffd@0: printf("Computing XiAlpha-estimates..."); fflush(stdout); wolffd@0: } wolffd@0: compute_xa_estimates(model,label,unlabeled,totdoc,docs,lin,a, wolffd@0: kernel_parm,learn_parm,&(model->xa_error), wolffd@0: &(model->xa_recall),&(model->xa_precision)); wolffd@0: if(verbosity>=1) { wolffd@0: printf("done\n"); wolffd@0: } wolffd@0: printf("Runtime for XiAlpha-estimates in cpu-seconds: %.2f\n", wolffd@0: (get_runtime()-runtime_start_xa)/100.0); wolffd@0: wolffd@0: fprintf(stdout,"XiAlpha-estimate of the error: error<=%.2f%% (rho=%.2f,depth=%ld)\n", wolffd@0: model->xa_error,learn_parm->rho,learn_parm->xa_depth); wolffd@0: fprintf(stdout,"XiAlpha-estimate of the recall: recall=>%.2f%% (rho=%.2f,depth=%ld)\n", wolffd@0: model->xa_recall,learn_parm->rho,learn_parm->xa_depth); wolffd@0: fprintf(stdout,"XiAlpha-estimate of the precision: precision=>%.2f%% (rho=%.2f,depth=%ld)\n", wolffd@0: model->xa_precision,learn_parm->rho,learn_parm->xa_depth); wolffd@0: } wolffd@0: else if(!learn_parm->remove_inconsistent) { wolffd@0: estimate_transduction_quality(model,label,unlabeled,totdoc,docs,lin); wolffd@0: } wolffd@0: } wolffd@0: if(verbosity>=1) { wolffd@0: printf("Number of kernel evaluations: %ld\n",kernel_cache_statistic); wolffd@0: } wolffd@0: } wolffd@0: wolffd@0: wolffd@0: /* leave-one-out testing starts now */ wolffd@0: if(learn_parm->compute_loo) { wolffd@0: /* save results of training on full dataset for leave-one-out */ wolffd@0: runtime_start_loo=get_runtime(); wolffd@0: for(i=0;ib)*(double)label[i]); wolffd@0: if(xi_fullset[i]<0) xi_fullset[i]=0; wolffd@0: a_fullset[i]=a[i]; wolffd@0: } wolffd@0: if(verbosity>=1) { wolffd@0: printf("Computing leave-one-out"); wolffd@0: } wolffd@0: wolffd@0: /* repeat this loop for every held-out example */ wolffd@0: for(heldout=0;(heldoutrho*a_fullset[heldout]*r_delta_sq+xi_fullset[heldout] wolffd@0: < 1.0) { wolffd@0: /* guaranteed to not produce a leave-one-out error */ wolffd@0: if(verbosity==1) { wolffd@0: printf("+"); fflush(stdout); wolffd@0: } wolffd@0: } wolffd@0: else if(xi_fullset[heldout] > 1.0) { wolffd@0: /* guaranteed to produce a leave-one-out error */ wolffd@0: loo_count++; wolffd@0: if(label[heldout] > 0) loo_count_pos++; else loo_count_neg++; wolffd@0: if(verbosity==1) { wolffd@0: printf("-"); fflush(stdout); wolffd@0: } wolffd@0: } wolffd@0: else { wolffd@0: loocomputed++; wolffd@0: heldout_c=learn_parm->svm_cost[heldout]; /* set upper bound to zero */ wolffd@0: learn_parm->svm_cost[heldout]=0; wolffd@0: /* make sure heldout example is not currently */ wolffd@0: /* shrunk away. Assumes that lin is up to date! */ wolffd@0: shrink_state.active[heldout]=1; wolffd@0: if(verbosity>=2) wolffd@0: printf("\nLeave-One-Out test on example %ld\n",heldout); wolffd@0: if(verbosity>=1) { wolffd@0: printf("(?[%ld]",heldout); fflush(stdout); wolffd@0: } wolffd@0: wolffd@0: optimize_to_convergence(docs,label,totdoc,totwords,learn_parm, wolffd@0: kernel_parm, wolffd@0: kernel_cache,&shrink_state,model,inconsistent,unlabeled, wolffd@0: a,lin,c,&timing_profile, wolffd@0: &maxdiff,heldout,(long)2); wolffd@0: wolffd@0: /* printf("%.20f\n",(lin[heldout]-model->b)*(double)label[heldout]); */ wolffd@0: wolffd@0: if(((lin[heldout]-model->b)*(double)label[heldout]) <= 0.0) { wolffd@0: loo_count++; /* there was a loo-error */ wolffd@0: if(label[heldout] > 0) loo_count_pos++; else loo_count_neg++; wolffd@0: if(verbosity>=1) { wolffd@0: printf("-)"); fflush(stdout); wolffd@0: } wolffd@0: } wolffd@0: else { wolffd@0: if(verbosity>=1) { wolffd@0: printf("+)"); fflush(stdout); wolffd@0: } wolffd@0: } wolffd@0: /* now we need to restore the original data set*/ wolffd@0: learn_parm->svm_cost[heldout]=heldout_c; /* restore upper bound */ wolffd@0: } wolffd@0: } /* end of leave-one-out loop */ wolffd@0: wolffd@0: wolffd@0: if(verbosity>=1) { wolffd@0: printf("\nRetrain on full problem"); fflush(stdout); wolffd@0: } wolffd@0: optimize_to_convergence(docs,label,totdoc,totwords,learn_parm, wolffd@0: kernel_parm, wolffd@0: kernel_cache,&shrink_state,model,inconsistent,unlabeled, wolffd@0: a,lin,c,&timing_profile, wolffd@0: &maxdiff,(long)-1,(long)1); wolffd@0: if(verbosity >= 1) wolffd@0: printf("done.\n"); wolffd@0: wolffd@0: wolffd@0: /* after all leave-one-out computed */ wolffd@0: model->loo_error=100.0*loo_count/(double)totdoc; wolffd@0: model->loo_recall=(1.0-(double)loo_count_pos/(double)trainpos)*100.0; wolffd@0: model->loo_precision=(trainpos-loo_count_pos)/ wolffd@0: (double)(trainpos-loo_count_pos+loo_count_neg)*100.0; wolffd@0: if(verbosity >= 1) { wolffd@0: fprintf(stdout,"Leave-one-out estimate of the error: error=%.2f%%\n", wolffd@0: model->loo_error); wolffd@0: fprintf(stdout,"Leave-one-out estimate of the recall: recall=%.2f%%\n", wolffd@0: model->loo_recall); wolffd@0: fprintf(stdout,"Leave-one-out estimate of the precision: precision=%.2f%%\n", wolffd@0: model->loo_precision); wolffd@0: fprintf(stdout,"Actual leave-one-outs computed: %ld (rho=%.2f)\n", wolffd@0: loocomputed,learn_parm->rho); wolffd@0: printf("Runtime for leave-one-out in cpu-seconds: %.2f\n", wolffd@0: (double)(get_runtime()-runtime_start_loo)/100.0); wolffd@0: } wolffd@0: } wolffd@0: wolffd@0: if(learn_parm->alphafile[0]) wolffd@0: write_alphas(learn_parm->alphafile,a,label,totdoc); wolffd@0: wolffd@0: shrink_state_cleanup(&shrink_state); wolffd@0: free(label); wolffd@0: free(inconsistent); wolffd@0: free(unlabeled); wolffd@0: free(c); wolffd@0: free(a); wolffd@0: free(a_fullset); wolffd@0: free(xi_fullset); wolffd@0: free(lin); wolffd@0: free(learn_parm->svm_cost); wolffd@0: } wolffd@0: wolffd@0: wolffd@0: /* Learns an SVM regression model based on the training data in wolffd@0: docs/label. The resulting model is returned in the structure wolffd@0: model. */ wolffd@0: wolffd@0: void svm_learn_regression(DOC **docs, double *value, long int totdoc, wolffd@0: long int totwords, LEARN_PARM *learn_parm, wolffd@0: KERNEL_PARM *kernel_parm, wolffd@0: KERNEL_CACHE **kernel_cache, MODEL *model) wolffd@0: /* docs: Training vectors (x-part) */ wolffd@0: /* class: Training value (y-part) */ wolffd@0: /* totdoc: Number of examples in docs/label */ wolffd@0: /* totwords: Number of features (i.e. highest feature index) */ wolffd@0: /* learn_parm: Learning paramenters */ wolffd@0: /* kernel_parm: Kernel paramenters */ wolffd@0: /* kernel_cache:Initialized Cache, if using a kernel. NULL if wolffd@0: linear. Note that it will be free'd and reassigned */ wolffd@0: /* model: Returns learning result (assumed empty before called) */ wolffd@0: { wolffd@0: long *inconsistent,i,j; wolffd@0: long inconsistentnum; wolffd@0: long upsupvecnum; wolffd@0: double loss,model_length,example_length; wolffd@0: double maxdiff,*lin,*a,*c; wolffd@0: long runtime_start,runtime_end; wolffd@0: long iterations,kernel_cache_size; wolffd@0: long *unlabeled; wolffd@0: double r_delta_sq=0,r_delta,r_delta_avg; wolffd@0: double *xi_fullset; /* buffer for storing xi on full sample in loo */ wolffd@0: double *a_fullset; /* buffer for storing alpha on full sample in loo */ wolffd@0: TIMING timing_profile; wolffd@0: SHRINK_STATE shrink_state; wolffd@0: DOC **docs_org; wolffd@0: long *label; wolffd@0: wolffd@0: /* set up regression problem in standard form */ wolffd@0: docs_org=docs; wolffd@0: docs = (DOC **)my_malloc(sizeof(DOC)*2*totdoc); wolffd@0: label = (long *)my_malloc(sizeof(long)*2*totdoc); wolffd@0: c = (double *)my_malloc(sizeof(double)*2*totdoc); wolffd@0: for(i=0;icostfactor,docs_org[i]->fvec); wolffd@0: label[i]=+1; wolffd@0: c[i]=value[i]; wolffd@0: docs[j]=create_example(j,0,0,docs_org[i]->costfactor,docs_org[i]->fvec); wolffd@0: label[j]=-1; wolffd@0: c[j]=value[i]; wolffd@0: } wolffd@0: totdoc*=2; wolffd@0: wolffd@0: /* need to get a bigger kernel cache */ wolffd@0: if(*kernel_cache) { wolffd@0: kernel_cache_size=(*kernel_cache)->buffsize*sizeof(CFLOAT)/(1024*1024); wolffd@0: kernel_cache_cleanup(*kernel_cache); wolffd@0: (*kernel_cache)=kernel_cache_init(totdoc,kernel_cache_size); wolffd@0: } wolffd@0: wolffd@0: runtime_start=get_runtime(); wolffd@0: timing_profile.time_kernel=0; wolffd@0: timing_profile.time_opti=0; wolffd@0: timing_profile.time_shrink=0; wolffd@0: timing_profile.time_update=0; wolffd@0: timing_profile.time_model=0; wolffd@0: timing_profile.time_check=0; wolffd@0: timing_profile.time_select=0; wolffd@0: kernel_cache_statistic=0; wolffd@0: wolffd@0: learn_parm->totwords=totwords; wolffd@0: wolffd@0: /* make sure -n value is reasonable */ wolffd@0: if((learn_parm->svm_newvarsinqp < 2) wolffd@0: || (learn_parm->svm_newvarsinqp > learn_parm->svm_maxqpsize)) { wolffd@0: learn_parm->svm_newvarsinqp=learn_parm->svm_maxqpsize; wolffd@0: } wolffd@0: wolffd@0: init_shrink_state(&shrink_state,totdoc,(long)MAXSHRINK); wolffd@0: wolffd@0: inconsistent = (long *)my_malloc(sizeof(long)*totdoc); wolffd@0: unlabeled = (long *)my_malloc(sizeof(long)*totdoc); wolffd@0: a = (double *)my_malloc(sizeof(double)*totdoc); wolffd@0: a_fullset = (double *)my_malloc(sizeof(double)*totdoc); wolffd@0: xi_fullset = (double *)my_malloc(sizeof(double)*totdoc); wolffd@0: lin = (double *)my_malloc(sizeof(double)*totdoc); wolffd@0: learn_parm->svm_cost = (double *)my_malloc(sizeof(double)*totdoc); wolffd@0: model->supvec = (DOC **)my_malloc(sizeof(DOC *)*(totdoc+2)); wolffd@0: model->alpha = (double *)my_malloc(sizeof(double)*(totdoc+2)); wolffd@0: model->index = (long *)my_malloc(sizeof(long)*(totdoc+2)); wolffd@0: wolffd@0: model->at_upper_bound=0; wolffd@0: model->b=0; wolffd@0: model->supvec[0]=0; /* element 0 reserved and empty for now */ wolffd@0: model->alpha[0]=0; wolffd@0: model->lin_weights=NULL; wolffd@0: model->totwords=totwords; wolffd@0: model->totdoc=totdoc; wolffd@0: model->kernel_parm=(*kernel_parm); wolffd@0: model->sv_num=1; wolffd@0: model->loo_error=-1; wolffd@0: model->loo_recall=-1; wolffd@0: model->loo_precision=-1; wolffd@0: model->xa_error=-1; wolffd@0: model->xa_recall=-1; wolffd@0: model->xa_precision=-1; wolffd@0: inconsistentnum=0; wolffd@0: wolffd@0: r_delta=estimate_r_delta(docs,totdoc,kernel_parm); wolffd@0: r_delta_sq=r_delta*r_delta; wolffd@0: wolffd@0: r_delta_avg=estimate_r_delta_average(docs,totdoc,kernel_parm); wolffd@0: if(learn_parm->svm_c == 0.0) { /* default value for C */ wolffd@0: learn_parm->svm_c=1.0/(r_delta_avg*r_delta_avg); wolffd@0: if(verbosity>=1) wolffd@0: printf("Setting default regularization parameter C=%.4f\n", wolffd@0: learn_parm->svm_c); wolffd@0: } wolffd@0: wolffd@0: for(i=0;i 0) { wolffd@0: learn_parm->svm_cost[i]=learn_parm->svm_c*learn_parm->svm_costratio* wolffd@0: docs[i]->costfactor; wolffd@0: } wolffd@0: else if(label[i] < 0) { wolffd@0: learn_parm->svm_cost[i]=learn_parm->svm_c*docs[i]->costfactor; wolffd@0: } wolffd@0: } wolffd@0: wolffd@0: /* caching makes no sense for linear kernel */ wolffd@0: if((kernel_parm->kernel_type == LINEAR) && (*kernel_cache)) { wolffd@0: printf("WARNING: Using a kernel cache for linear case will slow optimization down!\n"); wolffd@0: } wolffd@0: wolffd@0: if(verbosity==1) { wolffd@0: printf("Optimizing"); fflush(stdout); wolffd@0: } wolffd@0: wolffd@0: /* train the svm */ wolffd@0: iterations=optimize_to_convergence(docs,label,totdoc,totwords,learn_parm, wolffd@0: kernel_parm,*kernel_cache,&shrink_state, wolffd@0: model,inconsistent,unlabeled,a,lin,c, wolffd@0: &timing_profile,&maxdiff,(long)-1, wolffd@0: (long)1); wolffd@0: wolffd@0: if(verbosity>=1) { wolffd@0: if(verbosity==1) printf("done. (%ld iterations)\n",iterations); wolffd@0: wolffd@0: printf("Optimization finished (maxdiff=%.5f).\n",maxdiff); wolffd@0: wolffd@0: runtime_end=get_runtime(); wolffd@0: if(verbosity>=2) { wolffd@0: printf("Runtime in cpu-seconds: %.2f (%.2f%% for kernel/%.2f%% for optimizer/%.2f%% for final/%.2f%% for update/%.2f%% for model/%.2f%% for check/%.2f%% for select)\n", wolffd@0: ((float)runtime_end-(float)runtime_start)/100.0, wolffd@0: (100.0*timing_profile.time_kernel)/(float)(runtime_end-runtime_start), wolffd@0: (100.0*timing_profile.time_opti)/(float)(runtime_end-runtime_start), wolffd@0: (100.0*timing_profile.time_shrink)/(float)(runtime_end-runtime_start), wolffd@0: (100.0*timing_profile.time_update)/(float)(runtime_end-runtime_start), wolffd@0: (100.0*timing_profile.time_model)/(float)(runtime_end-runtime_start), wolffd@0: (100.0*timing_profile.time_check)/(float)(runtime_end-runtime_start), wolffd@0: (100.0*timing_profile.time_select)/(float)(runtime_end-runtime_start)); wolffd@0: } wolffd@0: else { wolffd@0: printf("Runtime in cpu-seconds: %.2f\n", wolffd@0: (runtime_end-runtime_start)/100.0); wolffd@0: } wolffd@0: wolffd@0: if(learn_parm->remove_inconsistent) { wolffd@0: inconsistentnum=0; wolffd@0: for(i=0;isv_num-1,inconsistentnum); wolffd@0: } wolffd@0: else { wolffd@0: upsupvecnum=0; wolffd@0: for(i=1;isv_num;i++) { wolffd@0: if(fabs(model->alpha[i]) >= wolffd@0: (learn_parm->svm_cost[(model->supvec[i])->docnum]- wolffd@0: learn_parm->epsilon_a)) wolffd@0: upsupvecnum++; wolffd@0: } wolffd@0: printf("Number of SV: %ld (including %ld at upper bound)\n", wolffd@0: model->sv_num-1,upsupvecnum); wolffd@0: } wolffd@0: wolffd@0: if((verbosity>=1) && (!learn_parm->skip_final_opt_check)) { wolffd@0: loss=0; wolffd@0: model_length=0; wolffd@0: for(i=0;ib)*(double)label[i] < (-learn_parm->eps+(double)label[i]*c[i])-learn_parm->epsilon_crit) wolffd@0: loss+=-learn_parm->eps+(double)label[i]*c[i]-(lin[i]-model->b)*(double)label[i]; wolffd@0: model_length+=a[i]*label[i]*lin[i]; wolffd@0: } wolffd@0: model_length=sqrt(model_length); wolffd@0: fprintf(stdout,"L1 loss: loss=%.5f\n",loss); wolffd@0: fprintf(stdout,"Norm of weight vector: |w|=%.5f\n",model_length); wolffd@0: example_length=estimate_sphere(model,kernel_parm); wolffd@0: fprintf(stdout,"Norm of longest example vector: |x|=%.5f\n", wolffd@0: length_of_longest_document_vector(docs,totdoc,kernel_parm)); wolffd@0: } wolffd@0: if(verbosity>=1) { wolffd@0: printf("Number of kernel evaluations: %ld\n",kernel_cache_statistic); wolffd@0: } wolffd@0: } wolffd@0: wolffd@0: if(learn_parm->alphafile[0]) wolffd@0: write_alphas(learn_parm->alphafile,a,label,totdoc); wolffd@0: wolffd@0: /* this makes sure the model we return does not contain pointers to the wolffd@0: temporary documents */ wolffd@0: for(i=1;isv_num;i++) { wolffd@0: j=model->supvec[i]->docnum; wolffd@0: if(j >= (totdoc/2)) { wolffd@0: j=totdoc-j-1; wolffd@0: } wolffd@0: model->supvec[i]=docs_org[j]; wolffd@0: } wolffd@0: wolffd@0: shrink_state_cleanup(&shrink_state); wolffd@0: for(i=0;isvm_cost); wolffd@0: } wolffd@0: wolffd@0: void svm_learn_ranking(DOC **docs, double *rankvalue, long int totdoc, wolffd@0: long int totwords, LEARN_PARM *learn_parm, wolffd@0: KERNEL_PARM *kernel_parm, KERNEL_CACHE **kernel_cache, wolffd@0: MODEL *model) wolffd@0: /* docs: Training vectors (x-part) */ wolffd@0: /* rankvalue: Training target values that determine the ranking */ wolffd@0: /* totdoc: Number of examples in docs/label */ wolffd@0: /* totwords: Number of features (i.e. highest feature index) */ wolffd@0: /* learn_parm: Learning paramenters */ wolffd@0: /* kernel_parm: Kernel paramenters */ wolffd@0: /* kernel_cache:Initialized pointer to Cache of size 1*totdoc, if wolffd@0: using a kernel. NULL if linear. NOTE: Cache is wolffd@0: getting reinitialized in this function */ wolffd@0: /* model: Returns learning result (assumed empty before called) */ wolffd@0: { wolffd@0: DOC **docdiff; wolffd@0: long i,j,k,totpair,kernel_cache_size; wolffd@0: double *target,*alpha,cost; wolffd@0: long *greater,*lesser; wolffd@0: MODEL *pairmodel; wolffd@0: SVECTOR *flow,*fhigh; wolffd@0: wolffd@0: totpair=0; wolffd@0: for(i=0;iqueryid==docs[j]->queryid) && (rankvalue[i] != rankvalue[j])) { wolffd@0: totpair++; wolffd@0: } wolffd@0: } wolffd@0: } wolffd@0: wolffd@0: printf("Constructing %ld rank constraints...",totpair); fflush(stdout); wolffd@0: docdiff=(DOC **)my_malloc(sizeof(DOC)*totpair); wolffd@0: target=(double *)my_malloc(sizeof(double)*totpair); wolffd@0: greater=(long *)my_malloc(sizeof(long)*totpair); wolffd@0: lesser=(long *)my_malloc(sizeof(long)*totpair); wolffd@0: wolffd@0: k=0; wolffd@0: for(i=0;iqueryid == docs[j]->queryid) { wolffd@0: cost=(docs[i]->costfactor+docs[j]->costfactor)/2.0; wolffd@0: if(rankvalue[i] > rankvalue[j]) { wolffd@0: if(kernel_parm->kernel_type == LINEAR) wolffd@0: docdiff[k]=create_example(k,0,0,cost, wolffd@0: sub_ss(docs[i]->fvec,docs[j]->fvec)); wolffd@0: else { wolffd@0: flow=copy_svector(docs[j]->fvec); wolffd@0: flow->factor=-1.0; wolffd@0: flow->next=NULL; wolffd@0: fhigh=copy_svector(docs[i]->fvec); wolffd@0: fhigh->factor=1.0; wolffd@0: fhigh->next=flow; wolffd@0: docdiff[k]=create_example(k,0,0,cost,fhigh); wolffd@0: } wolffd@0: target[k]=1; wolffd@0: greater[k]=i; wolffd@0: lesser[k]=j; wolffd@0: k++; wolffd@0: } wolffd@0: else if(rankvalue[i] < rankvalue[j]) { wolffd@0: if(kernel_parm->kernel_type == LINEAR) wolffd@0: docdiff[k]=create_example(k,0,0,cost, wolffd@0: sub_ss(docs[i]->fvec,docs[j]->fvec)); wolffd@0: else { wolffd@0: flow=copy_svector(docs[j]->fvec); wolffd@0: flow->factor=-1.0; wolffd@0: flow->next=NULL; wolffd@0: fhigh=copy_svector(docs[i]->fvec); wolffd@0: fhigh->factor=1.0; wolffd@0: fhigh->next=flow; wolffd@0: docdiff[k]=create_example(k,0,0,cost,fhigh); wolffd@0: } wolffd@0: target[k]=-1; wolffd@0: greater[k]=i; wolffd@0: lesser[k]=j; wolffd@0: k++; wolffd@0: } wolffd@0: } wolffd@0: } wolffd@0: } wolffd@0: printf("done.\n"); fflush(stdout); wolffd@0: wolffd@0: /* need to get a bigger kernel cache */ wolffd@0: if(*kernel_cache) { wolffd@0: kernel_cache_size=(*kernel_cache)->buffsize*sizeof(CFLOAT)/(1024*1024); wolffd@0: kernel_cache_cleanup(*kernel_cache); wolffd@0: (*kernel_cache)=kernel_cache_init(totpair,kernel_cache_size); wolffd@0: } wolffd@0: wolffd@0: /* must use unbiased hyperplane on difference vectors */ wolffd@0: learn_parm->biased_hyperplane=0; wolffd@0: pairmodel=(MODEL *)my_malloc(sizeof(MODEL)); wolffd@0: svm_learn_classification(docdiff,target,totpair,totwords,learn_parm, wolffd@0: kernel_parm,(*kernel_cache),pairmodel,NULL); wolffd@0: wolffd@0: /* Transfer the result into a more compact model. If you would like wolffd@0: to output the original model on pairs of documents, see below. */ wolffd@0: alpha=(double *)my_malloc(sizeof(double)*totdoc); wolffd@0: for(i=0;isv_num;i++) { wolffd@0: alpha[lesser[(pairmodel->supvec[i])->docnum]]-=pairmodel->alpha[i]; wolffd@0: alpha[greater[(pairmodel->supvec[i])->docnum]]+=pairmodel->alpha[i]; wolffd@0: } wolffd@0: model->supvec = (DOC **)my_malloc(sizeof(DOC *)*(totdoc+2)); wolffd@0: model->alpha = (double *)my_malloc(sizeof(double)*(totdoc+2)); wolffd@0: model->index = (long *)my_malloc(sizeof(long)*(totdoc+2)); wolffd@0: model->supvec[0]=0; /* element 0 reserved and empty for now */ wolffd@0: model->alpha[0]=0; wolffd@0: model->sv_num=1; wolffd@0: for(i=0;isupvec[model->sv_num]=docs[i]; wolffd@0: model->alpha[model->sv_num]=alpha[i]; wolffd@0: model->index[i]=model->sv_num; wolffd@0: model->sv_num++; wolffd@0: } wolffd@0: else { wolffd@0: model->index[i]=-1; wolffd@0: } wolffd@0: } wolffd@0: model->at_upper_bound=0; wolffd@0: model->b=0; wolffd@0: model->lin_weights=NULL; wolffd@0: model->totwords=totwords; wolffd@0: model->totdoc=totdoc; wolffd@0: model->kernel_parm=(*kernel_parm); wolffd@0: model->loo_error=-1; wolffd@0: model->loo_recall=-1; wolffd@0: model->loo_precision=-1; wolffd@0: model->xa_error=-1; wolffd@0: model->xa_recall=-1; wolffd@0: model->xa_precision=-1; wolffd@0: wolffd@0: free(alpha); wolffd@0: free(greater); wolffd@0: free(lesser); wolffd@0: free(target); wolffd@0: wolffd@0: /* If you would like to output the original model on pairs of wolffd@0: document, replace the following lines with '(*model)=(*pairmodel);' */ wolffd@0: for(i=0;i rhs_i - \xi_i wolffd@0: wolffd@0: This corresponds to the -z o option. */ wolffd@0: wolffd@0: void svm_learn_optimization(DOC **docs, double *rhs, long int wolffd@0: totdoc, long int totwords, wolffd@0: LEARN_PARM *learn_parm, wolffd@0: KERNEL_PARM *kernel_parm, wolffd@0: KERNEL_CACHE *kernel_cache, MODEL *model, wolffd@0: double *alpha) wolffd@0: /* docs: Left-hand side of inequalities (x-part) */ wolffd@0: /* rhs: Right-hand side of inequalities */ wolffd@0: /* totdoc: Number of examples in docs/label */ wolffd@0: /* totwords: Number of features (i.e. highest feature index) */ wolffd@0: /* learn_parm: Learning paramenters */ wolffd@0: /* kernel_parm: Kernel paramenters */ wolffd@0: /* kernel_cache:Initialized Cache of size 1*totdoc, if using a kernel. wolffd@0: NULL if linear.*/ wolffd@0: /* model: Returns solution as SV expansion (assumed empty before called) */ wolffd@0: /* alpha: Start values for the alpha variables or NULL wolffd@0: pointer. The new alpha values are returned after wolffd@0: optimization if not NULL. Array must be of size totdoc. */ wolffd@0: { wolffd@0: long i,*label; wolffd@0: long misclassified,upsupvecnum; wolffd@0: double loss,model_length,example_length; wolffd@0: double maxdiff,*lin,*a,*c; wolffd@0: long runtime_start,runtime_end; wolffd@0: long iterations,maxslackid,svsetnum; wolffd@0: long *unlabeled,*inconsistent; wolffd@0: double r_delta_sq=0,r_delta,r_delta_avg; wolffd@0: long *index,*index2dnum; wolffd@0: double *weights,*slack,*alphaslack; wolffd@0: CFLOAT *aicache; /* buffer to keep one row of hessian */ wolffd@0: wolffd@0: TIMING timing_profile; wolffd@0: SHRINK_STATE shrink_state; wolffd@0: wolffd@0: runtime_start=get_runtime(); wolffd@0: timing_profile.time_kernel=0; wolffd@0: timing_profile.time_opti=0; wolffd@0: timing_profile.time_shrink=0; wolffd@0: timing_profile.time_update=0; wolffd@0: timing_profile.time_model=0; wolffd@0: timing_profile.time_check=0; wolffd@0: timing_profile.time_select=0; wolffd@0: kernel_cache_statistic=0; wolffd@0: wolffd@0: learn_parm->totwords=totwords; wolffd@0: wolffd@0: /* make sure -n value is reasonable */ wolffd@0: if((learn_parm->svm_newvarsinqp < 2) wolffd@0: || (learn_parm->svm_newvarsinqp > learn_parm->svm_maxqpsize)) { wolffd@0: learn_parm->svm_newvarsinqp=learn_parm->svm_maxqpsize; wolffd@0: } wolffd@0: wolffd@0: init_shrink_state(&shrink_state,totdoc,(long)MAXSHRINK); wolffd@0: wolffd@0: label = (long *)my_malloc(sizeof(long)*totdoc); wolffd@0: unlabeled = (long *)my_malloc(sizeof(long)*totdoc); wolffd@0: inconsistent = (long *)my_malloc(sizeof(long)*totdoc); wolffd@0: c = (double *)my_malloc(sizeof(double)*totdoc); wolffd@0: a = (double *)my_malloc(sizeof(double)*totdoc); wolffd@0: lin = (double *)my_malloc(sizeof(double)*totdoc); wolffd@0: learn_parm->svm_cost = (double *)my_malloc(sizeof(double)*totdoc); wolffd@0: model->supvec = (DOC **)my_malloc(sizeof(DOC *)*(totdoc+2)); wolffd@0: model->alpha = (double *)my_malloc(sizeof(double)*(totdoc+2)); wolffd@0: model->index = (long *)my_malloc(sizeof(long)*(totdoc+2)); wolffd@0: wolffd@0: model->at_upper_bound=0; wolffd@0: model->b=0; wolffd@0: model->supvec[0]=0; /* element 0 reserved and empty for now */ wolffd@0: model->alpha[0]=0; wolffd@0: model->lin_weights=NULL; wolffd@0: model->totwords=totwords; wolffd@0: model->totdoc=totdoc; wolffd@0: model->kernel_parm=(*kernel_parm); wolffd@0: model->sv_num=1; wolffd@0: model->loo_error=-1; wolffd@0: model->loo_recall=-1; wolffd@0: model->loo_precision=-1; wolffd@0: model->xa_error=-1; wolffd@0: model->xa_recall=-1; wolffd@0: model->xa_precision=-1; wolffd@0: wolffd@0: r_delta=estimate_r_delta(docs,totdoc,kernel_parm); wolffd@0: r_delta_sq=r_delta*r_delta; wolffd@0: wolffd@0: r_delta_avg=estimate_r_delta_average(docs,totdoc,kernel_parm); wolffd@0: if(learn_parm->svm_c == 0.0) { /* default value for C */ wolffd@0: learn_parm->svm_c=1.0/(r_delta_avg*r_delta_avg); wolffd@0: if(verbosity>=1) wolffd@0: printf("Setting default regularization parameter C=%.4f\n", wolffd@0: learn_parm->svm_c); wolffd@0: } wolffd@0: wolffd@0: learn_parm->biased_hyperplane=0; /* learn an unbiased hyperplane */ wolffd@0: wolffd@0: learn_parm->eps=0.0; /* No margin, unless explicitly handcoded wolffd@0: in the right-hand side in the training wolffd@0: set. */ wolffd@0: wolffd@0: for(i=0;idocnum=i; wolffd@0: a[i]=0; wolffd@0: lin[i]=0; wolffd@0: c[i]=rhs[i]; /* set right-hand side */ wolffd@0: unlabeled[i]=0; wolffd@0: inconsistent[i]=0; wolffd@0: learn_parm->svm_cost[i]=learn_parm->svm_c*learn_parm->svm_costratio* wolffd@0: docs[i]->costfactor; wolffd@0: label[i]=1; wolffd@0: } wolffd@0: if(learn_parm->sharedslack) /* if shared slacks are used, they must */ wolffd@0: for(i=0;islackid) { wolffd@0: perror("Error: Missing shared slacks definitions in some of the examples."); wolffd@0: exit(0); wolffd@0: } wolffd@0: wolffd@0: /* compute starting state for initial alpha values */ wolffd@0: if(alpha) { wolffd@0: if(verbosity>=1) { wolffd@0: printf("Computing starting state..."); fflush(stdout); wolffd@0: } wolffd@0: index = (long *)my_malloc(sizeof(long)*totdoc); wolffd@0: index2dnum = (long *)my_malloc(sizeof(long)*(totdoc+11)); wolffd@0: weights=(double *)my_malloc(sizeof(double)*(totwords+1)); wolffd@0: aicache = (CFLOAT *)my_malloc(sizeof(CFLOAT)*totdoc); wolffd@0: for(i=0;ilearn_parm->svm_cost[i]) alpha[i]=learn_parm->svm_cost[i]; wolffd@0: } wolffd@0: if(kernel_parm->kernel_type != LINEAR) { wolffd@0: for(i=0;i0) && (alpha[i]svm_cost[i]) wolffd@0: && (kernel_cache_space_available(kernel_cache))) wolffd@0: cache_kernel_row(kernel_cache,docs,i,kernel_parm); wolffd@0: for(i=0;isvm_cost[i]) wolffd@0: && (kernel_cache_space_available(kernel_cache))) wolffd@0: cache_kernel_row(kernel_cache,docs,i,kernel_parm); wolffd@0: } wolffd@0: (void)compute_index(index,totdoc,index2dnum); wolffd@0: update_linear_component(docs,label,index2dnum,alpha,a,index2dnum,totdoc, wolffd@0: totwords,kernel_parm,kernel_cache,lin,aicache, wolffd@0: weights); wolffd@0: (void)calculate_svm_model(docs,label,unlabeled,lin,alpha,a,c, wolffd@0: learn_parm,index2dnum,index2dnum,model); wolffd@0: for(i=0;i=1) { wolffd@0: printf("done.\n"); fflush(stdout); wolffd@0: } wolffd@0: } wolffd@0: wolffd@0: /* removing inconsistent does not work for general optimization problem */ wolffd@0: if(learn_parm->remove_inconsistent) { wolffd@0: learn_parm->remove_inconsistent = 0; wolffd@0: printf("'remove inconsistent' not available in this mode. Switching option off!"); fflush(stdout); wolffd@0: } wolffd@0: wolffd@0: /* caching makes no sense for linear kernel */ wolffd@0: if(kernel_parm->kernel_type == LINEAR) { wolffd@0: kernel_cache = NULL; wolffd@0: } wolffd@0: wolffd@0: if(verbosity==1) { wolffd@0: printf("Optimizing"); fflush(stdout); wolffd@0: } wolffd@0: wolffd@0: /* train the svm */ wolffd@0: if(learn_parm->sharedslack) wolffd@0: iterations=optimize_to_convergence_sharedslack(docs,label,totdoc, wolffd@0: totwords,learn_parm,kernel_parm, wolffd@0: kernel_cache,&shrink_state,model, wolffd@0: a,lin,c,&timing_profile, wolffd@0: &maxdiff); wolffd@0: else wolffd@0: iterations=optimize_to_convergence(docs,label,totdoc, wolffd@0: totwords,learn_parm,kernel_parm, wolffd@0: kernel_cache,&shrink_state,model, wolffd@0: inconsistent,unlabeled, wolffd@0: a,lin,c,&timing_profile, wolffd@0: &maxdiff,(long)-1,(long)1); wolffd@0: wolffd@0: if(verbosity>=1) { wolffd@0: if(verbosity==1) printf("done. (%ld iterations)\n",iterations); wolffd@0: wolffd@0: misclassified=0; wolffd@0: for(i=0;(ib)*(double)label[i] <= 0.0) wolffd@0: misclassified++; wolffd@0: } wolffd@0: wolffd@0: printf("Optimization finished (maxdiff=%.5f).\n",maxdiff); wolffd@0: wolffd@0: runtime_end=get_runtime(); wolffd@0: if(verbosity>=2) { wolffd@0: printf("Runtime in cpu-seconds: %.2f (%.2f%% for kernel/%.2f%% for optimizer/%.2f%% for final/%.2f%% for update/%.2f%% for model/%.2f%% for check/%.2f%% for select)\n", wolffd@0: ((float)runtime_end-(float)runtime_start)/100.0, wolffd@0: (100.0*timing_profile.time_kernel)/(float)(runtime_end-runtime_start), wolffd@0: (100.0*timing_profile.time_opti)/(float)(runtime_end-runtime_start), wolffd@0: (100.0*timing_profile.time_shrink)/(float)(runtime_end-runtime_start), wolffd@0: (100.0*timing_profile.time_update)/(float)(runtime_end-runtime_start), wolffd@0: (100.0*timing_profile.time_model)/(float)(runtime_end-runtime_start), wolffd@0: (100.0*timing_profile.time_check)/(float)(runtime_end-runtime_start), wolffd@0: (100.0*timing_profile.time_select)/(float)(runtime_end-runtime_start)); wolffd@0: } wolffd@0: else { wolffd@0: printf("Runtime in cpu-seconds: %.2f\n", wolffd@0: (runtime_end-runtime_start)/100.0); wolffd@0: } wolffd@0: } wolffd@0: if((verbosity>=1) && (!learn_parm->skip_final_opt_check)) { wolffd@0: loss=0; wolffd@0: model_length=0; wolffd@0: for(i=0;ib)*(double)label[i] < c[i]-learn_parm->epsilon_crit) wolffd@0: loss+=c[i]-(lin[i]-model->b)*(double)label[i]; wolffd@0: model_length+=a[i]*label[i]*lin[i]; wolffd@0: } wolffd@0: model_length=sqrt(model_length); wolffd@0: fprintf(stdout,"Norm of weight vector: |w|=%.5f\n",model_length); wolffd@0: } wolffd@0: wolffd@0: if(learn_parm->sharedslack) { wolffd@0: index = (long *)my_malloc(sizeof(long)*totdoc); wolffd@0: index2dnum = (long *)my_malloc(sizeof(long)*(totdoc+11)); wolffd@0: maxslackid=0; wolffd@0: for(i=0;islackid) wolffd@0: maxslackid=docs[i]->slackid; wolffd@0: } wolffd@0: (void)compute_index(index,totdoc,index2dnum); wolffd@0: slack=(double *)my_malloc(sizeof(double)*(maxslackid+1)); wolffd@0: alphaslack=(double *)my_malloc(sizeof(double)*(maxslackid+1)); wolffd@0: for(i=0;i<=maxslackid;i++) { /* init shared slacks */ wolffd@0: slack[i]=0; wolffd@0: alphaslack[i]=0; wolffd@0: } wolffd@0: compute_shared_slacks(docs,label,a,lin,c,index2dnum,learn_parm, wolffd@0: slack,alphaslack); wolffd@0: loss=0; wolffd@0: model->at_upper_bound=0; wolffd@0: svsetnum=0; wolffd@0: for(i=0;i<=maxslackid;i++) { /* create full index */ wolffd@0: loss+=slack[i]; wolffd@0: if(alphaslack[i] > (learn_parm->svm_c - learn_parm->epsilon_a)) wolffd@0: model->at_upper_bound++; wolffd@0: if(alphaslack[i] > learn_parm->epsilon_a) wolffd@0: svsetnum++; wolffd@0: } wolffd@0: free(index); wolffd@0: free(index2dnum); wolffd@0: free(slack); wolffd@0: free(alphaslack); wolffd@0: } wolffd@0: wolffd@0: if((verbosity>=1) && (!learn_parm->skip_final_opt_check)) { wolffd@0: if(learn_parm->sharedslack) { wolffd@0: printf("Number of SV: %ld\n", wolffd@0: model->sv_num-1); wolffd@0: printf("Number of non-zero slack variables: %ld (out of %ld)\n", wolffd@0: model->at_upper_bound,svsetnum); wolffd@0: fprintf(stdout,"L1 loss: loss=%.5f\n",loss); wolffd@0: } wolffd@0: else { wolffd@0: upsupvecnum=0; wolffd@0: for(i=1;isv_num;i++) { wolffd@0: if(fabs(model->alpha[i]) >= wolffd@0: (learn_parm->svm_cost[(model->supvec[i])->docnum]- wolffd@0: learn_parm->epsilon_a)) wolffd@0: upsupvecnum++; wolffd@0: } wolffd@0: printf("Number of SV: %ld (including %ld at upper bound)\n", wolffd@0: model->sv_num-1,upsupvecnum); wolffd@0: fprintf(stdout,"L1 loss: loss=%.5f\n",loss); wolffd@0: } wolffd@0: example_length=estimate_sphere(model,kernel_parm); wolffd@0: fprintf(stdout,"Norm of longest example vector: |x|=%.5f\n", wolffd@0: length_of_longest_document_vector(docs,totdoc,kernel_parm)); wolffd@0: } wolffd@0: if(verbosity>=1) { wolffd@0: printf("Number of kernel evaluations: %ld\n",kernel_cache_statistic); wolffd@0: } wolffd@0: wolffd@0: if(alpha) { wolffd@0: for(i=0;ialphafile[0]) wolffd@0: write_alphas(learn_parm->alphafile,a,label,totdoc); wolffd@0: wolffd@0: shrink_state_cleanup(&shrink_state); wolffd@0: free(label); wolffd@0: free(unlabeled); wolffd@0: free(inconsistent); wolffd@0: free(c); wolffd@0: free(a); wolffd@0: free(lin); wolffd@0: free(learn_parm->svm_cost); wolffd@0: } wolffd@0: wolffd@0: wolffd@0: long optimize_to_convergence(DOC **docs, long int *label, long int totdoc, wolffd@0: long int totwords, LEARN_PARM *learn_parm, wolffd@0: KERNEL_PARM *kernel_parm, wolffd@0: KERNEL_CACHE *kernel_cache, wolffd@0: SHRINK_STATE *shrink_state, MODEL *model, wolffd@0: long int *inconsistent, long int *unlabeled, wolffd@0: double *a, double *lin, double *c, wolffd@0: TIMING *timing_profile, double *maxdiff, wolffd@0: long int heldout, long int retrain) wolffd@0: /* docs: Training vectors (x-part) */ wolffd@0: /* label: Training labels/value (y-part, zero if test example for wolffd@0: transduction) */ wolffd@0: /* totdoc: Number of examples in docs/label */ wolffd@0: /* totwords: Number of features (i.e. highest feature index) */ wolffd@0: /* laern_parm: Learning paramenters */ wolffd@0: /* kernel_parm: Kernel paramenters */ wolffd@0: /* kernel_cache: Initialized/partly filled Cache, if using a kernel. wolffd@0: NULL if linear. */ wolffd@0: /* shrink_state: State of active variables */ wolffd@0: /* model: Returns learning result */ wolffd@0: /* inconsistent: examples thrown out as inconstistent */ wolffd@0: /* unlabeled: test examples for transduction */ wolffd@0: /* a: alphas */ wolffd@0: /* lin: linear component of gradient */ wolffd@0: /* c: right hand side of inequalities (margin) */ wolffd@0: /* maxdiff: returns maximum violation of KT-conditions */ wolffd@0: /* heldout: marks held-out example for leave-one-out (or -1) */ wolffd@0: /* retrain: selects training mode (1=regular / 2=holdout) */ wolffd@0: { wolffd@0: long *chosen,*key,i,j,jj,*last_suboptimal_at,noshrink; wolffd@0: long inconsistentnum,choosenum,already_chosen=0,iteration; wolffd@0: long misclassified,supvecnum=0,*active2dnum,inactivenum; wolffd@0: long *working2dnum,*selexam; wolffd@0: long activenum; wolffd@0: double criterion,eq; wolffd@0: double *a_old; wolffd@0: long t0=0,t1=0,t2=0,t3=0,t4=0,t5=0,t6=0; /* timing */ wolffd@0: long transductcycle; wolffd@0: long transduction; wolffd@0: double epsilon_crit_org; wolffd@0: double bestmaxdiff; wolffd@0: long bestmaxdiffiter,terminate; wolffd@0: wolffd@0: double *selcrit; /* buffer for sorting */ wolffd@0: CFLOAT *aicache; /* buffer to keep one row of hessian */ wolffd@0: double *weights; /* buffer for weight vector in linear case */ wolffd@0: QP qp; /* buffer for one quadratic program */ wolffd@0: wolffd@0: epsilon_crit_org=learn_parm->epsilon_crit; /* save org */ wolffd@0: if(kernel_parm->kernel_type == LINEAR) { wolffd@0: learn_parm->epsilon_crit=2.0; wolffd@0: kernel_cache=NULL; /* caching makes no sense for linear kernel */ wolffd@0: } wolffd@0: learn_parm->epsilon_shrink=2; wolffd@0: (*maxdiff)=1; wolffd@0: wolffd@0: learn_parm->totwords=totwords; wolffd@0: wolffd@0: chosen = (long *)my_malloc(sizeof(long)*totdoc); wolffd@0: last_suboptimal_at = (long *)my_malloc(sizeof(long)*totdoc); wolffd@0: key = (long *)my_malloc(sizeof(long)*(totdoc+11)); wolffd@0: selcrit = (double *)my_malloc(sizeof(double)*totdoc); wolffd@0: selexam = (long *)my_malloc(sizeof(long)*totdoc); wolffd@0: a_old = (double *)my_malloc(sizeof(double)*totdoc); wolffd@0: aicache = (CFLOAT *)my_malloc(sizeof(CFLOAT)*totdoc); wolffd@0: working2dnum = (long *)my_malloc(sizeof(long)*(totdoc+11)); wolffd@0: active2dnum = (long *)my_malloc(sizeof(long)*(totdoc+11)); wolffd@0: qp.opt_ce = (double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize); wolffd@0: qp.opt_ce0 = (double *)my_malloc(sizeof(double)); wolffd@0: qp.opt_g = (double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize wolffd@0: *learn_parm->svm_maxqpsize); wolffd@0: qp.opt_g0 = (double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize); wolffd@0: qp.opt_xinit = (double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize); wolffd@0: qp.opt_low=(double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize); wolffd@0: qp.opt_up=(double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize); wolffd@0: weights=(double *)my_malloc(sizeof(double)*(totwords+1)); wolffd@0: wolffd@0: choosenum=0; wolffd@0: inconsistentnum=0; wolffd@0: transductcycle=0; wolffd@0: transduction=0; wolffd@0: if(!retrain) retrain=1; wolffd@0: iteration=1; wolffd@0: bestmaxdiffiter=1; wolffd@0: bestmaxdiff=999999999; wolffd@0: terminate=0; wolffd@0: wolffd@0: if(kernel_cache) { wolffd@0: kernel_cache->time=iteration; /* for lru cache */ wolffd@0: kernel_cache_reset_lru(kernel_cache); wolffd@0: } wolffd@0: wolffd@0: for(i=0;iactive,totdoc,active2dnum); wolffd@0: inactivenum=totdoc-activenum; wolffd@0: clear_index(working2dnum); wolffd@0: wolffd@0: /* repeat this loop until we have convergence */ wolffd@0: for(;retrain && (!terminate);iteration++) { wolffd@0: wolffd@0: if(kernel_cache) wolffd@0: kernel_cache->time=iteration; /* for lru cache */ wolffd@0: if(verbosity>=2) { wolffd@0: printf( wolffd@0: "Iteration %ld: ",iteration); fflush(stdout); wolffd@0: } wolffd@0: else if(verbosity==1) { wolffd@0: printf("."); fflush(stdout); wolffd@0: } wolffd@0: wolffd@0: if(verbosity>=2) t0=get_runtime(); wolffd@0: if(verbosity>=3) { wolffd@0: printf("\nSelecting working set... "); fflush(stdout); wolffd@0: } wolffd@0: wolffd@0: if(learn_parm->svm_newvarsinqp>learn_parm->svm_maxqpsize) wolffd@0: learn_parm->svm_newvarsinqp=learn_parm->svm_maxqpsize; wolffd@0: wolffd@0: i=0; wolffd@0: for(jj=0;(j=working2dnum[jj])>=0;jj++) { /* clear working set */ wolffd@0: if((chosen[j]>=(learn_parm->svm_maxqpsize/ wolffd@0: minl(learn_parm->svm_maxqpsize, wolffd@0: learn_parm->svm_newvarsinqp))) wolffd@0: || (inconsistent[j]) wolffd@0: || (j == heldout)) { wolffd@0: chosen[j]=0; wolffd@0: choosenum--; wolffd@0: } wolffd@0: else { wolffd@0: chosen[j]++; wolffd@0: working2dnum[i++]=j; wolffd@0: } wolffd@0: } wolffd@0: working2dnum[i]=-1; wolffd@0: wolffd@0: if(retrain == 2) { wolffd@0: choosenum=0; wolffd@0: for(jj=0;(j=working2dnum[jj])>=0;jj++) { /* fully clear working set */ wolffd@0: chosen[j]=0; wolffd@0: } wolffd@0: clear_index(working2dnum); wolffd@0: for(i=0;ibiased_hyperplane) { wolffd@0: eq=0; wolffd@0: for(i=0;i learn_parm->epsilon_a);i++) { wolffd@0: if((eq*label[i] > 0) && (a[i] > 0)) { wolffd@0: chosen[i]=88888; wolffd@0: choosenum++; wolffd@0: if((eq*label[i]) > a[i]) { wolffd@0: eq-=(a[i]*label[i]); wolffd@0: a[i]=0; wolffd@0: } wolffd@0: else { wolffd@0: a[i]-=(eq*label[i]); wolffd@0: eq=0; wolffd@0: } wolffd@0: } wolffd@0: } wolffd@0: } wolffd@0: compute_index(chosen,totdoc,working2dnum); wolffd@0: } wolffd@0: else { /* select working set according to steepest gradient */ wolffd@0: if(iteration % 101) { wolffd@0: already_chosen=0; wolffd@0: if((minl(learn_parm->svm_newvarsinqp, wolffd@0: learn_parm->svm_maxqpsize-choosenum)>=4) wolffd@0: && (kernel_parm->kernel_type != LINEAR)) { wolffd@0: /* select part of the working set from cache */ wolffd@0: already_chosen=select_next_qp_subproblem_grad( wolffd@0: label,unlabeled,a,lin,c,totdoc, wolffd@0: (long)(minl(learn_parm->svm_maxqpsize-choosenum, wolffd@0: learn_parm->svm_newvarsinqp) wolffd@0: /2), wolffd@0: learn_parm,inconsistent,active2dnum, wolffd@0: working2dnum,selcrit,selexam,kernel_cache,1, wolffd@0: key,chosen); wolffd@0: choosenum+=already_chosen; wolffd@0: } wolffd@0: choosenum+=select_next_qp_subproblem_grad( wolffd@0: label,unlabeled,a,lin,c,totdoc, wolffd@0: minl(learn_parm->svm_maxqpsize-choosenum, wolffd@0: learn_parm->svm_newvarsinqp-already_chosen), wolffd@0: learn_parm,inconsistent,active2dnum, wolffd@0: working2dnum,selcrit,selexam,kernel_cache,0,key, wolffd@0: chosen); wolffd@0: } wolffd@0: else { /* once in a while, select a somewhat random working set wolffd@0: to get unlocked of infinite loops due to numerical wolffd@0: inaccuracies in the core qp-solver */ wolffd@0: choosenum+=select_next_qp_subproblem_rand( wolffd@0: label,unlabeled,a,lin,c,totdoc, wolffd@0: minl(learn_parm->svm_maxqpsize-choosenum, wolffd@0: learn_parm->svm_newvarsinqp), wolffd@0: learn_parm,inconsistent,active2dnum, wolffd@0: working2dnum,selcrit,selexam,kernel_cache,key, wolffd@0: chosen,iteration); wolffd@0: } wolffd@0: } wolffd@0: wolffd@0: if(verbosity>=2) { wolffd@0: printf(" %ld vectors chosen\n",choosenum); fflush(stdout); wolffd@0: } wolffd@0: wolffd@0: if(verbosity>=2) t1=get_runtime(); wolffd@0: wolffd@0: if(kernel_cache) wolffd@0: cache_multiple_kernel_rows(kernel_cache,docs,working2dnum, wolffd@0: choosenum,kernel_parm); wolffd@0: wolffd@0: if(verbosity>=2) t2=get_runtime(); wolffd@0: if(retrain != 2) { wolffd@0: optimize_svm(docs,label,unlabeled,inconsistent,0.0,chosen,active2dnum, wolffd@0: model,totdoc,working2dnum,choosenum,a,lin,c,learn_parm, wolffd@0: aicache,kernel_parm,&qp,&epsilon_crit_org); wolffd@0: } wolffd@0: wolffd@0: if(verbosity>=2) t3=get_runtime(); wolffd@0: update_linear_component(docs,label,active2dnum,a,a_old,working2dnum,totdoc, wolffd@0: totwords,kernel_parm,kernel_cache,lin,aicache, wolffd@0: weights); wolffd@0: wolffd@0: if(verbosity>=2) t4=get_runtime(); wolffd@0: supvecnum=calculate_svm_model(docs,label,unlabeled,lin,a,a_old,c, wolffd@0: learn_parm,working2dnum,active2dnum,model); wolffd@0: wolffd@0: if(verbosity>=2) t5=get_runtime(); wolffd@0: wolffd@0: /* The following computation of the objective function works only */ wolffd@0: /* relative to the active variables */ wolffd@0: if(verbosity>=3) { wolffd@0: criterion=compute_objective_function(a,lin,c,learn_parm->eps,label, wolffd@0: active2dnum); wolffd@0: printf("Objective function (over active variables): %.16f\n",criterion); wolffd@0: fflush(stdout); wolffd@0: } wolffd@0: wolffd@0: for(jj=0;(i=working2dnum[jj])>=0;jj++) { wolffd@0: a_old[i]=a[i]; wolffd@0: } wolffd@0: wolffd@0: if(retrain == 2) { /* reset inconsistent unlabeled examples */ wolffd@0: for(i=0;(i=2) { wolffd@0: t6=get_runtime(); wolffd@0: timing_profile->time_select+=t1-t0; wolffd@0: timing_profile->time_kernel+=t2-t1; wolffd@0: timing_profile->time_opti+=t3-t2; wolffd@0: timing_profile->time_update+=t4-t3; wolffd@0: timing_profile->time_model+=t5-t4; wolffd@0: timing_profile->time_check+=t6-t5; wolffd@0: } wolffd@0: wolffd@0: /* checking whether optimizer got stuck */ wolffd@0: if((*maxdiff) < bestmaxdiff) { wolffd@0: bestmaxdiff=(*maxdiff); wolffd@0: bestmaxdiffiter=iteration; wolffd@0: } wolffd@0: if(iteration > (bestmaxdiffiter+learn_parm->maxiter)) { wolffd@0: /* long time no progress? */ wolffd@0: terminate=1; wolffd@0: retrain=0; wolffd@0: if(verbosity>=1) wolffd@0: printf("\nWARNING: Relaxing KT-Conditions due to slow progress! Terminating!\n"); wolffd@0: } wolffd@0: wolffd@0: noshrink=0; wolffd@0: if((!retrain) && (inactivenum>0) wolffd@0: && ((!learn_parm->skip_final_opt_check) wolffd@0: || (kernel_parm->kernel_type == LINEAR))) { wolffd@0: if(((verbosity>=1) && (kernel_parm->kernel_type != LINEAR)) wolffd@0: || (verbosity>=2)) { wolffd@0: if(verbosity==1) { wolffd@0: printf("\n"); wolffd@0: } wolffd@0: printf(" Checking optimality of inactive variables..."); wolffd@0: fflush(stdout); wolffd@0: } wolffd@0: t1=get_runtime(); wolffd@0: reactivate_inactive_examples(label,unlabeled,a,shrink_state,lin,c,totdoc, wolffd@0: totwords,iteration,learn_parm,inconsistent, wolffd@0: docs,kernel_parm,kernel_cache,model,aicache, wolffd@0: weights,maxdiff); wolffd@0: /* Update to new active variables. */ wolffd@0: activenum=compute_index(shrink_state->active,totdoc,active2dnum); wolffd@0: inactivenum=totdoc-activenum; wolffd@0: /* reset watchdog */ wolffd@0: bestmaxdiff=(*maxdiff); wolffd@0: bestmaxdiffiter=iteration; wolffd@0: /* termination criterion */ wolffd@0: noshrink=1; wolffd@0: retrain=0; wolffd@0: if((*maxdiff) > learn_parm->epsilon_crit) wolffd@0: retrain=1; wolffd@0: timing_profile->time_shrink+=get_runtime()-t1; wolffd@0: if(((verbosity>=1) && (kernel_parm->kernel_type != LINEAR)) wolffd@0: || (verbosity>=2)) { wolffd@0: printf("done.\n"); fflush(stdout); wolffd@0: printf(" Number of inactive variables = %ld\n",inactivenum); wolffd@0: } wolffd@0: } wolffd@0: wolffd@0: if((!retrain) && (learn_parm->epsilon_crit>(*maxdiff))) wolffd@0: learn_parm->epsilon_crit=(*maxdiff); wolffd@0: if((!retrain) && (learn_parm->epsilon_crit>epsilon_crit_org)) { wolffd@0: learn_parm->epsilon_crit/=2.0; wolffd@0: retrain=1; wolffd@0: noshrink=1; wolffd@0: } wolffd@0: if(learn_parm->epsilon_critepsilon_crit=epsilon_crit_org; wolffd@0: wolffd@0: if(verbosity>=2) { wolffd@0: printf(" => (%ld SV (incl. %ld SV at u-bound), max violation=%.5f)\n", wolffd@0: supvecnum,model->at_upper_bound,(*maxdiff)); wolffd@0: fflush(stdout); wolffd@0: } wolffd@0: if(verbosity>=3) { wolffd@0: printf("\n"); wolffd@0: } wolffd@0: wolffd@0: if((!retrain) && (transduction)) { wolffd@0: for(i=0;(iactive[i]=1; wolffd@0: } wolffd@0: activenum=compute_index(shrink_state->active,totdoc,active2dnum); wolffd@0: inactivenum=0; wolffd@0: if(verbosity==1) printf("done\n"); wolffd@0: retrain=incorporate_unlabeled_examples(model,label,inconsistent, wolffd@0: unlabeled,a,lin,totdoc, wolffd@0: selcrit,selexam,key, wolffd@0: transductcycle,kernel_parm, wolffd@0: learn_parm); wolffd@0: epsilon_crit_org=learn_parm->epsilon_crit; wolffd@0: if(kernel_parm->kernel_type == LINEAR) wolffd@0: learn_parm->epsilon_crit=1; wolffd@0: transductcycle++; wolffd@0: /* reset watchdog */ wolffd@0: bestmaxdiff=(*maxdiff); wolffd@0: bestmaxdiffiter=iteration; wolffd@0: } wolffd@0: else if(((iteration % 10) == 0) && (!noshrink)) { wolffd@0: activenum=shrink_problem(docs,learn_parm,shrink_state,kernel_parm, wolffd@0: active2dnum,last_suboptimal_at,iteration,totdoc, wolffd@0: maxl((long)(activenum/10), wolffd@0: maxl((long)(totdoc/500),100)), wolffd@0: a,inconsistent); wolffd@0: inactivenum=totdoc-activenum; wolffd@0: if((kernel_cache) wolffd@0: && (supvecnum>kernel_cache->max_elems) wolffd@0: && ((kernel_cache->activenum-activenum)>maxl((long)(activenum/10),500))) { wolffd@0: kernel_cache_shrink(kernel_cache,totdoc, wolffd@0: minl((kernel_cache->activenum-activenum), wolffd@0: (kernel_cache->activenum-supvecnum)), wolffd@0: shrink_state->active); wolffd@0: } wolffd@0: } wolffd@0: wolffd@0: if((!retrain) && learn_parm->remove_inconsistent) { wolffd@0: if(verbosity>=1) { wolffd@0: printf(" Moving training errors to inconsistent examples..."); wolffd@0: fflush(stdout); wolffd@0: } wolffd@0: if(learn_parm->remove_inconsistent == 1) { wolffd@0: retrain=identify_inconsistent(a,label,unlabeled,totdoc,learn_parm, wolffd@0: &inconsistentnum,inconsistent); wolffd@0: } wolffd@0: else if(learn_parm->remove_inconsistent == 2) { wolffd@0: retrain=identify_misclassified(lin,label,unlabeled,totdoc, wolffd@0: model,&inconsistentnum,inconsistent); wolffd@0: } wolffd@0: else if(learn_parm->remove_inconsistent == 3) { wolffd@0: retrain=identify_one_misclassified(lin,label,unlabeled,totdoc, wolffd@0: model,&inconsistentnum,inconsistent); wolffd@0: } wolffd@0: if(retrain) { wolffd@0: if(kernel_parm->kernel_type == LINEAR) { /* reinit shrinking */ wolffd@0: learn_parm->epsilon_crit=2.0; wolffd@0: } wolffd@0: } wolffd@0: if(verbosity>=1) { wolffd@0: printf("done.\n"); wolffd@0: if(retrain) { wolffd@0: printf(" Now %ld inconsistent examples.\n",inconsistentnum); wolffd@0: } wolffd@0: } wolffd@0: } wolffd@0: } /* end of loop */ wolffd@0: wolffd@0: free(chosen); wolffd@0: free(last_suboptimal_at); wolffd@0: free(key); wolffd@0: free(selcrit); wolffd@0: free(selexam); wolffd@0: free(a_old); wolffd@0: free(aicache); wolffd@0: free(working2dnum); wolffd@0: free(active2dnum); wolffd@0: free(qp.opt_ce); wolffd@0: free(qp.opt_ce0); wolffd@0: free(qp.opt_g); wolffd@0: free(qp.opt_g0); wolffd@0: free(qp.opt_xinit); wolffd@0: free(qp.opt_low); wolffd@0: free(qp.opt_up); wolffd@0: free(weights); wolffd@0: wolffd@0: learn_parm->epsilon_crit=epsilon_crit_org; /* restore org */ wolffd@0: model->maxdiff=(*maxdiff); wolffd@0: wolffd@0: return(iteration); wolffd@0: } wolffd@0: wolffd@0: long optimize_to_convergence_sharedslack(DOC **docs, long int *label, wolffd@0: long int totdoc, wolffd@0: long int totwords, LEARN_PARM *learn_parm, wolffd@0: KERNEL_PARM *kernel_parm, wolffd@0: KERNEL_CACHE *kernel_cache, wolffd@0: SHRINK_STATE *shrink_state, MODEL *model, wolffd@0: double *a, double *lin, double *c, wolffd@0: TIMING *timing_profile, double *maxdiff) wolffd@0: /* docs: Training vectors (x-part) */ wolffd@0: /* label: Training labels/value (y-part, zero if test example for wolffd@0: transduction) */ wolffd@0: /* totdoc: Number of examples in docs/label */ wolffd@0: /* totwords: Number of features (i.e. highest feature index) */ wolffd@0: /* learn_parm: Learning paramenters */ wolffd@0: /* kernel_parm: Kernel paramenters */ wolffd@0: /* kernel_cache: Initialized/partly filled Cache, if using a kernel. wolffd@0: NULL if linear. */ wolffd@0: /* shrink_state: State of active variables */ wolffd@0: /* model: Returns learning result */ wolffd@0: /* a: alphas */ wolffd@0: /* lin: linear component of gradient */ wolffd@0: /* c: right hand side of inequalities (margin) */ wolffd@0: /* maxdiff: returns maximum violation of KT-conditions */ wolffd@0: { wolffd@0: long *chosen,*key,i,j,jj,*last_suboptimal_at,noshrink,*unlabeled; wolffd@0: long *inconsistent,choosenum,already_chosen=0,iteration; wolffd@0: long misclassified,supvecnum=0,*active2dnum,inactivenum; wolffd@0: long *working2dnum,*selexam,*ignore; wolffd@0: long activenum,retrain,maxslackid,slackset,jointstep; wolffd@0: double criterion,eq_target; wolffd@0: double *a_old,*alphaslack; wolffd@0: long t0=0,t1=0,t2=0,t3=0,t4=0,t5=0,t6=0; /* timing */ wolffd@0: double epsilon_crit_org,maxsharedviol; wolffd@0: double bestmaxdiff; wolffd@0: long bestmaxdiffiter,terminate; wolffd@0: wolffd@0: double *selcrit; /* buffer for sorting */ wolffd@0: CFLOAT *aicache; /* buffer to keep one row of hessian */ wolffd@0: double *weights; /* buffer for weight vector in linear case */ wolffd@0: QP qp; /* buffer for one quadratic program */ wolffd@0: double *slack; /* vector of slack variables for optimization with wolffd@0: shared slacks */ wolffd@0: wolffd@0: epsilon_crit_org=learn_parm->epsilon_crit; /* save org */ wolffd@0: if(kernel_parm->kernel_type == LINEAR) { wolffd@0: learn_parm->epsilon_crit=2.0; wolffd@0: kernel_cache=NULL; /* caching makes no sense for linear kernel */ wolffd@0: } wolffd@0: learn_parm->epsilon_shrink=2; wolffd@0: (*maxdiff)=1; wolffd@0: wolffd@0: learn_parm->totwords=totwords; wolffd@0: wolffd@0: chosen = (long *)my_malloc(sizeof(long)*totdoc); wolffd@0: unlabeled = (long *)my_malloc(sizeof(long)*totdoc); wolffd@0: inconsistent = (long *)my_malloc(sizeof(long)*totdoc); wolffd@0: ignore = (long *)my_malloc(sizeof(long)*totdoc); wolffd@0: key = (long *)my_malloc(sizeof(long)*(totdoc+11)); wolffd@0: selcrit = (double *)my_malloc(sizeof(double)*totdoc); wolffd@0: selexam = (long *)my_malloc(sizeof(long)*totdoc); wolffd@0: a_old = (double *)my_malloc(sizeof(double)*totdoc); wolffd@0: aicache = (CFLOAT *)my_malloc(sizeof(CFLOAT)*totdoc); wolffd@0: working2dnum = (long *)my_malloc(sizeof(long)*(totdoc+11)); wolffd@0: active2dnum = (long *)my_malloc(sizeof(long)*(totdoc+11)); wolffd@0: qp.opt_ce = (double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize); wolffd@0: qp.opt_ce0 = (double *)my_malloc(sizeof(double)); wolffd@0: qp.opt_g = (double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize wolffd@0: *learn_parm->svm_maxqpsize); wolffd@0: qp.opt_g0 = (double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize); wolffd@0: qp.opt_xinit = (double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize); wolffd@0: qp.opt_low=(double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize); wolffd@0: qp.opt_up=(double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize); wolffd@0: weights=(double *)my_malloc(sizeof(double)*(totwords+1)); wolffd@0: maxslackid=0; wolffd@0: for(i=0;islackid) wolffd@0: maxslackid=docs[i]->slackid; wolffd@0: } wolffd@0: slack=(double *)my_malloc(sizeof(double)*(maxslackid+1)); wolffd@0: alphaslack=(double *)my_malloc(sizeof(double)*(maxslackid+1)); wolffd@0: last_suboptimal_at = (long *)my_malloc(sizeof(long)*(maxslackid+1)); wolffd@0: for(i=0;i<=maxslackid;i++) { /* init shared slacks */ wolffd@0: slack[i]=0; wolffd@0: alphaslack[i]=0; wolffd@0: last_suboptimal_at[i]=1; wolffd@0: } wolffd@0: wolffd@0: choosenum=0; wolffd@0: retrain=1; wolffd@0: iteration=1; wolffd@0: bestmaxdiffiter=1; wolffd@0: bestmaxdiff=999999999; wolffd@0: terminate=0; wolffd@0: wolffd@0: if(kernel_cache) { wolffd@0: kernel_cache->time=iteration; /* for lru cache */ wolffd@0: kernel_cache_reset_lru(kernel_cache); wolffd@0: } wolffd@0: wolffd@0: for(i=0;iactive,totdoc,active2dnum); wolffd@0: inactivenum=totdoc-activenum; wolffd@0: clear_index(working2dnum); wolffd@0: wolffd@0: /* call to init slack and alphaslack */ wolffd@0: compute_shared_slacks(docs,label,a,lin,c,active2dnum,learn_parm, wolffd@0: slack,alphaslack); wolffd@0: wolffd@0: /* repeat this loop until we have convergence */ wolffd@0: for(;retrain && (!terminate);iteration++) { wolffd@0: wolffd@0: if(kernel_cache) wolffd@0: kernel_cache->time=iteration; /* for lru cache */ wolffd@0: if(verbosity>=2) { wolffd@0: printf( wolffd@0: "Iteration %ld: ",iteration); fflush(stdout); wolffd@0: } wolffd@0: else if(verbosity==1) { wolffd@0: printf("."); fflush(stdout); wolffd@0: } wolffd@0: wolffd@0: if(verbosity>=2) t0=get_runtime(); wolffd@0: if(verbosity>=3) { wolffd@0: printf("\nSelecting working set... "); fflush(stdout); wolffd@0: } wolffd@0: wolffd@0: if(learn_parm->svm_newvarsinqp>learn_parm->svm_maxqpsize) wolffd@0: learn_parm->svm_newvarsinqp=learn_parm->svm_maxqpsize; wolffd@0: wolffd@0: /* select working set according to steepest gradient */ wolffd@0: jointstep=0; wolffd@0: eq_target=0; wolffd@0: if(iteration % 101) { wolffd@0: slackset=select_next_qp_slackset(docs,label,a,lin,slack,alphaslack,c, wolffd@0: learn_parm,active2dnum,&maxsharedviol); wolffd@0: if((iteration % 2) wolffd@0: || (!slackset) || (maxsharedviolepsilon_crit)){ wolffd@0: /* do a step with examples from different slack sets */ wolffd@0: if(verbosity >= 2) { wolffd@0: printf("(i-step)"); fflush(stdout); wolffd@0: } wolffd@0: i=0; wolffd@0: for(jj=0;(j=working2dnum[jj])>=0;jj++) { /* clear old part of working set */ wolffd@0: if((chosen[j]>=(learn_parm->svm_maxqpsize/ wolffd@0: minl(learn_parm->svm_maxqpsize, wolffd@0: learn_parm->svm_newvarsinqp)))) { wolffd@0: chosen[j]=0; wolffd@0: choosenum--; wolffd@0: } wolffd@0: else { wolffd@0: chosen[j]++; wolffd@0: working2dnum[i++]=j; wolffd@0: } wolffd@0: } wolffd@0: working2dnum[i]=-1; wolffd@0: wolffd@0: already_chosen=0; wolffd@0: if((minl(learn_parm->svm_newvarsinqp, wolffd@0: learn_parm->svm_maxqpsize-choosenum)>=4) wolffd@0: && (kernel_parm->kernel_type != LINEAR)) { wolffd@0: /* select part of the working set from cache */ wolffd@0: already_chosen=select_next_qp_subproblem_grad( wolffd@0: label,unlabeled,a,lin,c,totdoc, wolffd@0: (long)(minl(learn_parm->svm_maxqpsize-choosenum, wolffd@0: learn_parm->svm_newvarsinqp) wolffd@0: /2), wolffd@0: learn_parm,inconsistent,active2dnum, wolffd@0: working2dnum,selcrit,selexam,kernel_cache, wolffd@0: (long)1,key,chosen); wolffd@0: choosenum+=already_chosen; wolffd@0: } wolffd@0: choosenum+=select_next_qp_subproblem_grad( wolffd@0: label,unlabeled,a,lin,c,totdoc, wolffd@0: minl(learn_parm->svm_maxqpsize-choosenum, wolffd@0: learn_parm->svm_newvarsinqp-already_chosen), wolffd@0: learn_parm,inconsistent,active2dnum, wolffd@0: working2dnum,selcrit,selexam,kernel_cache, wolffd@0: (long)0,key,chosen); wolffd@0: } wolffd@0: else { /* do a step with all examples from same slack set */ wolffd@0: if(verbosity >= 2) { wolffd@0: printf("(j-step on %ld)",slackset); fflush(stdout); wolffd@0: } wolffd@0: jointstep=1; wolffd@0: for(jj=0;(j=working2dnum[jj])>=0;jj++) { /* clear working set */ wolffd@0: chosen[j]=0; wolffd@0: } wolffd@0: working2dnum[0]=-1; wolffd@0: eq_target=alphaslack[slackset]; wolffd@0: for(j=0;j=0;jj++) { */ wolffd@0: if(docs[j]->slackid != slackset) wolffd@0: ignore[j]=1; wolffd@0: else { wolffd@0: ignore[j]=0; wolffd@0: learn_parm->svm_cost[j]=learn_parm->svm_c; wolffd@0: /* printf("Inslackset(%ld,%ld)",j,shrink_state->active[j]); */ wolffd@0: } wolffd@0: } wolffd@0: learn_parm->biased_hyperplane=1; wolffd@0: choosenum=select_next_qp_subproblem_grad( wolffd@0: label,unlabeled,a,lin,c,totdoc, wolffd@0: learn_parm->svm_maxqpsize, wolffd@0: learn_parm,ignore,active2dnum, wolffd@0: working2dnum,selcrit,selexam,kernel_cache, wolffd@0: (long)0,key,chosen); wolffd@0: learn_parm->biased_hyperplane=0; wolffd@0: } wolffd@0: } wolffd@0: else { /* once in a while, select a somewhat random working set wolffd@0: to get unlocked of infinite loops due to numerical wolffd@0: inaccuracies in the core qp-solver */ wolffd@0: choosenum+=select_next_qp_subproblem_rand( wolffd@0: label,unlabeled,a,lin,c,totdoc, wolffd@0: minl(learn_parm->svm_maxqpsize-choosenum, wolffd@0: learn_parm->svm_newvarsinqp), wolffd@0: learn_parm,inconsistent,active2dnum, wolffd@0: working2dnum,selcrit,selexam,kernel_cache,key, wolffd@0: chosen,iteration); wolffd@0: } wolffd@0: wolffd@0: if(verbosity>=2) { wolffd@0: printf(" %ld vectors chosen\n",choosenum); fflush(stdout); wolffd@0: } wolffd@0: wolffd@0: if(verbosity>=2) t1=get_runtime(); wolffd@0: wolffd@0: if(kernel_cache) wolffd@0: cache_multiple_kernel_rows(kernel_cache,docs,working2dnum, wolffd@0: choosenum,kernel_parm); wolffd@0: wolffd@0: if(verbosity>=2) t2=get_runtime(); wolffd@0: if(jointstep) learn_parm->biased_hyperplane=1; wolffd@0: optimize_svm(docs,label,unlabeled,ignore,eq_target,chosen,active2dnum, wolffd@0: model,totdoc,working2dnum,choosenum,a,lin,c,learn_parm, wolffd@0: aicache,kernel_parm,&qp,&epsilon_crit_org); wolffd@0: learn_parm->biased_hyperplane=0; wolffd@0: wolffd@0: for(jj=0;(i=working2dnum[jj])>=0;jj++) /* recompute sums of alphas */ wolffd@0: alphaslack[docs[i]->slackid]+=(a[i]-a_old[i]); wolffd@0: for(jj=0;(i=working2dnum[jj])>=0;jj++) { /* reduce alpha to fulfill wolffd@0: constraints */ wolffd@0: if(alphaslack[docs[i]->slackid] > learn_parm->svm_c) { wolffd@0: if(a[i] < (alphaslack[docs[i]->slackid]-learn_parm->svm_c)) { wolffd@0: alphaslack[docs[i]->slackid]-=a[i]; wolffd@0: a[i]=0; wolffd@0: } wolffd@0: else { wolffd@0: a[i]-=(alphaslack[docs[i]->slackid]-learn_parm->svm_c); wolffd@0: alphaslack[docs[i]->slackid]=learn_parm->svm_c; wolffd@0: } wolffd@0: } wolffd@0: } wolffd@0: for(jj=0;(i=active2dnum[jj])>=0;jj++) wolffd@0: learn_parm->svm_cost[i]=a[i]+(learn_parm->svm_c wolffd@0: -alphaslack[docs[i]->slackid]); wolffd@0: wolffd@0: if(verbosity>=2) t3=get_runtime(); wolffd@0: update_linear_component(docs,label,active2dnum,a,a_old,working2dnum,totdoc, wolffd@0: totwords,kernel_parm,kernel_cache,lin,aicache, wolffd@0: weights); wolffd@0: compute_shared_slacks(docs,label,a,lin,c,active2dnum,learn_parm, wolffd@0: slack,alphaslack); wolffd@0: wolffd@0: if(verbosity>=2) t4=get_runtime(); wolffd@0: supvecnum=calculate_svm_model(docs,label,unlabeled,lin,a,a_old,c, wolffd@0: learn_parm,working2dnum,active2dnum,model); wolffd@0: wolffd@0: if(verbosity>=2) t5=get_runtime(); wolffd@0: wolffd@0: /* The following computation of the objective function works only */ wolffd@0: /* relative to the active variables */ wolffd@0: if(verbosity>=3) { wolffd@0: criterion=compute_objective_function(a,lin,c,learn_parm->eps,label, wolffd@0: active2dnum); wolffd@0: printf("Objective function (over active variables): %.16f\n",criterion); wolffd@0: fflush(stdout); wolffd@0: } wolffd@0: wolffd@0: for(jj=0;(i=working2dnum[jj])>=0;jj++) { wolffd@0: a_old[i]=a[i]; wolffd@0: } wolffd@0: wolffd@0: retrain=check_optimality_sharedslack(docs,model,label,a,lin,c, wolffd@0: slack,alphaslack,totdoc,learn_parm, wolffd@0: maxdiff,epsilon_crit_org,&misclassified, wolffd@0: active2dnum,last_suboptimal_at, wolffd@0: iteration,kernel_parm); wolffd@0: wolffd@0: if(verbosity>=2) { wolffd@0: t6=get_runtime(); wolffd@0: timing_profile->time_select+=t1-t0; wolffd@0: timing_profile->time_kernel+=t2-t1; wolffd@0: timing_profile->time_opti+=t3-t2; wolffd@0: timing_profile->time_update+=t4-t3; wolffd@0: timing_profile->time_model+=t5-t4; wolffd@0: timing_profile->time_check+=t6-t5; wolffd@0: } wolffd@0: wolffd@0: /* checking whether optimizer got stuck */ wolffd@0: if((*maxdiff) < bestmaxdiff) { wolffd@0: bestmaxdiff=(*maxdiff); wolffd@0: bestmaxdiffiter=iteration; wolffd@0: } wolffd@0: if(iteration > (bestmaxdiffiter+learn_parm->maxiter)) { wolffd@0: /* long time no progress? */ wolffd@0: terminate=1; wolffd@0: retrain=0; wolffd@0: if(verbosity>=1) wolffd@0: printf("\nWARNING: Relaxing KT-Conditions due to slow progress! Terminating!\n"); wolffd@0: } wolffd@0: wolffd@0: noshrink=0; wolffd@0: wolffd@0: if((!retrain) && (inactivenum>0) wolffd@0: && ((!learn_parm->skip_final_opt_check) wolffd@0: || (kernel_parm->kernel_type == LINEAR))) { wolffd@0: if(((verbosity>=1) && (kernel_parm->kernel_type != LINEAR)) wolffd@0: || (verbosity>=2)) { wolffd@0: if(verbosity==1) { wolffd@0: printf("\n"); wolffd@0: } wolffd@0: printf(" Checking optimality of inactive variables..."); wolffd@0: fflush(stdout); wolffd@0: } wolffd@0: t1=get_runtime(); wolffd@0: reactivate_inactive_examples(label,unlabeled,a,shrink_state,lin,c,totdoc, wolffd@0: totwords,iteration,learn_parm,inconsistent, wolffd@0: docs,kernel_parm,kernel_cache,model,aicache, wolffd@0: weights,maxdiff); wolffd@0: /* Update to new active variables. */ wolffd@0: activenum=compute_index(shrink_state->active,totdoc,active2dnum); wolffd@0: inactivenum=totdoc-activenum; wolffd@0: /* check optimality, since check in reactivate does not work for wolffd@0: sharedslacks */ wolffd@0: retrain=check_optimality_sharedslack(docs,model,label,a,lin,c, wolffd@0: slack,alphaslack,totdoc,learn_parm, wolffd@0: maxdiff,epsilon_crit_org,&misclassified, wolffd@0: active2dnum,last_suboptimal_at, wolffd@0: iteration,kernel_parm); wolffd@0: wolffd@0: /* reset watchdog */ wolffd@0: bestmaxdiff=(*maxdiff); wolffd@0: bestmaxdiffiter=iteration; wolffd@0: /* termination criterion */ wolffd@0: noshrink=1; wolffd@0: retrain=0; wolffd@0: if((*maxdiff) > learn_parm->epsilon_crit) wolffd@0: retrain=1; wolffd@0: timing_profile->time_shrink+=get_runtime()-t1; wolffd@0: if(((verbosity>=1) && (kernel_parm->kernel_type != LINEAR)) wolffd@0: || (verbosity>=2)) { wolffd@0: printf("done.\n"); fflush(stdout); wolffd@0: printf(" Number of inactive variables = %ld\n",inactivenum); wolffd@0: } wolffd@0: } wolffd@0: wolffd@0: if((!retrain) && (learn_parm->epsilon_crit>(*maxdiff))) wolffd@0: learn_parm->epsilon_crit=(*maxdiff); wolffd@0: if((!retrain) && (learn_parm->epsilon_crit>epsilon_crit_org)) { wolffd@0: learn_parm->epsilon_crit/=2.0; wolffd@0: retrain=1; wolffd@0: noshrink=1; wolffd@0: } wolffd@0: if(learn_parm->epsilon_critepsilon_crit=epsilon_crit_org; wolffd@0: wolffd@0: if(verbosity>=2) { wolffd@0: printf(" => (%ld SV (incl. %ld SV at u-bound), max violation=%.5f)\n", wolffd@0: supvecnum,model->at_upper_bound,(*maxdiff)); wolffd@0: fflush(stdout); wolffd@0: } wolffd@0: if(verbosity>=3) { wolffd@0: printf("\n"); wolffd@0: } wolffd@0: wolffd@0: if(((iteration % 10) == 0) && (!noshrink)) { wolffd@0: activenum=shrink_problem(docs,learn_parm,shrink_state, wolffd@0: kernel_parm,active2dnum, wolffd@0: last_suboptimal_at,iteration,totdoc, wolffd@0: maxl((long)(activenum/10), wolffd@0: maxl((long)(totdoc/500),100)), wolffd@0: a,inconsistent); wolffd@0: inactivenum=totdoc-activenum; wolffd@0: if((kernel_cache) wolffd@0: && (supvecnum>kernel_cache->max_elems) wolffd@0: && ((kernel_cache->activenum-activenum)>maxl((long)(activenum/10),500))) { wolffd@0: kernel_cache_shrink(kernel_cache,totdoc, wolffd@0: minl((kernel_cache->activenum-activenum), wolffd@0: (kernel_cache->activenum-supvecnum)), wolffd@0: shrink_state->active); wolffd@0: } wolffd@0: } wolffd@0: wolffd@0: } /* end of loop */ wolffd@0: wolffd@0: wolffd@0: free(alphaslack); wolffd@0: free(slack); wolffd@0: free(chosen); wolffd@0: free(unlabeled); wolffd@0: free(inconsistent); wolffd@0: free(ignore); wolffd@0: free(last_suboptimal_at); wolffd@0: free(key); wolffd@0: free(selcrit); wolffd@0: free(selexam); wolffd@0: free(a_old); wolffd@0: free(aicache); wolffd@0: free(working2dnum); wolffd@0: free(active2dnum); wolffd@0: free(qp.opt_ce); wolffd@0: free(qp.opt_ce0); wolffd@0: free(qp.opt_g); wolffd@0: free(qp.opt_g0); wolffd@0: free(qp.opt_xinit); wolffd@0: free(qp.opt_low); wolffd@0: free(qp.opt_up); wolffd@0: free(weights); wolffd@0: wolffd@0: learn_parm->epsilon_crit=epsilon_crit_org; /* restore org */ wolffd@0: model->maxdiff=(*maxdiff); wolffd@0: wolffd@0: return(iteration); wolffd@0: } wolffd@0: wolffd@0: wolffd@0: double compute_objective_function(double *a, double *lin, double *c, wolffd@0: double eps, long int *label, wolffd@0: long int *active2dnum) wolffd@0: /* Return value of objective function. */ wolffd@0: /* Works only relative to the active variables! */ wolffd@0: { wolffd@0: long i,ii; wolffd@0: double criterion; wolffd@0: /* calculate value of objective function */ wolffd@0: criterion=0; wolffd@0: for(ii=0;active2dnum[ii]>=0;ii++) { wolffd@0: i=active2dnum[ii]; wolffd@0: criterion=criterion+(eps-(double)label[i]*c[i])*a[i]+0.5*a[i]*label[i]*lin[i]; wolffd@0: } wolffd@0: return(criterion); wolffd@0: } wolffd@0: wolffd@0: void clear_index(long int *index) wolffd@0: /* initializes and empties index */ wolffd@0: { wolffd@0: index[0]=-1; wolffd@0: } wolffd@0: wolffd@0: void add_to_index(long int *index, long int elem) wolffd@0: /* initializes and empties index */ wolffd@0: { wolffd@0: register long i; wolffd@0: for(i=0;index[i] != -1;i++); wolffd@0: index[i]=elem; wolffd@0: index[i+1]=-1; wolffd@0: } wolffd@0: wolffd@0: long compute_index(long int *binfeature, long int range, long int *index) wolffd@0: /* create an inverted index of binfeature */ wolffd@0: { wolffd@0: register long i,ii; wolffd@0: wolffd@0: ii=0; wolffd@0: for(i=0;i=3) { wolffd@0: printf("Running optimizer..."); fflush(stdout); wolffd@0: } wolffd@0: /* call the qp-subsolver */ wolffd@0: a_v=optimize_qp(qp,epsilon_crit_target, wolffd@0: learn_parm->svm_maxqpsize, wolffd@0: &(model->b), /* in case the optimizer gives us */ wolffd@0: /* the threshold for free. otherwise */ wolffd@0: /* b is calculated in calculate_model. */ wolffd@0: learn_parm); wolffd@0: if(verbosity>=3) { wolffd@0: printf("done\n"); wolffd@0: } wolffd@0: wolffd@0: for(i=0;iepsilon_a)) { wolffd@0: a[working2dnum[i]]=0; wolffd@0: } wolffd@0: else if(a_v[i]>=(learn_parm->svm_cost[working2dnum[i]]-learn_parm->epsilon_a)) { wolffd@0: a[working2dnum[i]]=learn_parm->svm_cost[working2dnum[i]]; wolffd@0: } wolffd@0: */ wolffd@0: } wolffd@0: } wolffd@0: wolffd@0: void compute_matrices_for_optimization(DOC **docs, long int *label, wolffd@0: long int *unlabeled, long *exclude_from_eq_const, double eq_target, wolffd@0: long int *chosen, long int *active2dnum, wolffd@0: long int *key, MODEL *model, double *a, double *lin, double *c, wolffd@0: long int varnum, long int totdoc, LEARN_PARM *learn_parm, wolffd@0: CFLOAT *aicache, KERNEL_PARM *kernel_parm, QP *qp) wolffd@0: { wolffd@0: register long ki,kj,i,j; wolffd@0: register double kernel_temp; wolffd@0: wolffd@0: if(verbosity>=3) { wolffd@0: fprintf(stdout,"Computing qp-matrices (type %ld kernel [degree %ld, rbf_gamma %f, coef_lin %f, coef_const %f])...",kernel_parm->kernel_type,kernel_parm->poly_degree,kernel_parm->rbf_gamma,kernel_parm->coef_lin,kernel_parm->coef_const); wolffd@0: fflush(stdout); wolffd@0: } wolffd@0: wolffd@0: qp->opt_n=varnum; wolffd@0: qp->opt_ce0[0]=-eq_target; /* compute the constant for equality constraint */ wolffd@0: for(j=1;jsv_num;j++) { /* start at 1 */ wolffd@0: if((!chosen[(model->supvec[j])->docnum]) wolffd@0: && (!exclude_from_eq_const[(model->supvec[j])->docnum])) { wolffd@0: qp->opt_ce0[0]+=model->alpha[j]; wolffd@0: } wolffd@0: } wolffd@0: if(learn_parm->biased_hyperplane) wolffd@0: qp->opt_m=1; wolffd@0: else wolffd@0: qp->opt_m=0; /* eq-constraint will be ignored */ wolffd@0: wolffd@0: /* init linear part of objective function */ wolffd@0: for(i=0;iopt_g0[i]=lin[key[i]]; wolffd@0: } wolffd@0: wolffd@0: for(i=0;iopt_ce[i]=label[ki]; wolffd@0: qp->opt_low[i]=0; wolffd@0: qp->opt_up[i]=learn_parm->svm_cost[ki]; wolffd@0: wolffd@0: kernel_temp=(double)kernel(kernel_parm,docs[ki],docs[ki]); wolffd@0: /* compute linear part of objective function */ wolffd@0: qp->opt_g0[i]-=(kernel_temp*a[ki]*(double)label[ki]); wolffd@0: /* compute quadratic part of objective function */ wolffd@0: qp->opt_g[varnum*i+i]=kernel_temp; wolffd@0: for(j=i+1;jopt_g0[i]-=(kernel_temp*a[kj]*(double)label[kj]); wolffd@0: qp->opt_g0[j]-=(kernel_temp*a[ki]*(double)label[ki]); wolffd@0: /* compute quadratic part of objective function */ wolffd@0: qp->opt_g[varnum*i+j]=(double)label[ki]*(double)label[kj]*kernel_temp; wolffd@0: qp->opt_g[varnum*j+i]=(double)label[ki]*(double)label[kj]*kernel_temp; wolffd@0: } wolffd@0: wolffd@0: if(verbosity>=3) { wolffd@0: if(i % 20 == 0) { wolffd@0: fprintf(stdout,"%ld..",i); fflush(stdout); wolffd@0: } wolffd@0: } wolffd@0: } wolffd@0: wolffd@0: for(i=0;iopt_xinit[i]=a[key[i]]; wolffd@0: /* set linear part of objective function */ wolffd@0: qp->opt_g0[i]=(learn_parm->eps-(double)label[key[i]]*c[key[i]])+qp->opt_g0[i]*(double)label[key[i]]; wolffd@0: } wolffd@0: wolffd@0: if(verbosity>=3) { wolffd@0: fprintf(stdout,"done\n"); wolffd@0: } wolffd@0: } wolffd@0: wolffd@0: long calculate_svm_model(DOC **docs, long int *label, long int *unlabeled, wolffd@0: double *lin, double *a, double *a_old, double *c, wolffd@0: LEARN_PARM *learn_parm, long int *working2dnum, wolffd@0: long int *active2dnum, MODEL *model) wolffd@0: /* Compute decision function based on current values */ wolffd@0: /* of alpha. */ wolffd@0: { wolffd@0: long i,ii,pos,b_calculated=0,first_low,first_high; wolffd@0: double ex_c,b_temp,b_low,b_high; wolffd@0: wolffd@0: if(verbosity>=3) { wolffd@0: printf("Calculating model..."); fflush(stdout); wolffd@0: } wolffd@0: wolffd@0: if(!learn_parm->biased_hyperplane) { wolffd@0: model->b=0; wolffd@0: b_calculated=1; wolffd@0: } wolffd@0: wolffd@0: for(ii=0;(i=working2dnum[ii])>=0;ii++) { wolffd@0: if((a_old[i]>0) && (a[i]==0)) { /* remove from model */ wolffd@0: pos=model->index[i]; wolffd@0: model->index[i]=-1; wolffd@0: (model->sv_num)--; wolffd@0: model->supvec[pos]=model->supvec[model->sv_num]; wolffd@0: model->alpha[pos]=model->alpha[model->sv_num]; wolffd@0: model->index[(model->supvec[pos])->docnum]=pos; wolffd@0: } wolffd@0: else if((a_old[i]==0) && (a[i]>0)) { /* add to model */ wolffd@0: model->supvec[model->sv_num]=docs[i]; wolffd@0: model->alpha[model->sv_num]=a[i]*(double)label[i]; wolffd@0: model->index[i]=model->sv_num; wolffd@0: (model->sv_num)++; wolffd@0: } wolffd@0: else if(a_old[i]==a[i]) { /* nothing to do */ wolffd@0: } wolffd@0: else { /* just update alpha */ wolffd@0: model->alpha[model->index[i]]=a[i]*(double)label[i]; wolffd@0: } wolffd@0: wolffd@0: ex_c=learn_parm->svm_cost[i]-learn_parm->epsilon_a; wolffd@0: if((a_old[i]>=ex_c) && (a[i]at_upper_bound)--; wolffd@0: } wolffd@0: else if((a_old[i]=ex_c)) { wolffd@0: (model->at_upper_bound)++; wolffd@0: } wolffd@0: wolffd@0: if((!b_calculated) wolffd@0: && (a[i]>learn_parm->epsilon_a) && (a[i]b=((double)label[i]*learn_parm->eps-c[i]+lin[i]); wolffd@0: /* model->b=(-(double)label[i]+lin[i]); */ wolffd@0: b_calculated=1; wolffd@0: } wolffd@0: } wolffd@0: wolffd@0: /* No alpha in the working set not at bounds, so b was not wolffd@0: calculated in the usual way. The following handles this special wolffd@0: case. */ wolffd@0: if(learn_parm->biased_hyperplane wolffd@0: && (!b_calculated) wolffd@0: && (model->sv_num-1 == model->at_upper_bound)) { wolffd@0: first_low=1; wolffd@0: first_high=1; wolffd@0: b_low=0; wolffd@0: b_high=0; wolffd@0: for(ii=0;(i=active2dnum[ii])>=0;ii++) { wolffd@0: ex_c=learn_parm->svm_cost[i]-learn_parm->epsilon_a; wolffd@0: if(a[i]0) { wolffd@0: b_temp=-(learn_parm->eps-c[i]+lin[i]); wolffd@0: if((b_temp>b_low) || (first_low)) { wolffd@0: b_low=b_temp; wolffd@0: first_low=0; wolffd@0: } wolffd@0: } wolffd@0: else { wolffd@0: b_temp=-(-learn_parm->eps-c[i]+lin[i]); wolffd@0: if((b_tempeps-c[i]+lin[i]); wolffd@0: if((b_temp>b_low) || (first_low)) { wolffd@0: b_low=b_temp; wolffd@0: first_low=0; wolffd@0: } wolffd@0: } wolffd@0: else { wolffd@0: b_temp=-(learn_parm->eps-c[i]+lin[i]); wolffd@0: if((b_tempb=-b_low; wolffd@0: } wolffd@0: else if(first_low) { wolffd@0: model->b=-b_high; wolffd@0: } wolffd@0: else { wolffd@0: model->b=-(b_high+b_low)/2.0; /* select b as the middle of range */ wolffd@0: /* printf("\nb_low=%f, b_high=%f,b=%f\n",b_low,b_high,model->b); */ wolffd@0: } wolffd@0: } wolffd@0: wolffd@0: if(verbosity>=3) { wolffd@0: printf("done\n"); fflush(stdout); wolffd@0: } wolffd@0: wolffd@0: return(model->sv_num-1); /* have to substract one, since element 0 is empty*/ wolffd@0: } wolffd@0: wolffd@0: long check_optimality(MODEL *model, long int *label, long int *unlabeled, wolffd@0: double *a, double *lin, double *c, long int totdoc, wolffd@0: LEARN_PARM *learn_parm, double *maxdiff, wolffd@0: double epsilon_crit_org, long int *misclassified, wolffd@0: long int *inconsistent, long int *active2dnum, wolffd@0: long int *last_suboptimal_at, wolffd@0: long int iteration, KERNEL_PARM *kernel_parm) wolffd@0: /* Check KT-conditions */ wolffd@0: { wolffd@0: long i,ii,retrain; wolffd@0: double dist,ex_c,target; wolffd@0: wolffd@0: if(kernel_parm->kernel_type == LINEAR) { /* be optimistic */ wolffd@0: learn_parm->epsilon_shrink=-learn_parm->epsilon_crit+epsilon_crit_org; wolffd@0: } wolffd@0: else { /* be conservative */ wolffd@0: learn_parm->epsilon_shrink=learn_parm->epsilon_shrink*0.7+(*maxdiff)*0.3; wolffd@0: } wolffd@0: retrain=0; wolffd@0: (*maxdiff)=0; wolffd@0: (*misclassified)=0; wolffd@0: for(ii=0;(i=active2dnum[ii])>=0;ii++) { wolffd@0: if((!inconsistent[i]) && label[i]) { wolffd@0: dist=(lin[i]-model->b)*(double)label[i];/* 'distance' from wolffd@0: hyperplane*/ wolffd@0: target=-(learn_parm->eps-(double)label[i]*c[i]); wolffd@0: ex_c=learn_parm->svm_cost[i]-learn_parm->epsilon_a; wolffd@0: if(dist <= 0) { wolffd@0: (*misclassified)++; /* does not work due to deactivation of var */ wolffd@0: } wolffd@0: if((a[i]>learn_parm->epsilon_a) && (dist > target)) { wolffd@0: if((dist-target)>(*maxdiff)) /* largest violation */ wolffd@0: (*maxdiff)=dist-target; wolffd@0: } wolffd@0: else if((a[i](*maxdiff)) /* largest violation */ wolffd@0: (*maxdiff)=target-dist; wolffd@0: } wolffd@0: /* Count how long a variable was at lower/upper bound (and optimal).*/ wolffd@0: /* Variables, which were at the bound and optimal for a long */ wolffd@0: /* time are unlikely to become support vectors. In case our */ wolffd@0: /* cache is filled up, those variables are excluded to save */ wolffd@0: /* kernel evaluations. (See chapter 'Shrinking').*/ wolffd@0: if((a[i]>(learn_parm->epsilon_a)) wolffd@0: && (a[i]epsilon_a)) wolffd@0: && (dist < (target+learn_parm->epsilon_shrink))) { wolffd@0: last_suboptimal_at[i]=iteration; /* not likely optimal */ wolffd@0: } wolffd@0: else if((a[i]>=ex_c) wolffd@0: && (dist > (target-learn_parm->epsilon_shrink))) { wolffd@0: last_suboptimal_at[i]=iteration; /* not likely optimal */ wolffd@0: } wolffd@0: } wolffd@0: } wolffd@0: /* termination criterion */ wolffd@0: if((!retrain) && ((*maxdiff) > learn_parm->epsilon_crit)) { wolffd@0: retrain=1; wolffd@0: } wolffd@0: return(retrain); wolffd@0: } wolffd@0: wolffd@0: long check_optimality_sharedslack(DOC **docs, MODEL *model, long int *label, wolffd@0: double *a, double *lin, double *c, double *slack, wolffd@0: double *alphaslack, wolffd@0: long int totdoc, wolffd@0: LEARN_PARM *learn_parm, double *maxdiff, wolffd@0: double epsilon_crit_org, long int *misclassified, wolffd@0: long int *active2dnum, wolffd@0: long int *last_suboptimal_at, wolffd@0: long int iteration, KERNEL_PARM *kernel_parm) wolffd@0: /* Check KT-conditions */ wolffd@0: { wolffd@0: long i,ii,retrain; wolffd@0: double dist,ex_c=0,target; wolffd@0: wolffd@0: if(kernel_parm->kernel_type == LINEAR) { /* be optimistic */ wolffd@0: learn_parm->epsilon_shrink=-learn_parm->epsilon_crit+epsilon_crit_org; wolffd@0: } wolffd@0: else { /* be conservative */ wolffd@0: learn_parm->epsilon_shrink=learn_parm->epsilon_shrink*0.7+(*maxdiff)*0.3; wolffd@0: } wolffd@0: wolffd@0: retrain=0; wolffd@0: (*maxdiff)=0; wolffd@0: (*misclassified)=0; wolffd@0: for(ii=0;(i=active2dnum[ii])>=0;ii++) { wolffd@0: /* 'distance' from hyperplane*/ wolffd@0: dist=(lin[i]-model->b)*(double)label[i]+slack[docs[i]->slackid]; wolffd@0: target=-(learn_parm->eps-(double)label[i]*c[i]); wolffd@0: ex_c=learn_parm->svm_c-learn_parm->epsilon_a; wolffd@0: if((a[i]>learn_parm->epsilon_a) && (dist > target)) { wolffd@0: if((dist-target)>(*maxdiff)) { /* largest violation */ wolffd@0: (*maxdiff)=dist-target; wolffd@0: if(verbosity>=5) printf("sid %ld: dist=%.2f, target=%.2f, slack=%.2f, a=%f, alphaslack=%f\n",docs[i]->slackid,dist,target,slack[docs[i]->slackid],a[i],alphaslack[docs[i]->slackid]); wolffd@0: if(verbosity>=5) printf(" (single %f)\n",(*maxdiff)); wolffd@0: } wolffd@0: } wolffd@0: if((alphaslack[docs[i]->slackid]slackid]>0)) { wolffd@0: if((slack[docs[i]->slackid])>(*maxdiff)) { /* largest violation */ wolffd@0: (*maxdiff)=slack[docs[i]->slackid]; wolffd@0: if(verbosity>=5) printf("sid %ld: dist=%.2f, target=%.2f, slack=%.2f, a=%f, alphaslack=%f\n",docs[i]->slackid,dist,target,slack[docs[i]->slackid],a[i],alphaslack[docs[i]->slackid]); wolffd@0: if(verbosity>=5) printf(" (joint %f)\n",(*maxdiff)); wolffd@0: } wolffd@0: } wolffd@0: /* Count how long a variable was at lower/upper bound (and optimal).*/ wolffd@0: /* Variables, which were at the bound and optimal for a long */ wolffd@0: /* time are unlikely to become support vectors. In case our */ wolffd@0: /* cache is filled up, those variables are excluded to save */ wolffd@0: /* kernel evaluations. (See chapter 'Shrinking').*/ wolffd@0: if((a[i]>(learn_parm->epsilon_a)) wolffd@0: && (a[i]slackid]=iteration; /* not at bound */ wolffd@0: } wolffd@0: else if((a[i]<=(learn_parm->epsilon_a)) wolffd@0: && (dist < (target+learn_parm->epsilon_shrink))) { wolffd@0: last_suboptimal_at[docs[i]->slackid]=iteration; /* not likely optimal */ wolffd@0: } wolffd@0: else if((a[i]>=ex_c) wolffd@0: && (slack[docs[i]->slackid] < learn_parm->epsilon_shrink)) { wolffd@0: last_suboptimal_at[docs[i]->slackid]=iteration; /* not likely optimal */ wolffd@0: } wolffd@0: } wolffd@0: /* termination criterion */ wolffd@0: if((!retrain) && ((*maxdiff) > learn_parm->epsilon_crit)) { wolffd@0: retrain=1; wolffd@0: } wolffd@0: return(retrain); wolffd@0: } wolffd@0: wolffd@0: void compute_shared_slacks(DOC **docs, long int *label, wolffd@0: double *a, double *lin, wolffd@0: double *c, long int *active2dnum, wolffd@0: LEARN_PARM *learn_parm, wolffd@0: double *slack, double *alphaslack) wolffd@0: /* compute the value of shared slacks and the joint alphas */ wolffd@0: { wolffd@0: long jj,i; wolffd@0: double dist,target; wolffd@0: wolffd@0: for(jj=0;(i=active2dnum[jj])>=0;jj++) { /* clear slack variables */ wolffd@0: slack[docs[i]->slackid]=0.0; wolffd@0: alphaslack[docs[i]->slackid]=0.0; wolffd@0: } wolffd@0: for(jj=0;(i=active2dnum[jj])>=0;jj++) { /* recompute slack variables */ wolffd@0: dist=(lin[i])*(double)label[i]; wolffd@0: target=-(learn_parm->eps-(double)label[i]*c[i]); wolffd@0: if((target-dist) > slack[docs[i]->slackid]) wolffd@0: slack[docs[i]->slackid]=target-dist; wolffd@0: alphaslack[docs[i]->slackid]+=a[i]; wolffd@0: } wolffd@0: } wolffd@0: wolffd@0: wolffd@0: long identify_inconsistent(double *a, long int *label, wolffd@0: long int *unlabeled, long int totdoc, wolffd@0: LEARN_PARM *learn_parm, wolffd@0: long int *inconsistentnum, long int *inconsistent) wolffd@0: { wolffd@0: long i,retrain; wolffd@0: wolffd@0: /* Throw out examples with multipliers at upper bound. This */ wolffd@0: /* corresponds to the -i 1 option. */ wolffd@0: /* ATTENTION: this is just a heuristic for finding a close */ wolffd@0: /* to minimum number of examples to exclude to */ wolffd@0: /* make the problem separable with desired margin */ wolffd@0: retrain=0; wolffd@0: for(i=0;i=(learn_parm->svm_cost[i]-learn_parm->epsilon_a))) { wolffd@0: (*inconsistentnum)++; wolffd@0: inconsistent[i]=1; /* never choose again */ wolffd@0: retrain=2; /* start over */ wolffd@0: if(verbosity>=3) { wolffd@0: printf("inconsistent(%ld)..",i); fflush(stdout); wolffd@0: } wolffd@0: } wolffd@0: } wolffd@0: return(retrain); wolffd@0: } wolffd@0: wolffd@0: long identify_misclassified(double *lin, long int *label, wolffd@0: long int *unlabeled, long int totdoc, wolffd@0: MODEL *model, long int *inconsistentnum, wolffd@0: long int *inconsistent) wolffd@0: { wolffd@0: long i,retrain; wolffd@0: double dist; wolffd@0: wolffd@0: /* Throw out misclassified examples. This */ wolffd@0: /* corresponds to the -i 2 option. */ wolffd@0: /* ATTENTION: this is just a heuristic for finding a close */ wolffd@0: /* to minimum number of examples to exclude to */ wolffd@0: /* make the problem separable with desired margin */ wolffd@0: retrain=0; wolffd@0: for(i=0;ib)*(double)label[i]; /* 'distance' from hyperplane*/ wolffd@0: if((!inconsistent[i]) && (!unlabeled[i]) && (dist <= 0)) { wolffd@0: (*inconsistentnum)++; wolffd@0: inconsistent[i]=1; /* never choose again */ wolffd@0: retrain=2; /* start over */ wolffd@0: if(verbosity>=3) { wolffd@0: printf("inconsistent(%ld)..",i); fflush(stdout); wolffd@0: } wolffd@0: } wolffd@0: } wolffd@0: return(retrain); wolffd@0: } wolffd@0: wolffd@0: long identify_one_misclassified(double *lin, long int *label, wolffd@0: long int *unlabeled, wolffd@0: long int totdoc, MODEL *model, wolffd@0: long int *inconsistentnum, wolffd@0: long int *inconsistent) wolffd@0: { wolffd@0: long i,retrain,maxex=-1; wolffd@0: double dist,maxdist=0; wolffd@0: wolffd@0: /* Throw out the 'most misclassified' example. This */ wolffd@0: /* corresponds to the -i 3 option. */ wolffd@0: /* ATTENTION: this is just a heuristic for finding a close */ wolffd@0: /* to minimum number of examples to exclude to */ wolffd@0: /* make the problem separable with desired margin */ wolffd@0: retrain=0; wolffd@0: for(i=0;ib)*(double)label[i];/* 'distance' from hyperplane*/ wolffd@0: if(dist=0) { wolffd@0: (*inconsistentnum)++; wolffd@0: inconsistent[maxex]=1; /* never choose again */ wolffd@0: retrain=2; /* start over */ wolffd@0: if(verbosity>=3) { wolffd@0: printf("inconsistent(%ld)..",i); fflush(stdout); wolffd@0: } wolffd@0: } wolffd@0: return(retrain); wolffd@0: } wolffd@0: wolffd@0: void update_linear_component(DOC **docs, long int *label, wolffd@0: long int *active2dnum, double *a, wolffd@0: double *a_old, long int *working2dnum, wolffd@0: long int totdoc, long int totwords, wolffd@0: KERNEL_PARM *kernel_parm, wolffd@0: KERNEL_CACHE *kernel_cache, wolffd@0: double *lin, CFLOAT *aicache, double *weights) wolffd@0: /* keep track of the linear component */ wolffd@0: /* lin of the gradient etc. by updating */ wolffd@0: /* based on the change of the variables */ wolffd@0: /* in the current working set */ wolffd@0: { wolffd@0: register long i,ii,j,jj; wolffd@0: register double tec; wolffd@0: SVECTOR *f; wolffd@0: wolffd@0: if(kernel_parm->kernel_type==0) { /* special linear case */ wolffd@0: clear_vector_n(weights,totwords); wolffd@0: for(ii=0;(i=working2dnum[ii])>=0;ii++) { wolffd@0: if(a[i] != a_old[i]) { wolffd@0: for(f=docs[i]->fvec;f;f=f->next) wolffd@0: add_vector_ns(weights,f, wolffd@0: f->factor*((a[i]-a_old[i])*(double)label[i])); wolffd@0: } wolffd@0: } wolffd@0: for(jj=0;(j=active2dnum[jj])>=0;jj++) { wolffd@0: for(f=docs[j]->fvec;f;f=f->next) wolffd@0: lin[j]+=f->factor*sprod_ns(weights,f); wolffd@0: } wolffd@0: } wolffd@0: else { /* general case */ wolffd@0: for(jj=0;(i=working2dnum[jj])>=0;jj++) { wolffd@0: if(a[i] != a_old[i]) { wolffd@0: get_kernel_row(kernel_cache,docs,i,totdoc,active2dnum,aicache, wolffd@0: kernel_parm); wolffd@0: for(ii=0;(j=active2dnum[ii])>=0;ii++) { wolffd@0: tec=aicache[j]; wolffd@0: lin[j]+=(((a[i]*tec)-(a_old[i]*tec))*(double)label[i]); wolffd@0: } wolffd@0: } wolffd@0: } wolffd@0: } wolffd@0: } wolffd@0: wolffd@0: wolffd@0: long incorporate_unlabeled_examples(MODEL *model, long int *label, wolffd@0: long int *inconsistent, wolffd@0: long int *unlabeled, wolffd@0: double *a, double *lin, wolffd@0: long int totdoc, double *selcrit, wolffd@0: long int *select, long int *key, wolffd@0: long int transductcycle, wolffd@0: KERNEL_PARM *kernel_parm, wolffd@0: LEARN_PARM *learn_parm) wolffd@0: { wolffd@0: long i,j,k,j1,j2,j3,j4,unsupaddnum1=0,unsupaddnum2=0; wolffd@0: long pos,neg,upos,uneg,orgpos,orgneg,nolabel,newpos,newneg,allunlab; wolffd@0: double dist,model_length,posratio,negratio; wolffd@0: long check_every=2; wolffd@0: double loss; wolffd@0: static double switchsens=0.0,switchsensorg=0.0; wolffd@0: double umin,umax,sumalpha; wolffd@0: long imin=0,imax=0; wolffd@0: static long switchnum=0; wolffd@0: wolffd@0: switchsens/=1.2; wolffd@0: wolffd@0: /* assumes that lin[] is up to date -> no inactive vars */ wolffd@0: wolffd@0: orgpos=0; wolffd@0: orgneg=0; wolffd@0: newpos=0; wolffd@0: newneg=0; wolffd@0: nolabel=0; wolffd@0: allunlab=0; wolffd@0: for(i=0;i 0) { wolffd@0: orgpos++; wolffd@0: } wolffd@0: else { wolffd@0: orgneg++; wolffd@0: } wolffd@0: } wolffd@0: else { wolffd@0: allunlab++; wolffd@0: if(unlabeled[i]) { wolffd@0: if(label[i] > 0) { wolffd@0: newpos++; wolffd@0: } wolffd@0: else if(label[i] < 0) { wolffd@0: newneg++; wolffd@0: } wolffd@0: } wolffd@0: } wolffd@0: if(label[i]==0) { wolffd@0: nolabel++; wolffd@0: } wolffd@0: } wolffd@0: wolffd@0: if(learn_parm->transduction_posratio >= 0) { wolffd@0: posratio=learn_parm->transduction_posratio; wolffd@0: } wolffd@0: else { wolffd@0: posratio=(double)orgpos/(double)(orgpos+orgneg); /* use ratio of pos/neg */ wolffd@0: } /* in training data */ wolffd@0: negratio=1.0-posratio; wolffd@0: wolffd@0: learn_parm->svm_costratio=1.0; /* global */ wolffd@0: if(posratio>0) { wolffd@0: learn_parm->svm_costratio_unlab=negratio/posratio; wolffd@0: } wolffd@0: else { wolffd@0: learn_parm->svm_costratio_unlab=1.0; wolffd@0: } wolffd@0: wolffd@0: pos=0; wolffd@0: neg=0; wolffd@0: upos=0; wolffd@0: uneg=0; wolffd@0: for(i=0;ib); /* 'distance' from hyperplane*/ wolffd@0: if(dist>0) { wolffd@0: pos++; wolffd@0: } wolffd@0: else { wolffd@0: neg++; wolffd@0: } wolffd@0: if(unlabeled[i]) { wolffd@0: if(dist>0) { wolffd@0: upos++; wolffd@0: } wolffd@0: else { wolffd@0: uneg++; wolffd@0: } wolffd@0: } wolffd@0: if((!unlabeled[i]) && (a[i]>(learn_parm->svm_cost[i]-learn_parm->epsilon_a))) { wolffd@0: /* printf("Ubounded %ld (class %ld, unlabeled %ld)\n",i,label[i],unlabeled[i]); */ wolffd@0: } wolffd@0: } wolffd@0: if(verbosity>=2) { wolffd@0: printf("POS=%ld, ORGPOS=%ld, ORGNEG=%ld\n",pos,orgpos,orgneg); wolffd@0: printf("POS=%ld, NEWPOS=%ld, NEWNEG=%ld\n",pos,newpos,newneg); wolffd@0: printf("pos ratio = %f (%f).\n",(double)(upos)/(double)(allunlab),posratio); wolffd@0: fflush(stdout); wolffd@0: } wolffd@0: wolffd@0: if(transductcycle == 0) { wolffd@0: j1=0; wolffd@0: j2=0; wolffd@0: j4=0; wolffd@0: for(i=0;ib); /* 'distance' from hyperplane*/ wolffd@0: if((label[i]==0) && (unlabeled[i])) { wolffd@0: selcrit[j4]=dist; wolffd@0: key[j4]=i; wolffd@0: j4++; wolffd@0: } wolffd@0: } wolffd@0: unsupaddnum1=0; wolffd@0: unsupaddnum2=0; wolffd@0: select_top_n(selcrit,j4,select,(long)(allunlab*posratio+0.5)); wolffd@0: for(k=0;(k<(long)(allunlab*posratio+0.5));k++) { wolffd@0: i=key[select[k]]; wolffd@0: label[i]=1; wolffd@0: unsupaddnum1++; wolffd@0: j1++; wolffd@0: } wolffd@0: for(i=0;isvm_cost[i]=learn_parm->svm_c* wolffd@0: learn_parm->svm_costratio_unlab*learn_parm->svm_unlabbound; wolffd@0: } wolffd@0: else if(label[i] == -1) { wolffd@0: learn_parm->svm_cost[i]=learn_parm->svm_c* wolffd@0: learn_parm->svm_unlabbound; wolffd@0: } wolffd@0: } wolffd@0: } wolffd@0: if(verbosity>=1) { wolffd@0: /* printf("costratio %f, costratio_unlab %f, unlabbound %f\n", wolffd@0: learn_parm->svm_costratio,learn_parm->svm_costratio_unlab, wolffd@0: learn_parm->svm_unlabbound); */ wolffd@0: printf("Classifying unlabeled data as %ld POS / %ld NEG.\n", wolffd@0: unsupaddnum1,unsupaddnum2); wolffd@0: fflush(stdout); wolffd@0: } wolffd@0: if(verbosity >= 1) wolffd@0: printf("Retraining."); wolffd@0: if(verbosity >= 2) printf("\n"); wolffd@0: return((long)3); wolffd@0: } wolffd@0: if((transductcycle % check_every) == 0) { wolffd@0: if(verbosity >= 1) wolffd@0: printf("Retraining."); wolffd@0: if(verbosity >= 2) printf("\n"); wolffd@0: j1=0; wolffd@0: j2=0; wolffd@0: unsupaddnum1=0; wolffd@0: unsupaddnum2=0; wolffd@0: for(i=0;isvm_cost[i]=learn_parm->svm_c* wolffd@0: learn_parm->svm_costratio_unlab*learn_parm->svm_unlabbound; wolffd@0: } wolffd@0: else if(label[i] == -1) { wolffd@0: learn_parm->svm_cost[i]=learn_parm->svm_c* wolffd@0: learn_parm->svm_unlabbound; wolffd@0: } wolffd@0: } wolffd@0: } wolffd@0: wolffd@0: if(verbosity>=2) { wolffd@0: /* printf("costratio %f, costratio_unlab %f, unlabbound %f\n", wolffd@0: learn_parm->svm_costratio,learn_parm->svm_costratio_unlab, wolffd@0: learn_parm->svm_unlabbound); */ wolffd@0: printf("%ld positive -> Added %ld POS / %ld NEG unlabeled examples.\n", wolffd@0: upos,unsupaddnum1,unsupaddnum2); wolffd@0: fflush(stdout); wolffd@0: } wolffd@0: wolffd@0: if(learn_parm->svm_unlabbound == 1) { wolffd@0: learn_parm->epsilon_crit=0.001; /* do the last run right */ wolffd@0: } wolffd@0: else { wolffd@0: learn_parm->epsilon_crit=0.01; /* otherwise, no need to be so picky */ wolffd@0: } wolffd@0: wolffd@0: return((long)3); wolffd@0: } wolffd@0: else if(((transductcycle % check_every) < check_every)) { wolffd@0: model_length=0; wolffd@0: sumalpha=0; wolffd@0: loss=0; wolffd@0: for(i=0;ib); /* 'distance' from hyperplane*/ wolffd@0: if((label[i]*dist)<(1.0-learn_parm->epsilon_crit)) { wolffd@0: loss+=(1.0-(label[i]*dist))*learn_parm->svm_cost[i]; wolffd@0: } wolffd@0: } wolffd@0: model_length=sqrt(model_length); wolffd@0: if(verbosity>=2) { wolffd@0: printf("Model-length = %f (%f), loss = %f, objective = %f\n", wolffd@0: model_length,sumalpha,loss,loss+0.5*model_length*model_length); wolffd@0: fflush(stdout); wolffd@0: } wolffd@0: j1=0; wolffd@0: j2=0; wolffd@0: j3=0; wolffd@0: j4=0; wolffd@0: unsupaddnum1=0; wolffd@0: unsupaddnum2=0; wolffd@0: umin=99999; wolffd@0: umax=-99999; wolffd@0: j4=1; wolffd@0: while(j4) { wolffd@0: umin=99999; wolffd@0: umax=-99999; wolffd@0: for(i=0;(ib); wolffd@0: if((label[i]>0) && (unlabeled[i]) && (!inconsistent[i]) wolffd@0: && (distumax)) { wolffd@0: umax=dist; wolffd@0: imax=i; wolffd@0: } wolffd@0: } wolffd@0: if((umin < (umax+switchsens-1E-4))) { wolffd@0: j1++; wolffd@0: j2++; wolffd@0: unsupaddnum1++; wolffd@0: unlabeled[imin]=3; wolffd@0: inconsistent[imin]=1; wolffd@0: unsupaddnum2++; wolffd@0: unlabeled[imax]=2; wolffd@0: inconsistent[imax]=1; wolffd@0: } wolffd@0: else wolffd@0: j4=0; wolffd@0: j4=0; wolffd@0: } wolffd@0: for(j=0;(j0) { wolffd@0: unlabeled[j]=2; wolffd@0: } wolffd@0: else if(label[j]<0) { wolffd@0: unlabeled[j]=3; wolffd@0: } wolffd@0: /* inconsistent[j]=1; */ wolffd@0: j3++; wolffd@0: } wolffd@0: } wolffd@0: switchnum+=unsupaddnum1+unsupaddnum2; wolffd@0: wolffd@0: /* stop and print out current margin wolffd@0: printf("switchnum %ld %ld\n",switchnum,kernel_parm->poly_degree); wolffd@0: if(switchnum == 2*kernel_parm->poly_degree) { wolffd@0: learn_parm->svm_unlabbound=1; wolffd@0: } wolffd@0: */ wolffd@0: wolffd@0: if((!unsupaddnum1) && (!unsupaddnum2)) { wolffd@0: if((learn_parm->svm_unlabbound>=1) && ((newpos+newneg) == allunlab)) { wolffd@0: for(j=0;(jpredfile,model,lin,a,unlabeled,label, wolffd@0: totdoc,learn_parm); wolffd@0: if(verbosity>=1) wolffd@0: printf("Number of switches: %ld\n",switchnum); wolffd@0: return((long)0); wolffd@0: } wolffd@0: switchsens=switchsensorg; wolffd@0: learn_parm->svm_unlabbound*=1.5; wolffd@0: if(learn_parm->svm_unlabbound>1) { wolffd@0: learn_parm->svm_unlabbound=1; wolffd@0: } wolffd@0: model->at_upper_bound=0; /* since upper bound increased */ wolffd@0: if(verbosity>=1) wolffd@0: printf("Increasing influence of unlabeled examples to %f%% .", wolffd@0: learn_parm->svm_unlabbound*100.0); wolffd@0: } wolffd@0: else if(verbosity>=1) { wolffd@0: printf("%ld positive -> Switching labels of %ld POS / %ld NEG unlabeled examples.", wolffd@0: upos,unsupaddnum1,unsupaddnum2); wolffd@0: fflush(stdout); wolffd@0: } wolffd@0: wolffd@0: if(verbosity >= 2) printf("\n"); wolffd@0: wolffd@0: learn_parm->epsilon_crit=0.5; /* don't need to be so picky */ wolffd@0: wolffd@0: for(i=0;isvm_cost[i]=learn_parm->svm_c* wolffd@0: learn_parm->svm_costratio_unlab*learn_parm->svm_unlabbound; wolffd@0: } wolffd@0: else if(label[i] == -1) { wolffd@0: learn_parm->svm_cost[i]=learn_parm->svm_c* wolffd@0: learn_parm->svm_unlabbound; wolffd@0: } wolffd@0: } wolffd@0: } wolffd@0: wolffd@0: return((long)2); wolffd@0: } wolffd@0: wolffd@0: return((long)0); wolffd@0: } wolffd@0: wolffd@0: /*************************** Working set selection ***************************/ wolffd@0: wolffd@0: long select_next_qp_subproblem_grad(long int *label, wolffd@0: long int *unlabeled, wolffd@0: double *a, double *lin, wolffd@0: double *c, long int totdoc, wolffd@0: long int qp_size, wolffd@0: LEARN_PARM *learn_parm, wolffd@0: long int *inconsistent, wolffd@0: long int *active2dnum, wolffd@0: long int *working2dnum, wolffd@0: double *selcrit, wolffd@0: long int *select, wolffd@0: KERNEL_CACHE *kernel_cache, wolffd@0: long int cache_only, wolffd@0: long int *key, long int *chosen) wolffd@0: /* Use the feasible direction approach to select the next wolffd@0: qp-subproblem (see chapter 'Selecting a good working set'). If wolffd@0: 'cache_only' is true, then the variables are selected only among wolffd@0: those for which the kernel evaluations are cached. */ wolffd@0: { wolffd@0: long choosenum,i,j,k,activedoc,inum,valid; wolffd@0: double s; wolffd@0: wolffd@0: for(inum=0;working2dnum[inum]>=0;inum++); /* find end of index */ wolffd@0: choosenum=0; wolffd@0: activedoc=0; wolffd@0: for(i=0;(j=active2dnum[i])>=0;i++) { wolffd@0: s=-label[j]; wolffd@0: if(kernel_cache && cache_only) wolffd@0: valid=(kernel_cache->index[j]>=0); wolffd@0: else wolffd@0: valid=1; wolffd@0: if(valid wolffd@0: && (!((a[j]<=(0+learn_parm->epsilon_a)) && (s<0))) wolffd@0: && (!((a[j]>=(learn_parm->svm_cost[j]-learn_parm->epsilon_a)) wolffd@0: && (s>0))) wolffd@0: && (!chosen[j]) wolffd@0: && (label[j]) wolffd@0: && (!inconsistent[j])) wolffd@0: { wolffd@0: selcrit[activedoc]=(double)label[j]*(learn_parm->eps-(double)label[j]*c[j]+(double)label[j]*lin[j]); wolffd@0: /* selcrit[activedoc]=(double)label[j]*(-1.0+(double)label[j]*lin[j]); */ wolffd@0: key[activedoc]=j; wolffd@0: activedoc++; wolffd@0: } wolffd@0: } wolffd@0: select_top_n(selcrit,activedoc,select,(long)(qp_size/2)); wolffd@0: for(k=0;(choosenum<(qp_size/2)) && (k<(qp_size/2)) && (kbiased_hyperplane || (selcrit[select[k]] > 0)) { */ wolffd@0: i=key[select[k]]; wolffd@0: chosen[i]=1; wolffd@0: working2dnum[inum+choosenum]=i; wolffd@0: choosenum+=1; wolffd@0: if(kernel_cache) wolffd@0: kernel_cache_touch(kernel_cache,i); /* make sure it does not get wolffd@0: kicked out of cache */ wolffd@0: /* } */ wolffd@0: } wolffd@0: wolffd@0: activedoc=0; wolffd@0: for(i=0;(j=active2dnum[i])>=0;i++) { wolffd@0: s=label[j]; wolffd@0: if(kernel_cache && cache_only) wolffd@0: valid=(kernel_cache->index[j]>=0); wolffd@0: else wolffd@0: valid=1; wolffd@0: if(valid wolffd@0: && (!((a[j]<=(0+learn_parm->epsilon_a)) && (s<0))) wolffd@0: && (!((a[j]>=(learn_parm->svm_cost[j]-learn_parm->epsilon_a)) wolffd@0: && (s>0))) wolffd@0: && (!chosen[j]) wolffd@0: && (label[j]) wolffd@0: && (!inconsistent[j])) wolffd@0: { wolffd@0: selcrit[activedoc]=-(double)label[j]*(learn_parm->eps-(double)label[j]*c[j]+(double)label[j]*lin[j]); wolffd@0: /* selcrit[activedoc]=-(double)(label[j]*(-1.0+(double)label[j]*lin[j])); */ wolffd@0: key[activedoc]=j; wolffd@0: activedoc++; wolffd@0: } wolffd@0: } wolffd@0: select_top_n(selcrit,activedoc,select,(long)(qp_size/2)); wolffd@0: for(k=0;(choosenumbiased_hyperplane || (selcrit[select[k]] > 0)) { */ wolffd@0: i=key[select[k]]; wolffd@0: chosen[i]=1; wolffd@0: working2dnum[inum+choosenum]=i; wolffd@0: choosenum+=1; wolffd@0: if(kernel_cache) wolffd@0: kernel_cache_touch(kernel_cache,i); /* make sure it does not get wolffd@0: kicked out of cache */ wolffd@0: /* } */ wolffd@0: } wolffd@0: working2dnum[inum+choosenum]=-1; /* complete index */ wolffd@0: return(choosenum); wolffd@0: } wolffd@0: wolffd@0: long select_next_qp_subproblem_rand(long int *label, wolffd@0: long int *unlabeled, wolffd@0: double *a, double *lin, wolffd@0: double *c, long int totdoc, wolffd@0: long int qp_size, wolffd@0: LEARN_PARM *learn_parm, wolffd@0: long int *inconsistent, wolffd@0: long int *active2dnum, wolffd@0: long int *working2dnum, wolffd@0: double *selcrit, wolffd@0: long int *select, wolffd@0: KERNEL_CACHE *kernel_cache, wolffd@0: long int *key, wolffd@0: long int *chosen, wolffd@0: long int iteration) wolffd@0: /* Use the feasible direction approach to select the next wolffd@0: qp-subproblem (see section 'Selecting a good working set'). Chooses wolffd@0: a feasible direction at (pseudo) random to help jump over numerical wolffd@0: problem. */ wolffd@0: { wolffd@0: long choosenum,i,j,k,activedoc,inum; wolffd@0: double s; wolffd@0: wolffd@0: for(inum=0;working2dnum[inum]>=0;inum++); /* find end of index */ wolffd@0: choosenum=0; wolffd@0: activedoc=0; wolffd@0: for(i=0;(j=active2dnum[i])>=0;i++) { wolffd@0: s=-label[j]; wolffd@0: if((!((a[j]<=(0+learn_parm->epsilon_a)) && (s<0))) wolffd@0: && (!((a[j]>=(learn_parm->svm_cost[j]-learn_parm->epsilon_a)) wolffd@0: && (s>0))) wolffd@0: && (!inconsistent[j]) wolffd@0: && (label[j]) wolffd@0: && (!chosen[j])) { wolffd@0: selcrit[activedoc]=(j+iteration) % totdoc; wolffd@0: key[activedoc]=j; wolffd@0: activedoc++; wolffd@0: } wolffd@0: } wolffd@0: select_top_n(selcrit,activedoc,select,(long)(qp_size/2)); wolffd@0: for(k=0;(choosenum<(qp_size/2)) && (k<(qp_size/2)) && (k=0;i++) { wolffd@0: s=label[j]; wolffd@0: if((!((a[j]<=(0+learn_parm->epsilon_a)) && (s<0))) wolffd@0: && (!((a[j]>=(learn_parm->svm_cost[j]-learn_parm->epsilon_a)) wolffd@0: && (s>0))) wolffd@0: && (!inconsistent[j]) wolffd@0: && (label[j]) wolffd@0: && (!chosen[j])) { wolffd@0: selcrit[activedoc]=(j+iteration) % totdoc; wolffd@0: key[activedoc]=j; wolffd@0: activedoc++; wolffd@0: } wolffd@0: } wolffd@0: select_top_n(selcrit,activedoc,select,(long)(qp_size/2)); wolffd@0: for(k=0;(choosenum=0;ii++) { wolffd@0: ex_c=learn_parm->svm_c-learn_parm->epsilon_a; wolffd@0: if(alphaslack[docs[i]->slackid] >= ex_c) { wolffd@0: dist=(lin[i])*(double)label[i]+slack[docs[i]->slackid]; /* distance */ wolffd@0: target=-(learn_parm->eps-(double)label[i]*c[i]); /* rhs of constraint */ wolffd@0: if((a[i]>learn_parm->epsilon_a) && (dist > target)) { wolffd@0: if((dist-target)>maxdiff) { /* largest violation */ wolffd@0: maxdiff=dist-target; wolffd@0: maxdiffid=docs[i]->slackid; wolffd@0: } wolffd@0: } wolffd@0: } wolffd@0: } wolffd@0: (*maxviol)=maxdiff; wolffd@0: return(maxdiffid); wolffd@0: } wolffd@0: wolffd@0: wolffd@0: void select_top_n(double *selcrit, long int range, long int *select, wolffd@0: long int n) wolffd@0: { wolffd@0: register long i,j; wolffd@0: wolffd@0: for(i=0;(i=0;j--) { wolffd@0: if((j>0) && (selcrit[select[j-1]]0) { wolffd@0: for(i=n;iselcrit[select[n-1]]) { wolffd@0: for(j=n-1;j>=0;j--) { wolffd@0: if((j>0) && (selcrit[select[j-1]]deactnum=0; wolffd@0: shrink_state->active = (long *)my_malloc(sizeof(long)*totdoc); wolffd@0: shrink_state->inactive_since = (long *)my_malloc(sizeof(long)*totdoc); wolffd@0: shrink_state->a_history = (double **)my_malloc(sizeof(double *)*maxhistory); wolffd@0: shrink_state->maxhistory=maxhistory; wolffd@0: shrink_state->last_lin = (double *)my_malloc(sizeof(double)*totdoc); wolffd@0: shrink_state->last_a = (double *)my_malloc(sizeof(double)*totdoc); wolffd@0: wolffd@0: for(i=0;iactive[i]=1; wolffd@0: shrink_state->inactive_since[i]=0; wolffd@0: shrink_state->last_a[i]=0; wolffd@0: shrink_state->last_lin[i]=0; wolffd@0: } wolffd@0: } wolffd@0: wolffd@0: void shrink_state_cleanup(SHRINK_STATE *shrink_state) wolffd@0: { wolffd@0: free(shrink_state->active); wolffd@0: free(shrink_state->inactive_since); wolffd@0: if(shrink_state->deactnum > 0) wolffd@0: free(shrink_state->a_history[shrink_state->deactnum-1]); wolffd@0: free(shrink_state->a_history); wolffd@0: free(shrink_state->last_a); wolffd@0: free(shrink_state->last_lin); wolffd@0: } wolffd@0: wolffd@0: long shrink_problem(DOC **docs, wolffd@0: LEARN_PARM *learn_parm, wolffd@0: SHRINK_STATE *shrink_state, wolffd@0: KERNEL_PARM *kernel_parm, wolffd@0: long int *active2dnum, wolffd@0: long int *last_suboptimal_at, wolffd@0: long int iteration, wolffd@0: long int totdoc, wolffd@0: long int minshrink, wolffd@0: double *a, wolffd@0: long int *inconsistent) wolffd@0: /* Shrink some variables away. Do the shrinking only if at least wolffd@0: minshrink variables can be removed. */ wolffd@0: { wolffd@0: long i,ii,change,activenum,lastiter; wolffd@0: double *a_old; wolffd@0: wolffd@0: activenum=0; wolffd@0: change=0; wolffd@0: for(ii=0;active2dnum[ii]>=0;ii++) { wolffd@0: i=active2dnum[ii]; wolffd@0: activenum++; wolffd@0: if(learn_parm->sharedslack) wolffd@0: lastiter=last_suboptimal_at[docs[i]->slackid]; wolffd@0: else wolffd@0: lastiter=last_suboptimal_at[i]; wolffd@0: if(((iteration-lastiter) > learn_parm->svm_iter_to_shrink) wolffd@0: || (inconsistent[i])) { wolffd@0: change++; wolffd@0: } wolffd@0: } wolffd@0: if((change>=minshrink) /* shrink only if sufficiently many candidates */ wolffd@0: && (shrink_state->deactnummaxhistory)) { /* and enough memory */ wolffd@0: /* Shrink problem by removing those variables which are */ wolffd@0: /* optimal at a bound for a minimum number of iterations */ wolffd@0: if(verbosity>=2) { wolffd@0: printf(" Shrinking..."); fflush(stdout); wolffd@0: } wolffd@0: if(kernel_parm->kernel_type != LINEAR) { /* non-linear case save alphas */ wolffd@0: a_old=(double *)my_malloc(sizeof(double)*totdoc); wolffd@0: shrink_state->a_history[shrink_state->deactnum]=a_old; wolffd@0: for(i=0;i=0;ii++) { wolffd@0: i=active2dnum[ii]; wolffd@0: if(learn_parm->sharedslack) wolffd@0: lastiter=last_suboptimal_at[docs[i]->slackid]; wolffd@0: else wolffd@0: lastiter=last_suboptimal_at[i]; wolffd@0: if(((iteration-lastiter) > learn_parm->svm_iter_to_shrink) wolffd@0: || (inconsistent[i])) { wolffd@0: shrink_state->active[i]=0; wolffd@0: shrink_state->inactive_since[i]=shrink_state->deactnum; wolffd@0: } wolffd@0: } wolffd@0: activenum=compute_index(shrink_state->active,totdoc,active2dnum); wolffd@0: shrink_state->deactnum++; wolffd@0: if(kernel_parm->kernel_type == LINEAR) { wolffd@0: shrink_state->deactnum=0; wolffd@0: } wolffd@0: if(verbosity>=2) { wolffd@0: printf("done.\n"); fflush(stdout); wolffd@0: printf(" Number of inactive variables = %ld\n",totdoc-activenum); wolffd@0: } wolffd@0: } wolffd@0: return(activenum); wolffd@0: } wolffd@0: wolffd@0: wolffd@0: void reactivate_inactive_examples(long int *label, wolffd@0: long int *unlabeled, wolffd@0: double *a, wolffd@0: SHRINK_STATE *shrink_state, wolffd@0: double *lin, wolffd@0: double *c, wolffd@0: long int totdoc, wolffd@0: long int totwords, wolffd@0: long int iteration, wolffd@0: LEARN_PARM *learn_parm, wolffd@0: long int *inconsistent, wolffd@0: DOC **docs, wolffd@0: KERNEL_PARM *kernel_parm, wolffd@0: KERNEL_CACHE *kernel_cache, wolffd@0: MODEL *model, wolffd@0: CFLOAT *aicache, wolffd@0: double *weights, wolffd@0: double *maxdiff) wolffd@0: /* Make all variables active again which had been removed by wolffd@0: shrinking. */ wolffd@0: /* Computes lin for those variables from scratch. */ wolffd@0: { wolffd@0: register long i,j,ii,jj,t,*changed2dnum,*inactive2dnum; wolffd@0: long *changed,*inactive; wolffd@0: register double kernel_val,*a_old,dist; wolffd@0: double ex_c,target; wolffd@0: SVECTOR *f; wolffd@0: wolffd@0: if(kernel_parm->kernel_type == LINEAR) { /* special linear case */ wolffd@0: a_old=shrink_state->last_a; wolffd@0: clear_vector_n(weights,totwords); wolffd@0: for(i=0;ifvec;f;f=f->next) wolffd@0: add_vector_ns(weights,f, wolffd@0: f->factor*((a[i]-a_old[i])*(double)label[i])); wolffd@0: a_old[i]=a[i]; wolffd@0: } wolffd@0: } wolffd@0: for(i=0;iactive[i]) { wolffd@0: for(f=docs[i]->fvec;f;f=f->next) wolffd@0: lin[i]=shrink_state->last_lin[i]+f->factor*sprod_ns(weights,f); wolffd@0: } wolffd@0: shrink_state->last_lin[i]=lin[i]; wolffd@0: } wolffd@0: } wolffd@0: else { wolffd@0: changed=(long *)my_malloc(sizeof(long)*totdoc); wolffd@0: changed2dnum=(long *)my_malloc(sizeof(long)*(totdoc+11)); wolffd@0: inactive=(long *)my_malloc(sizeof(long)*totdoc); wolffd@0: inactive2dnum=(long *)my_malloc(sizeof(long)*(totdoc+11)); wolffd@0: for(t=shrink_state->deactnum-1;(t>=0) && shrink_state->a_history[t];t--) { wolffd@0: if(verbosity>=2) { wolffd@0: printf("%ld..",t); fflush(stdout); wolffd@0: } wolffd@0: a_old=shrink_state->a_history[t]; wolffd@0: for(i=0;iactive[i]) wolffd@0: && (shrink_state->inactive_since[i] == t)); wolffd@0: changed[i]= (a[i] != a_old[i]); wolffd@0: } wolffd@0: compute_index(inactive,totdoc,inactive2dnum); wolffd@0: compute_index(changed,totdoc,changed2dnum); wolffd@0: wolffd@0: for(ii=0;(i=changed2dnum[ii])>=0;ii++) { wolffd@0: get_kernel_row(kernel_cache,docs,i,totdoc,inactive2dnum,aicache, wolffd@0: kernel_parm); wolffd@0: for(jj=0;(j=inactive2dnum[jj])>=0;jj++) { wolffd@0: kernel_val=aicache[j]; wolffd@0: lin[j]+=(((a[i]*kernel_val)-(a_old[i]*kernel_val))*(double)label[i]); wolffd@0: } wolffd@0: } wolffd@0: } wolffd@0: free(changed); wolffd@0: free(changed2dnum); wolffd@0: free(inactive); wolffd@0: free(inactive2dnum); wolffd@0: } wolffd@0: (*maxdiff)=0; wolffd@0: for(i=0;iinactive_since[i]=shrink_state->deactnum-1; wolffd@0: if(!inconsistent[i]) { wolffd@0: dist=(lin[i]-model->b)*(double)label[i]; wolffd@0: target=-(learn_parm->eps-(double)label[i]*c[i]); wolffd@0: ex_c=learn_parm->svm_cost[i]-learn_parm->epsilon_a; wolffd@0: if((a[i]>learn_parm->epsilon_a) && (dist > target)) { wolffd@0: if((dist-target)>(*maxdiff)) /* largest violation */ wolffd@0: (*maxdiff)=dist-target; wolffd@0: } wolffd@0: else if((a[i](*maxdiff)) /* largest violation */ wolffd@0: (*maxdiff)=target-dist; wolffd@0: } wolffd@0: if((a[i]>(0+learn_parm->epsilon_a)) wolffd@0: && (a[i]active[i]=1; /* not at bound */ wolffd@0: } wolffd@0: else if((a[i]<=(0+learn_parm->epsilon_a)) && (dist < (target+learn_parm->epsilon_shrink))) { wolffd@0: shrink_state->active[i]=1; wolffd@0: } wolffd@0: else if((a[i]>=ex_c) wolffd@0: && (dist > (target-learn_parm->epsilon_shrink))) { wolffd@0: shrink_state->active[i]=1; wolffd@0: } wolffd@0: else if(learn_parm->sharedslack) { /* make all active when sharedslack */ wolffd@0: shrink_state->active[i]=1; wolffd@0: } wolffd@0: } wolffd@0: } wolffd@0: if(kernel_parm->kernel_type != LINEAR) { /* update history for non-linear */ wolffd@0: for(i=0;ia_history[shrink_state->deactnum-1])[i]=a[i]; wolffd@0: } wolffd@0: for(t=shrink_state->deactnum-2;(t>=0) && shrink_state->a_history[t];t--) { wolffd@0: free(shrink_state->a_history[t]); wolffd@0: shrink_state->a_history[t]=0; wolffd@0: } wolffd@0: } wolffd@0: } wolffd@0: wolffd@0: /****************************** Cache handling *******************************/ wolffd@0: wolffd@0: void get_kernel_row(KERNEL_CACHE *kernel_cache, DOC **docs, wolffd@0: long int docnum, long int totdoc, wolffd@0: long int *active2dnum, CFLOAT *buffer, wolffd@0: KERNEL_PARM *kernel_parm) wolffd@0: /* Get's a row of the matrix of kernel values This matrix has the wolffd@0: same form as the Hessian, just that the elements are not wolffd@0: multiplied by */ wolffd@0: /* y_i * y_j * a_i * a_j */ wolffd@0: /* Takes the values from the cache if available. */ wolffd@0: { wolffd@0: register long i,j,start; wolffd@0: DOC *ex; wolffd@0: wolffd@0: ex=docs[docnum]; wolffd@0: wolffd@0: if(kernel_cache->index[docnum] != -1) { /* row is cached? */ wolffd@0: kernel_cache->lru[kernel_cache->index[docnum]]=kernel_cache->time; /* lru */ wolffd@0: start=kernel_cache->activenum*kernel_cache->index[docnum]; wolffd@0: for(i=0;(j=active2dnum[i])>=0;i++) { wolffd@0: if(kernel_cache->totdoc2active[j] >= 0) { /* column is cached? */ wolffd@0: buffer[j]=kernel_cache->buffer[start+kernel_cache->totdoc2active[j]]; wolffd@0: } wolffd@0: else { wolffd@0: buffer[j]=(CFLOAT)kernel(kernel_parm,ex,docs[j]); wolffd@0: } wolffd@0: } wolffd@0: } wolffd@0: else { wolffd@0: for(i=0;(j=active2dnum[i])>=0;i++) { wolffd@0: buffer[j]=(CFLOAT)kernel(kernel_parm,ex,docs[j]); wolffd@0: } wolffd@0: } wolffd@0: } wolffd@0: wolffd@0: wolffd@0: void cache_kernel_row(KERNEL_CACHE *kernel_cache, DOC **docs, wolffd@0: long int m, KERNEL_PARM *kernel_parm) wolffd@0: /* Fills cache for the row m */ wolffd@0: { wolffd@0: register DOC *ex; wolffd@0: register long j,k,l; wolffd@0: register CFLOAT *cache; wolffd@0: wolffd@0: if(!kernel_cache_check(kernel_cache,m)) { /* not cached yet*/ wolffd@0: cache = kernel_cache_clean_and_malloc(kernel_cache,m); wolffd@0: if(cache) { wolffd@0: l=kernel_cache->totdoc2active[m]; wolffd@0: ex=docs[m]; wolffd@0: for(j=0;jactivenum;j++) { /* fill cache */ wolffd@0: k=kernel_cache->active2totdoc[j]; wolffd@0: if((kernel_cache->index[k] != -1) && (l != -1) && (k != m)) { wolffd@0: cache[j]=kernel_cache->buffer[kernel_cache->activenum wolffd@0: *kernel_cache->index[k]+l]; wolffd@0: } wolffd@0: else { wolffd@0: cache[j]=kernel(kernel_parm,ex,docs[k]); wolffd@0: } wolffd@0: } wolffd@0: } wolffd@0: else { wolffd@0: perror("Error: Kernel cache full! => increase cache size"); wolffd@0: } wolffd@0: } wolffd@0: } wolffd@0: wolffd@0: wolffd@0: void cache_multiple_kernel_rows(KERNEL_CACHE *kernel_cache, DOC **docs, wolffd@0: long int *key, long int varnum, wolffd@0: KERNEL_PARM *kernel_parm) wolffd@0: /* Fills cache for the rows in key */ wolffd@0: { wolffd@0: register long i; wolffd@0: wolffd@0: for(i=0;i=2) { wolffd@0: printf(" Reorganizing cache..."); fflush(stdout); wolffd@0: } wolffd@0: wolffd@0: keep=(long *)my_malloc(sizeof(long)*totdoc); wolffd@0: for(j=0;jactivenum) && (scountactive2totdoc[jj]; wolffd@0: if(!after[j]) { wolffd@0: scount++; wolffd@0: keep[j]=0; wolffd@0: } wolffd@0: } wolffd@0: wolffd@0: for(i=0;imax_elems;i++) { wolffd@0: for(jj=0;jjactivenum;jj++) { wolffd@0: j=kernel_cache->active2totdoc[jj]; wolffd@0: if(!keep[j]) { wolffd@0: from++; wolffd@0: } wolffd@0: else { wolffd@0: kernel_cache->buffer[to]=kernel_cache->buffer[from]; wolffd@0: to++; wolffd@0: from++; wolffd@0: } wolffd@0: } wolffd@0: } wolffd@0: wolffd@0: kernel_cache->activenum=0; wolffd@0: for(j=0;jtotdoc2active[j] != -1)) { wolffd@0: kernel_cache->active2totdoc[kernel_cache->activenum]=j; wolffd@0: kernel_cache->totdoc2active[j]=kernel_cache->activenum; wolffd@0: kernel_cache->activenum++; wolffd@0: } wolffd@0: else { wolffd@0: kernel_cache->totdoc2active[j]=-1; wolffd@0: } wolffd@0: } wolffd@0: wolffd@0: kernel_cache->max_elems=(long)(kernel_cache->buffsize/kernel_cache->activenum); wolffd@0: if(kernel_cache->max_elems>totdoc) { wolffd@0: kernel_cache->max_elems=totdoc; wolffd@0: } wolffd@0: wolffd@0: free(keep); wolffd@0: wolffd@0: if(verbosity>=2) { wolffd@0: printf("done.\n"); fflush(stdout); wolffd@0: printf(" Cache-size in rows = %ld\n",kernel_cache->max_elems); wolffd@0: } wolffd@0: } wolffd@0: wolffd@0: KERNEL_CACHE *kernel_cache_init(long int totdoc, long int buffsize) wolffd@0: { wolffd@0: long i; wolffd@0: KERNEL_CACHE *kernel_cache; wolffd@0: wolffd@0: kernel_cache=(KERNEL_CACHE *)my_malloc(sizeof(KERNEL_CACHE)); wolffd@0: kernel_cache->index = (long *)my_malloc(sizeof(long)*totdoc); wolffd@0: kernel_cache->occu = (long *)my_malloc(sizeof(long)*totdoc); wolffd@0: kernel_cache->lru = (long *)my_malloc(sizeof(long)*totdoc); wolffd@0: kernel_cache->invindex = (long *)my_malloc(sizeof(long)*totdoc); wolffd@0: kernel_cache->active2totdoc = (long *)my_malloc(sizeof(long)*totdoc); wolffd@0: kernel_cache->totdoc2active = (long *)my_malloc(sizeof(long)*totdoc); wolffd@0: kernel_cache->buffer = (CFLOAT *)my_malloc((size_t)(buffsize)*1024*1024); wolffd@0: wolffd@0: kernel_cache->buffsize=(long)(buffsize/sizeof(CFLOAT)*1024*1024); wolffd@0: wolffd@0: kernel_cache->max_elems=(long)(kernel_cache->buffsize/totdoc); wolffd@0: if(kernel_cache->max_elems>totdoc) { wolffd@0: kernel_cache->max_elems=totdoc; wolffd@0: } wolffd@0: wolffd@0: if(verbosity>=2) { wolffd@0: printf(" Cache-size in rows = %ld\n",kernel_cache->max_elems); wolffd@0: printf(" Kernel evals so far: %ld\n",kernel_cache_statistic); wolffd@0: } wolffd@0: wolffd@0: kernel_cache->elems=0; /* initialize cache */ wolffd@0: for(i=0;iindex[i]=-1; wolffd@0: kernel_cache->lru[i]=0; wolffd@0: } wolffd@0: for(i=0;ioccu[i]=0; wolffd@0: kernel_cache->invindex[i]=-1; wolffd@0: } wolffd@0: wolffd@0: kernel_cache->activenum=totdoc;; wolffd@0: for(i=0;iactive2totdoc[i]=i; wolffd@0: kernel_cache->totdoc2active[i]=i; wolffd@0: } wolffd@0: wolffd@0: kernel_cache->time=0; wolffd@0: wolffd@0: return(kernel_cache); wolffd@0: } wolffd@0: wolffd@0: void kernel_cache_reset_lru(KERNEL_CACHE *kernel_cache) wolffd@0: { wolffd@0: long maxlru=0,k; wolffd@0: wolffd@0: for(k=0;kmax_elems;k++) { wolffd@0: if(maxlru < kernel_cache->lru[k]) wolffd@0: maxlru=kernel_cache->lru[k]; wolffd@0: } wolffd@0: for(k=0;kmax_elems;k++) { wolffd@0: kernel_cache->lru[k]-=maxlru; wolffd@0: } wolffd@0: } wolffd@0: wolffd@0: void kernel_cache_cleanup(KERNEL_CACHE *kernel_cache) wolffd@0: { wolffd@0: free(kernel_cache->index); wolffd@0: free(kernel_cache->occu); wolffd@0: free(kernel_cache->lru); wolffd@0: free(kernel_cache->invindex); wolffd@0: free(kernel_cache->active2totdoc); wolffd@0: free(kernel_cache->totdoc2active); wolffd@0: free(kernel_cache->buffer); wolffd@0: free(kernel_cache); wolffd@0: } wolffd@0: wolffd@0: long kernel_cache_malloc(KERNEL_CACHE *kernel_cache) wolffd@0: { wolffd@0: long i; wolffd@0: wolffd@0: if(kernel_cache_space_available(kernel_cache)) { wolffd@0: for(i=0;imax_elems;i++) { wolffd@0: if(!kernel_cache->occu[i]) { wolffd@0: kernel_cache->occu[i]=1; wolffd@0: kernel_cache->elems++; wolffd@0: return(i); wolffd@0: } wolffd@0: } wolffd@0: } wolffd@0: return(-1); wolffd@0: } wolffd@0: wolffd@0: void kernel_cache_free(KERNEL_CACHE *kernel_cache, long int i) wolffd@0: { wolffd@0: kernel_cache->occu[i]=0; wolffd@0: kernel_cache->elems--; wolffd@0: } wolffd@0: wolffd@0: long kernel_cache_free_lru(KERNEL_CACHE *kernel_cache) wolffd@0: /* remove least recently used cache element */ wolffd@0: { wolffd@0: register long k,least_elem=-1,least_time; wolffd@0: wolffd@0: least_time=kernel_cache->time+1; wolffd@0: for(k=0;kmax_elems;k++) { wolffd@0: if(kernel_cache->invindex[k] != -1) { wolffd@0: if(kernel_cache->lru[k]lru[k]; wolffd@0: least_elem=k; wolffd@0: } wolffd@0: } wolffd@0: } wolffd@0: if(least_elem != -1) { wolffd@0: kernel_cache_free(kernel_cache,least_elem); wolffd@0: kernel_cache->index[kernel_cache->invindex[least_elem]]=-1; wolffd@0: kernel_cache->invindex[least_elem]=-1; wolffd@0: return(1); wolffd@0: } wolffd@0: return(0); wolffd@0: } wolffd@0: wolffd@0: wolffd@0: CFLOAT *kernel_cache_clean_and_malloc(KERNEL_CACHE *kernel_cache, wolffd@0: long int docnum) wolffd@0: /* Get a free cache entry. In case cache is full, the lru element wolffd@0: is removed. */ wolffd@0: { wolffd@0: long result; wolffd@0: if((result = kernel_cache_malloc(kernel_cache)) == -1) { wolffd@0: if(kernel_cache_free_lru(kernel_cache)) { wolffd@0: result = kernel_cache_malloc(kernel_cache); wolffd@0: } wolffd@0: } wolffd@0: kernel_cache->index[docnum]=result; wolffd@0: if(result == -1) { wolffd@0: return(0); wolffd@0: } wolffd@0: kernel_cache->invindex[result]=docnum; wolffd@0: kernel_cache->lru[kernel_cache->index[docnum]]=kernel_cache->time; /* lru */ wolffd@0: return((CFLOAT *)((long)kernel_cache->buffer wolffd@0: +(kernel_cache->activenum*sizeof(CFLOAT)* wolffd@0: kernel_cache->index[docnum]))); wolffd@0: } wolffd@0: wolffd@0: long kernel_cache_touch(KERNEL_CACHE *kernel_cache, long int docnum) wolffd@0: /* Update lru time to avoid removal from cache. */ wolffd@0: { wolffd@0: if(kernel_cache && kernel_cache->index[docnum] != -1) { wolffd@0: kernel_cache->lru[kernel_cache->index[docnum]]=kernel_cache->time; /* lru */ wolffd@0: return(1); wolffd@0: } wolffd@0: return(0); wolffd@0: } wolffd@0: wolffd@0: long kernel_cache_check(KERNEL_CACHE *kernel_cache, long int docnum) wolffd@0: /* Is that row cached? */ wolffd@0: { wolffd@0: return(kernel_cache->index[docnum] != -1); wolffd@0: } wolffd@0: wolffd@0: long kernel_cache_space_available(KERNEL_CACHE *kernel_cache) wolffd@0: /* Is there room for one more row? */ wolffd@0: { wolffd@0: return(kernel_cache->elems < kernel_cache->max_elems); wolffd@0: } wolffd@0: wolffd@0: /************************** Compute estimates ******************************/ wolffd@0: wolffd@0: void compute_xa_estimates(MODEL *model, long int *label, wolffd@0: long int *unlabeled, long int totdoc, wolffd@0: DOC **docs, double *lin, double *a, wolffd@0: KERNEL_PARM *kernel_parm, wolffd@0: LEARN_PARM *learn_parm, double *error, wolffd@0: double *recall, double *precision) wolffd@0: /* Computes xa-estimate of error rate, recall, and precision. See wolffd@0: T. Joachims, Estimating the Generalization Performance of an SVM wolffd@0: Efficiently, IMCL, 2000. */ wolffd@0: { wolffd@0: long i,looerror,looposerror,loonegerror; wolffd@0: long totex,totposex; wolffd@0: double xi,r_delta,r_delta_sq,sim=0; wolffd@0: long *sv2dnum=NULL,*sv=NULL,svnum; wolffd@0: wolffd@0: r_delta=estimate_r_delta(docs,totdoc,kernel_parm); wolffd@0: r_delta_sq=r_delta*r_delta; wolffd@0: wolffd@0: looerror=0; wolffd@0: looposerror=0; wolffd@0: loonegerror=0; wolffd@0: totex=0; wolffd@0: totposex=0; wolffd@0: svnum=0; wolffd@0: wolffd@0: if(learn_parm->xa_depth > 0) { wolffd@0: sv = (long *)my_malloc(sizeof(long)*(totdoc+11)); wolffd@0: for(i=0;isv_num;i++) wolffd@0: if(a[model->supvec[i]->docnum] wolffd@0: < (learn_parm->svm_cost[model->supvec[i]->docnum] wolffd@0: -learn_parm->epsilon_a)) { wolffd@0: sv[model->supvec[i]->docnum]=1; wolffd@0: svnum++; wolffd@0: } wolffd@0: sv2dnum = (long *)my_malloc(sizeof(long)*(totdoc+11)); wolffd@0: clear_index(sv2dnum); wolffd@0: compute_index(sv,totdoc,sv2dnum); wolffd@0: } wolffd@0: wolffd@0: for(i=0;ib)*(double)label[i]); wolffd@0: if(xi<0) xi=0; wolffd@0: if(label[i]>0) { wolffd@0: totposex++; wolffd@0: } wolffd@0: if((learn_parm->rho*a[i]*r_delta_sq+xi) >= 1.0) { wolffd@0: if(learn_parm->xa_depth > 0) { /* makes assumptions */ wolffd@0: sim=distribute_alpha_t_greedily(sv2dnum,svnum,docs,a,i,label, wolffd@0: kernel_parm,learn_parm, wolffd@0: (double)((1.0-xi-a[i]*r_delta_sq)/(2.0*a[i]))); wolffd@0: } wolffd@0: if((learn_parm->xa_depth == 0) || wolffd@0: ((a[i]*kernel(kernel_parm,docs[i],docs[i])+a[i]*2.0*sim+xi) >= 1.0)) { wolffd@0: looerror++; wolffd@0: if(label[i]>0) { wolffd@0: looposerror++; wolffd@0: } wolffd@0: else { wolffd@0: loonegerror++; wolffd@0: } wolffd@0: } wolffd@0: } wolffd@0: totex++; wolffd@0: } wolffd@0: } wolffd@0: wolffd@0: (*error)=((double)looerror/(double)totex)*100.0; wolffd@0: (*recall)=(1.0-(double)looposerror/(double)totposex)*100.0; wolffd@0: (*precision)=(((double)totposex-(double)looposerror) wolffd@0: /((double)totposex-(double)looposerror+(double)loonegerror))*100.0; wolffd@0: wolffd@0: free(sv); wolffd@0: free(sv2dnum); wolffd@0: } wolffd@0: wolffd@0: wolffd@0: double distribute_alpha_t_greedily(long int *sv2dnum, long int svnum, wolffd@0: DOC **docs, double *a, wolffd@0: long int docnum, wolffd@0: long int *label, wolffd@0: KERNEL_PARM *kernel_parm, wolffd@0: LEARN_PARM *learn_parm, double thresh) wolffd@0: /* Experimental Code improving plain XiAlpha Estimates by wolffd@0: computing a better bound using a greedy optimzation strategy. */ wolffd@0: { wolffd@0: long best_depth=0; wolffd@0: long i,j,k,d,skip,allskip; wolffd@0: double best,best_val[101],val,init_val_sq,init_val_lin; wolffd@0: long best_ex[101]; wolffd@0: CFLOAT *cache,*trow; wolffd@0: wolffd@0: cache=(CFLOAT *)my_malloc(sizeof(CFLOAT)*learn_parm->xa_depth*svnum); wolffd@0: trow = (CFLOAT *)my_malloc(sizeof(CFLOAT)*svnum); wolffd@0: wolffd@0: for(k=0;kxa_depth;d++) { wolffd@0: allskip=1; wolffd@0: if(d>=1) { wolffd@0: init_val_sq+=cache[best_ex[d-1]+svnum*(d-1)]; wolffd@0: for(k=0;kkernel_type == LINEAR) wolffd@0: val+=docs[sv2dnum[i]]->fvec->twonorm_sq; wolffd@0: else wolffd@0: val+=kernel(kernel_parm,docs[sv2dnum[i]],docs[sv2dnum[i]]); wolffd@0: for(j=0;jxa_depth; wolffd@0: } wolffd@0: } wolffd@0: wolffd@0: free(cache); wolffd@0: free(trow); wolffd@0: wolffd@0: /* printf("Distribute[%ld](%ld)=%f, ",docnum,best_depth,best); */ wolffd@0: return(best); wolffd@0: } wolffd@0: wolffd@0: wolffd@0: void estimate_transduction_quality(MODEL *model, long int *label, wolffd@0: long int *unlabeled, wolffd@0: long int totdoc, DOC **docs, double *lin) wolffd@0: /* Loo-bound based on observation that loo-errors must have an wolffd@0: equal distribution in both training and test examples, given wolffd@0: that the test examples are classified correctly. Compare wolffd@0: chapter "Constraints on the Transductive Hyperplane" in my wolffd@0: Dissertation. */ wolffd@0: { wolffd@0: long i,j,l=0,ulab=0,lab=0,labpos=0,labneg=0,ulabpos=0,ulabneg=0,totulab=0; wolffd@0: double totlab=0,totlabpos=0,totlabneg=0,labsum=0,ulabsum=0; wolffd@0: double r_delta,r_delta_sq,xi,xisum=0,asum=0; wolffd@0: wolffd@0: r_delta=estimate_r_delta(docs,totdoc,&(model->kernel_parm)); wolffd@0: r_delta_sq=r_delta*r_delta; wolffd@0: wolffd@0: for(j=0;j 0) wolffd@0: totlabpos++; wolffd@0: else wolffd@0: totlabneg++; wolffd@0: } wolffd@0: } wolffd@0: for(j=1;jsv_num;j++) { wolffd@0: i=model->supvec[j]->docnum; wolffd@0: xi=1.0-((lin[i]-model->b)*(double)label[i]); wolffd@0: if(xi<0) xi=0; wolffd@0: wolffd@0: xisum+=xi; wolffd@0: asum+=fabs(model->alpha[j]); wolffd@0: if(unlabeled[i]) { wolffd@0: ulabsum+=(fabs(model->alpha[j])*r_delta_sq+xi); wolffd@0: } wolffd@0: else { wolffd@0: labsum+=(fabs(model->alpha[j])*r_delta_sq+xi); wolffd@0: } wolffd@0: if((fabs(model->alpha[j])*r_delta_sq+xi) >= 1) { wolffd@0: l++; wolffd@0: if(unlabeled[model->supvec[j]->docnum]) { wolffd@0: ulab++; wolffd@0: if(model->alpha[j] > 0) wolffd@0: ulabpos++; wolffd@0: else wolffd@0: ulabneg++; wolffd@0: } wolffd@0: else { wolffd@0: lab++; wolffd@0: if(model->alpha[j] > 0) wolffd@0: labpos++; wolffd@0: else wolffd@0: labneg++; wolffd@0: } wolffd@0: } wolffd@0: } wolffd@0: printf("xacrit>=1: labeledpos=%.5f labeledneg=%.5f default=%.5f\n",(double)labpos/(double)totlab*100.0,(double)labneg/(double)totlab*100.0,(double)totlabpos/(double)(totlab)*100.0); wolffd@0: printf("xacrit>=1: unlabelpos=%.5f unlabelneg=%.5f\n",(double)ulabpos/(double)totulab*100.0,(double)ulabneg/(double)totulab*100.0); wolffd@0: printf("xacrit>=1: labeled=%.5f unlabled=%.5f all=%.5f\n",(double)lab/(double)totlab*100.0,(double)ulab/(double)totulab*100.0,(double)l/(double)(totdoc)*100.0); wolffd@0: printf("xacritsum: labeled=%.5f unlabled=%.5f all=%.5f\n",(double)labsum/(double)totlab*100.0,(double)ulabsum/(double)totulab*100.0,(double)(labsum+ulabsum)/(double)(totdoc)*100.0); wolffd@0: printf("r_delta_sq=%.5f xisum=%.5f asum=%.5f\n",r_delta_sq,xisum,asum); wolffd@0: } wolffd@0: wolffd@0: double estimate_margin_vcdim(MODEL *model, double w, double R, wolffd@0: KERNEL_PARM *kernel_parm) wolffd@0: /* optional: length of model vector in feature space */ wolffd@0: /* optional: radius of ball containing the data */ wolffd@0: { wolffd@0: double h; wolffd@0: wolffd@0: /* follows chapter 5.6.4 in [Vapnik/95] */ wolffd@0: wolffd@0: if(w<0) { wolffd@0: w=model_length_s(model,kernel_parm); wolffd@0: } wolffd@0: if(R<0) { wolffd@0: R=estimate_sphere(model,kernel_parm); wolffd@0: } wolffd@0: h = w*w * R*R +1; wolffd@0: return(h); wolffd@0: } wolffd@0: wolffd@0: double estimate_sphere(MODEL *model, KERNEL_PARM *kernel_parm) wolffd@0: /* Approximates the radius of the ball containing */ wolffd@0: /* the support vectors by bounding it with the */ wolffd@0: { /* length of the longest support vector. This is */ wolffd@0: register long j; /* pretty good for text categorization, since all */ wolffd@0: double xlen,maxxlen=0; /* documents have feature vectors of length 1. It */ wolffd@0: DOC *nulldoc; /* assumes that the center of the ball is at the */ wolffd@0: WORD nullword; /* origin of the space. */ wolffd@0: wolffd@0: nullword.wnum=0; wolffd@0: nulldoc=create_example(-2,0,0,0.0,create_svector(&nullword,"",1.0)); wolffd@0: wolffd@0: for(j=1;jsv_num;j++) { wolffd@0: xlen=sqrt(kernel(kernel_parm,model->supvec[j],model->supvec[j]) wolffd@0: -2*kernel(kernel_parm,model->supvec[j],nulldoc) wolffd@0: +kernel(kernel_parm,nulldoc,nulldoc)); wolffd@0: if(xlen>maxxlen) { wolffd@0: maxxlen=xlen; wolffd@0: } wolffd@0: } wolffd@0: wolffd@0: free_example(nulldoc,1); wolffd@0: return(maxxlen); wolffd@0: } wolffd@0: wolffd@0: double estimate_r_delta(DOC **docs, long int totdoc, KERNEL_PARM *kernel_parm) wolffd@0: { wolffd@0: long i; wolffd@0: double maxxlen,xlen; wolffd@0: DOC *nulldoc; /* assumes that the center of the ball is at the */ wolffd@0: WORD nullword; /* origin of the space. */ wolffd@0: wolffd@0: nullword.wnum=0; wolffd@0: nulldoc=create_example(-2,0,0,0.0,create_svector(&nullword,"",1.0)); wolffd@0: wolffd@0: maxxlen=0; wolffd@0: for(i=0;imaxxlen) { wolffd@0: maxxlen=xlen; wolffd@0: } wolffd@0: } wolffd@0: wolffd@0: free_example(nulldoc,1); wolffd@0: return(maxxlen); wolffd@0: } wolffd@0: wolffd@0: double estimate_r_delta_average(DOC **docs, long int totdoc, wolffd@0: KERNEL_PARM *kernel_parm) wolffd@0: { wolffd@0: long i; wolffd@0: double avgxlen; wolffd@0: DOC *nulldoc; /* assumes that the center of the ball is at the */ wolffd@0: WORD nullword; /* origin of the space. */ wolffd@0: wolffd@0: nullword.wnum=0; wolffd@0: nulldoc=create_example(-2,0,0,0.0,create_svector(&nullword,"",1.0)); wolffd@0: wolffd@0: avgxlen=0; wolffd@0: for(i=0;imaxxlen) { wolffd@0: maxxlen=xlen; wolffd@0: } wolffd@0: } wolffd@0: wolffd@0: return(maxxlen); wolffd@0: } wolffd@0: wolffd@0: /****************************** IO-handling **********************************/ wolffd@0: wolffd@0: void write_prediction(char *predfile, MODEL *model, double *lin, wolffd@0: double *a, long int *unlabeled, wolffd@0: long int *label, long int totdoc, wolffd@0: LEARN_PARM *learn_parm) wolffd@0: { wolffd@0: FILE *predfl; wolffd@0: long i; wolffd@0: double dist,a_max; wolffd@0: wolffd@0: if(verbosity>=1) { wolffd@0: printf("Writing prediction file..."); fflush(stdout); wolffd@0: } wolffd@0: if ((predfl = fopen (predfile, "w")) == NULL) wolffd@0: { perror (predfile); exit (1); } wolffd@0: a_max=learn_parm->epsilon_a; wolffd@0: for(i=0;ia_max)) { wolffd@0: a_max=a[i]; wolffd@0: } wolffd@0: } wolffd@0: for(i=0;i(learn_parm->epsilon_a))) { wolffd@0: dist=(double)label[i]*(1.0-learn_parm->epsilon_crit-a[i]/(a_max*2.0)); wolffd@0: } wolffd@0: else { wolffd@0: dist=(lin[i]-model->b); wolffd@0: } wolffd@0: if(dist>0) { wolffd@0: fprintf(predfl,"%.8g:+1 %.8g:-1\n",dist,-dist); wolffd@0: } wolffd@0: else { wolffd@0: fprintf(predfl,"%.8g:-1 %.8g:+1\n",-dist,dist); wolffd@0: } wolffd@0: } wolffd@0: } wolffd@0: fclose(predfl); wolffd@0: if(verbosity>=1) { wolffd@0: printf("done\n"); wolffd@0: } wolffd@0: } wolffd@0: wolffd@0: void write_alphas(char *alphafile, double *a, wolffd@0: long int *label, long int totdoc) wolffd@0: { wolffd@0: FILE *alphafl; wolffd@0: long i; wolffd@0: wolffd@0: if(verbosity>=1) { wolffd@0: printf("Writing alpha file..."); fflush(stdout); wolffd@0: } wolffd@0: if ((alphafl = fopen (alphafile, "w")) == NULL) wolffd@0: { perror (alphafile); exit (1); } wolffd@0: for(i=0;i=1) { wolffd@0: printf("done\n"); wolffd@0: } wolffd@0: } wolffd@0: