annotate toolboxes/SVM-light/src/svm_classify.c @ 0:e9a9cd732c1e tip

first hg version after svn
author wolffd
date Tue, 10 Feb 2015 15:05:51 +0000
parents
children
rev   line source
wolffd@0 1 /***********************************************************************/
wolffd@0 2 /* */
wolffd@0 3 /* svm_classify.c */
wolffd@0 4 /* */
wolffd@0 5 /* Classification module of Support Vector Machine. */
wolffd@0 6 /* */
wolffd@0 7 /* Author: Thorsten Joachims */
wolffd@0 8 /* Date: 02.07.02 */
wolffd@0 9 /* */
wolffd@0 10 /* Copyright (c) 2002 Thorsten Joachims - All rights reserved */
wolffd@0 11 /* */
wolffd@0 12 /* This software is available for non-commercial use only. It must */
wolffd@0 13 /* not be modified and distributed without prior permission of the */
wolffd@0 14 /* author. The author is not responsible for implications from the */
wolffd@0 15 /* use of this software. */
wolffd@0 16 /* */
wolffd@0 17 /************************************************************************/
wolffd@0 18
wolffd@0 19 # include "svm_common.h"
wolffd@0 20
wolffd@0 21 char docfile[200];
wolffd@0 22 char modelfile[200];
wolffd@0 23 char predictionsfile[200];
wolffd@0 24
wolffd@0 25 void read_input_parameters(int, char **, char *, char *, char *, long *,
wolffd@0 26 long *);
wolffd@0 27 void print_help(void);
wolffd@0 28
wolffd@0 29
wolffd@0 30 int main (int argc, char* argv[])
wolffd@0 31 {
wolffd@0 32 DOC *doc; /* test example */
wolffd@0 33 WORD *words;
wolffd@0 34 long max_docs,max_words_doc,lld;
wolffd@0 35 long totdoc=0,queryid,slackid;
wolffd@0 36 long correct=0,incorrect=0,no_accuracy=0;
wolffd@0 37 long res_a=0,res_b=0,res_c=0,res_d=0,wnum,pred_format;
wolffd@0 38 long j;
wolffd@0 39 double t1,runtime=0;
wolffd@0 40 double dist,doc_label,costfactor;
wolffd@0 41 char *line,*comment;
wolffd@0 42 FILE *predfl,*docfl;
wolffd@0 43 MODEL *model;
wolffd@0 44
wolffd@0 45 read_input_parameters(argc,argv,docfile,modelfile,predictionsfile,
wolffd@0 46 &verbosity,&pred_format);
wolffd@0 47
wolffd@0 48 nol_ll(docfile,&max_docs,&max_words_doc,&lld); /* scan size of input file */
wolffd@0 49 max_words_doc+=2;
wolffd@0 50 lld+=2;
wolffd@0 51
wolffd@0 52 line = (char *)my_malloc(sizeof(char)*lld);
wolffd@0 53 words = (WORD *)my_malloc(sizeof(WORD)*(max_words_doc+10));
wolffd@0 54
wolffd@0 55 model=read_model(modelfile);
wolffd@0 56
wolffd@0 57 if(model->kernel_parm.kernel_type == 0) { /* linear kernel */
wolffd@0 58 /* compute weight vector */
wolffd@0 59 add_weight_vector_to_linear_model(model);
wolffd@0 60 }
wolffd@0 61
wolffd@0 62 if(verbosity>=2) {
wolffd@0 63 printf("Classifying test examples.."); fflush(stdout);
wolffd@0 64 }
wolffd@0 65
wolffd@0 66 if ((docfl = fopen (docfile, "r")) == NULL)
wolffd@0 67 { perror (docfile); exit (1); }
wolffd@0 68 if ((predfl = fopen (predictionsfile, "w")) == NULL)
wolffd@0 69 { perror (predictionsfile); exit (1); }
wolffd@0 70
wolffd@0 71 while((!feof(docfl)) && fgets(line,(int)lld,docfl)) {
wolffd@0 72 if(line[0] == '#') continue; /* line contains comments */
wolffd@0 73 parse_document(line,words,&doc_label,&queryid,&slackid,&costfactor,&wnum,
wolffd@0 74 max_words_doc,&comment);
wolffd@0 75 totdoc++;
wolffd@0 76 if(model->kernel_parm.kernel_type == 0) { /* linear kernel */
wolffd@0 77 for(j=0;(words[j]).wnum != 0;j++) { /* Check if feature numbers */
wolffd@0 78 if((words[j]).wnum>model->totwords) /* are not larger than in */
wolffd@0 79 (words[j]).wnum=0; /* model. Remove feature if */
wolffd@0 80 } /* necessary. */
wolffd@0 81 doc = create_example(-1,0,0,0.0,create_svector(words,comment,1.0));
wolffd@0 82 t1=get_runtime();
wolffd@0 83 dist=classify_example_linear(model,doc);
wolffd@0 84 runtime+=(get_runtime()-t1);
wolffd@0 85 free_example(doc,1);
wolffd@0 86 }
wolffd@0 87 else { /* non-linear kernel */
wolffd@0 88 doc = create_example(-1,0,0,0.0,create_svector(words,comment,1.0));
wolffd@0 89 t1=get_runtime();
wolffd@0 90 dist=classify_example(model,doc);
wolffd@0 91 runtime+=(get_runtime()-t1);
wolffd@0 92 free_example(doc,1);
wolffd@0 93 }
wolffd@0 94 if(dist>0) {
wolffd@0 95 if(pred_format==0) { /* old weired output format */
wolffd@0 96 fprintf(predfl,"%.8g:+1 %.8g:-1\n",dist,-dist);
wolffd@0 97 }
wolffd@0 98 if(doc_label>0) correct++; else incorrect++;
wolffd@0 99 if(doc_label>0) res_a++; else res_b++;
wolffd@0 100 }
wolffd@0 101 else {
wolffd@0 102 if(pred_format==0) { /* old weired output format */
wolffd@0 103 fprintf(predfl,"%.8g:-1 %.8g:+1\n",-dist,dist);
wolffd@0 104 }
wolffd@0 105 if(doc_label<0) correct++; else incorrect++;
wolffd@0 106 if(doc_label>0) res_c++; else res_d++;
wolffd@0 107 }
wolffd@0 108 if(pred_format==1) { /* output the value of decision function */
wolffd@0 109 fprintf(predfl,"%.8g\n",dist);
wolffd@0 110 }
wolffd@0 111 if((int)(0.01+(doc_label*doc_label)) != 1)
wolffd@0 112 { no_accuracy=1; } /* test data is not binary labeled */
wolffd@0 113 if(verbosity>=2) {
wolffd@0 114 if(totdoc % 100 == 0) {
wolffd@0 115 printf("%ld..",totdoc); fflush(stdout);
wolffd@0 116 }
wolffd@0 117 }
wolffd@0 118 }
wolffd@0 119 free(line);
wolffd@0 120 free(words);
wolffd@0 121 free_model(model,1);
wolffd@0 122
wolffd@0 123 if(verbosity>=2) {
wolffd@0 124 printf("done\n");
wolffd@0 125
wolffd@0 126 /* Note by Gary Boone Date: 29 April 2000 */
wolffd@0 127 /* o Timing is inaccurate. The timer has 0.01 second resolution. */
wolffd@0 128 /* Because classification of a single vector takes less than */
wolffd@0 129 /* 0.01 secs, the timer was underflowing. */
wolffd@0 130 printf("Runtime (without IO) in cpu-seconds: %.2f\n",
wolffd@0 131 (float)(runtime/100.0));
wolffd@0 132
wolffd@0 133 }
wolffd@0 134 if((!no_accuracy) && (verbosity>=1)) {
wolffd@0 135 printf("Accuracy on test set: %.2f%% (%ld correct, %ld incorrect, %ld total)\n",(float)(correct)*100.0/totdoc,correct,incorrect,totdoc);
wolffd@0 136 printf("Precision/recall on test set: %.2f%%/%.2f%%\n",(float)(res_a)*100.0/(res_a+res_b),(float)(res_a)*100.0/(res_a+res_c));
wolffd@0 137 }
wolffd@0 138
wolffd@0 139 return(0);
wolffd@0 140 }
wolffd@0 141
wolffd@0 142 void read_input_parameters(int argc, char **argv, char *docfile,
wolffd@0 143 char *modelfile, char *predictionsfile,
wolffd@0 144 long int *verbosity, long int *pred_format)
wolffd@0 145 {
wolffd@0 146 long i;
wolffd@0 147
wolffd@0 148 /* set default */
wolffd@0 149 strcpy (modelfile, "svm_model");
wolffd@0 150 strcpy (predictionsfile, "svm_predictions");
wolffd@0 151 (*verbosity)=2;
wolffd@0 152 (*pred_format)=1;
wolffd@0 153
wolffd@0 154 for(i=1;(i<argc) && ((argv[i])[0] == '-');i++) {
wolffd@0 155 switch ((argv[i])[1])
wolffd@0 156 {
wolffd@0 157 case 'h': print_help(); exit(0);
wolffd@0 158 case 'v': i++; (*verbosity)=atol(argv[i]); break;
wolffd@0 159 case 'f': i++; (*pred_format)=atol(argv[i]); break;
wolffd@0 160 default: printf("\nUnrecognized option %s!\n\n",argv[i]);
wolffd@0 161 print_help();
wolffd@0 162 exit(0);
wolffd@0 163 }
wolffd@0 164 }
wolffd@0 165 if((i+1)>=argc) {
wolffd@0 166 printf("\nNot enough input parameters!\n\n");
wolffd@0 167 print_help();
wolffd@0 168 exit(0);
wolffd@0 169 }
wolffd@0 170 strcpy (docfile, argv[i]);
wolffd@0 171 strcpy (modelfile, argv[i+1]);
wolffd@0 172 if((i+2)<argc) {
wolffd@0 173 strcpy (predictionsfile, argv[i+2]);
wolffd@0 174 }
wolffd@0 175 if(((*pred_format) != 0) && ((*pred_format) != 1)) {
wolffd@0 176 printf("\nOutput format can only take the values 0 or 1!\n\n");
wolffd@0 177 print_help();
wolffd@0 178 exit(0);
wolffd@0 179 }
wolffd@0 180 }
wolffd@0 181
wolffd@0 182 void print_help(void)
wolffd@0 183 {
wolffd@0 184 printf("\nSVM-light %s: Support Vector Machine, classification module %s\n",VERSION,VERSION_DATE);
wolffd@0 185 copyright_notice();
wolffd@0 186 printf(" usage: svm_classify [options] example_file model_file output_file\n\n");
wolffd@0 187 printf("options: -h -> this help\n");
wolffd@0 188 printf(" -v [0..3] -> verbosity level (default 2)\n");
wolffd@0 189 printf(" -f [0,1] -> 0: old output format of V1.0\n");
wolffd@0 190 printf(" -> 1: output the value of decision function (default)\n\n");
wolffd@0 191 }
wolffd@0 192
wolffd@0 193
wolffd@0 194
wolffd@0 195