Daniel@0: /***********************************************************************/ Daniel@0: /* */ Daniel@0: /* svm_classify.c */ Daniel@0: /* */ Daniel@0: /* Classification module of Support Vector Machine. */ Daniel@0: /* */ Daniel@0: /* Author: Thorsten Joachims */ Daniel@0: /* Date: 02.07.02 */ Daniel@0: /* */ Daniel@0: /* Copyright (c) 2002 Thorsten Joachims - All rights reserved */ Daniel@0: /* */ Daniel@0: /* This software is available for non-commercial use only. It must */ Daniel@0: /* not be modified and distributed without prior permission of the */ Daniel@0: /* author. The author is not responsible for implications from the */ Daniel@0: /* use of this software. */ Daniel@0: /* */ Daniel@0: /************************************************************************/ Daniel@0: Daniel@0: # include "svm_common.h" Daniel@0: Daniel@0: char docfile[200]; Daniel@0: char modelfile[200]; Daniel@0: char predictionsfile[200]; Daniel@0: Daniel@0: void read_input_parameters(int, char **, char *, char *, char *, long *, Daniel@0: long *); Daniel@0: void print_help(void); Daniel@0: Daniel@0: Daniel@0: int main (int argc, char* argv[]) Daniel@0: { Daniel@0: DOC *doc; /* test example */ Daniel@0: WORD *words; Daniel@0: long max_docs,max_words_doc,lld; Daniel@0: long totdoc=0,queryid,slackid; Daniel@0: long correct=0,incorrect=0,no_accuracy=0; Daniel@0: long res_a=0,res_b=0,res_c=0,res_d=0,wnum,pred_format; Daniel@0: long j; Daniel@0: double t1,runtime=0; Daniel@0: double dist,doc_label,costfactor; Daniel@0: char *line,*comment; Daniel@0: FILE *predfl,*docfl; Daniel@0: MODEL *model; Daniel@0: Daniel@0: read_input_parameters(argc,argv,docfile,modelfile,predictionsfile, Daniel@0: &verbosity,&pred_format); Daniel@0: Daniel@0: nol_ll(docfile,&max_docs,&max_words_doc,&lld); /* scan size of input file */ Daniel@0: max_words_doc+=2; Daniel@0: lld+=2; Daniel@0: Daniel@0: line = (char *)my_malloc(sizeof(char)*lld); Daniel@0: words = (WORD *)my_malloc(sizeof(WORD)*(max_words_doc+10)); Daniel@0: Daniel@0: model=read_model(modelfile); Daniel@0: Daniel@0: if(model->kernel_parm.kernel_type == 0) { /* linear kernel */ Daniel@0: /* compute weight vector */ Daniel@0: add_weight_vector_to_linear_model(model); Daniel@0: } Daniel@0: Daniel@0: if(verbosity>=2) { Daniel@0: printf("Classifying test examples.."); fflush(stdout); Daniel@0: } Daniel@0: Daniel@0: if ((docfl = fopen (docfile, "r")) == NULL) Daniel@0: { perror (docfile); exit (1); } Daniel@0: if ((predfl = fopen (predictionsfile, "w")) == NULL) Daniel@0: { perror (predictionsfile); exit (1); } Daniel@0: Daniel@0: while((!feof(docfl)) && fgets(line,(int)lld,docfl)) { Daniel@0: if(line[0] == '#') continue; /* line contains comments */ Daniel@0: parse_document(line,words,&doc_label,&queryid,&slackid,&costfactor,&wnum, Daniel@0: max_words_doc,&comment); Daniel@0: totdoc++; Daniel@0: if(model->kernel_parm.kernel_type == 0) { /* linear kernel */ Daniel@0: for(j=0;(words[j]).wnum != 0;j++) { /* Check if feature numbers */ Daniel@0: if((words[j]).wnum>model->totwords) /* are not larger than in */ Daniel@0: (words[j]).wnum=0; /* model. Remove feature if */ Daniel@0: } /* necessary. */ Daniel@0: doc = create_example(-1,0,0,0.0,create_svector(words,comment,1.0)); Daniel@0: t1=get_runtime(); Daniel@0: dist=classify_example_linear(model,doc); Daniel@0: runtime+=(get_runtime()-t1); Daniel@0: free_example(doc,1); Daniel@0: } Daniel@0: else { /* non-linear kernel */ Daniel@0: doc = create_example(-1,0,0,0.0,create_svector(words,comment,1.0)); Daniel@0: t1=get_runtime(); Daniel@0: dist=classify_example(model,doc); Daniel@0: runtime+=(get_runtime()-t1); Daniel@0: free_example(doc,1); Daniel@0: } Daniel@0: if(dist>0) { Daniel@0: if(pred_format==0) { /* old weired output format */ Daniel@0: fprintf(predfl,"%.8g:+1 %.8g:-1\n",dist,-dist); Daniel@0: } Daniel@0: if(doc_label>0) correct++; else incorrect++; Daniel@0: if(doc_label>0) res_a++; else res_b++; Daniel@0: } Daniel@0: else { Daniel@0: if(pred_format==0) { /* old weired output format */ Daniel@0: fprintf(predfl,"%.8g:-1 %.8g:+1\n",-dist,dist); Daniel@0: } Daniel@0: if(doc_label<0) correct++; else incorrect++; Daniel@0: if(doc_label>0) res_c++; else res_d++; Daniel@0: } Daniel@0: if(pred_format==1) { /* output the value of decision function */ Daniel@0: fprintf(predfl,"%.8g\n",dist); Daniel@0: } Daniel@0: if((int)(0.01+(doc_label*doc_label)) != 1) Daniel@0: { no_accuracy=1; } /* test data is not binary labeled */ Daniel@0: if(verbosity>=2) { Daniel@0: if(totdoc % 100 == 0) { Daniel@0: printf("%ld..",totdoc); fflush(stdout); Daniel@0: } Daniel@0: } Daniel@0: } Daniel@0: free(line); Daniel@0: free(words); Daniel@0: free_model(model,1); Daniel@0: Daniel@0: if(verbosity>=2) { Daniel@0: printf("done\n"); Daniel@0: Daniel@0: /* Note by Gary Boone Date: 29 April 2000 */ Daniel@0: /* o Timing is inaccurate. The timer has 0.01 second resolution. */ Daniel@0: /* Because classification of a single vector takes less than */ Daniel@0: /* 0.01 secs, the timer was underflowing. */ Daniel@0: printf("Runtime (without IO) in cpu-seconds: %.2f\n", Daniel@0: (float)(runtime/100.0)); Daniel@0: Daniel@0: } Daniel@0: if((!no_accuracy) && (verbosity>=1)) { Daniel@0: printf("Accuracy on test set: %.2f%% (%ld correct, %ld incorrect, %ld total)\n",(float)(correct)*100.0/totdoc,correct,incorrect,totdoc); Daniel@0: printf("Precision/recall on test set: %.2f%%/%.2f%%\n",(float)(res_a)*100.0/(res_a+res_b),(float)(res_a)*100.0/(res_a+res_c)); Daniel@0: } Daniel@0: Daniel@0: return(0); Daniel@0: } Daniel@0: Daniel@0: void read_input_parameters(int argc, char **argv, char *docfile, Daniel@0: char *modelfile, char *predictionsfile, Daniel@0: long int *verbosity, long int *pred_format) Daniel@0: { Daniel@0: long i; Daniel@0: Daniel@0: /* set default */ Daniel@0: strcpy (modelfile, "svm_model"); Daniel@0: strcpy (predictionsfile, "svm_predictions"); Daniel@0: (*verbosity)=2; Daniel@0: (*pred_format)=1; Daniel@0: Daniel@0: for(i=1;(i=argc) { Daniel@0: printf("\nNot enough input parameters!\n\n"); Daniel@0: print_help(); Daniel@0: exit(0); Daniel@0: } Daniel@0: strcpy (docfile, argv[i]); Daniel@0: strcpy (modelfile, argv[i+1]); Daniel@0: if((i+2) this help\n"); Daniel@0: printf(" -v [0..3] -> verbosity level (default 2)\n"); Daniel@0: printf(" -f [0,1] -> 0: old output format of V1.0\n"); Daniel@0: printf(" -> 1: output the value of decision function (default)\n\n"); Daniel@0: } Daniel@0: Daniel@0: Daniel@0: Daniel@0: