wolffd@0: /***********************************************************************/ wolffd@0: /* */ wolffd@0: /* svm_classify.c */ wolffd@0: /* */ wolffd@0: /* Classification module of Support Vector Machine. */ wolffd@0: /* */ wolffd@0: /* Author: Thorsten Joachims */ wolffd@0: /* Date: 02.07.02 */ wolffd@0: /* */ wolffd@0: /* Copyright (c) 2002 Thorsten Joachims - All rights reserved */ wolffd@0: /* */ wolffd@0: /* This software is available for non-commercial use only. It must */ wolffd@0: /* not be modified and distributed without prior permission of the */ wolffd@0: /* author. The author is not responsible for implications from the */ wolffd@0: /* use of this software. */ wolffd@0: /* */ wolffd@0: /************************************************************************/ wolffd@0: wolffd@0: # include "svm_common.h" wolffd@0: wolffd@0: char docfile[200]; wolffd@0: char modelfile[200]; wolffd@0: char predictionsfile[200]; wolffd@0: wolffd@0: void read_input_parameters(int, char **, char *, char *, char *, long *, wolffd@0: long *); wolffd@0: void print_help(void); wolffd@0: wolffd@0: wolffd@0: int main (int argc, char* argv[]) wolffd@0: { wolffd@0: DOC *doc; /* test example */ wolffd@0: WORD *words; wolffd@0: long max_docs,max_words_doc,lld; wolffd@0: long totdoc=0,queryid,slackid; wolffd@0: long correct=0,incorrect=0,no_accuracy=0; wolffd@0: long res_a=0,res_b=0,res_c=0,res_d=0,wnum,pred_format; wolffd@0: long j; wolffd@0: double t1,runtime=0; wolffd@0: double dist,doc_label,costfactor; wolffd@0: char *line,*comment; wolffd@0: FILE *predfl,*docfl; wolffd@0: MODEL *model; wolffd@0: wolffd@0: read_input_parameters(argc,argv,docfile,modelfile,predictionsfile, wolffd@0: &verbosity,&pred_format); wolffd@0: wolffd@0: nol_ll(docfile,&max_docs,&max_words_doc,&lld); /* scan size of input file */ wolffd@0: max_words_doc+=2; wolffd@0: lld+=2; wolffd@0: wolffd@0: line = (char *)my_malloc(sizeof(char)*lld); wolffd@0: words = (WORD *)my_malloc(sizeof(WORD)*(max_words_doc+10)); wolffd@0: wolffd@0: model=read_model(modelfile); wolffd@0: wolffd@0: if(model->kernel_parm.kernel_type == 0) { /* linear kernel */ wolffd@0: /* compute weight vector */ wolffd@0: add_weight_vector_to_linear_model(model); wolffd@0: } wolffd@0: wolffd@0: if(verbosity>=2) { wolffd@0: printf("Classifying test examples.."); fflush(stdout); wolffd@0: } wolffd@0: wolffd@0: if ((docfl = fopen (docfile, "r")) == NULL) wolffd@0: { perror (docfile); exit (1); } wolffd@0: if ((predfl = fopen (predictionsfile, "w")) == NULL) wolffd@0: { perror (predictionsfile); exit (1); } wolffd@0: wolffd@0: while((!feof(docfl)) && fgets(line,(int)lld,docfl)) { wolffd@0: if(line[0] == '#') continue; /* line contains comments */ wolffd@0: parse_document(line,words,&doc_label,&queryid,&slackid,&costfactor,&wnum, wolffd@0: max_words_doc,&comment); wolffd@0: totdoc++; wolffd@0: if(model->kernel_parm.kernel_type == 0) { /* linear kernel */ wolffd@0: for(j=0;(words[j]).wnum != 0;j++) { /* Check if feature numbers */ wolffd@0: if((words[j]).wnum>model->totwords) /* are not larger than in */ wolffd@0: (words[j]).wnum=0; /* model. Remove feature if */ wolffd@0: } /* necessary. */ wolffd@0: doc = create_example(-1,0,0,0.0,create_svector(words,comment,1.0)); wolffd@0: t1=get_runtime(); wolffd@0: dist=classify_example_linear(model,doc); wolffd@0: runtime+=(get_runtime()-t1); wolffd@0: free_example(doc,1); wolffd@0: } wolffd@0: else { /* non-linear kernel */ wolffd@0: doc = create_example(-1,0,0,0.0,create_svector(words,comment,1.0)); wolffd@0: t1=get_runtime(); wolffd@0: dist=classify_example(model,doc); wolffd@0: runtime+=(get_runtime()-t1); wolffd@0: free_example(doc,1); wolffd@0: } wolffd@0: if(dist>0) { wolffd@0: if(pred_format==0) { /* old weired output format */ wolffd@0: fprintf(predfl,"%.8g:+1 %.8g:-1\n",dist,-dist); wolffd@0: } wolffd@0: if(doc_label>0) correct++; else incorrect++; wolffd@0: if(doc_label>0) res_a++; else res_b++; wolffd@0: } wolffd@0: else { wolffd@0: if(pred_format==0) { /* old weired output format */ wolffd@0: fprintf(predfl,"%.8g:-1 %.8g:+1\n",-dist,dist); wolffd@0: } wolffd@0: if(doc_label<0) correct++; else incorrect++; wolffd@0: if(doc_label>0) res_c++; else res_d++; wolffd@0: } wolffd@0: if(pred_format==1) { /* output the value of decision function */ wolffd@0: fprintf(predfl,"%.8g\n",dist); wolffd@0: } wolffd@0: if((int)(0.01+(doc_label*doc_label)) != 1) wolffd@0: { no_accuracy=1; } /* test data is not binary labeled */ wolffd@0: if(verbosity>=2) { wolffd@0: if(totdoc % 100 == 0) { wolffd@0: printf("%ld..",totdoc); fflush(stdout); wolffd@0: } wolffd@0: } wolffd@0: } wolffd@0: free(line); wolffd@0: free(words); wolffd@0: free_model(model,1); wolffd@0: wolffd@0: if(verbosity>=2) { wolffd@0: printf("done\n"); wolffd@0: wolffd@0: /* Note by Gary Boone Date: 29 April 2000 */ wolffd@0: /* o Timing is inaccurate. The timer has 0.01 second resolution. */ wolffd@0: /* Because classification of a single vector takes less than */ wolffd@0: /* 0.01 secs, the timer was underflowing. */ wolffd@0: printf("Runtime (without IO) in cpu-seconds: %.2f\n", wolffd@0: (float)(runtime/100.0)); wolffd@0: wolffd@0: } wolffd@0: if((!no_accuracy) && (verbosity>=1)) { wolffd@0: printf("Accuracy on test set: %.2f%% (%ld correct, %ld incorrect, %ld total)\n",(float)(correct)*100.0/totdoc,correct,incorrect,totdoc); wolffd@0: printf("Precision/recall on test set: %.2f%%/%.2f%%\n",(float)(res_a)*100.0/(res_a+res_b),(float)(res_a)*100.0/(res_a+res_c)); wolffd@0: } wolffd@0: wolffd@0: return(0); wolffd@0: } wolffd@0: wolffd@0: void read_input_parameters(int argc, char **argv, char *docfile, wolffd@0: char *modelfile, char *predictionsfile, wolffd@0: long int *verbosity, long int *pred_format) wolffd@0: { wolffd@0: long i; wolffd@0: wolffd@0: /* set default */ wolffd@0: strcpy (modelfile, "svm_model"); wolffd@0: strcpy (predictionsfile, "svm_predictions"); wolffd@0: (*verbosity)=2; wolffd@0: (*pred_format)=1; wolffd@0: wolffd@0: for(i=1;(i=argc) { wolffd@0: printf("\nNot enough input parameters!\n\n"); wolffd@0: print_help(); wolffd@0: exit(0); wolffd@0: } wolffd@0: strcpy (docfile, argv[i]); wolffd@0: strcpy (modelfile, argv[i+1]); wolffd@0: if((i+2) this help\n"); wolffd@0: printf(" -v [0..3] -> verbosity level (default 2)\n"); wolffd@0: printf(" -f [0,1] -> 0: old output format of V1.0\n"); wolffd@0: printf(" -> 1: output the value of decision function (default)\n\n"); wolffd@0: } wolffd@0: wolffd@0: wolffd@0: wolffd@0: