wolffd@0
|
1 /***********************************************************************/
|
wolffd@0
|
2 /* */
|
wolffd@0
|
3 /* svm_classify.c */
|
wolffd@0
|
4 /* */
|
wolffd@0
|
5 /* Classification module of Support Vector Machine. */
|
wolffd@0
|
6 /* */
|
wolffd@0
|
7 /* Author: Thorsten Joachims */
|
wolffd@0
|
8 /* Date: 02.07.02 */
|
wolffd@0
|
9 /* */
|
wolffd@0
|
10 /* Copyright (c) 2002 Thorsten Joachims - All rights reserved */
|
wolffd@0
|
11 /* */
|
wolffd@0
|
12 /* This software is available for non-commercial use only. It must */
|
wolffd@0
|
13 /* not be modified and distributed without prior permission of the */
|
wolffd@0
|
14 /* author. The author is not responsible for implications from the */
|
wolffd@0
|
15 /* use of this software. */
|
wolffd@0
|
16 /* */
|
wolffd@0
|
17 /************************************************************************/
|
wolffd@0
|
18
|
wolffd@0
|
19 # include "svm_common.h"
|
wolffd@0
|
20
|
wolffd@0
|
21 char docfile[200];
|
wolffd@0
|
22 char modelfile[200];
|
wolffd@0
|
23 char predictionsfile[200];
|
wolffd@0
|
24
|
wolffd@0
|
25 void read_input_parameters(int, char **, char *, char *, char *, long *,
|
wolffd@0
|
26 long *);
|
wolffd@0
|
27 void print_help(void);
|
wolffd@0
|
28
|
wolffd@0
|
29
|
wolffd@0
|
30 int main (int argc, char* argv[])
|
wolffd@0
|
31 {
|
wolffd@0
|
32 DOC *doc; /* test example */
|
wolffd@0
|
33 WORD *words;
|
wolffd@0
|
34 long max_docs,max_words_doc,lld;
|
wolffd@0
|
35 long totdoc=0,queryid,slackid;
|
wolffd@0
|
36 long correct=0,incorrect=0,no_accuracy=0;
|
wolffd@0
|
37 long res_a=0,res_b=0,res_c=0,res_d=0,wnum,pred_format;
|
wolffd@0
|
38 long j;
|
wolffd@0
|
39 double t1,runtime=0;
|
wolffd@0
|
40 double dist,doc_label,costfactor;
|
wolffd@0
|
41 char *line,*comment;
|
wolffd@0
|
42 FILE *predfl,*docfl;
|
wolffd@0
|
43 MODEL *model;
|
wolffd@0
|
44
|
wolffd@0
|
45 read_input_parameters(argc,argv,docfile,modelfile,predictionsfile,
|
wolffd@0
|
46 &verbosity,&pred_format);
|
wolffd@0
|
47
|
wolffd@0
|
48 nol_ll(docfile,&max_docs,&max_words_doc,&lld); /* scan size of input file */
|
wolffd@0
|
49 max_words_doc+=2;
|
wolffd@0
|
50 lld+=2;
|
wolffd@0
|
51
|
wolffd@0
|
52 line = (char *)my_malloc(sizeof(char)*lld);
|
wolffd@0
|
53 words = (WORD *)my_malloc(sizeof(WORD)*(max_words_doc+10));
|
wolffd@0
|
54
|
wolffd@0
|
55 model=read_model(modelfile);
|
wolffd@0
|
56
|
wolffd@0
|
57 if(model->kernel_parm.kernel_type == 0) { /* linear kernel */
|
wolffd@0
|
58 /* compute weight vector */
|
wolffd@0
|
59 add_weight_vector_to_linear_model(model);
|
wolffd@0
|
60 }
|
wolffd@0
|
61
|
wolffd@0
|
62 if(verbosity>=2) {
|
wolffd@0
|
63 printf("Classifying test examples.."); fflush(stdout);
|
wolffd@0
|
64 }
|
wolffd@0
|
65
|
wolffd@0
|
66 if ((docfl = fopen (docfile, "r")) == NULL)
|
wolffd@0
|
67 { perror (docfile); exit (1); }
|
wolffd@0
|
68 if ((predfl = fopen (predictionsfile, "w")) == NULL)
|
wolffd@0
|
69 { perror (predictionsfile); exit (1); }
|
wolffd@0
|
70
|
wolffd@0
|
71 while((!feof(docfl)) && fgets(line,(int)lld,docfl)) {
|
wolffd@0
|
72 if(line[0] == '#') continue; /* line contains comments */
|
wolffd@0
|
73 parse_document(line,words,&doc_label,&queryid,&slackid,&costfactor,&wnum,
|
wolffd@0
|
74 max_words_doc,&comment);
|
wolffd@0
|
75 totdoc++;
|
wolffd@0
|
76 if(model->kernel_parm.kernel_type == 0) { /* linear kernel */
|
wolffd@0
|
77 for(j=0;(words[j]).wnum != 0;j++) { /* Check if feature numbers */
|
wolffd@0
|
78 if((words[j]).wnum>model->totwords) /* are not larger than in */
|
wolffd@0
|
79 (words[j]).wnum=0; /* model. Remove feature if */
|
wolffd@0
|
80 } /* necessary. */
|
wolffd@0
|
81 doc = create_example(-1,0,0,0.0,create_svector(words,comment,1.0));
|
wolffd@0
|
82 t1=get_runtime();
|
wolffd@0
|
83 dist=classify_example_linear(model,doc);
|
wolffd@0
|
84 runtime+=(get_runtime()-t1);
|
wolffd@0
|
85 free_example(doc,1);
|
wolffd@0
|
86 }
|
wolffd@0
|
87 else { /* non-linear kernel */
|
wolffd@0
|
88 doc = create_example(-1,0,0,0.0,create_svector(words,comment,1.0));
|
wolffd@0
|
89 t1=get_runtime();
|
wolffd@0
|
90 dist=classify_example(model,doc);
|
wolffd@0
|
91 runtime+=(get_runtime()-t1);
|
wolffd@0
|
92 free_example(doc,1);
|
wolffd@0
|
93 }
|
wolffd@0
|
94 if(dist>0) {
|
wolffd@0
|
95 if(pred_format==0) { /* old weired output format */
|
wolffd@0
|
96 fprintf(predfl,"%.8g:+1 %.8g:-1\n",dist,-dist);
|
wolffd@0
|
97 }
|
wolffd@0
|
98 if(doc_label>0) correct++; else incorrect++;
|
wolffd@0
|
99 if(doc_label>0) res_a++; else res_b++;
|
wolffd@0
|
100 }
|
wolffd@0
|
101 else {
|
wolffd@0
|
102 if(pred_format==0) { /* old weired output format */
|
wolffd@0
|
103 fprintf(predfl,"%.8g:-1 %.8g:+1\n",-dist,dist);
|
wolffd@0
|
104 }
|
wolffd@0
|
105 if(doc_label<0) correct++; else incorrect++;
|
wolffd@0
|
106 if(doc_label>0) res_c++; else res_d++;
|
wolffd@0
|
107 }
|
wolffd@0
|
108 if(pred_format==1) { /* output the value of decision function */
|
wolffd@0
|
109 fprintf(predfl,"%.8g\n",dist);
|
wolffd@0
|
110 }
|
wolffd@0
|
111 if((int)(0.01+(doc_label*doc_label)) != 1)
|
wolffd@0
|
112 { no_accuracy=1; } /* test data is not binary labeled */
|
wolffd@0
|
113 if(verbosity>=2) {
|
wolffd@0
|
114 if(totdoc % 100 == 0) {
|
wolffd@0
|
115 printf("%ld..",totdoc); fflush(stdout);
|
wolffd@0
|
116 }
|
wolffd@0
|
117 }
|
wolffd@0
|
118 }
|
wolffd@0
|
119 free(line);
|
wolffd@0
|
120 free(words);
|
wolffd@0
|
121 free_model(model,1);
|
wolffd@0
|
122
|
wolffd@0
|
123 if(verbosity>=2) {
|
wolffd@0
|
124 printf("done\n");
|
wolffd@0
|
125
|
wolffd@0
|
126 /* Note by Gary Boone Date: 29 April 2000 */
|
wolffd@0
|
127 /* o Timing is inaccurate. The timer has 0.01 second resolution. */
|
wolffd@0
|
128 /* Because classification of a single vector takes less than */
|
wolffd@0
|
129 /* 0.01 secs, the timer was underflowing. */
|
wolffd@0
|
130 printf("Runtime (without IO) in cpu-seconds: %.2f\n",
|
wolffd@0
|
131 (float)(runtime/100.0));
|
wolffd@0
|
132
|
wolffd@0
|
133 }
|
wolffd@0
|
134 if((!no_accuracy) && (verbosity>=1)) {
|
wolffd@0
|
135 printf("Accuracy on test set: %.2f%% (%ld correct, %ld incorrect, %ld total)\n",(float)(correct)*100.0/totdoc,correct,incorrect,totdoc);
|
wolffd@0
|
136 printf("Precision/recall on test set: %.2f%%/%.2f%%\n",(float)(res_a)*100.0/(res_a+res_b),(float)(res_a)*100.0/(res_a+res_c));
|
wolffd@0
|
137 }
|
wolffd@0
|
138
|
wolffd@0
|
139 return(0);
|
wolffd@0
|
140 }
|
wolffd@0
|
141
|
wolffd@0
|
142 void read_input_parameters(int argc, char **argv, char *docfile,
|
wolffd@0
|
143 char *modelfile, char *predictionsfile,
|
wolffd@0
|
144 long int *verbosity, long int *pred_format)
|
wolffd@0
|
145 {
|
wolffd@0
|
146 long i;
|
wolffd@0
|
147
|
wolffd@0
|
148 /* set default */
|
wolffd@0
|
149 strcpy (modelfile, "svm_model");
|
wolffd@0
|
150 strcpy (predictionsfile, "svm_predictions");
|
wolffd@0
|
151 (*verbosity)=2;
|
wolffd@0
|
152 (*pred_format)=1;
|
wolffd@0
|
153
|
wolffd@0
|
154 for(i=1;(i<argc) && ((argv[i])[0] == '-');i++) {
|
wolffd@0
|
155 switch ((argv[i])[1])
|
wolffd@0
|
156 {
|
wolffd@0
|
157 case 'h': print_help(); exit(0);
|
wolffd@0
|
158 case 'v': i++; (*verbosity)=atol(argv[i]); break;
|
wolffd@0
|
159 case 'f': i++; (*pred_format)=atol(argv[i]); break;
|
wolffd@0
|
160 default: printf("\nUnrecognized option %s!\n\n",argv[i]);
|
wolffd@0
|
161 print_help();
|
wolffd@0
|
162 exit(0);
|
wolffd@0
|
163 }
|
wolffd@0
|
164 }
|
wolffd@0
|
165 if((i+1)>=argc) {
|
wolffd@0
|
166 printf("\nNot enough input parameters!\n\n");
|
wolffd@0
|
167 print_help();
|
wolffd@0
|
168 exit(0);
|
wolffd@0
|
169 }
|
wolffd@0
|
170 strcpy (docfile, argv[i]);
|
wolffd@0
|
171 strcpy (modelfile, argv[i+1]);
|
wolffd@0
|
172 if((i+2)<argc) {
|
wolffd@0
|
173 strcpy (predictionsfile, argv[i+2]);
|
wolffd@0
|
174 }
|
wolffd@0
|
175 if(((*pred_format) != 0) && ((*pred_format) != 1)) {
|
wolffd@0
|
176 printf("\nOutput format can only take the values 0 or 1!\n\n");
|
wolffd@0
|
177 print_help();
|
wolffd@0
|
178 exit(0);
|
wolffd@0
|
179 }
|
wolffd@0
|
180 }
|
wolffd@0
|
181
|
wolffd@0
|
182 void print_help(void)
|
wolffd@0
|
183 {
|
wolffd@0
|
184 printf("\nSVM-light %s: Support Vector Machine, classification module %s\n",VERSION,VERSION_DATE);
|
wolffd@0
|
185 copyright_notice();
|
wolffd@0
|
186 printf(" usage: svm_classify [options] example_file model_file output_file\n\n");
|
wolffd@0
|
187 printf("options: -h -> this help\n");
|
wolffd@0
|
188 printf(" -v [0..3] -> verbosity level (default 2)\n");
|
wolffd@0
|
189 printf(" -f [0,1] -> 0: old output format of V1.0\n");
|
wolffd@0
|
190 printf(" -> 1: output the value of decision function (default)\n\n");
|
wolffd@0
|
191 }
|
wolffd@0
|
192
|
wolffd@0
|
193
|
wolffd@0
|
194
|
wolffd@0
|
195
|