Mercurial > hg > camir-aes2014
comparison toolboxes/SVM-light/src/svm_classify.c @ 0:e9a9cd732c1e tip
first hg version after svn
author | wolffd |
---|---|
date | Tue, 10 Feb 2015 15:05:51 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:e9a9cd732c1e |
---|---|
1 /***********************************************************************/ | |
2 /* */ | |
3 /* svm_classify.c */ | |
4 /* */ | |
5 /* Classification module of Support Vector Machine. */ | |
6 /* */ | |
7 /* Author: Thorsten Joachims */ | |
8 /* Date: 02.07.02 */ | |
9 /* */ | |
10 /* Copyright (c) 2002 Thorsten Joachims - All rights reserved */ | |
11 /* */ | |
12 /* This software is available for non-commercial use only. It must */ | |
13 /* not be modified and distributed without prior permission of the */ | |
14 /* author. The author is not responsible for implications from the */ | |
15 /* use of this software. */ | |
16 /* */ | |
17 /************************************************************************/ | |
18 | |
19 # include "svm_common.h" | |
20 | |
21 char docfile[200]; | |
22 char modelfile[200]; | |
23 char predictionsfile[200]; | |
24 | |
25 void read_input_parameters(int, char **, char *, char *, char *, long *, | |
26 long *); | |
27 void print_help(void); | |
28 | |
29 | |
30 int main (int argc, char* argv[]) | |
31 { | |
32 DOC *doc; /* test example */ | |
33 WORD *words; | |
34 long max_docs,max_words_doc,lld; | |
35 long totdoc=0,queryid,slackid; | |
36 long correct=0,incorrect=0,no_accuracy=0; | |
37 long res_a=0,res_b=0,res_c=0,res_d=0,wnum,pred_format; | |
38 long j; | |
39 double t1,runtime=0; | |
40 double dist,doc_label,costfactor; | |
41 char *line,*comment; | |
42 FILE *predfl,*docfl; | |
43 MODEL *model; | |
44 | |
45 read_input_parameters(argc,argv,docfile,modelfile,predictionsfile, | |
46 &verbosity,&pred_format); | |
47 | |
48 nol_ll(docfile,&max_docs,&max_words_doc,&lld); /* scan size of input file */ | |
49 max_words_doc+=2; | |
50 lld+=2; | |
51 | |
52 line = (char *)my_malloc(sizeof(char)*lld); | |
53 words = (WORD *)my_malloc(sizeof(WORD)*(max_words_doc+10)); | |
54 | |
55 model=read_model(modelfile); | |
56 | |
57 if(model->kernel_parm.kernel_type == 0) { /* linear kernel */ | |
58 /* compute weight vector */ | |
59 add_weight_vector_to_linear_model(model); | |
60 } | |
61 | |
62 if(verbosity>=2) { | |
63 printf("Classifying test examples.."); fflush(stdout); | |
64 } | |
65 | |
66 if ((docfl = fopen (docfile, "r")) == NULL) | |
67 { perror (docfile); exit (1); } | |
68 if ((predfl = fopen (predictionsfile, "w")) == NULL) | |
69 { perror (predictionsfile); exit (1); } | |
70 | |
71 while((!feof(docfl)) && fgets(line,(int)lld,docfl)) { | |
72 if(line[0] == '#') continue; /* line contains comments */ | |
73 parse_document(line,words,&doc_label,&queryid,&slackid,&costfactor,&wnum, | |
74 max_words_doc,&comment); | |
75 totdoc++; | |
76 if(model->kernel_parm.kernel_type == 0) { /* linear kernel */ | |
77 for(j=0;(words[j]).wnum != 0;j++) { /* Check if feature numbers */ | |
78 if((words[j]).wnum>model->totwords) /* are not larger than in */ | |
79 (words[j]).wnum=0; /* model. Remove feature if */ | |
80 } /* necessary. */ | |
81 doc = create_example(-1,0,0,0.0,create_svector(words,comment,1.0)); | |
82 t1=get_runtime(); | |
83 dist=classify_example_linear(model,doc); | |
84 runtime+=(get_runtime()-t1); | |
85 free_example(doc,1); | |
86 } | |
87 else { /* non-linear kernel */ | |
88 doc = create_example(-1,0,0,0.0,create_svector(words,comment,1.0)); | |
89 t1=get_runtime(); | |
90 dist=classify_example(model,doc); | |
91 runtime+=(get_runtime()-t1); | |
92 free_example(doc,1); | |
93 } | |
94 if(dist>0) { | |
95 if(pred_format==0) { /* old weired output format */ | |
96 fprintf(predfl,"%.8g:+1 %.8g:-1\n",dist,-dist); | |
97 } | |
98 if(doc_label>0) correct++; else incorrect++; | |
99 if(doc_label>0) res_a++; else res_b++; | |
100 } | |
101 else { | |
102 if(pred_format==0) { /* old weired output format */ | |
103 fprintf(predfl,"%.8g:-1 %.8g:+1\n",-dist,dist); | |
104 } | |
105 if(doc_label<0) correct++; else incorrect++; | |
106 if(doc_label>0) res_c++; else res_d++; | |
107 } | |
108 if(pred_format==1) { /* output the value of decision function */ | |
109 fprintf(predfl,"%.8g\n",dist); | |
110 } | |
111 if((int)(0.01+(doc_label*doc_label)) != 1) | |
112 { no_accuracy=1; } /* test data is not binary labeled */ | |
113 if(verbosity>=2) { | |
114 if(totdoc % 100 == 0) { | |
115 printf("%ld..",totdoc); fflush(stdout); | |
116 } | |
117 } | |
118 } | |
119 free(line); | |
120 free(words); | |
121 free_model(model,1); | |
122 | |
123 if(verbosity>=2) { | |
124 printf("done\n"); | |
125 | |
126 /* Note by Gary Boone Date: 29 April 2000 */ | |
127 /* o Timing is inaccurate. The timer has 0.01 second resolution. */ | |
128 /* Because classification of a single vector takes less than */ | |
129 /* 0.01 secs, the timer was underflowing. */ | |
130 printf("Runtime (without IO) in cpu-seconds: %.2f\n", | |
131 (float)(runtime/100.0)); | |
132 | |
133 } | |
134 if((!no_accuracy) && (verbosity>=1)) { | |
135 printf("Accuracy on test set: %.2f%% (%ld correct, %ld incorrect, %ld total)\n",(float)(correct)*100.0/totdoc,correct,incorrect,totdoc); | |
136 printf("Precision/recall on test set: %.2f%%/%.2f%%\n",(float)(res_a)*100.0/(res_a+res_b),(float)(res_a)*100.0/(res_a+res_c)); | |
137 } | |
138 | |
139 return(0); | |
140 } | |
141 | |
142 void read_input_parameters(int argc, char **argv, char *docfile, | |
143 char *modelfile, char *predictionsfile, | |
144 long int *verbosity, long int *pred_format) | |
145 { | |
146 long i; | |
147 | |
148 /* set default */ | |
149 strcpy (modelfile, "svm_model"); | |
150 strcpy (predictionsfile, "svm_predictions"); | |
151 (*verbosity)=2; | |
152 (*pred_format)=1; | |
153 | |
154 for(i=1;(i<argc) && ((argv[i])[0] == '-');i++) { | |
155 switch ((argv[i])[1]) | |
156 { | |
157 case 'h': print_help(); exit(0); | |
158 case 'v': i++; (*verbosity)=atol(argv[i]); break; | |
159 case 'f': i++; (*pred_format)=atol(argv[i]); break; | |
160 default: printf("\nUnrecognized option %s!\n\n",argv[i]); | |
161 print_help(); | |
162 exit(0); | |
163 } | |
164 } | |
165 if((i+1)>=argc) { | |
166 printf("\nNot enough input parameters!\n\n"); | |
167 print_help(); | |
168 exit(0); | |
169 } | |
170 strcpy (docfile, argv[i]); | |
171 strcpy (modelfile, argv[i+1]); | |
172 if((i+2)<argc) { | |
173 strcpy (predictionsfile, argv[i+2]); | |
174 } | |
175 if(((*pred_format) != 0) && ((*pred_format) != 1)) { | |
176 printf("\nOutput format can only take the values 0 or 1!\n\n"); | |
177 print_help(); | |
178 exit(0); | |
179 } | |
180 } | |
181 | |
182 void print_help(void) | |
183 { | |
184 printf("\nSVM-light %s: Support Vector Machine, classification module %s\n",VERSION,VERSION_DATE); | |
185 copyright_notice(); | |
186 printf(" usage: svm_classify [options] example_file model_file output_file\n\n"); | |
187 printf("options: -h -> this help\n"); | |
188 printf(" -v [0..3] -> verbosity level (default 2)\n"); | |
189 printf(" -f [0,1] -> 0: old output format of V1.0\n"); | |
190 printf(" -> 1: output the value of decision function (default)\n\n"); | |
191 } | |
192 | |
193 | |
194 | |
195 |