diff toolboxes/SVM-light/src/svm_classify.c @ 0:e9a9cd732c1e tip

first hg version after svn
author wolffd
date Tue, 10 Feb 2015 15:05:51 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/toolboxes/SVM-light/src/svm_classify.c	Tue Feb 10 15:05:51 2015 +0000
@@ -0,0 +1,195 @@
+/***********************************************************************/
+/*                                                                     */
+/*   svm_classify.c                                                    */
+/*                                                                     */
+/*   Classification module of Support Vector Machine.                  */
+/*                                                                     */
+/*   Author: Thorsten Joachims                                         */
+/*   Date: 02.07.02                                                    */
+/*                                                                     */
+/*   Copyright (c) 2002  Thorsten Joachims - All rights reserved       */
+/*                                                                     */
+/*   This software is available for non-commercial use only. It must   */
+/*   not be modified and distributed without prior permission of the   */
+/*   author. The author is not responsible for implications from the   */
+/*   use of this software.                                             */
+/*                                                                     */
+/************************************************************************/
+
+# include "svm_common.h"
+
+char docfile[200];
+char modelfile[200];
+char predictionsfile[200];
+
+void read_input_parameters(int, char **, char *, char *, char *, long *, 
+			   long *);
+void print_help(void);
+
+
+int main (int argc, char* argv[])
+{
+  DOC *doc;   /* test example */
+  WORD *words;
+  long max_docs,max_words_doc,lld;
+  long totdoc=0,queryid,slackid;
+  long correct=0,incorrect=0,no_accuracy=0;
+  long res_a=0,res_b=0,res_c=0,res_d=0,wnum,pred_format;
+  long j;
+  double t1,runtime=0;
+  double dist,doc_label,costfactor;
+  char *line,*comment; 
+  FILE *predfl,*docfl;
+  MODEL *model; 
+
+  read_input_parameters(argc,argv,docfile,modelfile,predictionsfile,
+			&verbosity,&pred_format);
+
+  nol_ll(docfile,&max_docs,&max_words_doc,&lld); /* scan size of input file */
+  max_words_doc+=2;
+  lld+=2;
+
+  line = (char *)my_malloc(sizeof(char)*lld);
+  words = (WORD *)my_malloc(sizeof(WORD)*(max_words_doc+10));
+
+  model=read_model(modelfile);
+
+  if(model->kernel_parm.kernel_type == 0) { /* linear kernel */
+    /* compute weight vector */
+    add_weight_vector_to_linear_model(model);
+  }
+  
+  if(verbosity>=2) {
+    printf("Classifying test examples.."); fflush(stdout);
+  }
+
+  if ((docfl = fopen (docfile, "r")) == NULL)
+  { perror (docfile); exit (1); }
+  if ((predfl = fopen (predictionsfile, "w")) == NULL)
+  { perror (predictionsfile); exit (1); }
+
+  while((!feof(docfl)) && fgets(line,(int)lld,docfl)) {
+    if(line[0] == '#') continue;  /* line contains comments */
+    parse_document(line,words,&doc_label,&queryid,&slackid,&costfactor,&wnum,
+		   max_words_doc,&comment);
+    totdoc++;
+    if(model->kernel_parm.kernel_type == 0) {   /* linear kernel */
+      for(j=0;(words[j]).wnum != 0;j++) {  /* Check if feature numbers   */
+	if((words[j]).wnum>model->totwords) /* are not larger than in     */
+	  (words[j]).wnum=0;               /* model. Remove feature if   */
+      }                                        /* necessary.                 */
+      doc = create_example(-1,0,0,0.0,create_svector(words,comment,1.0));
+      t1=get_runtime();
+      dist=classify_example_linear(model,doc);
+      runtime+=(get_runtime()-t1);
+      free_example(doc,1);
+    }
+    else {                             /* non-linear kernel */
+      doc = create_example(-1,0,0,0.0,create_svector(words,comment,1.0));
+      t1=get_runtime();
+      dist=classify_example(model,doc);
+      runtime+=(get_runtime()-t1);
+      free_example(doc,1);
+    }
+    if(dist>0) {
+      if(pred_format==0) { /* old weired output format */
+	fprintf(predfl,"%.8g:+1 %.8g:-1\n",dist,-dist);
+      }
+      if(doc_label>0) correct++; else incorrect++;
+      if(doc_label>0) res_a++; else res_b++;
+    }
+    else {
+      if(pred_format==0) { /* old weired output format */
+	fprintf(predfl,"%.8g:-1 %.8g:+1\n",-dist,dist);
+      }
+      if(doc_label<0) correct++; else incorrect++;
+      if(doc_label>0) res_c++; else res_d++;
+    }
+    if(pred_format==1) { /* output the value of decision function */
+      fprintf(predfl,"%.8g\n",dist);
+    }
+    if((int)(0.01+(doc_label*doc_label)) != 1) 
+      { no_accuracy=1; } /* test data is not binary labeled */
+    if(verbosity>=2) {
+      if(totdoc % 100 == 0) {
+	printf("%ld..",totdoc); fflush(stdout);
+      }
+    }
+  }  
+  free(line);
+  free(words);
+  free_model(model,1);
+
+  if(verbosity>=2) {
+    printf("done\n");
+
+/*   Note by Gary Boone                     Date: 29 April 2000        */
+/*      o Timing is inaccurate. The timer has 0.01 second resolution.  */
+/*        Because classification of a single vector takes less than    */
+/*        0.01 secs, the timer was underflowing.                       */
+    printf("Runtime (without IO) in cpu-seconds: %.2f\n",
+	   (float)(runtime/100.0));
+    
+  }
+  if((!no_accuracy) && (verbosity>=1)) {
+    printf("Accuracy on test set: %.2f%% (%ld correct, %ld incorrect, %ld total)\n",(float)(correct)*100.0/totdoc,correct,incorrect,totdoc);
+    printf("Precision/recall on test set: %.2f%%/%.2f%%\n",(float)(res_a)*100.0/(res_a+res_b),(float)(res_a)*100.0/(res_a+res_c));
+  }
+
+  return(0);
+}
+
+void read_input_parameters(int argc, char **argv, char *docfile, 
+			   char *modelfile, char *predictionsfile, 
+			   long int *verbosity, long int *pred_format)
+{
+  long i;
+  
+  /* set default */
+  strcpy (modelfile, "svm_model");
+  strcpy (predictionsfile, "svm_predictions"); 
+  (*verbosity)=2;
+  (*pred_format)=1;
+
+  for(i=1;(i<argc) && ((argv[i])[0] == '-');i++) {
+    switch ((argv[i])[1]) 
+      { 
+      case 'h': print_help(); exit(0);
+      case 'v': i++; (*verbosity)=atol(argv[i]); break;
+      case 'f': i++; (*pred_format)=atol(argv[i]); break;
+      default: printf("\nUnrecognized option %s!\n\n",argv[i]);
+	       print_help();
+	       exit(0);
+      }
+  }
+  if((i+1)>=argc) {
+    printf("\nNot enough input parameters!\n\n");
+    print_help();
+    exit(0);
+  }
+  strcpy (docfile, argv[i]);
+  strcpy (modelfile, argv[i+1]);
+  if((i+2)<argc) {
+    strcpy (predictionsfile, argv[i+2]);
+  }
+  if(((*pred_format) != 0) && ((*pred_format) != 1)) {
+    printf("\nOutput format can only take the values 0 or 1!\n\n");
+    print_help();
+    exit(0);
+  }
+}
+
+void print_help(void)
+{
+  printf("\nSVM-light %s: Support Vector Machine, classification module     %s\n",VERSION,VERSION_DATE);
+  copyright_notice();
+  printf("   usage: svm_classify [options] example_file model_file output_file\n\n");
+  printf("options: -h         -> this help\n");
+  printf("         -v [0..3]  -> verbosity level (default 2)\n");
+  printf("         -f [0,1]   -> 0: old output format of V1.0\n");
+  printf("                    -> 1: output the value of decision function (default)\n\n");
+}
+
+
+
+