Daniel@0: /************************************************************************/
Daniel@0: /*                                                                      */
Daniel@0: /*   svm_common.h                                                       */
Daniel@0: /*                                                                      */
Daniel@0: /*   Definitions and functions used in both svm_learn and svm_classify. */
Daniel@0: /*                                                                      */
Daniel@0: /*   Author: Thorsten Joachims                                          */
Daniel@0: /*   Date: 02.07.02                                                     */
Daniel@0: /*                                                                      */
Daniel@0: /*   Copyright (c) 2002  Thorsten Joachims - All rights reserved        */
Daniel@0: /*                                                                      */
Daniel@0: /*   This software is available for non-commercial use only. It must    */
Daniel@0: /*   not be modified and distributed without prior permission of the    */
Daniel@0: /*   author. The author is not responsible for implications from the    */
Daniel@0: /*   use of this software.                                              */
Daniel@0: /*                                                                      */
Daniel@0: /************************************************************************/
Daniel@0: 
Daniel@0: #ifndef SVM_COMMON
Daniel@0: #define SVM_COMMON
Daniel@0: 
Daniel@0: # define MAXSHRINK     50000    /* maximum number of shrinking rounds */
Daniel@0: # define MAXFEATNUM 99999999    /* maximum feature number (must be in
Daniel@0: 			  	   valid range of long int type!) */
Daniel@0: 
Daniel@0: # include <stdio.h>
Daniel@0: # include <ctype.h>
Daniel@0: # include <math.h>
Daniel@0: # include <string.h>
Daniel@0: # include <stdlib.h>
Daniel@0: # include <time.h> 
Daniel@0: # include <float.h>
Daniel@0: 
Daniel@0: # define VERSION       "V6.01"
Daniel@0: # define VERSION_DATE  "01.09.04"
Daniel@0: 
Daniel@0: # define CFLOAT  float       /* the type of float to use for caching */
Daniel@0:                              /* kernel evaluations. Using float saves */
Daniel@0:                              /* us some memory, but you can use double, too */
Daniel@0: # define FNUM    long        /* the type used for storing feature ids */
Daniel@0: # define FVAL    float       /* the type used for storing feature values */
Daniel@0: 
Daniel@0: # define LINEAR  0           /* linear kernel type */
Daniel@0: # define POLY    1           /* polynoial kernel type */
Daniel@0: # define RBF     2           /* rbf kernel type */
Daniel@0: # define SIGMOID 3           /* sigmoid kernel type */
Daniel@0: 
Daniel@0: # define CLASSIFICATION 1    /* train classification model */
Daniel@0: # define REGRESSION     2    /* train regression model */
Daniel@0: # define RANKING        3    /* train ranking model */
Daniel@0: # define OPTIMIZATION   4    /* train on general set of constraints */
Daniel@0: 
Daniel@0: typedef struct word {
Daniel@0:   FNUM    wnum;	               /* word number */
Daniel@0:   FVAL    weight;              /* word weight */
Daniel@0: } WORD;
Daniel@0: 
Daniel@0: typedef struct svector {
Daniel@0:   WORD    *words;              /* The features/values in the vector by
Daniel@0: 				  increasing feature-number. Feature
Daniel@0: 				  numbers that are skipped are
Daniel@0: 				  interpreted as having value zero. */
Daniel@0:   double  twonorm_sq;          /* The squared euclidian length of the
Daniel@0:                                   vector. Used to speed up the RBF kernel. */
Daniel@0:   char    *userdefined;        /* You can put additional information
Daniel@0: 				  here. This can be useful, if you are
Daniel@0: 				  implementing your own kernel that
Daniel@0: 				  does not work with feature/values
Daniel@0: 				  representations (for example a
Daniel@0: 				  string kernel). By default,
Daniel@0: 				  svm-light will put here the string
Daniel@0: 				  after the # sign from each line of
Daniel@0: 				  the input file. */
Daniel@0:   long    kernel_id;           /* Feature vectors with different
Daniel@0: 				  kernel_id's are orthogonal (ie. the
Daniel@0: 				  feature number do not match). This
Daniel@0: 				  is used for computing component
Daniel@0: 				  kernels for linear constraints which
Daniel@0: 				  are a sum of several different
Daniel@0: 				  weight vectors. (currently not
Daniel@0: 				  implemented). */
Daniel@0:   struct svector *next;        /* Let's you set up a list of SVECTOR's
Daniel@0: 				  for linear constraints which are a
Daniel@0: 				  sum of multiple feature
Daniel@0: 				  vectors. List is terminated by
Daniel@0: 				  NULL. */
Daniel@0:   double  factor;              /* Factor by which this feature vector
Daniel@0: 				  is multiplied in the sum. */
Daniel@0: } SVECTOR;
Daniel@0: 
Daniel@0: typedef struct doc {
Daniel@0:   long    docnum;              /* Document ID. This has to be the position of 
Daniel@0:                                   the document in the training set array. */
Daniel@0:   long    queryid;             /* for learning rankings, constraints are 
Daniel@0: 				  generated for documents with the same 
Daniel@0: 				  queryID. */
Daniel@0:   double  costfactor;          /* Scales the cost of misclassifying this
Daniel@0: 				  document by this factor. The effect of this
Daniel@0: 				  value is, that the upper bound on the alpha
Daniel@0: 				  for this example is scaled by this factor.
Daniel@0: 				  The factors are set by the feature 
Daniel@0: 				  'cost:<val>' in the training data. */
Daniel@0:   long    slackid;             /* Index of the slack variable
Daniel@0: 				  corresponding to this
Daniel@0: 				  constraint. All constraints with the
Daniel@0: 				  same slackid share the same slack
Daniel@0: 				  variable. This can only be used for
Daniel@0: 				  svm_learn_optimization. */
Daniel@0:   SVECTOR *fvec;               /* Feature vector of the example. The
Daniel@0: 				  feature vector can actually be a
Daniel@0: 				  list of feature vectors. For
Daniel@0: 				  example, the list will have two
Daniel@0: 				  elements, if this DOC is a
Daniel@0: 				  preference constraint. The one
Daniel@0: 				  vector that is supposed to be ranked
Daniel@0: 				  higher, will have a factor of +1,
Daniel@0: 				  the lower ranked one should have a
Daniel@0: 				  factor of -1. */
Daniel@0: } DOC;
Daniel@0: 
Daniel@0: typedef struct learn_parm {
Daniel@0:   long   type;                 /* selects between regression and
Daniel@0: 				  classification */
Daniel@0:   double svm_c;                /* upper bound C on alphas */
Daniel@0:   double eps;                  /* regression epsilon (eps=1.0 for
Daniel@0: 				  classification */
Daniel@0:   double svm_costratio;        /* factor to multiply C for positive examples */
Daniel@0:   double transduction_posratio;/* fraction of unlabeled examples to be */
Daniel@0:                                /* classified as positives */
Daniel@0:   long   biased_hyperplane;    /* if nonzero, use hyperplane w*x+b=0 
Daniel@0: 				  otherwise w*x=0 */
Daniel@0:   long   sharedslack;          /* if nonzero, it will use the shared
Daniel@0:                                   slack variable mode in
Daniel@0:                                   svm_learn_optimization. It requires
Daniel@0:                                   that the slackid is set for every
Daniel@0:                                   training example */
Daniel@0:   long   svm_maxqpsize;        /* size q of working set */
Daniel@0:   long   svm_newvarsinqp;      /* new variables to enter the working set 
Daniel@0: 				  in each iteration */
Daniel@0:   long   kernel_cache_size;    /* size of kernel cache in megabytes */
Daniel@0:   double epsilon_crit;         /* tolerable error for distances used 
Daniel@0: 				  in stopping criterion */
Daniel@0:   double epsilon_shrink;       /* how much a multiplier should be above 
Daniel@0: 				  zero for shrinking */
Daniel@0:   long   svm_iter_to_shrink;   /* iterations h after which an example can
Daniel@0: 				  be removed by shrinking */
Daniel@0:   long   maxiter;              /* number of iterations after which the
Daniel@0: 				  optimizer terminates, if there was
Daniel@0: 				  no progress in maxdiff */
Daniel@0:   long   remove_inconsistent;  /* exclude examples with alpha at C and 
Daniel@0: 				  retrain */
Daniel@0:   long   skip_final_opt_check; /* do not check KT-Conditions at the end of
Daniel@0: 				  optimization for examples removed by 
Daniel@0: 				  shrinking. WARNING: This might lead to 
Daniel@0: 				  sub-optimal solutions! */
Daniel@0:   long   compute_loo;          /* if nonzero, computes leave-one-out
Daniel@0: 				  estimates */
Daniel@0:   double rho;                  /* parameter in xi/alpha-estimates and for
Daniel@0: 				  pruning leave-one-out range [1..2] */
Daniel@0:   long   xa_depth;             /* parameter in xi/alpha-estimates upper
Daniel@0: 				  bounding the number of SV the current
Daniel@0: 				  alpha_t is distributed over */
Daniel@0:   char predfile[200];          /* file for predicitions on unlabeled examples
Daniel@0: 				  in transduction */
Daniel@0:   char alphafile[200];         /* file to store optimal alphas in. use  
Daniel@0: 				  empty string if alphas should not be 
Daniel@0: 				  output */
Daniel@0: 
Daniel@0:   /* you probably do not want to touch the following */
Daniel@0:   double epsilon_const;        /* tolerable error on eq-constraint */
Daniel@0:   double epsilon_a;            /* tolerable error on alphas at bounds */
Daniel@0:   double opt_precision;        /* precision of solver, set to e.g. 1e-21 
Daniel@0: 				  if you get convergence problems */
Daniel@0: 
Daniel@0:   /* the following are only for internal use */
Daniel@0:   long   svm_c_steps;          /* do so many steps for finding optimal C */
Daniel@0:   double svm_c_factor;         /* increase C by this factor every step */
Daniel@0:   double svm_costratio_unlab;
Daniel@0:   double svm_unlabbound;
Daniel@0:   double *svm_cost;            /* individual upper bounds for each var */
Daniel@0:   long   totwords;             /* number of features */
Daniel@0: } LEARN_PARM;
Daniel@0: 
Daniel@0: typedef struct kernel_parm {
Daniel@0:   long    kernel_type;   /* 0=linear, 1=poly, 2=rbf, 3=sigmoid, 4=custom */
Daniel@0:   long    poly_degree;
Daniel@0:   double  rbf_gamma;
Daniel@0:   double  coef_lin;
Daniel@0:   double  coef_const;
Daniel@0:   char    custom[50];    /* for user supplied kernel */
Daniel@0: } KERNEL_PARM;
Daniel@0: 
Daniel@0: typedef struct model {
Daniel@0:   long    sv_num;	
Daniel@0:   long    at_upper_bound;
Daniel@0:   double  b;
Daniel@0:   DOC     **supvec;
Daniel@0:   double  *alpha;
Daniel@0:   long    *index;       /* index from docnum to position in model */
Daniel@0:   long    totwords;     /* number of features */
Daniel@0:   long    totdoc;       /* number of training documents */
Daniel@0:   KERNEL_PARM kernel_parm; /* kernel */
Daniel@0: 
Daniel@0:   /* the following values are not written to file */
Daniel@0:   double  loo_error,loo_recall,loo_precision; /* leave-one-out estimates */
Daniel@0:   double  xa_error,xa_recall,xa_precision;    /* xi/alpha estimates */
Daniel@0:   double  *lin_weights;                       /* weights for linear case using
Daniel@0: 						 folding */
Daniel@0:   double  maxdiff;                            /* precision, up to which this 
Daniel@0: 						 model is accurate */
Daniel@0: } MODEL;
Daniel@0: 
Daniel@0: typedef struct quadratic_program {
Daniel@0:   long   opt_n;            /* number of variables */
Daniel@0:   long   opt_m;            /* number of linear equality constraints */
Daniel@0:   double *opt_ce,*opt_ce0; /* linear equality constraints */
Daniel@0:   double *opt_g;           /* hessian of objective */
Daniel@0:   double *opt_g0;          /* linear part of objective */
Daniel@0:   double *opt_xinit;       /* initial value for variables */
Daniel@0:   double *opt_low,*opt_up; /* box constraints */
Daniel@0: } QP;
Daniel@0: 
Daniel@0: typedef struct kernel_cache {
Daniel@0:   long   *index;  /* cache some kernel evalutations */
Daniel@0:   CFLOAT *buffer; /* to improve speed */
Daniel@0:   long   *invindex;
Daniel@0:   long   *active2totdoc;
Daniel@0:   long   *totdoc2active;
Daniel@0:   long   *lru;
Daniel@0:   long   *occu;
Daniel@0:   long   elems;
Daniel@0:   long   max_elems;
Daniel@0:   long   time;
Daniel@0:   long   activenum;
Daniel@0:   long   buffsize;
Daniel@0: } KERNEL_CACHE;
Daniel@0: 
Daniel@0: 
Daniel@0: typedef struct timing_profile {
Daniel@0:   long   time_kernel;
Daniel@0:   long   time_opti;
Daniel@0:   long   time_shrink;
Daniel@0:   long   time_update;
Daniel@0:   long   time_model;
Daniel@0:   long   time_check;
Daniel@0:   long   time_select;
Daniel@0: } TIMING;
Daniel@0: 
Daniel@0: typedef struct shrink_state {
Daniel@0:   long   *active;
Daniel@0:   long   *inactive_since;
Daniel@0:   long   deactnum;
Daniel@0:   double **a_history;  /* for shrinking with non-linear kernel */
Daniel@0:   long   maxhistory;
Daniel@0:   double *last_a;      /* for shrinking with linear kernel */
Daniel@0:   double *last_lin;    /* for shrinking with linear kernel */
Daniel@0: } SHRINK_STATE;
Daniel@0: 
Daniel@0: double classify_example(MODEL *, DOC *);
Daniel@0: double classify_example_linear(MODEL *, DOC *);
Daniel@0: CFLOAT kernel(KERNEL_PARM *, DOC *, DOC *); 
Daniel@0: CFLOAT single_kernel(KERNEL_PARM *, SVECTOR *, SVECTOR *); 
Daniel@0: double custom_kernel(KERNEL_PARM *, SVECTOR *, SVECTOR *); 
Daniel@0: SVECTOR *create_svector(WORD *, char *, double);
Daniel@0: SVECTOR *copy_svector(SVECTOR *);
Daniel@0: void   free_svector(SVECTOR *);
Daniel@0: double    sprod_ss(SVECTOR *, SVECTOR *);
Daniel@0: SVECTOR*  sub_ss(SVECTOR *, SVECTOR *); 
Daniel@0: SVECTOR*  add_ss(SVECTOR *, SVECTOR *); 
Daniel@0: SVECTOR*  add_list_ss(SVECTOR *); 
Daniel@0: void      append_svector_list(SVECTOR *a, SVECTOR *b);
Daniel@0: SVECTOR*  smult_s(SVECTOR *, double);
Daniel@0: int       featvec_eq(SVECTOR *, SVECTOR *); 
Daniel@0: double model_length_s(MODEL *, KERNEL_PARM *);
Daniel@0: void   clear_vector_n(double *, long);
Daniel@0: void   add_vector_ns(double *, SVECTOR *, double);
Daniel@0: double sprod_ns(double *, SVECTOR *);
Daniel@0: void   add_weight_vector_to_linear_model(MODEL *);
Daniel@0: DOC    *create_example(long, long, long, double, SVECTOR *);
Daniel@0: void   free_example(DOC *, long);
Daniel@0: MODEL  *read_model(char *);
Daniel@0: MODEL  *copy_model(MODEL *);
Daniel@0: void   free_model(MODEL *, int);
Daniel@0: void   read_documents(char *, DOC ***, double **, long *, long *);
Daniel@0: int    parse_document(char *, WORD *, double *, long *, long *, double *, long *, long, char **);
Daniel@0: double *read_alphas(char *,long);
Daniel@0: void   nol_ll(char *, long *, long *, long *);
Daniel@0: long   minl(long, long);
Daniel@0: long   maxl(long, long);
Daniel@0: long   get_runtime(void);
Daniel@0: int    space_or_null(int);
Daniel@0: void   *my_malloc(size_t); 
Daniel@0: void   copyright_notice(void);
Daniel@0: # ifdef _MSC_VER
Daniel@0:    int isnan(double);
Daniel@0: # endif
Daniel@0: 
Daniel@0: extern long   verbosity;              /* verbosity level (0-4) */
Daniel@0: extern long   kernel_cache_statistic;
Daniel@0: 
Daniel@0: #endif