wolffd@0: /************************************************************************/
wolffd@0: /*                                                                      */
wolffd@0: /*   svm_common.h                                                       */
wolffd@0: /*                                                                      */
wolffd@0: /*   Definitions and functions used in both svm_learn and svm_classify. */
wolffd@0: /*                                                                      */
wolffd@0: /*   Author: Thorsten Joachims                                          */
wolffd@0: /*   Date: 02.07.02                                                     */
wolffd@0: /*                                                                      */
wolffd@0: /*   Copyright (c) 2002  Thorsten Joachims - All rights reserved        */
wolffd@0: /*                                                                      */
wolffd@0: /*   This software is available for non-commercial use only. It must    */
wolffd@0: /*   not be modified and distributed without prior permission of the    */
wolffd@0: /*   author. The author is not responsible for implications from the    */
wolffd@0: /*   use of this software.                                              */
wolffd@0: /*                                                                      */
wolffd@0: /************************************************************************/
wolffd@0: 
wolffd@0: #ifndef SVM_COMMON
wolffd@0: #define SVM_COMMON
wolffd@0: 
wolffd@0: # define MAXSHRINK     50000    /* maximum number of shrinking rounds */
wolffd@0: # define MAXFEATNUM 99999999    /* maximum feature number (must be in
wolffd@0: 			  	   valid range of long int type!) */
wolffd@0: 
wolffd@0: # include <stdio.h>
wolffd@0: # include <ctype.h>
wolffd@0: # include <math.h>
wolffd@0: # include <string.h>
wolffd@0: # include <stdlib.h>
wolffd@0: # include <time.h> 
wolffd@0: # include <float.h>
wolffd@0: 
wolffd@0: # define VERSION       "V6.01"
wolffd@0: # define VERSION_DATE  "01.09.04"
wolffd@0: 
wolffd@0: # define CFLOAT  float       /* the type of float to use for caching */
wolffd@0:                              /* kernel evaluations. Using float saves */
wolffd@0:                              /* us some memory, but you can use double, too */
wolffd@0: # define FNUM    long        /* the type used for storing feature ids */
wolffd@0: # define FVAL    float       /* the type used for storing feature values */
wolffd@0: 
wolffd@0: # define LINEAR  0           /* linear kernel type */
wolffd@0: # define POLY    1           /* polynoial kernel type */
wolffd@0: # define RBF     2           /* rbf kernel type */
wolffd@0: # define SIGMOID 3           /* sigmoid kernel type */
wolffd@0: 
wolffd@0: # define CLASSIFICATION 1    /* train classification model */
wolffd@0: # define REGRESSION     2    /* train regression model */
wolffd@0: # define RANKING        3    /* train ranking model */
wolffd@0: # define OPTIMIZATION   4    /* train on general set of constraints */
wolffd@0: 
wolffd@0: typedef struct word {
wolffd@0:   FNUM    wnum;	               /* word number */
wolffd@0:   FVAL    weight;              /* word weight */
wolffd@0: } WORD;
wolffd@0: 
wolffd@0: typedef struct svector {
wolffd@0:   WORD    *words;              /* The features/values in the vector by
wolffd@0: 				  increasing feature-number. Feature
wolffd@0: 				  numbers that are skipped are
wolffd@0: 				  interpreted as having value zero. */
wolffd@0:   double  twonorm_sq;          /* The squared euclidian length of the
wolffd@0:                                   vector. Used to speed up the RBF kernel. */
wolffd@0:   char    *userdefined;        /* You can put additional information
wolffd@0: 				  here. This can be useful, if you are
wolffd@0: 				  implementing your own kernel that
wolffd@0: 				  does not work with feature/values
wolffd@0: 				  representations (for example a
wolffd@0: 				  string kernel). By default,
wolffd@0: 				  svm-light will put here the string
wolffd@0: 				  after the # sign from each line of
wolffd@0: 				  the input file. */
wolffd@0:   long    kernel_id;           /* Feature vectors with different
wolffd@0: 				  kernel_id's are orthogonal (ie. the
wolffd@0: 				  feature number do not match). This
wolffd@0: 				  is used for computing component
wolffd@0: 				  kernels for linear constraints which
wolffd@0: 				  are a sum of several different
wolffd@0: 				  weight vectors. (currently not
wolffd@0: 				  implemented). */
wolffd@0:   struct svector *next;        /* Let's you set up a list of SVECTOR's
wolffd@0: 				  for linear constraints which are a
wolffd@0: 				  sum of multiple feature
wolffd@0: 				  vectors. List is terminated by
wolffd@0: 				  NULL. */
wolffd@0:   double  factor;              /* Factor by which this feature vector
wolffd@0: 				  is multiplied in the sum. */
wolffd@0: } SVECTOR;
wolffd@0: 
wolffd@0: typedef struct doc {
wolffd@0:   long    docnum;              /* Document ID. This has to be the position of 
wolffd@0:                                   the document in the training set array. */
wolffd@0:   long    queryid;             /* for learning rankings, constraints are 
wolffd@0: 				  generated for documents with the same 
wolffd@0: 				  queryID. */
wolffd@0:   double  costfactor;          /* Scales the cost of misclassifying this
wolffd@0: 				  document by this factor. The effect of this
wolffd@0: 				  value is, that the upper bound on the alpha
wolffd@0: 				  for this example is scaled by this factor.
wolffd@0: 				  The factors are set by the feature 
wolffd@0: 				  'cost:<val>' in the training data. */
wolffd@0:   long    slackid;             /* Index of the slack variable
wolffd@0: 				  corresponding to this
wolffd@0: 				  constraint. All constraints with the
wolffd@0: 				  same slackid share the same slack
wolffd@0: 				  variable. This can only be used for
wolffd@0: 				  svm_learn_optimization. */
wolffd@0:   SVECTOR *fvec;               /* Feature vector of the example. The
wolffd@0: 				  feature vector can actually be a
wolffd@0: 				  list of feature vectors. For
wolffd@0: 				  example, the list will have two
wolffd@0: 				  elements, if this DOC is a
wolffd@0: 				  preference constraint. The one
wolffd@0: 				  vector that is supposed to be ranked
wolffd@0: 				  higher, will have a factor of +1,
wolffd@0: 				  the lower ranked one should have a
wolffd@0: 				  factor of -1. */
wolffd@0: } DOC;
wolffd@0: 
wolffd@0: typedef struct learn_parm {
wolffd@0:   long   type;                 /* selects between regression and
wolffd@0: 				  classification */
wolffd@0:   double svm_c;                /* upper bound C on alphas */
wolffd@0:   double eps;                  /* regression epsilon (eps=1.0 for
wolffd@0: 				  classification */
wolffd@0:   double svm_costratio;        /* factor to multiply C for positive examples */
wolffd@0:   double transduction_posratio;/* fraction of unlabeled examples to be */
wolffd@0:                                /* classified as positives */
wolffd@0:   long   biased_hyperplane;    /* if nonzero, use hyperplane w*x+b=0 
wolffd@0: 				  otherwise w*x=0 */
wolffd@0:   long   sharedslack;          /* if nonzero, it will use the shared
wolffd@0:                                   slack variable mode in
wolffd@0:                                   svm_learn_optimization. It requires
wolffd@0:                                   that the slackid is set for every
wolffd@0:                                   training example */
wolffd@0:   long   svm_maxqpsize;        /* size q of working set */
wolffd@0:   long   svm_newvarsinqp;      /* new variables to enter the working set 
wolffd@0: 				  in each iteration */
wolffd@0:   long   kernel_cache_size;    /* size of kernel cache in megabytes */
wolffd@0:   double epsilon_crit;         /* tolerable error for distances used 
wolffd@0: 				  in stopping criterion */
wolffd@0:   double epsilon_shrink;       /* how much a multiplier should be above 
wolffd@0: 				  zero for shrinking */
wolffd@0:   long   svm_iter_to_shrink;   /* iterations h after which an example can
wolffd@0: 				  be removed by shrinking */
wolffd@0:   long   maxiter;              /* number of iterations after which the
wolffd@0: 				  optimizer terminates, if there was
wolffd@0: 				  no progress in maxdiff */
wolffd@0:   long   remove_inconsistent;  /* exclude examples with alpha at C and 
wolffd@0: 				  retrain */
wolffd@0:   long   skip_final_opt_check; /* do not check KT-Conditions at the end of
wolffd@0: 				  optimization for examples removed by 
wolffd@0: 				  shrinking. WARNING: This might lead to 
wolffd@0: 				  sub-optimal solutions! */
wolffd@0:   long   compute_loo;          /* if nonzero, computes leave-one-out
wolffd@0: 				  estimates */
wolffd@0:   double rho;                  /* parameter in xi/alpha-estimates and for
wolffd@0: 				  pruning leave-one-out range [1..2] */
wolffd@0:   long   xa_depth;             /* parameter in xi/alpha-estimates upper
wolffd@0: 				  bounding the number of SV the current
wolffd@0: 				  alpha_t is distributed over */
wolffd@0:   char predfile[200];          /* file for predicitions on unlabeled examples
wolffd@0: 				  in transduction */
wolffd@0:   char alphafile[200];         /* file to store optimal alphas in. use  
wolffd@0: 				  empty string if alphas should not be 
wolffd@0: 				  output */
wolffd@0: 
wolffd@0:   /* you probably do not want to touch the following */
wolffd@0:   double epsilon_const;        /* tolerable error on eq-constraint */
wolffd@0:   double epsilon_a;            /* tolerable error on alphas at bounds */
wolffd@0:   double opt_precision;        /* precision of solver, set to e.g. 1e-21 
wolffd@0: 				  if you get convergence problems */
wolffd@0: 
wolffd@0:   /* the following are only for internal use */
wolffd@0:   long   svm_c_steps;          /* do so many steps for finding optimal C */
wolffd@0:   double svm_c_factor;         /* increase C by this factor every step */
wolffd@0:   double svm_costratio_unlab;
wolffd@0:   double svm_unlabbound;
wolffd@0:   double *svm_cost;            /* individual upper bounds for each var */
wolffd@0:   long   totwords;             /* number of features */
wolffd@0: } LEARN_PARM;
wolffd@0: 
wolffd@0: typedef struct kernel_parm {
wolffd@0:   long    kernel_type;   /* 0=linear, 1=poly, 2=rbf, 3=sigmoid, 4=custom */
wolffd@0:   long    poly_degree;
wolffd@0:   double  rbf_gamma;
wolffd@0:   double  coef_lin;
wolffd@0:   double  coef_const;
wolffd@0:   char    custom[50];    /* for user supplied kernel */
wolffd@0: } KERNEL_PARM;
wolffd@0: 
wolffd@0: typedef struct model {
wolffd@0:   long    sv_num;	
wolffd@0:   long    at_upper_bound;
wolffd@0:   double  b;
wolffd@0:   DOC     **supvec;
wolffd@0:   double  *alpha;
wolffd@0:   long    *index;       /* index from docnum to position in model */
wolffd@0:   long    totwords;     /* number of features */
wolffd@0:   long    totdoc;       /* number of training documents */
wolffd@0:   KERNEL_PARM kernel_parm; /* kernel */
wolffd@0: 
wolffd@0:   /* the following values are not written to file */
wolffd@0:   double  loo_error,loo_recall,loo_precision; /* leave-one-out estimates */
wolffd@0:   double  xa_error,xa_recall,xa_precision;    /* xi/alpha estimates */
wolffd@0:   double  *lin_weights;                       /* weights for linear case using
wolffd@0: 						 folding */
wolffd@0:   double  maxdiff;                            /* precision, up to which this 
wolffd@0: 						 model is accurate */
wolffd@0: } MODEL;
wolffd@0: 
wolffd@0: typedef struct quadratic_program {
wolffd@0:   long   opt_n;            /* number of variables */
wolffd@0:   long   opt_m;            /* number of linear equality constraints */
wolffd@0:   double *opt_ce,*opt_ce0; /* linear equality constraints */
wolffd@0:   double *opt_g;           /* hessian of objective */
wolffd@0:   double *opt_g0;          /* linear part of objective */
wolffd@0:   double *opt_xinit;       /* initial value for variables */
wolffd@0:   double *opt_low,*opt_up; /* box constraints */
wolffd@0: } QP;
wolffd@0: 
wolffd@0: typedef struct kernel_cache {
wolffd@0:   long   *index;  /* cache some kernel evalutations */
wolffd@0:   CFLOAT *buffer; /* to improve speed */
wolffd@0:   long   *invindex;
wolffd@0:   long   *active2totdoc;
wolffd@0:   long   *totdoc2active;
wolffd@0:   long   *lru;
wolffd@0:   long   *occu;
wolffd@0:   long   elems;
wolffd@0:   long   max_elems;
wolffd@0:   long   time;
wolffd@0:   long   activenum;
wolffd@0:   long   buffsize;
wolffd@0: } KERNEL_CACHE;
wolffd@0: 
wolffd@0: 
wolffd@0: typedef struct timing_profile {
wolffd@0:   long   time_kernel;
wolffd@0:   long   time_opti;
wolffd@0:   long   time_shrink;
wolffd@0:   long   time_update;
wolffd@0:   long   time_model;
wolffd@0:   long   time_check;
wolffd@0:   long   time_select;
wolffd@0: } TIMING;
wolffd@0: 
wolffd@0: typedef struct shrink_state {
wolffd@0:   long   *active;
wolffd@0:   long   *inactive_since;
wolffd@0:   long   deactnum;
wolffd@0:   double **a_history;  /* for shrinking with non-linear kernel */
wolffd@0:   long   maxhistory;
wolffd@0:   double *last_a;      /* for shrinking with linear kernel */
wolffd@0:   double *last_lin;    /* for shrinking with linear kernel */
wolffd@0: } SHRINK_STATE;
wolffd@0: 
wolffd@0: double classify_example(MODEL *, DOC *);
wolffd@0: double classify_example_linear(MODEL *, DOC *);
wolffd@0: CFLOAT kernel(KERNEL_PARM *, DOC *, DOC *); 
wolffd@0: CFLOAT single_kernel(KERNEL_PARM *, SVECTOR *, SVECTOR *); 
wolffd@0: double custom_kernel(KERNEL_PARM *, SVECTOR *, SVECTOR *); 
wolffd@0: SVECTOR *create_svector(WORD *, char *, double);
wolffd@0: SVECTOR *copy_svector(SVECTOR *);
wolffd@0: void   free_svector(SVECTOR *);
wolffd@0: double    sprod_ss(SVECTOR *, SVECTOR *);
wolffd@0: SVECTOR*  sub_ss(SVECTOR *, SVECTOR *); 
wolffd@0: SVECTOR*  add_ss(SVECTOR *, SVECTOR *); 
wolffd@0: SVECTOR*  add_list_ss(SVECTOR *); 
wolffd@0: void      append_svector_list(SVECTOR *a, SVECTOR *b);
wolffd@0: SVECTOR*  smult_s(SVECTOR *, double);
wolffd@0: int       featvec_eq(SVECTOR *, SVECTOR *); 
wolffd@0: double model_length_s(MODEL *, KERNEL_PARM *);
wolffd@0: void   clear_vector_n(double *, long);
wolffd@0: void   add_vector_ns(double *, SVECTOR *, double);
wolffd@0: double sprod_ns(double *, SVECTOR *);
wolffd@0: void   add_weight_vector_to_linear_model(MODEL *);
wolffd@0: DOC    *create_example(long, long, long, double, SVECTOR *);
wolffd@0: void   free_example(DOC *, long);
wolffd@0: MODEL  *read_model(char *);
wolffd@0: MODEL  *copy_model(MODEL *);
wolffd@0: void   free_model(MODEL *, int);
wolffd@0: void   read_documents(char *, DOC ***, double **, long *, long *);
wolffd@0: int    parse_document(char *, WORD *, double *, long *, long *, double *, long *, long, char **);
wolffd@0: double *read_alphas(char *,long);
wolffd@0: void   nol_ll(char *, long *, long *, long *);
wolffd@0: long   minl(long, long);
wolffd@0: long   maxl(long, long);
wolffd@0: long   get_runtime(void);
wolffd@0: int    space_or_null(int);
wolffd@0: void   *my_malloc(size_t); 
wolffd@0: void   copyright_notice(void);
wolffd@0: # ifdef _MSC_VER
wolffd@0:    int isnan(double);
wolffd@0: # endif
wolffd@0: 
wolffd@0: extern long   verbosity;              /* verbosity level (0-4) */
wolffd@0: extern long   kernel_cache_statistic;
wolffd@0: 
wolffd@0: #endif