annotate toolboxes/SVM-light/src/svm_common.h @ 0:e9a9cd732c1e tip

first hg version after svn
author wolffd
date Tue, 10 Feb 2015 15:05:51 +0000
parents
children
rev   line source
wolffd@0 1 /************************************************************************/
wolffd@0 2 /* */
wolffd@0 3 /* svm_common.h */
wolffd@0 4 /* */
wolffd@0 5 /* Definitions and functions used in both svm_learn and svm_classify. */
wolffd@0 6 /* */
wolffd@0 7 /* Author: Thorsten Joachims */
wolffd@0 8 /* Date: 02.07.02 */
wolffd@0 9 /* */
wolffd@0 10 /* Copyright (c) 2002 Thorsten Joachims - All rights reserved */
wolffd@0 11 /* */
wolffd@0 12 /* This software is available for non-commercial use only. It must */
wolffd@0 13 /* not be modified and distributed without prior permission of the */
wolffd@0 14 /* author. The author is not responsible for implications from the */
wolffd@0 15 /* use of this software. */
wolffd@0 16 /* */
wolffd@0 17 /************************************************************************/
wolffd@0 18
wolffd@0 19 #ifndef SVM_COMMON
wolffd@0 20 #define SVM_COMMON
wolffd@0 21
wolffd@0 22 # define MAXSHRINK 50000 /* maximum number of shrinking rounds */
wolffd@0 23 # define MAXFEATNUM 99999999 /* maximum feature number (must be in
wolffd@0 24 valid range of long int type!) */
wolffd@0 25
wolffd@0 26 # include <stdio.h>
wolffd@0 27 # include <ctype.h>
wolffd@0 28 # include <math.h>
wolffd@0 29 # include <string.h>
wolffd@0 30 # include <stdlib.h>
wolffd@0 31 # include <time.h>
wolffd@0 32 # include <float.h>
wolffd@0 33
wolffd@0 34 # define VERSION "V6.01"
wolffd@0 35 # define VERSION_DATE "01.09.04"
wolffd@0 36
wolffd@0 37 # define CFLOAT float /* the type of float to use for caching */
wolffd@0 38 /* kernel evaluations. Using float saves */
wolffd@0 39 /* us some memory, but you can use double, too */
wolffd@0 40 # define FNUM long /* the type used for storing feature ids */
wolffd@0 41 # define FVAL float /* the type used for storing feature values */
wolffd@0 42
wolffd@0 43 # define LINEAR 0 /* linear kernel type */
wolffd@0 44 # define POLY 1 /* polynoial kernel type */
wolffd@0 45 # define RBF 2 /* rbf kernel type */
wolffd@0 46 # define SIGMOID 3 /* sigmoid kernel type */
wolffd@0 47
wolffd@0 48 # define CLASSIFICATION 1 /* train classification model */
wolffd@0 49 # define REGRESSION 2 /* train regression model */
wolffd@0 50 # define RANKING 3 /* train ranking model */
wolffd@0 51 # define OPTIMIZATION 4 /* train on general set of constraints */
wolffd@0 52
wolffd@0 53 typedef struct word {
wolffd@0 54 FNUM wnum; /* word number */
wolffd@0 55 FVAL weight; /* word weight */
wolffd@0 56 } WORD;
wolffd@0 57
wolffd@0 58 typedef struct svector {
wolffd@0 59 WORD *words; /* The features/values in the vector by
wolffd@0 60 increasing feature-number. Feature
wolffd@0 61 numbers that are skipped are
wolffd@0 62 interpreted as having value zero. */
wolffd@0 63 double twonorm_sq; /* The squared euclidian length of the
wolffd@0 64 vector. Used to speed up the RBF kernel. */
wolffd@0 65 char *userdefined; /* You can put additional information
wolffd@0 66 here. This can be useful, if you are
wolffd@0 67 implementing your own kernel that
wolffd@0 68 does not work with feature/values
wolffd@0 69 representations (for example a
wolffd@0 70 string kernel). By default,
wolffd@0 71 svm-light will put here the string
wolffd@0 72 after the # sign from each line of
wolffd@0 73 the input file. */
wolffd@0 74 long kernel_id; /* Feature vectors with different
wolffd@0 75 kernel_id's are orthogonal (ie. the
wolffd@0 76 feature number do not match). This
wolffd@0 77 is used for computing component
wolffd@0 78 kernels for linear constraints which
wolffd@0 79 are a sum of several different
wolffd@0 80 weight vectors. (currently not
wolffd@0 81 implemented). */
wolffd@0 82 struct svector *next; /* Let's you set up a list of SVECTOR's
wolffd@0 83 for linear constraints which are a
wolffd@0 84 sum of multiple feature
wolffd@0 85 vectors. List is terminated by
wolffd@0 86 NULL. */
wolffd@0 87 double factor; /* Factor by which this feature vector
wolffd@0 88 is multiplied in the sum. */
wolffd@0 89 } SVECTOR;
wolffd@0 90
wolffd@0 91 typedef struct doc {
wolffd@0 92 long docnum; /* Document ID. This has to be the position of
wolffd@0 93 the document in the training set array. */
wolffd@0 94 long queryid; /* for learning rankings, constraints are
wolffd@0 95 generated for documents with the same
wolffd@0 96 queryID. */
wolffd@0 97 double costfactor; /* Scales the cost of misclassifying this
wolffd@0 98 document by this factor. The effect of this
wolffd@0 99 value is, that the upper bound on the alpha
wolffd@0 100 for this example is scaled by this factor.
wolffd@0 101 The factors are set by the feature
wolffd@0 102 'cost:<val>' in the training data. */
wolffd@0 103 long slackid; /* Index of the slack variable
wolffd@0 104 corresponding to this
wolffd@0 105 constraint. All constraints with the
wolffd@0 106 same slackid share the same slack
wolffd@0 107 variable. This can only be used for
wolffd@0 108 svm_learn_optimization. */
wolffd@0 109 SVECTOR *fvec; /* Feature vector of the example. The
wolffd@0 110 feature vector can actually be a
wolffd@0 111 list of feature vectors. For
wolffd@0 112 example, the list will have two
wolffd@0 113 elements, if this DOC is a
wolffd@0 114 preference constraint. The one
wolffd@0 115 vector that is supposed to be ranked
wolffd@0 116 higher, will have a factor of +1,
wolffd@0 117 the lower ranked one should have a
wolffd@0 118 factor of -1. */
wolffd@0 119 } DOC;
wolffd@0 120
wolffd@0 121 typedef struct learn_parm {
wolffd@0 122 long type; /* selects between regression and
wolffd@0 123 classification */
wolffd@0 124 double svm_c; /* upper bound C on alphas */
wolffd@0 125 double eps; /* regression epsilon (eps=1.0 for
wolffd@0 126 classification */
wolffd@0 127 double svm_costratio; /* factor to multiply C for positive examples */
wolffd@0 128 double transduction_posratio;/* fraction of unlabeled examples to be */
wolffd@0 129 /* classified as positives */
wolffd@0 130 long biased_hyperplane; /* if nonzero, use hyperplane w*x+b=0
wolffd@0 131 otherwise w*x=0 */
wolffd@0 132 long sharedslack; /* if nonzero, it will use the shared
wolffd@0 133 slack variable mode in
wolffd@0 134 svm_learn_optimization. It requires
wolffd@0 135 that the slackid is set for every
wolffd@0 136 training example */
wolffd@0 137 long svm_maxqpsize; /* size q of working set */
wolffd@0 138 long svm_newvarsinqp; /* new variables to enter the working set
wolffd@0 139 in each iteration */
wolffd@0 140 long kernel_cache_size; /* size of kernel cache in megabytes */
wolffd@0 141 double epsilon_crit; /* tolerable error for distances used
wolffd@0 142 in stopping criterion */
wolffd@0 143 double epsilon_shrink; /* how much a multiplier should be above
wolffd@0 144 zero for shrinking */
wolffd@0 145 long svm_iter_to_shrink; /* iterations h after which an example can
wolffd@0 146 be removed by shrinking */
wolffd@0 147 long maxiter; /* number of iterations after which the
wolffd@0 148 optimizer terminates, if there was
wolffd@0 149 no progress in maxdiff */
wolffd@0 150 long remove_inconsistent; /* exclude examples with alpha at C and
wolffd@0 151 retrain */
wolffd@0 152 long skip_final_opt_check; /* do not check KT-Conditions at the end of
wolffd@0 153 optimization for examples removed by
wolffd@0 154 shrinking. WARNING: This might lead to
wolffd@0 155 sub-optimal solutions! */
wolffd@0 156 long compute_loo; /* if nonzero, computes leave-one-out
wolffd@0 157 estimates */
wolffd@0 158 double rho; /* parameter in xi/alpha-estimates and for
wolffd@0 159 pruning leave-one-out range [1..2] */
wolffd@0 160 long xa_depth; /* parameter in xi/alpha-estimates upper
wolffd@0 161 bounding the number of SV the current
wolffd@0 162 alpha_t is distributed over */
wolffd@0 163 char predfile[200]; /* file for predicitions on unlabeled examples
wolffd@0 164 in transduction */
wolffd@0 165 char alphafile[200]; /* file to store optimal alphas in. use
wolffd@0 166 empty string if alphas should not be
wolffd@0 167 output */
wolffd@0 168
wolffd@0 169 /* you probably do not want to touch the following */
wolffd@0 170 double epsilon_const; /* tolerable error on eq-constraint */
wolffd@0 171 double epsilon_a; /* tolerable error on alphas at bounds */
wolffd@0 172 double opt_precision; /* precision of solver, set to e.g. 1e-21
wolffd@0 173 if you get convergence problems */
wolffd@0 174
wolffd@0 175 /* the following are only for internal use */
wolffd@0 176 long svm_c_steps; /* do so many steps for finding optimal C */
wolffd@0 177 double svm_c_factor; /* increase C by this factor every step */
wolffd@0 178 double svm_costratio_unlab;
wolffd@0 179 double svm_unlabbound;
wolffd@0 180 double *svm_cost; /* individual upper bounds for each var */
wolffd@0 181 long totwords; /* number of features */
wolffd@0 182 } LEARN_PARM;
wolffd@0 183
wolffd@0 184 typedef struct kernel_parm {
wolffd@0 185 long kernel_type; /* 0=linear, 1=poly, 2=rbf, 3=sigmoid, 4=custom */
wolffd@0 186 long poly_degree;
wolffd@0 187 double rbf_gamma;
wolffd@0 188 double coef_lin;
wolffd@0 189 double coef_const;
wolffd@0 190 char custom[50]; /* for user supplied kernel */
wolffd@0 191 } KERNEL_PARM;
wolffd@0 192
wolffd@0 193 typedef struct model {
wolffd@0 194 long sv_num;
wolffd@0 195 long at_upper_bound;
wolffd@0 196 double b;
wolffd@0 197 DOC **supvec;
wolffd@0 198 double *alpha;
wolffd@0 199 long *index; /* index from docnum to position in model */
wolffd@0 200 long totwords; /* number of features */
wolffd@0 201 long totdoc; /* number of training documents */
wolffd@0 202 KERNEL_PARM kernel_parm; /* kernel */
wolffd@0 203
wolffd@0 204 /* the following values are not written to file */
wolffd@0 205 double loo_error,loo_recall,loo_precision; /* leave-one-out estimates */
wolffd@0 206 double xa_error,xa_recall,xa_precision; /* xi/alpha estimates */
wolffd@0 207 double *lin_weights; /* weights for linear case using
wolffd@0 208 folding */
wolffd@0 209 double maxdiff; /* precision, up to which this
wolffd@0 210 model is accurate */
wolffd@0 211 } MODEL;
wolffd@0 212
wolffd@0 213 typedef struct quadratic_program {
wolffd@0 214 long opt_n; /* number of variables */
wolffd@0 215 long opt_m; /* number of linear equality constraints */
wolffd@0 216 double *opt_ce,*opt_ce0; /* linear equality constraints */
wolffd@0 217 double *opt_g; /* hessian of objective */
wolffd@0 218 double *opt_g0; /* linear part of objective */
wolffd@0 219 double *opt_xinit; /* initial value for variables */
wolffd@0 220 double *opt_low,*opt_up; /* box constraints */
wolffd@0 221 } QP;
wolffd@0 222
wolffd@0 223 typedef struct kernel_cache {
wolffd@0 224 long *index; /* cache some kernel evalutations */
wolffd@0 225 CFLOAT *buffer; /* to improve speed */
wolffd@0 226 long *invindex;
wolffd@0 227 long *active2totdoc;
wolffd@0 228 long *totdoc2active;
wolffd@0 229 long *lru;
wolffd@0 230 long *occu;
wolffd@0 231 long elems;
wolffd@0 232 long max_elems;
wolffd@0 233 long time;
wolffd@0 234 long activenum;
wolffd@0 235 long buffsize;
wolffd@0 236 } KERNEL_CACHE;
wolffd@0 237
wolffd@0 238
wolffd@0 239 typedef struct timing_profile {
wolffd@0 240 long time_kernel;
wolffd@0 241 long time_opti;
wolffd@0 242 long time_shrink;
wolffd@0 243 long time_update;
wolffd@0 244 long time_model;
wolffd@0 245 long time_check;
wolffd@0 246 long time_select;
wolffd@0 247 } TIMING;
wolffd@0 248
wolffd@0 249 typedef struct shrink_state {
wolffd@0 250 long *active;
wolffd@0 251 long *inactive_since;
wolffd@0 252 long deactnum;
wolffd@0 253 double **a_history; /* for shrinking with non-linear kernel */
wolffd@0 254 long maxhistory;
wolffd@0 255 double *last_a; /* for shrinking with linear kernel */
wolffd@0 256 double *last_lin; /* for shrinking with linear kernel */
wolffd@0 257 } SHRINK_STATE;
wolffd@0 258
wolffd@0 259 double classify_example(MODEL *, DOC *);
wolffd@0 260 double classify_example_linear(MODEL *, DOC *);
wolffd@0 261 CFLOAT kernel(KERNEL_PARM *, DOC *, DOC *);
wolffd@0 262 CFLOAT single_kernel(KERNEL_PARM *, SVECTOR *, SVECTOR *);
wolffd@0 263 double custom_kernel(KERNEL_PARM *, SVECTOR *, SVECTOR *);
wolffd@0 264 SVECTOR *create_svector(WORD *, char *, double);
wolffd@0 265 SVECTOR *copy_svector(SVECTOR *);
wolffd@0 266 void free_svector(SVECTOR *);
wolffd@0 267 double sprod_ss(SVECTOR *, SVECTOR *);
wolffd@0 268 SVECTOR* sub_ss(SVECTOR *, SVECTOR *);
wolffd@0 269 SVECTOR* add_ss(SVECTOR *, SVECTOR *);
wolffd@0 270 SVECTOR* add_list_ss(SVECTOR *);
wolffd@0 271 void append_svector_list(SVECTOR *a, SVECTOR *b);
wolffd@0 272 SVECTOR* smult_s(SVECTOR *, double);
wolffd@0 273 int featvec_eq(SVECTOR *, SVECTOR *);
wolffd@0 274 double model_length_s(MODEL *, KERNEL_PARM *);
wolffd@0 275 void clear_vector_n(double *, long);
wolffd@0 276 void add_vector_ns(double *, SVECTOR *, double);
wolffd@0 277 double sprod_ns(double *, SVECTOR *);
wolffd@0 278 void add_weight_vector_to_linear_model(MODEL *);
wolffd@0 279 DOC *create_example(long, long, long, double, SVECTOR *);
wolffd@0 280 void free_example(DOC *, long);
wolffd@0 281 MODEL *read_model(char *);
wolffd@0 282 MODEL *copy_model(MODEL *);
wolffd@0 283 void free_model(MODEL *, int);
wolffd@0 284 void read_documents(char *, DOC ***, double **, long *, long *);
wolffd@0 285 int parse_document(char *, WORD *, double *, long *, long *, double *, long *, long, char **);
wolffd@0 286 double *read_alphas(char *,long);
wolffd@0 287 void nol_ll(char *, long *, long *, long *);
wolffd@0 288 long minl(long, long);
wolffd@0 289 long maxl(long, long);
wolffd@0 290 long get_runtime(void);
wolffd@0 291 int space_or_null(int);
wolffd@0 292 void *my_malloc(size_t);
wolffd@0 293 void copyright_notice(void);
wolffd@0 294 # ifdef _MSC_VER
wolffd@0 295 int isnan(double);
wolffd@0 296 # endif
wolffd@0 297
wolffd@0 298 extern long verbosity; /* verbosity level (0-4) */
wolffd@0 299 extern long kernel_cache_statistic;
wolffd@0 300
wolffd@0 301 #endif