camir-ismir2012: toolboxes/SVM-light/src/svm

annotate toolboxes/SVM-light/src/svm_common.h @ 0:cc4b1211e677 tip

initial commit to HG from Changeset: 646 (e263d8a21543) added further path and more save "camirversion.m"

author	Daniel Wolff
date	Fri, 19 Aug 2016 13:07:06 +0200
parents
children

rev	line source
Daniel@0	1 /************************************************************************/
Daniel@0	2 /* */
Daniel@0	3 /* svm_common.h */
Daniel@0	4 /* */
Daniel@0	5 /* Definitions and functions used in both svm_learn and svm_classify. */
Daniel@0	6 /* */
Daniel@0	7 /* Author: Thorsten Joachims */
Daniel@0	8 /* Date: 02.07.02 */
Daniel@0	9 /* */
Daniel@0	10 /* Copyright (c) 2002 Thorsten Joachims - All rights reserved */
Daniel@0	11 /* */
Daniel@0	12 /* This software is available for non-commercial use only. It must */
Daniel@0	13 /* not be modified and distributed without prior permission of the */
Daniel@0	14 /* author. The author is not responsible for implications from the */
Daniel@0	15 /* use of this software. */
Daniel@0	16 /* */
Daniel@0	17 /************************************************************************/
Daniel@0	18
Daniel@0	19 #ifndef SVM_COMMON
Daniel@0	20 #define SVM_COMMON
Daniel@0	21
Daniel@0	22 # define MAXSHRINK 50000 /* maximum number of shrinking rounds */
Daniel@0	23 # define MAXFEATNUM 99999999 /* maximum feature number (must be in
Daniel@0	24 valid range of long int type!) */
Daniel@0	25
Daniel@0	26 # include <stdio.h>
Daniel@0	27 # include <ctype.h>
Daniel@0	28 # include <math.h>
Daniel@0	29 # include <string.h>
Daniel@0	30 # include <stdlib.h>
Daniel@0	31 # include <time.h>
Daniel@0	32 # include <float.h>
Daniel@0	33
Daniel@0	34 # define VERSION "V6.01"
Daniel@0	35 # define VERSION_DATE "01.09.04"
Daniel@0	36
Daniel@0	37 # define CFLOAT float /* the type of float to use for caching */
Daniel@0	38 /* kernel evaluations. Using float saves */
Daniel@0	39 /* us some memory, but you can use double, too */
Daniel@0	40 # define FNUM long /* the type used for storing feature ids */
Daniel@0	41 # define FVAL float /* the type used for storing feature values */
Daniel@0	42
Daniel@0	43 # define LINEAR 0 /* linear kernel type */
Daniel@0	44 # define POLY 1 /* polynoial kernel type */
Daniel@0	45 # define RBF 2 /* rbf kernel type */
Daniel@0	46 # define SIGMOID 3 /* sigmoid kernel type */
Daniel@0	47
Daniel@0	48 # define CLASSIFICATION 1 /* train classification model */
Daniel@0	49 # define REGRESSION 2 /* train regression model */
Daniel@0	50 # define RANKING 3 /* train ranking model */
Daniel@0	51 # define OPTIMIZATION 4 /* train on general set of constraints */
Daniel@0	52
Daniel@0	53 typedef struct word {
Daniel@0	54 FNUM wnum; /* word number */
Daniel@0	55 FVAL weight; /* word weight */
Daniel@0	56 } WORD;
Daniel@0	57
Daniel@0	58 typedef struct svector {
Daniel@0	59 WORD words; / The features/values in the vector by
Daniel@0	60 increasing feature-number. Feature
Daniel@0	61 numbers that are skipped are
Daniel@0	62 interpreted as having value zero. */
Daniel@0	63 double twonorm_sq; /* The squared euclidian length of the
Daniel@0	64 vector. Used to speed up the RBF kernel. */
Daniel@0	65 char userdefined; / You can put additional information
Daniel@0	66 here. This can be useful, if you are
Daniel@0	67 implementing your own kernel that
Daniel@0	68 does not work with feature/values
Daniel@0	69 representations (for example a
Daniel@0	70 string kernel). By default,
Daniel@0	71 svm-light will put here the string
Daniel@0	72 after the # sign from each line of
Daniel@0	73 the input file. */
Daniel@0	74 long kernel_id; /* Feature vectors with different
Daniel@0	75 kernel_id's are orthogonal (ie. the
Daniel@0	76 feature number do not match). This
Daniel@0	77 is used for computing component
Daniel@0	78 kernels for linear constraints which
Daniel@0	79 are a sum of several different
Daniel@0	80 weight vectors. (currently not
Daniel@0	81 implemented). */
Daniel@0	82 struct svector next; / Let's you set up a list of SVECTOR's
Daniel@0	83 for linear constraints which are a
Daniel@0	84 sum of multiple feature
Daniel@0	85 vectors. List is terminated by
Daniel@0	86 NULL. */
Daniel@0	87 double factor; /* Factor by which this feature vector
Daniel@0	88 is multiplied in the sum. */
Daniel@0	89 } SVECTOR;
Daniel@0	90
Daniel@0	91 typedef struct doc {
Daniel@0	92 long docnum; /* Document ID. This has to be the position of
Daniel@0	93 the document in the training set array. */
Daniel@0	94 long queryid; /* for learning rankings, constraints are
Daniel@0	95 generated for documents with the same
Daniel@0	96 queryID. */
Daniel@0	97 double costfactor; /* Scales the cost of misclassifying this
Daniel@0	98 document by this factor. The effect of this
Daniel@0	99 value is, that the upper bound on the alpha
Daniel@0	100 for this example is scaled by this factor.
Daniel@0	101 The factors are set by the feature
Daniel@0	102 'cost:<val>' in the training data. */
Daniel@0	103 long slackid; /* Index of the slack variable
Daniel@0	104 corresponding to this
Daniel@0	105 constraint. All constraints with the
Daniel@0	106 same slackid share the same slack
Daniel@0	107 variable. This can only be used for
Daniel@0	108 svm_learn_optimization. */
Daniel@0	109 SVECTOR fvec; / Feature vector of the example. The
Daniel@0	110 feature vector can actually be a
Daniel@0	111 list of feature vectors. For
Daniel@0	112 example, the list will have two
Daniel@0	113 elements, if this DOC is a
Daniel@0	114 preference constraint. The one
Daniel@0	115 vector that is supposed to be ranked
Daniel@0	116 higher, will have a factor of +1,
Daniel@0	117 the lower ranked one should have a
Daniel@0	118 factor of -1. */
Daniel@0	119 } DOC;
Daniel@0	120
Daniel@0	121 typedef struct learn_parm {
Daniel@0	122 long type; /* selects between regression and
Daniel@0	123 classification */
Daniel@0	124 double svm_c; /* upper bound C on alphas */
Daniel@0	125 double eps; /* regression epsilon (eps=1.0 for
Daniel@0	126 classification */
Daniel@0	127 double svm_costratio; /* factor to multiply C for positive examples */
Daniel@0	128 double transduction_posratio;/* fraction of unlabeled examples to be */
Daniel@0	129 /* classified as positives */
Daniel@0	130 long biased_hyperplane; /* if nonzero, use hyperplane w*x+b=0
Daniel@0	131 otherwise wx=0 /
Daniel@0	132 long sharedslack; /* if nonzero, it will use the shared
Daniel@0	133 slack variable mode in
Daniel@0	134 svm_learn_optimization. It requires
Daniel@0	135 that the slackid is set for every
Daniel@0	136 training example */
Daniel@0	137 long svm_maxqpsize; /* size q of working set */
Daniel@0	138 long svm_newvarsinqp; /* new variables to enter the working set
Daniel@0	139 in each iteration */
Daniel@0	140 long kernel_cache_size; /* size of kernel cache in megabytes */
Daniel@0	141 double epsilon_crit; /* tolerable error for distances used
Daniel@0	142 in stopping criterion */
Daniel@0	143 double epsilon_shrink; /* how much a multiplier should be above
Daniel@0	144 zero for shrinking */
Daniel@0	145 long svm_iter_to_shrink; /* iterations h after which an example can
Daniel@0	146 be removed by shrinking */
Daniel@0	147 long maxiter; /* number of iterations after which the
Daniel@0	148 optimizer terminates, if there was
Daniel@0	149 no progress in maxdiff */
Daniel@0	150 long remove_inconsistent; /* exclude examples with alpha at C and
Daniel@0	151 retrain */
Daniel@0	152 long skip_final_opt_check; /* do not check KT-Conditions at the end of
Daniel@0	153 optimization for examples removed by
Daniel@0	154 shrinking. WARNING: This might lead to
Daniel@0	155 sub-optimal solutions! */
Daniel@0	156 long compute_loo; /* if nonzero, computes leave-one-out
Daniel@0	157 estimates */
Daniel@0	158 double rho; /* parameter in xi/alpha-estimates and for
Daniel@0	159 pruning leave-one-out range [1..2] */
Daniel@0	160 long xa_depth; /* parameter in xi/alpha-estimates upper
Daniel@0	161 bounding the number of SV the current
Daniel@0	162 alpha_t is distributed over */
Daniel@0	163 char predfile[200]; /* file for predicitions on unlabeled examples
Daniel@0	164 in transduction */
Daniel@0	165 char alphafile[200]; /* file to store optimal alphas in. use
Daniel@0	166 empty string if alphas should not be
Daniel@0	167 output */
Daniel@0	168
Daniel@0	169 /* you probably do not want to touch the following */
Daniel@0	170 double epsilon_const; /* tolerable error on eq-constraint */
Daniel@0	171 double epsilon_a; /* tolerable error on alphas at bounds */
Daniel@0	172 double opt_precision; /* precision of solver, set to e.g. 1e-21
Daniel@0	173 if you get convergence problems */
Daniel@0	174
Daniel@0	175 /* the following are only for internal use */
Daniel@0	176 long svm_c_steps; /* do so many steps for finding optimal C */
Daniel@0	177 double svm_c_factor; /* increase C by this factor every step */
Daniel@0	178 double svm_costratio_unlab;
Daniel@0	179 double svm_unlabbound;
Daniel@0	180 double svm_cost; / individual upper bounds for each var */
Daniel@0	181 long totwords; /* number of features */
Daniel@0	182 } LEARN_PARM;
Daniel@0	183
Daniel@0	184 typedef struct kernel_parm {
Daniel@0	185 long kernel_type; /* 0=linear, 1=poly, 2=rbf, 3=sigmoid, 4=custom */
Daniel@0	186 long poly_degree;
Daniel@0	187 double rbf_gamma;
Daniel@0	188 double coef_lin;
Daniel@0	189 double coef_const;
Daniel@0	190 char custom[50]; /* for user supplied kernel */
Daniel@0	191 } KERNEL_PARM;
Daniel@0	192
Daniel@0	193 typedef struct model {
Daniel@0	194 long sv_num;
Daniel@0	195 long at_upper_bound;
Daniel@0	196 double b;
Daniel@0	197 DOC **supvec;
Daniel@0	198 double *alpha;
Daniel@0	199 long index; / index from docnum to position in model */
Daniel@0	200 long totwords; /* number of features */
Daniel@0	201 long totdoc; /* number of training documents */
Daniel@0	202 KERNEL_PARM kernel_parm; /* kernel */
Daniel@0	203
Daniel@0	204 /* the following values are not written to file */
Daniel@0	205 double loo_error,loo_recall,loo_precision; /* leave-one-out estimates */
Daniel@0	206 double xa_error,xa_recall,xa_precision; /* xi/alpha estimates */
Daniel@0	207 double lin_weights; / weights for linear case using
Daniel@0	208 folding */
Daniel@0	209 double maxdiff; /* precision, up to which this
Daniel@0	210 model is accurate */
Daniel@0	211 } MODEL;
Daniel@0	212
Daniel@0	213 typedef struct quadratic_program {
Daniel@0	214 long opt_n; /* number of variables */
Daniel@0	215 long opt_m; /* number of linear equality constraints */
Daniel@0	216 double opt_ce,opt_ce0; /* linear equality constraints */
Daniel@0	217 double opt_g; / hessian of objective */
Daniel@0	218 double opt_g0; / linear part of objective */
Daniel@0	219 double opt_xinit; / initial value for variables */
Daniel@0	220 double opt_low,opt_up; /* box constraints */
Daniel@0	221 } QP;
Daniel@0	222
Daniel@0	223 typedef struct kernel_cache {
Daniel@0	224 long index; / cache some kernel evalutations */
Daniel@0	225 CFLOAT buffer; / to improve speed */
Daniel@0	226 long *invindex;
Daniel@0	227 long *active2totdoc;
Daniel@0	228 long *totdoc2active;
Daniel@0	229 long *lru;
Daniel@0	230 long *occu;
Daniel@0	231 long elems;
Daniel@0	232 long max_elems;
Daniel@0	233 long time;
Daniel@0	234 long activenum;
Daniel@0	235 long buffsize;
Daniel@0	236 } KERNEL_CACHE;
Daniel@0	237
Daniel@0	238
Daniel@0	239 typedef struct timing_profile {
Daniel@0	240 long time_kernel;
Daniel@0	241 long time_opti;
Daniel@0	242 long time_shrink;
Daniel@0	243 long time_update;
Daniel@0	244 long time_model;
Daniel@0	245 long time_check;
Daniel@0	246 long time_select;
Daniel@0	247 } TIMING;
Daniel@0	248
Daniel@0	249 typedef struct shrink_state {
Daniel@0	250 long *active;
Daniel@0	251 long *inactive_since;
Daniel@0	252 long deactnum;
Daniel@0	253 double *a_history; / for shrinking with non-linear kernel */
Daniel@0	254 long maxhistory;
Daniel@0	255 double last_a; / for shrinking with linear kernel */
Daniel@0	256 double last_lin; / for shrinking with linear kernel */
Daniel@0	257 } SHRINK_STATE;
Daniel@0	258
Daniel@0	259 double classify_example(MODEL , DOC );
Daniel@0	260 double classify_example_linear(MODEL , DOC );
Daniel@0	261 CFLOAT kernel(KERNEL_PARM , DOC , DOC *);
Daniel@0	262 CFLOAT single_kernel(KERNEL_PARM , SVECTOR , SVECTOR *);
Daniel@0	263 double custom_kernel(KERNEL_PARM , SVECTOR , SVECTOR *);
Daniel@0	264 SVECTOR create_svector(WORD , char *, double);
Daniel@0	265 SVECTOR copy_svector(SVECTOR );
Daniel@0	266 void free_svector(SVECTOR *);
Daniel@0	267 double sprod_ss(SVECTOR , SVECTOR );
Daniel@0	268 SVECTOR* sub_ss(SVECTOR , SVECTOR );
Daniel@0	269 SVECTOR* add_ss(SVECTOR , SVECTOR );
Daniel@0	270 SVECTOR* add_list_ss(SVECTOR *);
Daniel@0	271 void append_svector_list(SVECTOR a, SVECTOR b);
Daniel@0	272 SVECTOR* smult_s(SVECTOR *, double);
Daniel@0	273 int featvec_eq(SVECTOR , SVECTOR );
Daniel@0	274 double model_length_s(MODEL , KERNEL_PARM );
Daniel@0	275 void clear_vector_n(double *, long);
Daniel@0	276 void add_vector_ns(double , SVECTOR , double);
Daniel@0	277 double sprod_ns(double , SVECTOR );
Daniel@0	278 void add_weight_vector_to_linear_model(MODEL *);
Daniel@0	279 DOC create_example(long, long, long, double, SVECTOR );
Daniel@0	280 void free_example(DOC *, long);
Daniel@0	281 MODEL read_model(char );
Daniel@0	282 MODEL copy_model(MODEL );
Daniel@0	283 void free_model(MODEL *, int);
Daniel@0	284 void read_documents(char , DOC , double , long , long );
Daniel@0	285 int parse_document(char , WORD , double , long , long , double , long , long, char *);
Daniel@0	286 double read_alphas(char ,long);
Daniel@0	287 void nol_ll(char , long , long , long );
Daniel@0	288 long minl(long, long);
Daniel@0	289 long maxl(long, long);
Daniel@0	290 long get_runtime(void);
Daniel@0	291 int space_or_null(int);
Daniel@0	292 void *my_malloc(size_t);
Daniel@0	293 void copyright_notice(void);
Daniel@0	294 # ifdef _MSC_VER
Daniel@0	295 int isnan(double);
Daniel@0	296 # endif
Daniel@0	297
Daniel@0	298 extern long verbosity; /* verbosity level (0-4) */
Daniel@0	299 extern long kernel_cache_statistic;
Daniel@0	300
Daniel@0	301 #endif

Mercurial > hg > camir-ismir2012

annotate toolboxes/SVM-light/src/svm_common.h @ 0:cc4b1211e677 tip