wolffd@0
|
1 /************************************************************************/
|
wolffd@0
|
2 /* */
|
wolffd@0
|
3 /* svm_common.h */
|
wolffd@0
|
4 /* */
|
wolffd@0
|
5 /* Definitions and functions used in both svm_learn and svm_classify. */
|
wolffd@0
|
6 /* */
|
wolffd@0
|
7 /* Author: Thorsten Joachims */
|
wolffd@0
|
8 /* Date: 02.07.02 */
|
wolffd@0
|
9 /* */
|
wolffd@0
|
10 /* Copyright (c) 2002 Thorsten Joachims - All rights reserved */
|
wolffd@0
|
11 /* */
|
wolffd@0
|
12 /* This software is available for non-commercial use only. It must */
|
wolffd@0
|
13 /* not be modified and distributed without prior permission of the */
|
wolffd@0
|
14 /* author. The author is not responsible for implications from the */
|
wolffd@0
|
15 /* use of this software. */
|
wolffd@0
|
16 /* */
|
wolffd@0
|
17 /************************************************************************/
|
wolffd@0
|
18
|
wolffd@0
|
19 #ifndef SVM_COMMON
|
wolffd@0
|
20 #define SVM_COMMON
|
wolffd@0
|
21
|
wolffd@0
|
22 # define MAXSHRINK 50000 /* maximum number of shrinking rounds */
|
wolffd@0
|
23 # define MAXFEATNUM 99999999 /* maximum feature number (must be in
|
wolffd@0
|
24 valid range of long int type!) */
|
wolffd@0
|
25
|
wolffd@0
|
26 # include <stdio.h>
|
wolffd@0
|
27 # include <ctype.h>
|
wolffd@0
|
28 # include <math.h>
|
wolffd@0
|
29 # include <string.h>
|
wolffd@0
|
30 # include <stdlib.h>
|
wolffd@0
|
31 # include <time.h>
|
wolffd@0
|
32 # include <float.h>
|
wolffd@0
|
33
|
wolffd@0
|
34 # define VERSION "V6.01"
|
wolffd@0
|
35 # define VERSION_DATE "01.09.04"
|
wolffd@0
|
36
|
wolffd@0
|
37 # define CFLOAT float /* the type of float to use for caching */
|
wolffd@0
|
38 /* kernel evaluations. Using float saves */
|
wolffd@0
|
39 /* us some memory, but you can use double, too */
|
wolffd@0
|
40 # define FNUM long /* the type used for storing feature ids */
|
wolffd@0
|
41 # define FVAL float /* the type used for storing feature values */
|
wolffd@0
|
42
|
wolffd@0
|
43 # define LINEAR 0 /* linear kernel type */
|
wolffd@0
|
44 # define POLY 1 /* polynoial kernel type */
|
wolffd@0
|
45 # define RBF 2 /* rbf kernel type */
|
wolffd@0
|
46 # define SIGMOID 3 /* sigmoid kernel type */
|
wolffd@0
|
47
|
wolffd@0
|
48 # define CLASSIFICATION 1 /* train classification model */
|
wolffd@0
|
49 # define REGRESSION 2 /* train regression model */
|
wolffd@0
|
50 # define RANKING 3 /* train ranking model */
|
wolffd@0
|
51 # define OPTIMIZATION 4 /* train on general set of constraints */
|
wolffd@0
|
52
|
wolffd@0
|
53 typedef struct word {
|
wolffd@0
|
54 FNUM wnum; /* word number */
|
wolffd@0
|
55 FVAL weight; /* word weight */
|
wolffd@0
|
56 } WORD;
|
wolffd@0
|
57
|
wolffd@0
|
58 typedef struct svector {
|
wolffd@0
|
59 WORD *words; /* The features/values in the vector by
|
wolffd@0
|
60 increasing feature-number. Feature
|
wolffd@0
|
61 numbers that are skipped are
|
wolffd@0
|
62 interpreted as having value zero. */
|
wolffd@0
|
63 double twonorm_sq; /* The squared euclidian length of the
|
wolffd@0
|
64 vector. Used to speed up the RBF kernel. */
|
wolffd@0
|
65 char *userdefined; /* You can put additional information
|
wolffd@0
|
66 here. This can be useful, if you are
|
wolffd@0
|
67 implementing your own kernel that
|
wolffd@0
|
68 does not work with feature/values
|
wolffd@0
|
69 representations (for example a
|
wolffd@0
|
70 string kernel). By default,
|
wolffd@0
|
71 svm-light will put here the string
|
wolffd@0
|
72 after the # sign from each line of
|
wolffd@0
|
73 the input file. */
|
wolffd@0
|
74 long kernel_id; /* Feature vectors with different
|
wolffd@0
|
75 kernel_id's are orthogonal (ie. the
|
wolffd@0
|
76 feature number do not match). This
|
wolffd@0
|
77 is used for computing component
|
wolffd@0
|
78 kernels for linear constraints which
|
wolffd@0
|
79 are a sum of several different
|
wolffd@0
|
80 weight vectors. (currently not
|
wolffd@0
|
81 implemented). */
|
wolffd@0
|
82 struct svector *next; /* Let's you set up a list of SVECTOR's
|
wolffd@0
|
83 for linear constraints which are a
|
wolffd@0
|
84 sum of multiple feature
|
wolffd@0
|
85 vectors. List is terminated by
|
wolffd@0
|
86 NULL. */
|
wolffd@0
|
87 double factor; /* Factor by which this feature vector
|
wolffd@0
|
88 is multiplied in the sum. */
|
wolffd@0
|
89 } SVECTOR;
|
wolffd@0
|
90
|
wolffd@0
|
91 typedef struct doc {
|
wolffd@0
|
92 long docnum; /* Document ID. This has to be the position of
|
wolffd@0
|
93 the document in the training set array. */
|
wolffd@0
|
94 long queryid; /* for learning rankings, constraints are
|
wolffd@0
|
95 generated for documents with the same
|
wolffd@0
|
96 queryID. */
|
wolffd@0
|
97 double costfactor; /* Scales the cost of misclassifying this
|
wolffd@0
|
98 document by this factor. The effect of this
|
wolffd@0
|
99 value is, that the upper bound on the alpha
|
wolffd@0
|
100 for this example is scaled by this factor.
|
wolffd@0
|
101 The factors are set by the feature
|
wolffd@0
|
102 'cost:<val>' in the training data. */
|
wolffd@0
|
103 long slackid; /* Index of the slack variable
|
wolffd@0
|
104 corresponding to this
|
wolffd@0
|
105 constraint. All constraints with the
|
wolffd@0
|
106 same slackid share the same slack
|
wolffd@0
|
107 variable. This can only be used for
|
wolffd@0
|
108 svm_learn_optimization. */
|
wolffd@0
|
109 SVECTOR *fvec; /* Feature vector of the example. The
|
wolffd@0
|
110 feature vector can actually be a
|
wolffd@0
|
111 list of feature vectors. For
|
wolffd@0
|
112 example, the list will have two
|
wolffd@0
|
113 elements, if this DOC is a
|
wolffd@0
|
114 preference constraint. The one
|
wolffd@0
|
115 vector that is supposed to be ranked
|
wolffd@0
|
116 higher, will have a factor of +1,
|
wolffd@0
|
117 the lower ranked one should have a
|
wolffd@0
|
118 factor of -1. */
|
wolffd@0
|
119 } DOC;
|
wolffd@0
|
120
|
wolffd@0
|
121 typedef struct learn_parm {
|
wolffd@0
|
122 long type; /* selects between regression and
|
wolffd@0
|
123 classification */
|
wolffd@0
|
124 double svm_c; /* upper bound C on alphas */
|
wolffd@0
|
125 double eps; /* regression epsilon (eps=1.0 for
|
wolffd@0
|
126 classification */
|
wolffd@0
|
127 double svm_costratio; /* factor to multiply C for positive examples */
|
wolffd@0
|
128 double transduction_posratio;/* fraction of unlabeled examples to be */
|
wolffd@0
|
129 /* classified as positives */
|
wolffd@0
|
130 long biased_hyperplane; /* if nonzero, use hyperplane w*x+b=0
|
wolffd@0
|
131 otherwise w*x=0 */
|
wolffd@0
|
132 long sharedslack; /* if nonzero, it will use the shared
|
wolffd@0
|
133 slack variable mode in
|
wolffd@0
|
134 svm_learn_optimization. It requires
|
wolffd@0
|
135 that the slackid is set for every
|
wolffd@0
|
136 training example */
|
wolffd@0
|
137 long svm_maxqpsize; /* size q of working set */
|
wolffd@0
|
138 long svm_newvarsinqp; /* new variables to enter the working set
|
wolffd@0
|
139 in each iteration */
|
wolffd@0
|
140 long kernel_cache_size; /* size of kernel cache in megabytes */
|
wolffd@0
|
141 double epsilon_crit; /* tolerable error for distances used
|
wolffd@0
|
142 in stopping criterion */
|
wolffd@0
|
143 double epsilon_shrink; /* how much a multiplier should be above
|
wolffd@0
|
144 zero for shrinking */
|
wolffd@0
|
145 long svm_iter_to_shrink; /* iterations h after which an example can
|
wolffd@0
|
146 be removed by shrinking */
|
wolffd@0
|
147 long maxiter; /* number of iterations after which the
|
wolffd@0
|
148 optimizer terminates, if there was
|
wolffd@0
|
149 no progress in maxdiff */
|
wolffd@0
|
150 long remove_inconsistent; /* exclude examples with alpha at C and
|
wolffd@0
|
151 retrain */
|
wolffd@0
|
152 long skip_final_opt_check; /* do not check KT-Conditions at the end of
|
wolffd@0
|
153 optimization for examples removed by
|
wolffd@0
|
154 shrinking. WARNING: This might lead to
|
wolffd@0
|
155 sub-optimal solutions! */
|
wolffd@0
|
156 long compute_loo; /* if nonzero, computes leave-one-out
|
wolffd@0
|
157 estimates */
|
wolffd@0
|
158 double rho; /* parameter in xi/alpha-estimates and for
|
wolffd@0
|
159 pruning leave-one-out range [1..2] */
|
wolffd@0
|
160 long xa_depth; /* parameter in xi/alpha-estimates upper
|
wolffd@0
|
161 bounding the number of SV the current
|
wolffd@0
|
162 alpha_t is distributed over */
|
wolffd@0
|
163 char predfile[200]; /* file for predicitions on unlabeled examples
|
wolffd@0
|
164 in transduction */
|
wolffd@0
|
165 char alphafile[200]; /* file to store optimal alphas in. use
|
wolffd@0
|
166 empty string if alphas should not be
|
wolffd@0
|
167 output */
|
wolffd@0
|
168
|
wolffd@0
|
169 /* you probably do not want to touch the following */
|
wolffd@0
|
170 double epsilon_const; /* tolerable error on eq-constraint */
|
wolffd@0
|
171 double epsilon_a; /* tolerable error on alphas at bounds */
|
wolffd@0
|
172 double opt_precision; /* precision of solver, set to e.g. 1e-21
|
wolffd@0
|
173 if you get convergence problems */
|
wolffd@0
|
174
|
wolffd@0
|
175 /* the following are only for internal use */
|
wolffd@0
|
176 long svm_c_steps; /* do so many steps for finding optimal C */
|
wolffd@0
|
177 double svm_c_factor; /* increase C by this factor every step */
|
wolffd@0
|
178 double svm_costratio_unlab;
|
wolffd@0
|
179 double svm_unlabbound;
|
wolffd@0
|
180 double *svm_cost; /* individual upper bounds for each var */
|
wolffd@0
|
181 long totwords; /* number of features */
|
wolffd@0
|
182 } LEARN_PARM;
|
wolffd@0
|
183
|
wolffd@0
|
184 typedef struct kernel_parm {
|
wolffd@0
|
185 long kernel_type; /* 0=linear, 1=poly, 2=rbf, 3=sigmoid, 4=custom */
|
wolffd@0
|
186 long poly_degree;
|
wolffd@0
|
187 double rbf_gamma;
|
wolffd@0
|
188 double coef_lin;
|
wolffd@0
|
189 double coef_const;
|
wolffd@0
|
190 char custom[50]; /* for user supplied kernel */
|
wolffd@0
|
191 } KERNEL_PARM;
|
wolffd@0
|
192
|
wolffd@0
|
193 typedef struct model {
|
wolffd@0
|
194 long sv_num;
|
wolffd@0
|
195 long at_upper_bound;
|
wolffd@0
|
196 double b;
|
wolffd@0
|
197 DOC **supvec;
|
wolffd@0
|
198 double *alpha;
|
wolffd@0
|
199 long *index; /* index from docnum to position in model */
|
wolffd@0
|
200 long totwords; /* number of features */
|
wolffd@0
|
201 long totdoc; /* number of training documents */
|
wolffd@0
|
202 KERNEL_PARM kernel_parm; /* kernel */
|
wolffd@0
|
203
|
wolffd@0
|
204 /* the following values are not written to file */
|
wolffd@0
|
205 double loo_error,loo_recall,loo_precision; /* leave-one-out estimates */
|
wolffd@0
|
206 double xa_error,xa_recall,xa_precision; /* xi/alpha estimates */
|
wolffd@0
|
207 double *lin_weights; /* weights for linear case using
|
wolffd@0
|
208 folding */
|
wolffd@0
|
209 double maxdiff; /* precision, up to which this
|
wolffd@0
|
210 model is accurate */
|
wolffd@0
|
211 } MODEL;
|
wolffd@0
|
212
|
wolffd@0
|
213 typedef struct quadratic_program {
|
wolffd@0
|
214 long opt_n; /* number of variables */
|
wolffd@0
|
215 long opt_m; /* number of linear equality constraints */
|
wolffd@0
|
216 double *opt_ce,*opt_ce0; /* linear equality constraints */
|
wolffd@0
|
217 double *opt_g; /* hessian of objective */
|
wolffd@0
|
218 double *opt_g0; /* linear part of objective */
|
wolffd@0
|
219 double *opt_xinit; /* initial value for variables */
|
wolffd@0
|
220 double *opt_low,*opt_up; /* box constraints */
|
wolffd@0
|
221 } QP;
|
wolffd@0
|
222
|
wolffd@0
|
223 typedef struct kernel_cache {
|
wolffd@0
|
224 long *index; /* cache some kernel evalutations */
|
wolffd@0
|
225 CFLOAT *buffer; /* to improve speed */
|
wolffd@0
|
226 long *invindex;
|
wolffd@0
|
227 long *active2totdoc;
|
wolffd@0
|
228 long *totdoc2active;
|
wolffd@0
|
229 long *lru;
|
wolffd@0
|
230 long *occu;
|
wolffd@0
|
231 long elems;
|
wolffd@0
|
232 long max_elems;
|
wolffd@0
|
233 long time;
|
wolffd@0
|
234 long activenum;
|
wolffd@0
|
235 long buffsize;
|
wolffd@0
|
236 } KERNEL_CACHE;
|
wolffd@0
|
237
|
wolffd@0
|
238
|
wolffd@0
|
239 typedef struct timing_profile {
|
wolffd@0
|
240 long time_kernel;
|
wolffd@0
|
241 long time_opti;
|
wolffd@0
|
242 long time_shrink;
|
wolffd@0
|
243 long time_update;
|
wolffd@0
|
244 long time_model;
|
wolffd@0
|
245 long time_check;
|
wolffd@0
|
246 long time_select;
|
wolffd@0
|
247 } TIMING;
|
wolffd@0
|
248
|
wolffd@0
|
249 typedef struct shrink_state {
|
wolffd@0
|
250 long *active;
|
wolffd@0
|
251 long *inactive_since;
|
wolffd@0
|
252 long deactnum;
|
wolffd@0
|
253 double **a_history; /* for shrinking with non-linear kernel */
|
wolffd@0
|
254 long maxhistory;
|
wolffd@0
|
255 double *last_a; /* for shrinking with linear kernel */
|
wolffd@0
|
256 double *last_lin; /* for shrinking with linear kernel */
|
wolffd@0
|
257 } SHRINK_STATE;
|
wolffd@0
|
258
|
wolffd@0
|
259 double classify_example(MODEL *, DOC *);
|
wolffd@0
|
260 double classify_example_linear(MODEL *, DOC *);
|
wolffd@0
|
261 CFLOAT kernel(KERNEL_PARM *, DOC *, DOC *);
|
wolffd@0
|
262 CFLOAT single_kernel(KERNEL_PARM *, SVECTOR *, SVECTOR *);
|
wolffd@0
|
263 double custom_kernel(KERNEL_PARM *, SVECTOR *, SVECTOR *);
|
wolffd@0
|
264 SVECTOR *create_svector(WORD *, char *, double);
|
wolffd@0
|
265 SVECTOR *copy_svector(SVECTOR *);
|
wolffd@0
|
266 void free_svector(SVECTOR *);
|
wolffd@0
|
267 double sprod_ss(SVECTOR *, SVECTOR *);
|
wolffd@0
|
268 SVECTOR* sub_ss(SVECTOR *, SVECTOR *);
|
wolffd@0
|
269 SVECTOR* add_ss(SVECTOR *, SVECTOR *);
|
wolffd@0
|
270 SVECTOR* add_list_ss(SVECTOR *);
|
wolffd@0
|
271 void append_svector_list(SVECTOR *a, SVECTOR *b);
|
wolffd@0
|
272 SVECTOR* smult_s(SVECTOR *, double);
|
wolffd@0
|
273 int featvec_eq(SVECTOR *, SVECTOR *);
|
wolffd@0
|
274 double model_length_s(MODEL *, KERNEL_PARM *);
|
wolffd@0
|
275 void clear_vector_n(double *, long);
|
wolffd@0
|
276 void add_vector_ns(double *, SVECTOR *, double);
|
wolffd@0
|
277 double sprod_ns(double *, SVECTOR *);
|
wolffd@0
|
278 void add_weight_vector_to_linear_model(MODEL *);
|
wolffd@0
|
279 DOC *create_example(long, long, long, double, SVECTOR *);
|
wolffd@0
|
280 void free_example(DOC *, long);
|
wolffd@0
|
281 MODEL *read_model(char *);
|
wolffd@0
|
282 MODEL *copy_model(MODEL *);
|
wolffd@0
|
283 void free_model(MODEL *, int);
|
wolffd@0
|
284 void read_documents(char *, DOC ***, double **, long *, long *);
|
wolffd@0
|
285 int parse_document(char *, WORD *, double *, long *, long *, double *, long *, long, char **);
|
wolffd@0
|
286 double *read_alphas(char *,long);
|
wolffd@0
|
287 void nol_ll(char *, long *, long *, long *);
|
wolffd@0
|
288 long minl(long, long);
|
wolffd@0
|
289 long maxl(long, long);
|
wolffd@0
|
290 long get_runtime(void);
|
wolffd@0
|
291 int space_or_null(int);
|
wolffd@0
|
292 void *my_malloc(size_t);
|
wolffd@0
|
293 void copyright_notice(void);
|
wolffd@0
|
294 # ifdef _MSC_VER
|
wolffd@0
|
295 int isnan(double);
|
wolffd@0
|
296 # endif
|
wolffd@0
|
297
|
wolffd@0
|
298 extern long verbosity; /* verbosity level (0-4) */
|
wolffd@0
|
299 extern long kernel_cache_statistic;
|
wolffd@0
|
300
|
wolffd@0
|
301 #endif
|