wolffd@0: /************************************************************************/ wolffd@0: /* */ wolffd@0: /* svm_common.h */ wolffd@0: /* */ wolffd@0: /* Definitions and functions used in both svm_learn and svm_classify. */ wolffd@0: /* */ wolffd@0: /* Author: Thorsten Joachims */ wolffd@0: /* Date: 02.07.02 */ wolffd@0: /* */ wolffd@0: /* Copyright (c) 2002 Thorsten Joachims - All rights reserved */ wolffd@0: /* */ wolffd@0: /* This software is available for non-commercial use only. It must */ wolffd@0: /* not be modified and distributed without prior permission of the */ wolffd@0: /* author. The author is not responsible for implications from the */ wolffd@0: /* use of this software. */ wolffd@0: /* */ wolffd@0: /************************************************************************/ wolffd@0: wolffd@0: #ifndef SVM_COMMON wolffd@0: #define SVM_COMMON wolffd@0: wolffd@0: # define MAXSHRINK 50000 /* maximum number of shrinking rounds */ wolffd@0: # define MAXFEATNUM 99999999 /* maximum feature number (must be in wolffd@0: valid range of long int type!) */ wolffd@0: wolffd@0: # include wolffd@0: # include wolffd@0: # include wolffd@0: # include wolffd@0: # include wolffd@0: # include wolffd@0: # include wolffd@0: wolffd@0: # define VERSION "V6.01" wolffd@0: # define VERSION_DATE "01.09.04" wolffd@0: wolffd@0: # define CFLOAT float /* the type of float to use for caching */ wolffd@0: /* kernel evaluations. Using float saves */ wolffd@0: /* us some memory, but you can use double, too */ wolffd@0: # define FNUM long /* the type used for storing feature ids */ wolffd@0: # define FVAL float /* the type used for storing feature values */ wolffd@0: wolffd@0: # define LINEAR 0 /* linear kernel type */ wolffd@0: # define POLY 1 /* polynoial kernel type */ wolffd@0: # define RBF 2 /* rbf kernel type */ wolffd@0: # define SIGMOID 3 /* sigmoid kernel type */ wolffd@0: wolffd@0: # define CLASSIFICATION 1 /* train classification model */ wolffd@0: # define REGRESSION 2 /* train regression model */ wolffd@0: # define RANKING 3 /* train ranking model */ wolffd@0: # define OPTIMIZATION 4 /* train on general set of constraints */ wolffd@0: wolffd@0: typedef struct word { wolffd@0: FNUM wnum; /* word number */ wolffd@0: FVAL weight; /* word weight */ wolffd@0: } WORD; wolffd@0: wolffd@0: typedef struct svector { wolffd@0: WORD *words; /* The features/values in the vector by wolffd@0: increasing feature-number. Feature wolffd@0: numbers that are skipped are wolffd@0: interpreted as having value zero. */ wolffd@0: double twonorm_sq; /* The squared euclidian length of the wolffd@0: vector. Used to speed up the RBF kernel. */ wolffd@0: char *userdefined; /* You can put additional information wolffd@0: here. This can be useful, if you are wolffd@0: implementing your own kernel that wolffd@0: does not work with feature/values wolffd@0: representations (for example a wolffd@0: string kernel). By default, wolffd@0: svm-light will put here the string wolffd@0: after the # sign from each line of wolffd@0: the input file. */ wolffd@0: long kernel_id; /* Feature vectors with different wolffd@0: kernel_id's are orthogonal (ie. the wolffd@0: feature number do not match). This wolffd@0: is used for computing component wolffd@0: kernels for linear constraints which wolffd@0: are a sum of several different wolffd@0: weight vectors. (currently not wolffd@0: implemented). */ wolffd@0: struct svector *next; /* Let's you set up a list of SVECTOR's wolffd@0: for linear constraints which are a wolffd@0: sum of multiple feature wolffd@0: vectors. List is terminated by wolffd@0: NULL. */ wolffd@0: double factor; /* Factor by which this feature vector wolffd@0: is multiplied in the sum. */ wolffd@0: } SVECTOR; wolffd@0: wolffd@0: typedef struct doc { wolffd@0: long docnum; /* Document ID. This has to be the position of wolffd@0: the document in the training set array. */ wolffd@0: long queryid; /* for learning rankings, constraints are wolffd@0: generated for documents with the same wolffd@0: queryID. */ wolffd@0: double costfactor; /* Scales the cost of misclassifying this wolffd@0: document by this factor. The effect of this wolffd@0: value is, that the upper bound on the alpha wolffd@0: for this example is scaled by this factor. wolffd@0: The factors are set by the feature wolffd@0: 'cost:' in the training data. */ wolffd@0: long slackid; /* Index of the slack variable wolffd@0: corresponding to this wolffd@0: constraint. All constraints with the wolffd@0: same slackid share the same slack wolffd@0: variable. This can only be used for wolffd@0: svm_learn_optimization. */ wolffd@0: SVECTOR *fvec; /* Feature vector of the example. The wolffd@0: feature vector can actually be a wolffd@0: list of feature vectors. For wolffd@0: example, the list will have two wolffd@0: elements, if this DOC is a wolffd@0: preference constraint. The one wolffd@0: vector that is supposed to be ranked wolffd@0: higher, will have a factor of +1, wolffd@0: the lower ranked one should have a wolffd@0: factor of -1. */ wolffd@0: } DOC; wolffd@0: wolffd@0: typedef struct learn_parm { wolffd@0: long type; /* selects between regression and wolffd@0: classification */ wolffd@0: double svm_c; /* upper bound C on alphas */ wolffd@0: double eps; /* regression epsilon (eps=1.0 for wolffd@0: classification */ wolffd@0: double svm_costratio; /* factor to multiply C for positive examples */ wolffd@0: double transduction_posratio;/* fraction of unlabeled examples to be */ wolffd@0: /* classified as positives */ wolffd@0: long biased_hyperplane; /* if nonzero, use hyperplane w*x+b=0 wolffd@0: otherwise w*x=0 */ wolffd@0: long sharedslack; /* if nonzero, it will use the shared wolffd@0: slack variable mode in wolffd@0: svm_learn_optimization. It requires wolffd@0: that the slackid is set for every wolffd@0: training example */ wolffd@0: long svm_maxqpsize; /* size q of working set */ wolffd@0: long svm_newvarsinqp; /* new variables to enter the working set wolffd@0: in each iteration */ wolffd@0: long kernel_cache_size; /* size of kernel cache in megabytes */ wolffd@0: double epsilon_crit; /* tolerable error for distances used wolffd@0: in stopping criterion */ wolffd@0: double epsilon_shrink; /* how much a multiplier should be above wolffd@0: zero for shrinking */ wolffd@0: long svm_iter_to_shrink; /* iterations h after which an example can wolffd@0: be removed by shrinking */ wolffd@0: long maxiter; /* number of iterations after which the wolffd@0: optimizer terminates, if there was wolffd@0: no progress in maxdiff */ wolffd@0: long remove_inconsistent; /* exclude examples with alpha at C and wolffd@0: retrain */ wolffd@0: long skip_final_opt_check; /* do not check KT-Conditions at the end of wolffd@0: optimization for examples removed by wolffd@0: shrinking. WARNING: This might lead to wolffd@0: sub-optimal solutions! */ wolffd@0: long compute_loo; /* if nonzero, computes leave-one-out wolffd@0: estimates */ wolffd@0: double rho; /* parameter in xi/alpha-estimates and for wolffd@0: pruning leave-one-out range [1..2] */ wolffd@0: long xa_depth; /* parameter in xi/alpha-estimates upper wolffd@0: bounding the number of SV the current wolffd@0: alpha_t is distributed over */ wolffd@0: char predfile[200]; /* file for predicitions on unlabeled examples wolffd@0: in transduction */ wolffd@0: char alphafile[200]; /* file to store optimal alphas in. use wolffd@0: empty string if alphas should not be wolffd@0: output */ wolffd@0: wolffd@0: /* you probably do not want to touch the following */ wolffd@0: double epsilon_const; /* tolerable error on eq-constraint */ wolffd@0: double epsilon_a; /* tolerable error on alphas at bounds */ wolffd@0: double opt_precision; /* precision of solver, set to e.g. 1e-21 wolffd@0: if you get convergence problems */ wolffd@0: wolffd@0: /* the following are only for internal use */ wolffd@0: long svm_c_steps; /* do so many steps for finding optimal C */ wolffd@0: double svm_c_factor; /* increase C by this factor every step */ wolffd@0: double svm_costratio_unlab; wolffd@0: double svm_unlabbound; wolffd@0: double *svm_cost; /* individual upper bounds for each var */ wolffd@0: long totwords; /* number of features */ wolffd@0: } LEARN_PARM; wolffd@0: wolffd@0: typedef struct kernel_parm { wolffd@0: long kernel_type; /* 0=linear, 1=poly, 2=rbf, 3=sigmoid, 4=custom */ wolffd@0: long poly_degree; wolffd@0: double rbf_gamma; wolffd@0: double coef_lin; wolffd@0: double coef_const; wolffd@0: char custom[50]; /* for user supplied kernel */ wolffd@0: } KERNEL_PARM; wolffd@0: wolffd@0: typedef struct model { wolffd@0: long sv_num; wolffd@0: long at_upper_bound; wolffd@0: double b; wolffd@0: DOC **supvec; wolffd@0: double *alpha; wolffd@0: long *index; /* index from docnum to position in model */ wolffd@0: long totwords; /* number of features */ wolffd@0: long totdoc; /* number of training documents */ wolffd@0: KERNEL_PARM kernel_parm; /* kernel */ wolffd@0: wolffd@0: /* the following values are not written to file */ wolffd@0: double loo_error,loo_recall,loo_precision; /* leave-one-out estimates */ wolffd@0: double xa_error,xa_recall,xa_precision; /* xi/alpha estimates */ wolffd@0: double *lin_weights; /* weights for linear case using wolffd@0: folding */ wolffd@0: double maxdiff; /* precision, up to which this wolffd@0: model is accurate */ wolffd@0: } MODEL; wolffd@0: wolffd@0: typedef struct quadratic_program { wolffd@0: long opt_n; /* number of variables */ wolffd@0: long opt_m; /* number of linear equality constraints */ wolffd@0: double *opt_ce,*opt_ce0; /* linear equality constraints */ wolffd@0: double *opt_g; /* hessian of objective */ wolffd@0: double *opt_g0; /* linear part of objective */ wolffd@0: double *opt_xinit; /* initial value for variables */ wolffd@0: double *opt_low,*opt_up; /* box constraints */ wolffd@0: } QP; wolffd@0: wolffd@0: typedef struct kernel_cache { wolffd@0: long *index; /* cache some kernel evalutations */ wolffd@0: CFLOAT *buffer; /* to improve speed */ wolffd@0: long *invindex; wolffd@0: long *active2totdoc; wolffd@0: long *totdoc2active; wolffd@0: long *lru; wolffd@0: long *occu; wolffd@0: long elems; wolffd@0: long max_elems; wolffd@0: long time; wolffd@0: long activenum; wolffd@0: long buffsize; wolffd@0: } KERNEL_CACHE; wolffd@0: wolffd@0: wolffd@0: typedef struct timing_profile { wolffd@0: long time_kernel; wolffd@0: long time_opti; wolffd@0: long time_shrink; wolffd@0: long time_update; wolffd@0: long time_model; wolffd@0: long time_check; wolffd@0: long time_select; wolffd@0: } TIMING; wolffd@0: wolffd@0: typedef struct shrink_state { wolffd@0: long *active; wolffd@0: long *inactive_since; wolffd@0: long deactnum; wolffd@0: double **a_history; /* for shrinking with non-linear kernel */ wolffd@0: long maxhistory; wolffd@0: double *last_a; /* for shrinking with linear kernel */ wolffd@0: double *last_lin; /* for shrinking with linear kernel */ wolffd@0: } SHRINK_STATE; wolffd@0: wolffd@0: double classify_example(MODEL *, DOC *); wolffd@0: double classify_example_linear(MODEL *, DOC *); wolffd@0: CFLOAT kernel(KERNEL_PARM *, DOC *, DOC *); wolffd@0: CFLOAT single_kernel(KERNEL_PARM *, SVECTOR *, SVECTOR *); wolffd@0: double custom_kernel(KERNEL_PARM *, SVECTOR *, SVECTOR *); wolffd@0: SVECTOR *create_svector(WORD *, char *, double); wolffd@0: SVECTOR *copy_svector(SVECTOR *); wolffd@0: void free_svector(SVECTOR *); wolffd@0: double sprod_ss(SVECTOR *, SVECTOR *); wolffd@0: SVECTOR* sub_ss(SVECTOR *, SVECTOR *); wolffd@0: SVECTOR* add_ss(SVECTOR *, SVECTOR *); wolffd@0: SVECTOR* add_list_ss(SVECTOR *); wolffd@0: void append_svector_list(SVECTOR *a, SVECTOR *b); wolffd@0: SVECTOR* smult_s(SVECTOR *, double); wolffd@0: int featvec_eq(SVECTOR *, SVECTOR *); wolffd@0: double model_length_s(MODEL *, KERNEL_PARM *); wolffd@0: void clear_vector_n(double *, long); wolffd@0: void add_vector_ns(double *, SVECTOR *, double); wolffd@0: double sprod_ns(double *, SVECTOR *); wolffd@0: void add_weight_vector_to_linear_model(MODEL *); wolffd@0: DOC *create_example(long, long, long, double, SVECTOR *); wolffd@0: void free_example(DOC *, long); wolffd@0: MODEL *read_model(char *); wolffd@0: MODEL *copy_model(MODEL *); wolffd@0: void free_model(MODEL *, int); wolffd@0: void read_documents(char *, DOC ***, double **, long *, long *); wolffd@0: int parse_document(char *, WORD *, double *, long *, long *, double *, long *, long, char **); wolffd@0: double *read_alphas(char *,long); wolffd@0: void nol_ll(char *, long *, long *, long *); wolffd@0: long minl(long, long); wolffd@0: long maxl(long, long); wolffd@0: long get_runtime(void); wolffd@0: int space_or_null(int); wolffd@0: void *my_malloc(size_t); wolffd@0: void copyright_notice(void); wolffd@0: # ifdef _MSC_VER wolffd@0: int isnan(double); wolffd@0: # endif wolffd@0: wolffd@0: extern long verbosity; /* verbosity level (0-4) */ wolffd@0: extern long kernel_cache_statistic; wolffd@0: wolffd@0: #endif