Daniel@0: /************************************************************************/ Daniel@0: /* */ Daniel@0: /* svm_common.h */ Daniel@0: /* */ Daniel@0: /* Definitions and functions used in both svm_learn and svm_classify. */ Daniel@0: /* */ Daniel@0: /* Author: Thorsten Joachims */ Daniel@0: /* Date: 02.07.02 */ Daniel@0: /* */ Daniel@0: /* Copyright (c) 2002 Thorsten Joachims - All rights reserved */ Daniel@0: /* */ Daniel@0: /* This software is available for non-commercial use only. It must */ Daniel@0: /* not be modified and distributed without prior permission of the */ Daniel@0: /* author. The author is not responsible for implications from the */ Daniel@0: /* use of this software. */ Daniel@0: /* */ Daniel@0: /************************************************************************/ Daniel@0: Daniel@0: #ifndef SVM_COMMON Daniel@0: #define SVM_COMMON Daniel@0: Daniel@0: # define MAXSHRINK 50000 /* maximum number of shrinking rounds */ Daniel@0: # define MAXFEATNUM 99999999 /* maximum feature number (must be in Daniel@0: valid range of long int type!) */ Daniel@0: Daniel@0: # include Daniel@0: # include Daniel@0: # include Daniel@0: # include Daniel@0: # include Daniel@0: # include Daniel@0: # include Daniel@0: Daniel@0: # define VERSION "V6.01" Daniel@0: # define VERSION_DATE "01.09.04" Daniel@0: Daniel@0: # define CFLOAT float /* the type of float to use for caching */ Daniel@0: /* kernel evaluations. Using float saves */ Daniel@0: /* us some memory, but you can use double, too */ Daniel@0: # define FNUM long /* the type used for storing feature ids */ Daniel@0: # define FVAL float /* the type used for storing feature values */ Daniel@0: Daniel@0: # define LINEAR 0 /* linear kernel type */ Daniel@0: # define POLY 1 /* polynoial kernel type */ Daniel@0: # define RBF 2 /* rbf kernel type */ Daniel@0: # define SIGMOID 3 /* sigmoid kernel type */ Daniel@0: Daniel@0: # define CLASSIFICATION 1 /* train classification model */ Daniel@0: # define REGRESSION 2 /* train regression model */ Daniel@0: # define RANKING 3 /* train ranking model */ Daniel@0: # define OPTIMIZATION 4 /* train on general set of constraints */ Daniel@0: Daniel@0: typedef struct word { Daniel@0: FNUM wnum; /* word number */ Daniel@0: FVAL weight; /* word weight */ Daniel@0: } WORD; Daniel@0: Daniel@0: typedef struct svector { Daniel@0: WORD *words; /* The features/values in the vector by Daniel@0: increasing feature-number. Feature Daniel@0: numbers that are skipped are Daniel@0: interpreted as having value zero. */ Daniel@0: double twonorm_sq; /* The squared euclidian length of the Daniel@0: vector. Used to speed up the RBF kernel. */ Daniel@0: char *userdefined; /* You can put additional information Daniel@0: here. This can be useful, if you are Daniel@0: implementing your own kernel that Daniel@0: does not work with feature/values Daniel@0: representations (for example a Daniel@0: string kernel). By default, Daniel@0: svm-light will put here the string Daniel@0: after the # sign from each line of Daniel@0: the input file. */ Daniel@0: long kernel_id; /* Feature vectors with different Daniel@0: kernel_id's are orthogonal (ie. the Daniel@0: feature number do not match). This Daniel@0: is used for computing component Daniel@0: kernels for linear constraints which Daniel@0: are a sum of several different Daniel@0: weight vectors. (currently not Daniel@0: implemented). */ Daniel@0: struct svector *next; /* Let's you set up a list of SVECTOR's Daniel@0: for linear constraints which are a Daniel@0: sum of multiple feature Daniel@0: vectors. List is terminated by Daniel@0: NULL. */ Daniel@0: double factor; /* Factor by which this feature vector Daniel@0: is multiplied in the sum. */ Daniel@0: } SVECTOR; Daniel@0: Daniel@0: typedef struct doc { Daniel@0: long docnum; /* Document ID. This has to be the position of Daniel@0: the document in the training set array. */ Daniel@0: long queryid; /* for learning rankings, constraints are Daniel@0: generated for documents with the same Daniel@0: queryID. */ Daniel@0: double costfactor; /* Scales the cost of misclassifying this Daniel@0: document by this factor. The effect of this Daniel@0: value is, that the upper bound on the alpha Daniel@0: for this example is scaled by this factor. Daniel@0: The factors are set by the feature Daniel@0: 'cost:' in the training data. */ Daniel@0: long slackid; /* Index of the slack variable Daniel@0: corresponding to this Daniel@0: constraint. All constraints with the Daniel@0: same slackid share the same slack Daniel@0: variable. This can only be used for Daniel@0: svm_learn_optimization. */ Daniel@0: SVECTOR *fvec; /* Feature vector of the example. The Daniel@0: feature vector can actually be a Daniel@0: list of feature vectors. For Daniel@0: example, the list will have two Daniel@0: elements, if this DOC is a Daniel@0: preference constraint. The one Daniel@0: vector that is supposed to be ranked Daniel@0: higher, will have a factor of +1, Daniel@0: the lower ranked one should have a Daniel@0: factor of -1. */ Daniel@0: } DOC; Daniel@0: Daniel@0: typedef struct learn_parm { Daniel@0: long type; /* selects between regression and Daniel@0: classification */ Daniel@0: double svm_c; /* upper bound C on alphas */ Daniel@0: double eps; /* regression epsilon (eps=1.0 for Daniel@0: classification */ Daniel@0: double svm_costratio; /* factor to multiply C for positive examples */ Daniel@0: double transduction_posratio;/* fraction of unlabeled examples to be */ Daniel@0: /* classified as positives */ Daniel@0: long biased_hyperplane; /* if nonzero, use hyperplane w*x+b=0 Daniel@0: otherwise w*x=0 */ Daniel@0: long sharedslack; /* if nonzero, it will use the shared Daniel@0: slack variable mode in Daniel@0: svm_learn_optimization. It requires Daniel@0: that the slackid is set for every Daniel@0: training example */ Daniel@0: long svm_maxqpsize; /* size q of working set */ Daniel@0: long svm_newvarsinqp; /* new variables to enter the working set Daniel@0: in each iteration */ Daniel@0: long kernel_cache_size; /* size of kernel cache in megabytes */ Daniel@0: double epsilon_crit; /* tolerable error for distances used Daniel@0: in stopping criterion */ Daniel@0: double epsilon_shrink; /* how much a multiplier should be above Daniel@0: zero for shrinking */ Daniel@0: long svm_iter_to_shrink; /* iterations h after which an example can Daniel@0: be removed by shrinking */ Daniel@0: long maxiter; /* number of iterations after which the Daniel@0: optimizer terminates, if there was Daniel@0: no progress in maxdiff */ Daniel@0: long remove_inconsistent; /* exclude examples with alpha at C and Daniel@0: retrain */ Daniel@0: long skip_final_opt_check; /* do not check KT-Conditions at the end of Daniel@0: optimization for examples removed by Daniel@0: shrinking. WARNING: This might lead to Daniel@0: sub-optimal solutions! */ Daniel@0: long compute_loo; /* if nonzero, computes leave-one-out Daniel@0: estimates */ Daniel@0: double rho; /* parameter in xi/alpha-estimates and for Daniel@0: pruning leave-one-out range [1..2] */ Daniel@0: long xa_depth; /* parameter in xi/alpha-estimates upper Daniel@0: bounding the number of SV the current Daniel@0: alpha_t is distributed over */ Daniel@0: char predfile[200]; /* file for predicitions on unlabeled examples Daniel@0: in transduction */ Daniel@0: char alphafile[200]; /* file to store optimal alphas in. use Daniel@0: empty string if alphas should not be Daniel@0: output */ Daniel@0: Daniel@0: /* you probably do not want to touch the following */ Daniel@0: double epsilon_const; /* tolerable error on eq-constraint */ Daniel@0: double epsilon_a; /* tolerable error on alphas at bounds */ Daniel@0: double opt_precision; /* precision of solver, set to e.g. 1e-21 Daniel@0: if you get convergence problems */ Daniel@0: Daniel@0: /* the following are only for internal use */ Daniel@0: long svm_c_steps; /* do so many steps for finding optimal C */ Daniel@0: double svm_c_factor; /* increase C by this factor every step */ Daniel@0: double svm_costratio_unlab; Daniel@0: double svm_unlabbound; Daniel@0: double *svm_cost; /* individual upper bounds for each var */ Daniel@0: long totwords; /* number of features */ Daniel@0: } LEARN_PARM; Daniel@0: Daniel@0: typedef struct kernel_parm { Daniel@0: long kernel_type; /* 0=linear, 1=poly, 2=rbf, 3=sigmoid, 4=custom */ Daniel@0: long poly_degree; Daniel@0: double rbf_gamma; Daniel@0: double coef_lin; Daniel@0: double coef_const; Daniel@0: char custom[50]; /* for user supplied kernel */ Daniel@0: } KERNEL_PARM; Daniel@0: Daniel@0: typedef struct model { Daniel@0: long sv_num; Daniel@0: long at_upper_bound; Daniel@0: double b; Daniel@0: DOC **supvec; Daniel@0: double *alpha; Daniel@0: long *index; /* index from docnum to position in model */ Daniel@0: long totwords; /* number of features */ Daniel@0: long totdoc; /* number of training documents */ Daniel@0: KERNEL_PARM kernel_parm; /* kernel */ Daniel@0: Daniel@0: /* the following values are not written to file */ Daniel@0: double loo_error,loo_recall,loo_precision; /* leave-one-out estimates */ Daniel@0: double xa_error,xa_recall,xa_precision; /* xi/alpha estimates */ Daniel@0: double *lin_weights; /* weights for linear case using Daniel@0: folding */ Daniel@0: double maxdiff; /* precision, up to which this Daniel@0: model is accurate */ Daniel@0: } MODEL; Daniel@0: Daniel@0: typedef struct quadratic_program { Daniel@0: long opt_n; /* number of variables */ Daniel@0: long opt_m; /* number of linear equality constraints */ Daniel@0: double *opt_ce,*opt_ce0; /* linear equality constraints */ Daniel@0: double *opt_g; /* hessian of objective */ Daniel@0: double *opt_g0; /* linear part of objective */ Daniel@0: double *opt_xinit; /* initial value for variables */ Daniel@0: double *opt_low,*opt_up; /* box constraints */ Daniel@0: } QP; Daniel@0: Daniel@0: typedef struct kernel_cache { Daniel@0: long *index; /* cache some kernel evalutations */ Daniel@0: CFLOAT *buffer; /* to improve speed */ Daniel@0: long *invindex; Daniel@0: long *active2totdoc; Daniel@0: long *totdoc2active; Daniel@0: long *lru; Daniel@0: long *occu; Daniel@0: long elems; Daniel@0: long max_elems; Daniel@0: long time; Daniel@0: long activenum; Daniel@0: long buffsize; Daniel@0: } KERNEL_CACHE; Daniel@0: Daniel@0: Daniel@0: typedef struct timing_profile { Daniel@0: long time_kernel; Daniel@0: long time_opti; Daniel@0: long time_shrink; Daniel@0: long time_update; Daniel@0: long time_model; Daniel@0: long time_check; Daniel@0: long time_select; Daniel@0: } TIMING; Daniel@0: Daniel@0: typedef struct shrink_state { Daniel@0: long *active; Daniel@0: long *inactive_since; Daniel@0: long deactnum; Daniel@0: double **a_history; /* for shrinking with non-linear kernel */ Daniel@0: long maxhistory; Daniel@0: double *last_a; /* for shrinking with linear kernel */ Daniel@0: double *last_lin; /* for shrinking with linear kernel */ Daniel@0: } SHRINK_STATE; Daniel@0: Daniel@0: double classify_example(MODEL *, DOC *); Daniel@0: double classify_example_linear(MODEL *, DOC *); Daniel@0: CFLOAT kernel(KERNEL_PARM *, DOC *, DOC *); Daniel@0: CFLOAT single_kernel(KERNEL_PARM *, SVECTOR *, SVECTOR *); Daniel@0: double custom_kernel(KERNEL_PARM *, SVECTOR *, SVECTOR *); Daniel@0: SVECTOR *create_svector(WORD *, char *, double); Daniel@0: SVECTOR *copy_svector(SVECTOR *); Daniel@0: void free_svector(SVECTOR *); Daniel@0: double sprod_ss(SVECTOR *, SVECTOR *); Daniel@0: SVECTOR* sub_ss(SVECTOR *, SVECTOR *); Daniel@0: SVECTOR* add_ss(SVECTOR *, SVECTOR *); Daniel@0: SVECTOR* add_list_ss(SVECTOR *); Daniel@0: void append_svector_list(SVECTOR *a, SVECTOR *b); Daniel@0: SVECTOR* smult_s(SVECTOR *, double); Daniel@0: int featvec_eq(SVECTOR *, SVECTOR *); Daniel@0: double model_length_s(MODEL *, KERNEL_PARM *); Daniel@0: void clear_vector_n(double *, long); Daniel@0: void add_vector_ns(double *, SVECTOR *, double); Daniel@0: double sprod_ns(double *, SVECTOR *); Daniel@0: void add_weight_vector_to_linear_model(MODEL *); Daniel@0: DOC *create_example(long, long, long, double, SVECTOR *); Daniel@0: void free_example(DOC *, long); Daniel@0: MODEL *read_model(char *); Daniel@0: MODEL *copy_model(MODEL *); Daniel@0: void free_model(MODEL *, int); Daniel@0: void read_documents(char *, DOC ***, double **, long *, long *); Daniel@0: int parse_document(char *, WORD *, double *, long *, long *, double *, long *, long, char **); Daniel@0: double *read_alphas(char *,long); Daniel@0: void nol_ll(char *, long *, long *, long *); Daniel@0: long minl(long, long); Daniel@0: long maxl(long, long); Daniel@0: long get_runtime(void); Daniel@0: int space_or_null(int); Daniel@0: void *my_malloc(size_t); Daniel@0: void copyright_notice(void); Daniel@0: # ifdef _MSC_VER Daniel@0: int isnan(double); Daniel@0: # endif Daniel@0: Daniel@0: extern long verbosity; /* verbosity level (0-4) */ Daniel@0: extern long kernel_cache_statistic; Daniel@0: Daniel@0: #endif