wolffd@0: /***********************************************************************/ wolffd@0: /* */ wolffd@0: /* svm_learn.h */ wolffd@0: /* */ wolffd@0: /* Declarations for learning module of Support Vector Machine. */ wolffd@0: /* */ wolffd@0: /* Author: Thorsten Joachims */ wolffd@0: /* Date: 02.07.02 */ wolffd@0: /* */ wolffd@0: /* Copyright (c) 2002 Thorsten Joachims - All rights reserved */ wolffd@0: /* */ wolffd@0: /* This software is available for non-commercial use only. It must */ wolffd@0: /* not be modified and distributed without prior permission of the */ wolffd@0: /* author. The author is not responsible for implications from the */ wolffd@0: /* use of this software. */ wolffd@0: /* */ wolffd@0: /***********************************************************************/ wolffd@0: wolffd@0: #ifndef SVM_LEARN wolffd@0: #define SVM_LEARN wolffd@0: wolffd@0: void svm_learn_classification(DOC **, double *, long, long, LEARN_PARM *, wolffd@0: KERNEL_PARM *, KERNEL_CACHE *, MODEL *, wolffd@0: double *); wolffd@0: void svm_learn_regression(DOC **, double *, long, long, LEARN_PARM *, wolffd@0: KERNEL_PARM *, KERNEL_CACHE **, MODEL *); wolffd@0: void svm_learn_ranking(DOC **, double *, long, long, LEARN_PARM *, wolffd@0: KERNEL_PARM *, KERNEL_CACHE **, MODEL *); wolffd@0: void svm_learn_optimization(DOC **, double *, long, long, LEARN_PARM *, wolffd@0: KERNEL_PARM *, KERNEL_CACHE *, MODEL *, wolffd@0: double *); wolffd@0: long optimize_to_convergence(DOC **, long *, long, long, LEARN_PARM *, wolffd@0: KERNEL_PARM *, KERNEL_CACHE *, SHRINK_STATE *, wolffd@0: MODEL *, long *, long *, double *, wolffd@0: double *, double *, wolffd@0: TIMING *, double *, long, long); wolffd@0: long optimize_to_convergence_sharedslack(DOC **, long *, long, long, wolffd@0: LEARN_PARM *, wolffd@0: KERNEL_PARM *, KERNEL_CACHE *, SHRINK_STATE *, wolffd@0: MODEL *, double *, double *, double *, wolffd@0: TIMING *, double *); wolffd@0: double compute_objective_function(double *, double *, double *, double, wolffd@0: long *, long *); wolffd@0: void clear_index(long *); wolffd@0: void add_to_index(long *, long); wolffd@0: long compute_index(long *,long, long *); wolffd@0: void optimize_svm(DOC **, long *, long *, long *, double, long *, long *, wolffd@0: MODEL *, wolffd@0: long, long *, long, double *, double *, double *, wolffd@0: LEARN_PARM *, CFLOAT *, KERNEL_PARM *, QP *, double *); wolffd@0: void compute_matrices_for_optimization(DOC **, long *, long *, long *, double, wolffd@0: long *, wolffd@0: long *, long *, MODEL *, double *, wolffd@0: double *, double *, long, long, LEARN_PARM *, wolffd@0: CFLOAT *, KERNEL_PARM *, QP *); wolffd@0: long calculate_svm_model(DOC **, long *, long *, double *, double *, wolffd@0: double *, double *, LEARN_PARM *, long *, wolffd@0: long *, MODEL *); wolffd@0: long check_optimality(MODEL *, long *, long *, double *, double *, wolffd@0: double *, long, wolffd@0: LEARN_PARM *,double *, double, long *, long *, long *, wolffd@0: long *, long, KERNEL_PARM *); wolffd@0: long check_optimality_sharedslack(DOC **docs, MODEL *model, long int *label, wolffd@0: double *a, double *lin, double *c, double *slack, wolffd@0: double *alphaslack, long int totdoc, wolffd@0: LEARN_PARM *learn_parm, double *maxdiff, wolffd@0: double epsilon_crit_org, long int *misclassified, wolffd@0: long int *active2dnum, wolffd@0: long int *last_suboptimal_at, wolffd@0: long int iteration, KERNEL_PARM *kernel_parm); wolffd@0: void compute_shared_slacks(DOC **docs, long int *label, double *a, wolffd@0: double *lin, double *c, long int *active2dnum, wolffd@0: LEARN_PARM *learn_parm, wolffd@0: double *slack, double *alphaslack); wolffd@0: long identify_inconsistent(double *, long *, long *, long, LEARN_PARM *, wolffd@0: long *, long *); wolffd@0: long identify_misclassified(double *, long *, long *, long, wolffd@0: MODEL *, long *, long *); wolffd@0: long identify_one_misclassified(double *, long *, long *, long, wolffd@0: MODEL *, long *, long *); wolffd@0: long incorporate_unlabeled_examples(MODEL *, long *,long *, long *, wolffd@0: double *, double *, long, double *, wolffd@0: long *, long *, long, KERNEL_PARM *, wolffd@0: LEARN_PARM *); wolffd@0: void update_linear_component(DOC **, long *, long *, double *, double *, wolffd@0: long *, long, long, KERNEL_PARM *, wolffd@0: KERNEL_CACHE *, double *, wolffd@0: CFLOAT *, double *); wolffd@0: long select_next_qp_subproblem_grad(long *, long *, double *, wolffd@0: double *, double *, long, wolffd@0: long, LEARN_PARM *, long *, long *, wolffd@0: long *, double *, long *, KERNEL_CACHE *, wolffd@0: long, long *, long *); wolffd@0: long select_next_qp_subproblem_rand(long *, long *, double *, wolffd@0: double *, double *, long, wolffd@0: long, LEARN_PARM *, long *, long *, wolffd@0: long *, double *, long *, KERNEL_CACHE *, wolffd@0: long *, long *, long); wolffd@0: long select_next_qp_slackset(DOC **docs, long int *label, double *a, wolffd@0: double *lin, double *slack, double *alphaslack, wolffd@0: double *c, LEARN_PARM *learn_parm, wolffd@0: long int *active2dnum, double *maxviol); wolffd@0: void select_top_n(double *, long, long *, long); wolffd@0: void init_shrink_state(SHRINK_STATE *, long, long); wolffd@0: void shrink_state_cleanup(SHRINK_STATE *); wolffd@0: long shrink_problem(DOC **, LEARN_PARM *, SHRINK_STATE *, KERNEL_PARM *, wolffd@0: long *, long *, long, long, long, double *, long *); wolffd@0: void reactivate_inactive_examples(long *, long *, double *, SHRINK_STATE *, wolffd@0: double *, double*, long, long, long, LEARN_PARM *, wolffd@0: long *, DOC **, KERNEL_PARM *, wolffd@0: KERNEL_CACHE *, MODEL *, CFLOAT *, wolffd@0: double *, double *); wolffd@0: wolffd@0: /* cache kernel evalutations to improve speed */ wolffd@0: KERNEL_CACHE *kernel_cache_init(long, long); wolffd@0: void kernel_cache_cleanup(KERNEL_CACHE *); wolffd@0: void get_kernel_row(KERNEL_CACHE *,DOC **, long, long, long *, CFLOAT *, wolffd@0: KERNEL_PARM *); wolffd@0: void cache_kernel_row(KERNEL_CACHE *,DOC **, long, KERNEL_PARM *); wolffd@0: void cache_multiple_kernel_rows(KERNEL_CACHE *,DOC **, long *, long, wolffd@0: KERNEL_PARM *); wolffd@0: void kernel_cache_shrink(KERNEL_CACHE *,long, long, long *); wolffd@0: void kernel_cache_reset_lru(KERNEL_CACHE *); wolffd@0: long kernel_cache_malloc(KERNEL_CACHE *); wolffd@0: void kernel_cache_free(KERNEL_CACHE *,long); wolffd@0: long kernel_cache_free_lru(KERNEL_CACHE *); wolffd@0: CFLOAT *kernel_cache_clean_and_malloc(KERNEL_CACHE *,long); wolffd@0: long kernel_cache_touch(KERNEL_CACHE *,long); wolffd@0: long kernel_cache_check(KERNEL_CACHE *,long); wolffd@0: long kernel_cache_space_available(KERNEL_CACHE *); wolffd@0: wolffd@0: void compute_xa_estimates(MODEL *, long *, long *, long, DOC **, wolffd@0: double *, double *, KERNEL_PARM *, wolffd@0: LEARN_PARM *, double *, double *, double *); wolffd@0: double xa_estimate_error(MODEL *, long *, long *, long, DOC **, wolffd@0: double *, double *, KERNEL_PARM *, wolffd@0: LEARN_PARM *); wolffd@0: double xa_estimate_recall(MODEL *, long *, long *, long, DOC **, wolffd@0: double *, double *, KERNEL_PARM *, wolffd@0: LEARN_PARM *); wolffd@0: double xa_estimate_precision(MODEL *, long *, long *, long, DOC **, wolffd@0: double *, double *, KERNEL_PARM *, wolffd@0: LEARN_PARM *); wolffd@0: void avg_similarity_of_sv_of_one_class(MODEL *, DOC **, double *, long *, KERNEL_PARM *, double *, double *); wolffd@0: double most_similar_sv_of_same_class(MODEL *, DOC **, double *, long, long *, KERNEL_PARM *, LEARN_PARM *); wolffd@0: double distribute_alpha_t_greedily(long *, long, DOC **, double *, long, long *, KERNEL_PARM *, LEARN_PARM *, double); wolffd@0: double distribute_alpha_t_greedily_noindex(MODEL *, DOC **, double *, long, long *, KERNEL_PARM *, LEARN_PARM *, double); wolffd@0: void estimate_transduction_quality(MODEL *, long *, long *, long, DOC **, double *); wolffd@0: double estimate_margin_vcdim(MODEL *, double, double, KERNEL_PARM *); wolffd@0: double estimate_sphere(MODEL *, KERNEL_PARM *); wolffd@0: double estimate_r_delta_average(DOC **, long, KERNEL_PARM *); wolffd@0: double estimate_r_delta(DOC **, long, KERNEL_PARM *); wolffd@0: double length_of_longest_document_vector(DOC **, long, KERNEL_PARM *); wolffd@0: wolffd@0: void write_model(char *, MODEL *); wolffd@0: void write_prediction(char *, MODEL *, double *, double *, long *, long *, wolffd@0: long, LEARN_PARM *); wolffd@0: void write_alphas(char *, double *, long *, long); wolffd@0: wolffd@0: typedef struct cache_parm_s { wolffd@0: KERNEL_CACHE *kernel_cache; wolffd@0: CFLOAT *cache; wolffd@0: DOC **docs; wolffd@0: long m; wolffd@0: KERNEL_PARM *kernel_parm; wolffd@0: long offset,stepsize; wolffd@0: } cache_parm_t; wolffd@0: wolffd@0: #endif