annotate toolboxes/SVM-light/src/svm_common.c @ 0:cc4b1211e677 tip

initial commit to HG from Changeset: 646 (e263d8a21543) added further path and more save "camirversion.m"
author Daniel Wolff
date Fri, 19 Aug 2016 13:07:06 +0200
parents
children
rev   line source
Daniel@0 1 /************************************************************************/
Daniel@0 2 /* */
Daniel@0 3 /* svm_common.c */
Daniel@0 4 /* */
Daniel@0 5 /* Definitions and functions used in both svm_learn and svm_classify. */
Daniel@0 6 /* */
Daniel@0 7 /* Author: Thorsten Joachims */
Daniel@0 8 /* Date: 02.07.04 */
Daniel@0 9 /* */
Daniel@0 10 /* Copyright (c) 2004 Thorsten Joachims - All rights reserved */
Daniel@0 11 /* */
Daniel@0 12 /* This software is available for non-commercial use only. It must */
Daniel@0 13 /* not be modified and distributed without prior permission of the */
Daniel@0 14 /* author. The author is not responsible for implications from the */
Daniel@0 15 /* use of this software. */
Daniel@0 16 /* */
Daniel@0 17 /************************************************************************/
Daniel@0 18
Daniel@0 19 # include "ctype.h"
Daniel@0 20 # include "svm_common.h"
Daniel@0 21 # include "kernel.h" /* this contains a user supplied kernel */
Daniel@0 22
Daniel@0 23 long verbosity; /* verbosity level (0-4) */
Daniel@0 24 long kernel_cache_statistic;
Daniel@0 25
Daniel@0 26 double classify_example(MODEL *model, DOC *ex)
Daniel@0 27 /* classifies one example */
Daniel@0 28 {
Daniel@0 29 register long i;
Daniel@0 30 register double dist;
Daniel@0 31
Daniel@0 32 if((model->kernel_parm.kernel_type == LINEAR) && (model->lin_weights))
Daniel@0 33 return(classify_example_linear(model,ex));
Daniel@0 34
Daniel@0 35 dist=0;
Daniel@0 36 for(i=1;i<model->sv_num;i++) {
Daniel@0 37 dist+=kernel(&model->kernel_parm,model->supvec[i],ex)*model->alpha[i];
Daniel@0 38 }
Daniel@0 39 return(dist-model->b);
Daniel@0 40 }
Daniel@0 41
Daniel@0 42 double classify_example_linear(MODEL *model, DOC *ex)
Daniel@0 43 /* classifies example for linear kernel */
Daniel@0 44
Daniel@0 45 /* important: the model must have the linear weight vector computed */
Daniel@0 46 /* use: add_weight_vector_to_linear_model(&model); */
Daniel@0 47
Daniel@0 48
Daniel@0 49 /* important: the feature numbers in the example to classify must */
Daniel@0 50 /* not be larger than the weight vector! */
Daniel@0 51 {
Daniel@0 52 double sum=0;
Daniel@0 53 SVECTOR *f;
Daniel@0 54
Daniel@0 55 for(f=ex->fvec;f;f=f->next)
Daniel@0 56 sum+=f->factor*sprod_ns(model->lin_weights,f);
Daniel@0 57 return(sum-model->b);
Daniel@0 58 }
Daniel@0 59
Daniel@0 60
Daniel@0 61 CFLOAT kernel(KERNEL_PARM *kernel_parm, DOC *a, DOC *b)
Daniel@0 62 /* calculate the kernel function */
Daniel@0 63 {
Daniel@0 64 double sum=0;
Daniel@0 65 SVECTOR *fa,*fb;
Daniel@0 66
Daniel@0 67 /* in case the constraints are sums of feature vector as represented
Daniel@0 68 as a list of SVECTOR's with their coefficient factor in the sum,
Daniel@0 69 take the kernel between all pairs */
Daniel@0 70 for(fa=a->fvec;fa;fa=fa->next) {
Daniel@0 71 for(fb=b->fvec;fb;fb=fb->next) {
Daniel@0 72 if(fa->kernel_id == fb->kernel_id)
Daniel@0 73 sum+=fa->factor*fb->factor*single_kernel(kernel_parm,fa,fb);
Daniel@0 74 }
Daniel@0 75 }
Daniel@0 76 return(sum);
Daniel@0 77 }
Daniel@0 78
Daniel@0 79 CFLOAT single_kernel(KERNEL_PARM *kernel_parm, SVECTOR *a, SVECTOR *b)
Daniel@0 80 /* calculate the kernel function between two vectors */
Daniel@0 81 {
Daniel@0 82 kernel_cache_statistic++;
Daniel@0 83 switch(kernel_parm->kernel_type) {
Daniel@0 84 case 0: /* linear */
Daniel@0 85 return((CFLOAT)sprod_ss(a,b));
Daniel@0 86 case 1: /* polynomial */
Daniel@0 87 return((CFLOAT)pow(kernel_parm->coef_lin*sprod_ss(a,b)+kernel_parm->coef_const,(double)kernel_parm->poly_degree));
Daniel@0 88 case 2: /* radial basis function */
Daniel@0 89 return((CFLOAT)exp(-kernel_parm->rbf_gamma*(a->twonorm_sq-2*sprod_ss(a,b)+b->twonorm_sq)));
Daniel@0 90 case 3: /* sigmoid neural net */
Daniel@0 91 return((CFLOAT)tanh(kernel_parm->coef_lin*sprod_ss(a,b)+kernel_parm->coef_const));
Daniel@0 92 case 4: /* custom-kernel supplied in file kernel.h*/
Daniel@0 93 return((CFLOAT)custom_kernel(kernel_parm,a,b));
Daniel@0 94 default: printf("Error: Unknown kernel function\n"); exit(1);
Daniel@0 95 }
Daniel@0 96 }
Daniel@0 97
Daniel@0 98
Daniel@0 99 SVECTOR *create_svector(WORD *words,char *userdefined,double factor)
Daniel@0 100 {
Daniel@0 101 SVECTOR *vec;
Daniel@0 102 long fnum,i;
Daniel@0 103
Daniel@0 104 fnum=0;
Daniel@0 105 while(words[fnum].wnum) {
Daniel@0 106 fnum++;
Daniel@0 107 }
Daniel@0 108 fnum++;
Daniel@0 109 vec = (SVECTOR *)my_malloc(sizeof(SVECTOR));
Daniel@0 110 vec->words = (WORD *)my_malloc(sizeof(WORD)*(fnum));
Daniel@0 111 for(i=0;i<fnum;i++) {
Daniel@0 112 vec->words[i]=words[i];
Daniel@0 113 }
Daniel@0 114 vec->twonorm_sq=sprod_ss(vec,vec);
Daniel@0 115
Daniel@0 116 fnum=0;
Daniel@0 117 while(userdefined[fnum]) {
Daniel@0 118 fnum++;
Daniel@0 119 }
Daniel@0 120 fnum++;
Daniel@0 121 vec->userdefined = (char *)my_malloc(sizeof(char)*(fnum));
Daniel@0 122 for(i=0;i<fnum;i++) {
Daniel@0 123 vec->userdefined[i]=userdefined[i];
Daniel@0 124 }
Daniel@0 125 vec->kernel_id=0;
Daniel@0 126 vec->next=NULL;
Daniel@0 127 vec->factor=factor;
Daniel@0 128 return(vec);
Daniel@0 129 }
Daniel@0 130
Daniel@0 131 SVECTOR *copy_svector(SVECTOR *vec)
Daniel@0 132 {
Daniel@0 133 SVECTOR *newvec=NULL;
Daniel@0 134 if(vec) {
Daniel@0 135 newvec=create_svector(vec->words,vec->userdefined,vec->factor);
Daniel@0 136 newvec->next=copy_svector(vec->next);
Daniel@0 137 }
Daniel@0 138 return(newvec);
Daniel@0 139 }
Daniel@0 140
Daniel@0 141 void free_svector(SVECTOR *vec)
Daniel@0 142 {
Daniel@0 143 if(vec) {
Daniel@0 144 free(vec->words);
Daniel@0 145 if(vec->userdefined)
Daniel@0 146 free(vec->userdefined);
Daniel@0 147 free_svector(vec->next);
Daniel@0 148 free(vec);
Daniel@0 149 }
Daniel@0 150 }
Daniel@0 151
Daniel@0 152 double sprod_ss(SVECTOR *a, SVECTOR *b)
Daniel@0 153 /* compute the inner product of two sparse vectors */
Daniel@0 154 {
Daniel@0 155 register CFLOAT sum=0;
Daniel@0 156 register WORD *ai,*bj;
Daniel@0 157 ai=a->words;
Daniel@0 158 bj=b->words;
Daniel@0 159 while (ai->wnum && bj->wnum) {
Daniel@0 160 if(ai->wnum > bj->wnum) {
Daniel@0 161 bj++;
Daniel@0 162 }
Daniel@0 163 else if (ai->wnum < bj->wnum) {
Daniel@0 164 ai++;
Daniel@0 165 }
Daniel@0 166 else {
Daniel@0 167 sum+=(CFLOAT)(ai->weight) * (CFLOAT)(bj->weight);
Daniel@0 168 ai++;
Daniel@0 169 bj++;
Daniel@0 170 }
Daniel@0 171 }
Daniel@0 172 return((double)sum);
Daniel@0 173 }
Daniel@0 174
Daniel@0 175 SVECTOR* sub_ss(SVECTOR *a, SVECTOR *b)
Daniel@0 176 /* compute the difference a-b of two sparse vectors */
Daniel@0 177 /* Note: SVECTOR lists are not followed, but only the first
Daniel@0 178 SVECTOR is used */
Daniel@0 179 {
Daniel@0 180 SVECTOR *vec;
Daniel@0 181 register WORD *sum,*sumi;
Daniel@0 182 register WORD *ai,*bj;
Daniel@0 183 long veclength;
Daniel@0 184
Daniel@0 185 ai=a->words;
Daniel@0 186 bj=b->words;
Daniel@0 187 veclength=0;
Daniel@0 188 while (ai->wnum && bj->wnum) {
Daniel@0 189 if(ai->wnum > bj->wnum) {
Daniel@0 190 veclength++;
Daniel@0 191 bj++;
Daniel@0 192 }
Daniel@0 193 else if (ai->wnum < bj->wnum) {
Daniel@0 194 veclength++;
Daniel@0 195 ai++;
Daniel@0 196 }
Daniel@0 197 else {
Daniel@0 198 veclength++;
Daniel@0 199 ai++;
Daniel@0 200 bj++;
Daniel@0 201 }
Daniel@0 202 }
Daniel@0 203 while (bj->wnum) {
Daniel@0 204 veclength++;
Daniel@0 205 bj++;
Daniel@0 206 }
Daniel@0 207 while (ai->wnum) {
Daniel@0 208 veclength++;
Daniel@0 209 ai++;
Daniel@0 210 }
Daniel@0 211 veclength++;
Daniel@0 212
Daniel@0 213 sum=(WORD *)my_malloc(sizeof(WORD)*veclength);
Daniel@0 214 sumi=sum;
Daniel@0 215 ai=a->words;
Daniel@0 216 bj=b->words;
Daniel@0 217 while (ai->wnum && bj->wnum) {
Daniel@0 218 if(ai->wnum > bj->wnum) {
Daniel@0 219 (*sumi)=(*bj);
Daniel@0 220 sumi->weight*=(-1);
Daniel@0 221 sumi++;
Daniel@0 222 bj++;
Daniel@0 223 }
Daniel@0 224 else if (ai->wnum < bj->wnum) {
Daniel@0 225 (*sumi)=(*ai);
Daniel@0 226 sumi++;
Daniel@0 227 ai++;
Daniel@0 228 }
Daniel@0 229 else {
Daniel@0 230 (*sumi)=(*ai);
Daniel@0 231 sumi->weight-=bj->weight;
Daniel@0 232 if(sumi->weight != 0)
Daniel@0 233 sumi++;
Daniel@0 234 ai++;
Daniel@0 235 bj++;
Daniel@0 236 }
Daniel@0 237 }
Daniel@0 238 while (bj->wnum) {
Daniel@0 239 (*sumi)=(*bj);
Daniel@0 240 sumi->weight*=(-1);
Daniel@0 241 sumi++;
Daniel@0 242 bj++;
Daniel@0 243 }
Daniel@0 244 while (ai->wnum) {
Daniel@0 245 (*sumi)=(*ai);
Daniel@0 246 sumi++;
Daniel@0 247 ai++;
Daniel@0 248 }
Daniel@0 249 sumi->wnum=0;
Daniel@0 250
Daniel@0 251 vec=create_svector(sum,"",1.0);
Daniel@0 252 free(sum);
Daniel@0 253
Daniel@0 254 return(vec);
Daniel@0 255 }
Daniel@0 256
Daniel@0 257 SVECTOR* add_ss(SVECTOR *a, SVECTOR *b)
Daniel@0 258 /* compute the sum a+b of two sparse vectors */
Daniel@0 259 /* Note: SVECTOR lists are not followed, but only the first
Daniel@0 260 SVECTOR is used */
Daniel@0 261 {
Daniel@0 262 SVECTOR *vec;
Daniel@0 263 register WORD *sum,*sumi;
Daniel@0 264 register WORD *ai,*bj;
Daniel@0 265 long veclength;
Daniel@0 266
Daniel@0 267 ai=a->words;
Daniel@0 268 bj=b->words;
Daniel@0 269 veclength=0;
Daniel@0 270 while (ai->wnum && bj->wnum) {
Daniel@0 271 if(ai->wnum > bj->wnum) {
Daniel@0 272 veclength++;
Daniel@0 273 bj++;
Daniel@0 274 }
Daniel@0 275 else if (ai->wnum < bj->wnum) {
Daniel@0 276 veclength++;
Daniel@0 277 ai++;
Daniel@0 278 }
Daniel@0 279 else {
Daniel@0 280 veclength++;
Daniel@0 281 ai++;
Daniel@0 282 bj++;
Daniel@0 283 }
Daniel@0 284 }
Daniel@0 285 while (bj->wnum) {
Daniel@0 286 veclength++;
Daniel@0 287 bj++;
Daniel@0 288 }
Daniel@0 289 while (ai->wnum) {
Daniel@0 290 veclength++;
Daniel@0 291 ai++;
Daniel@0 292 }
Daniel@0 293 veclength++;
Daniel@0 294
Daniel@0 295 /*** is veclength=lengSequence(a)+lengthSequence(b)? ***/
Daniel@0 296
Daniel@0 297 sum=(WORD *)my_malloc(sizeof(WORD)*veclength);
Daniel@0 298 sumi=sum;
Daniel@0 299 ai=a->words;
Daniel@0 300 bj=b->words;
Daniel@0 301 while (ai->wnum && bj->wnum) {
Daniel@0 302 if(ai->wnum > bj->wnum) {
Daniel@0 303 (*sumi)=(*bj);
Daniel@0 304 sumi++;
Daniel@0 305 bj++;
Daniel@0 306 }
Daniel@0 307 else if (ai->wnum < bj->wnum) {
Daniel@0 308 (*sumi)=(*ai);
Daniel@0 309 sumi++;
Daniel@0 310 ai++;
Daniel@0 311 }
Daniel@0 312 else {
Daniel@0 313 (*sumi)=(*ai);
Daniel@0 314 sumi->weight+=bj->weight;
Daniel@0 315 if(sumi->weight != 0)
Daniel@0 316 sumi++;
Daniel@0 317 ai++;
Daniel@0 318 bj++;
Daniel@0 319 }
Daniel@0 320 }
Daniel@0 321 while (bj->wnum) {
Daniel@0 322 (*sumi)=(*bj);
Daniel@0 323 sumi++;
Daniel@0 324 bj++;
Daniel@0 325 }
Daniel@0 326 while (ai->wnum) {
Daniel@0 327 (*sumi)=(*ai);
Daniel@0 328 sumi++;
Daniel@0 329 ai++;
Daniel@0 330 }
Daniel@0 331 sumi->wnum=0;
Daniel@0 332
Daniel@0 333 vec=create_svector(sum,"",1.0);
Daniel@0 334 free(sum);
Daniel@0 335
Daniel@0 336 return(vec);
Daniel@0 337 }
Daniel@0 338
Daniel@0 339 SVECTOR* add_list_ss(SVECTOR *a)
Daniel@0 340 /* computes the linear combination of the SVECTOR list weighted
Daniel@0 341 by the factor of each SVECTOR */
Daniel@0 342 {
Daniel@0 343 SVECTOR *scaled,*oldsum,*sum,*f;
Daniel@0 344 WORD empty[2];
Daniel@0 345
Daniel@0 346 if(a){
Daniel@0 347 sum=smult_s(a,a->factor);
Daniel@0 348 for(f=a->next;f;f=f->next) {
Daniel@0 349 scaled=smult_s(f,f->factor);
Daniel@0 350 oldsum=sum;
Daniel@0 351 sum=add_ss(sum,scaled);
Daniel@0 352 free_svector(oldsum);
Daniel@0 353 free_svector(scaled);
Daniel@0 354 }
Daniel@0 355 sum->factor=1.0;
Daniel@0 356 }
Daniel@0 357 else {
Daniel@0 358 empty[0].wnum=0;
Daniel@0 359 sum=create_svector(empty,"",1.0);
Daniel@0 360 }
Daniel@0 361 return(sum);
Daniel@0 362 }
Daniel@0 363
Daniel@0 364 void append_svector_list(SVECTOR *a, SVECTOR *b)
Daniel@0 365 /* appends SVECTOR b to the end of SVECTOR a. */
Daniel@0 366 {
Daniel@0 367 SVECTOR *f;
Daniel@0 368
Daniel@0 369 for(f=a;f->next;f=f->next); /* find end of first vector list */
Daniel@0 370 f->next=b; /* append the two vector lists */
Daniel@0 371 }
Daniel@0 372
Daniel@0 373 SVECTOR* smult_s(SVECTOR *a, double factor)
Daniel@0 374 /* scale sparse vector a by factor */
Daniel@0 375 {
Daniel@0 376 SVECTOR *vec;
Daniel@0 377 register WORD *sum,*sumi;
Daniel@0 378 register WORD *ai;
Daniel@0 379 long veclength;
Daniel@0 380
Daniel@0 381 ai=a->words;
Daniel@0 382 veclength=0;
Daniel@0 383 while (ai->wnum) {
Daniel@0 384 veclength++;
Daniel@0 385 ai++;
Daniel@0 386 }
Daniel@0 387 veclength++;
Daniel@0 388
Daniel@0 389 sum=(WORD *)my_malloc(sizeof(WORD)*veclength);
Daniel@0 390 sumi=sum;
Daniel@0 391 ai=a->words;
Daniel@0 392 while (ai->wnum) {
Daniel@0 393 (*sumi)=(*ai);
Daniel@0 394 sumi->weight*=factor;
Daniel@0 395 if(sumi->weight != 0)
Daniel@0 396 sumi++;
Daniel@0 397 ai++;
Daniel@0 398 }
Daniel@0 399 sumi->wnum=0;
Daniel@0 400
Daniel@0 401 vec=create_svector(sum,a->userdefined,a->factor);
Daniel@0 402 free(sum);
Daniel@0 403
Daniel@0 404 return(vec);
Daniel@0 405 }
Daniel@0 406
Daniel@0 407 int featvec_eq(SVECTOR *a, SVECTOR *b)
Daniel@0 408 /* tests two sparse vectors for equality */
Daniel@0 409 {
Daniel@0 410 register WORD *ai,*bj;
Daniel@0 411 ai=a->words;
Daniel@0 412 bj=b->words;
Daniel@0 413 while (ai->wnum && bj->wnum) {
Daniel@0 414 if(ai->wnum > bj->wnum) {
Daniel@0 415 if((CFLOAT)(bj->weight) != 0)
Daniel@0 416 return(0);
Daniel@0 417 bj++;
Daniel@0 418 }
Daniel@0 419 else if (ai->wnum < bj->wnum) {
Daniel@0 420 if((CFLOAT)(ai->weight) != 0)
Daniel@0 421 return(0);
Daniel@0 422 ai++;
Daniel@0 423 }
Daniel@0 424 else {
Daniel@0 425 if((CFLOAT)(ai->weight) != (CFLOAT)(bj->weight))
Daniel@0 426 return(0);
Daniel@0 427 ai++;
Daniel@0 428 bj++;
Daniel@0 429 }
Daniel@0 430 }
Daniel@0 431 return(1);
Daniel@0 432 }
Daniel@0 433
Daniel@0 434 double model_length_s(MODEL *model, KERNEL_PARM *kernel_parm)
Daniel@0 435 /* compute length of weight vector */
Daniel@0 436 {
Daniel@0 437 register long i,j;
Daniel@0 438 register double sum=0,alphai;
Daniel@0 439 register DOC *supveci;
Daniel@0 440
Daniel@0 441 for(i=1;i<model->sv_num;i++) {
Daniel@0 442 alphai=model->alpha[i];
Daniel@0 443 supveci=model->supvec[i];
Daniel@0 444 for(j=1;j<model->sv_num;j++) {
Daniel@0 445 sum+=alphai*model->alpha[j]
Daniel@0 446 *kernel(kernel_parm,supveci,model->supvec[j]);
Daniel@0 447 }
Daniel@0 448 }
Daniel@0 449 return(sqrt(sum));
Daniel@0 450 }
Daniel@0 451
Daniel@0 452 void clear_vector_n(double *vec, long int n)
Daniel@0 453 {
Daniel@0 454 register long i;
Daniel@0 455 for(i=0;i<=n;i++) vec[i]=0;
Daniel@0 456 }
Daniel@0 457
Daniel@0 458 void add_vector_ns(double *vec_n, SVECTOR *vec_s, double faktor)
Daniel@0 459 {
Daniel@0 460 register WORD *ai;
Daniel@0 461 ai=vec_s->words;
Daniel@0 462 while (ai->wnum) {
Daniel@0 463 vec_n[ai->wnum]+=(faktor*ai->weight);
Daniel@0 464 ai++;
Daniel@0 465 }
Daniel@0 466 }
Daniel@0 467
Daniel@0 468 double sprod_ns(double *vec_n, SVECTOR *vec_s)
Daniel@0 469 {
Daniel@0 470 register double sum=0;
Daniel@0 471 register WORD *ai;
Daniel@0 472 ai=vec_s->words;
Daniel@0 473 while (ai->wnum) {
Daniel@0 474 sum+=(vec_n[ai->wnum]*ai->weight);
Daniel@0 475 ai++;
Daniel@0 476 }
Daniel@0 477 return(sum);
Daniel@0 478 }
Daniel@0 479
Daniel@0 480 void add_weight_vector_to_linear_model(MODEL *model)
Daniel@0 481 /* compute weight vector in linear case and add to model */
Daniel@0 482 {
Daniel@0 483 long i;
Daniel@0 484 SVECTOR *f;
Daniel@0 485
Daniel@0 486 model->lin_weights=(double *)my_malloc(sizeof(double)*(model->totwords+1));
Daniel@0 487 clear_vector_n(model->lin_weights,model->totwords);
Daniel@0 488 for(i=1;i<model->sv_num;i++) {
Daniel@0 489 for(f=(model->supvec[i])->fvec;f;f=f->next)
Daniel@0 490 add_vector_ns(model->lin_weights,f,f->factor*model->alpha[i]);
Daniel@0 491 }
Daniel@0 492 }
Daniel@0 493
Daniel@0 494
Daniel@0 495 DOC *create_example(long docnum, long queryid, long slackid,
Daniel@0 496 double costfactor, SVECTOR *fvec)
Daniel@0 497 {
Daniel@0 498 DOC *example;
Daniel@0 499 example = (DOC *)my_malloc(sizeof(DOC));
Daniel@0 500 example->docnum=docnum;
Daniel@0 501 example->queryid=queryid;
Daniel@0 502 example->slackid=slackid;
Daniel@0 503 example->costfactor=costfactor;
Daniel@0 504 example->fvec=fvec;
Daniel@0 505 return(example);
Daniel@0 506 }
Daniel@0 507
Daniel@0 508 void free_example(DOC *example, long deep)
Daniel@0 509 {
Daniel@0 510 if(example) {
Daniel@0 511 if(deep) {
Daniel@0 512 if(example->fvec)
Daniel@0 513 free_svector(example->fvec);
Daniel@0 514 }
Daniel@0 515 free(example);
Daniel@0 516 }
Daniel@0 517 }
Daniel@0 518
Daniel@0 519 void write_model(char *modelfile, MODEL *model)
Daniel@0 520 {
Daniel@0 521 FILE *modelfl;
Daniel@0 522 long j,i,sv_num;
Daniel@0 523 SVECTOR *v;
Daniel@0 524
Daniel@0 525 if(verbosity>=1) {
Daniel@0 526 printf("Writing model file..."); fflush(stdout);
Daniel@0 527 }
Daniel@0 528 if ((modelfl = fopen (modelfile, "w")) == NULL)
Daniel@0 529 { perror (modelfile); exit (1); }
Daniel@0 530 fprintf(modelfl,"SVM-light Version %s\n",VERSION);
Daniel@0 531 fprintf(modelfl,"%ld # kernel type\n",
Daniel@0 532 model->kernel_parm.kernel_type);
Daniel@0 533 fprintf(modelfl,"%ld # kernel parameter -d \n",
Daniel@0 534 model->kernel_parm.poly_degree);
Daniel@0 535 fprintf(modelfl,"%.8g # kernel parameter -g \n",
Daniel@0 536 model->kernel_parm.rbf_gamma);
Daniel@0 537 fprintf(modelfl,"%.8g # kernel parameter -s \n",
Daniel@0 538 model->kernel_parm.coef_lin);
Daniel@0 539 fprintf(modelfl,"%.8g # kernel parameter -r \n",
Daniel@0 540 model->kernel_parm.coef_const);
Daniel@0 541 fprintf(modelfl,"%s# kernel parameter -u \n",model->kernel_parm.custom);
Daniel@0 542 fprintf(modelfl,"%ld # highest feature index \n",model->totwords);
Daniel@0 543 fprintf(modelfl,"%ld # number of training documents \n",model->totdoc);
Daniel@0 544
Daniel@0 545 sv_num=1;
Daniel@0 546 for(i=1;i<model->sv_num;i++) {
Daniel@0 547 for(v=model->supvec[i]->fvec;v;v=v->next)
Daniel@0 548 sv_num++;
Daniel@0 549 }
Daniel@0 550 fprintf(modelfl,"%ld # number of support vectors plus 1 \n",sv_num);
Daniel@0 551 fprintf(modelfl,"%.8g # threshold b, each following line is a SV (starting with alpha*y)\n",model->b);
Daniel@0 552
Daniel@0 553 for(i=1;i<model->sv_num;i++) {
Daniel@0 554 for(v=model->supvec[i]->fvec;v;v=v->next) {
Daniel@0 555 fprintf(modelfl,"%.32g ",model->alpha[i]*v->factor);
Daniel@0 556 for (j=0; (v->words[j]).wnum; j++) {
Daniel@0 557 fprintf(modelfl,"%ld:%.8g ",
Daniel@0 558 (long)(v->words[j]).wnum,
Daniel@0 559 (double)(v->words[j]).weight);
Daniel@0 560 }
Daniel@0 561 fprintf(modelfl,"#%s\n",v->userdefined);
Daniel@0 562 /* NOTE: this could be made more efficient by summing the
Daniel@0 563 alpha's of identical vectors before writing them to the
Daniel@0 564 file. */
Daniel@0 565 }
Daniel@0 566 }
Daniel@0 567 fclose(modelfl);
Daniel@0 568 if(verbosity>=1) {
Daniel@0 569 printf("done\n");
Daniel@0 570 }
Daniel@0 571 }
Daniel@0 572
Daniel@0 573
Daniel@0 574 MODEL *read_model(char *modelfile)
Daniel@0 575 {
Daniel@0 576 FILE *modelfl;
Daniel@0 577 long i,queryid,slackid;
Daniel@0 578 double costfactor;
Daniel@0 579 long max_sv,max_words,ll,wpos;
Daniel@0 580 char *line,*comment;
Daniel@0 581 WORD *words;
Daniel@0 582 char version_buffer[100];
Daniel@0 583 MODEL *model;
Daniel@0 584
Daniel@0 585 if(verbosity>=1) {
Daniel@0 586 printf("Reading model..."); fflush(stdout);
Daniel@0 587 }
Daniel@0 588
Daniel@0 589 nol_ll(modelfile,&max_sv,&max_words,&ll); /* scan size of model file */
Daniel@0 590 max_words+=2;
Daniel@0 591 ll+=2;
Daniel@0 592
Daniel@0 593 words = (WORD *)my_malloc(sizeof(WORD)*(max_words+10));
Daniel@0 594 line = (char *)my_malloc(sizeof(char)*ll);
Daniel@0 595 model = (MODEL *)my_malloc(sizeof(MODEL));
Daniel@0 596
Daniel@0 597 if ((modelfl = fopen (modelfile, "r")) == NULL)
Daniel@0 598 { perror (modelfile); exit (1); }
Daniel@0 599
Daniel@0 600 fscanf(modelfl,"SVM-light Version %s\n",version_buffer);
Daniel@0 601 if(strcmp(version_buffer,VERSION)) {
Daniel@0 602 perror ("Version of model-file does not match version of svm_classify!");
Daniel@0 603 exit (1);
Daniel@0 604 }
Daniel@0 605 fscanf(modelfl,"%ld%*[^\n]\n", &model->kernel_parm.kernel_type);
Daniel@0 606 fscanf(modelfl,"%ld%*[^\n]\n", &model->kernel_parm.poly_degree);
Daniel@0 607 fscanf(modelfl,"%lf%*[^\n]\n", &model->kernel_parm.rbf_gamma);
Daniel@0 608 fscanf(modelfl,"%lf%*[^\n]\n", &model->kernel_parm.coef_lin);
Daniel@0 609 fscanf(modelfl,"%lf%*[^\n]\n", &model->kernel_parm.coef_const);
Daniel@0 610 fscanf(modelfl,"%[^#]%*[^\n]\n", model->kernel_parm.custom);
Daniel@0 611
Daniel@0 612 fscanf(modelfl,"%ld%*[^\n]\n", &model->totwords);
Daniel@0 613 fscanf(modelfl,"%ld%*[^\n]\n", &model->totdoc);
Daniel@0 614 fscanf(modelfl,"%ld%*[^\n]\n", &model->sv_num);
Daniel@0 615 fscanf(modelfl,"%lf%*[^\n]\n", &model->b);
Daniel@0 616
Daniel@0 617 model->supvec = (DOC **)my_malloc(sizeof(DOC *)*model->sv_num);
Daniel@0 618 model->alpha = (double *)my_malloc(sizeof(double)*model->sv_num);
Daniel@0 619 model->index=NULL;
Daniel@0 620 model->lin_weights=NULL;
Daniel@0 621
Daniel@0 622 for(i=1;i<model->sv_num;i++) {
Daniel@0 623 fgets(line,(int)ll,modelfl);
Daniel@0 624 if(!parse_document(line,words,&(model->alpha[i]),&queryid,&slackid,
Daniel@0 625 &costfactor,&wpos,max_words,&comment)) {
Daniel@0 626 printf("\nParsing error while reading model file in SV %ld!\n%s",
Daniel@0 627 i,line);
Daniel@0 628 exit(1);
Daniel@0 629 }
Daniel@0 630 model->supvec[i] = create_example(-1,
Daniel@0 631 0,0,
Daniel@0 632 0.0,
Daniel@0 633 create_svector(words,comment,1.0));
Daniel@0 634 }
Daniel@0 635 fclose(modelfl);
Daniel@0 636 free(line);
Daniel@0 637 free(words);
Daniel@0 638 if(verbosity>=1) {
Daniel@0 639 fprintf(stdout, "OK. (%d support vectors read)\n",(int)(model->sv_num-1));
Daniel@0 640 }
Daniel@0 641 return(model);
Daniel@0 642 }
Daniel@0 643
Daniel@0 644 MODEL *copy_model(MODEL *model)
Daniel@0 645 {
Daniel@0 646 MODEL *newmodel;
Daniel@0 647 long i;
Daniel@0 648
Daniel@0 649 newmodel=(MODEL *)my_malloc(sizeof(MODEL));
Daniel@0 650 (*newmodel)=(*model);
Daniel@0 651 newmodel->supvec = (DOC **)my_malloc(sizeof(DOC *)*model->sv_num);
Daniel@0 652 newmodel->alpha = (double *)my_malloc(sizeof(double)*model->sv_num);
Daniel@0 653 newmodel->index = NULL; /* index is not copied */
Daniel@0 654 newmodel->supvec[0] = NULL;
Daniel@0 655 newmodel->alpha[0] = 0;
Daniel@0 656 for(i=1;i<model->sv_num;i++) {
Daniel@0 657 newmodel->alpha[i]=model->alpha[i];
Daniel@0 658 newmodel->supvec[i]=create_example(model->supvec[i]->docnum,
Daniel@0 659 model->supvec[i]->queryid,0,
Daniel@0 660 model->supvec[i]->costfactor,
Daniel@0 661 copy_svector(model->supvec[i]->fvec));
Daniel@0 662 }
Daniel@0 663 if(model->lin_weights) {
Daniel@0 664 newmodel->lin_weights = (double *)my_malloc(sizeof(double)*(model->totwords+1));
Daniel@0 665 for(i=0;i<model->totwords+1;i++)
Daniel@0 666 newmodel->lin_weights[i]=model->lin_weights[i];
Daniel@0 667 }
Daniel@0 668 return(newmodel);
Daniel@0 669 }
Daniel@0 670
Daniel@0 671 void free_model(MODEL *model, int deep)
Daniel@0 672 {
Daniel@0 673 long i;
Daniel@0 674
Daniel@0 675 if(model->supvec) {
Daniel@0 676 if(deep) {
Daniel@0 677 for(i=1;i<model->sv_num;i++) {
Daniel@0 678 free_example(model->supvec[i],1);
Daniel@0 679 }
Daniel@0 680 }
Daniel@0 681 free(model->supvec);
Daniel@0 682 }
Daniel@0 683 if(model->alpha) free(model->alpha);
Daniel@0 684 if(model->index) free(model->index);
Daniel@0 685 if(model->lin_weights) free(model->lin_weights);
Daniel@0 686 free(model);
Daniel@0 687 }
Daniel@0 688
Daniel@0 689
Daniel@0 690 void read_documents(char *docfile, DOC ***docs, double **label,
Daniel@0 691 long int *totwords, long int *totdoc)
Daniel@0 692 {
Daniel@0 693 char *line,*comment;
Daniel@0 694 WORD *words;
Daniel@0 695 long dnum=0,wpos,dpos=0,dneg=0,dunlab=0,queryid,slackid,max_docs;
Daniel@0 696 long max_words_doc, ll;
Daniel@0 697 double doc_label,costfactor;
Daniel@0 698 FILE *docfl;
Daniel@0 699
Daniel@0 700 if(verbosity>=1) {
Daniel@0 701 printf("Scanning examples..."); fflush(stdout);
Daniel@0 702 }
Daniel@0 703 nol_ll(docfile,&max_docs,&max_words_doc,&ll); /* scan size of input file */
Daniel@0 704 max_words_doc+=2;
Daniel@0 705 ll+=2;
Daniel@0 706 max_docs+=2;
Daniel@0 707 if(verbosity>=1) {
Daniel@0 708 printf("done\n"); fflush(stdout);
Daniel@0 709 }
Daniel@0 710
Daniel@0 711 (*docs) = (DOC **)my_malloc(sizeof(DOC *)*max_docs); /* feature vectors */
Daniel@0 712 (*label) = (double *)my_malloc(sizeof(double)*max_docs); /* target values */
Daniel@0 713 line = (char *)my_malloc(sizeof(char)*ll);
Daniel@0 714
Daniel@0 715 if ((docfl = fopen (docfile, "r")) == NULL)
Daniel@0 716 { perror (docfile); exit (1); }
Daniel@0 717
Daniel@0 718 words = (WORD *)my_malloc(sizeof(WORD)*(max_words_doc+10));
Daniel@0 719 if(verbosity>=1) {
Daniel@0 720 printf("Reading examples into memory..."); fflush(stdout);
Daniel@0 721 }
Daniel@0 722 dnum=0;
Daniel@0 723 (*totwords)=0;
Daniel@0 724 while((!feof(docfl)) && fgets(line,(int)ll,docfl)) {
Daniel@0 725 if(line[0] == '#') continue; /* line contains comments */
Daniel@0 726 if(!parse_document(line,words,&doc_label,&queryid,&slackid,&costfactor,
Daniel@0 727 &wpos,max_words_doc,&comment)) {
Daniel@0 728 printf("\nParsing error in line %ld!\n%s",dnum,line);
Daniel@0 729 exit(1);
Daniel@0 730 }
Daniel@0 731 (*label)[dnum]=doc_label;
Daniel@0 732 /* printf("docnum=%ld: Class=%f ",dnum,doc_label); */
Daniel@0 733 if(doc_label > 0) dpos++;
Daniel@0 734 if (doc_label < 0) dneg++;
Daniel@0 735 if (doc_label == 0) dunlab++;
Daniel@0 736 if((wpos>1) && ((words[wpos-2]).wnum>(*totwords)))
Daniel@0 737 (*totwords)=(words[wpos-2]).wnum;
Daniel@0 738 if((*totwords) > MAXFEATNUM) {
Daniel@0 739 printf("\nMaximum feature number exceeds limit defined in MAXFEATNUM!\n");
Daniel@0 740 printf("LINE: %s\n",line);
Daniel@0 741 exit(1);
Daniel@0 742 }
Daniel@0 743 (*docs)[dnum] = create_example(dnum,queryid,slackid,costfactor,
Daniel@0 744 create_svector(words,comment,1.0));
Daniel@0 745 /* printf("\nNorm=%f\n",((*docs)[dnum]->fvec)->twonorm_sq); */
Daniel@0 746 dnum++;
Daniel@0 747 if(verbosity>=1) {
Daniel@0 748 if((dnum % 100) == 0) {
Daniel@0 749 printf("%ld..",dnum); fflush(stdout);
Daniel@0 750 }
Daniel@0 751 }
Daniel@0 752 }
Daniel@0 753
Daniel@0 754 fclose(docfl);
Daniel@0 755 free(line);
Daniel@0 756 free(words);
Daniel@0 757 if(verbosity>=1) {
Daniel@0 758 fprintf(stdout, "OK. (%ld examples read)\n", dnum);
Daniel@0 759 }
Daniel@0 760 (*totdoc)=dnum;
Daniel@0 761 }
Daniel@0 762
Daniel@0 763 int parse_document(char *line, WORD *words, double *label,
Daniel@0 764 long *queryid, long *slackid, double *costfactor,
Daniel@0 765 long int *numwords, long int max_words_doc,
Daniel@0 766 char **comment)
Daniel@0 767 {
Daniel@0 768 register long wpos,pos;
Daniel@0 769 long wnum;
Daniel@0 770 double weight;
Daniel@0 771 int numread;
Daniel@0 772 char featurepair[1000],junk[1000];
Daniel@0 773
Daniel@0 774 (*queryid)=0;
Daniel@0 775 (*slackid)=0;
Daniel@0 776 (*costfactor)=1;
Daniel@0 777
Daniel@0 778 pos=0;
Daniel@0 779 (*comment)=NULL;
Daniel@0 780 while(line[pos] ) { /* cut off comments */
Daniel@0 781 if((line[pos] == '#') && (!(*comment))) {
Daniel@0 782 line[pos]=0;
Daniel@0 783 (*comment)=&(line[pos+1]);
Daniel@0 784 }
Daniel@0 785 if(line[pos] == '\n') { /* strip the CR */
Daniel@0 786 line[pos]=0;
Daniel@0 787 }
Daniel@0 788 pos++;
Daniel@0 789 }
Daniel@0 790 if(!(*comment)) (*comment)=&(line[pos]);
Daniel@0 791 /* printf("Comment: '%s'\n",(*comment)); */
Daniel@0 792
Daniel@0 793 wpos=0;
Daniel@0 794 /* check, that line starts with target value or zero, but not with
Daniel@0 795 feature pair */
Daniel@0 796 if(sscanf(line,"%s",featurepair) == EOF) return(0);
Daniel@0 797 pos=0;
Daniel@0 798 while((featurepair[pos] != ':') && featurepair[pos]) pos++;
Daniel@0 799 if(featurepair[pos] == ':') {
Daniel@0 800 perror ("Line must start with label or 0!!!\n");
Daniel@0 801 printf("LINE: %s\n",line);
Daniel@0 802 exit (1);
Daniel@0 803 }
Daniel@0 804 /* read the target value */
Daniel@0 805 if(sscanf(line,"%lf",label) == EOF) return(0);
Daniel@0 806 pos=0;
Daniel@0 807 while(space_or_null((int)line[pos])) pos++;
Daniel@0 808 while((!space_or_null((int)line[pos])) && line[pos]) pos++;
Daniel@0 809 while(((numread=sscanf(line+pos,"%s",featurepair)) != EOF) &&
Daniel@0 810 (numread > 0) &&
Daniel@0 811 (wpos<max_words_doc)) {
Daniel@0 812 /* printf("%s\n",featurepair); */
Daniel@0 813 while(space_or_null((int)line[pos])) pos++;
Daniel@0 814 while((!space_or_null((int)line[pos])) && line[pos]) pos++;
Daniel@0 815 if(sscanf(featurepair,"qid:%ld%s",&wnum,junk)==1) {
Daniel@0 816 /* it is the query id */
Daniel@0 817 (*queryid)=(long)wnum;
Daniel@0 818 }
Daniel@0 819 else if(sscanf(featurepair,"sid:%ld%s",&wnum,junk)==1) {
Daniel@0 820 /* it is the slack id */
Daniel@0 821 if(wnum > 0)
Daniel@0 822 (*slackid)=(long)wnum;
Daniel@0 823 else {
Daniel@0 824 perror ("Slack-id must be greater or equal to 1!!!\n");
Daniel@0 825 printf("LINE: %s\n",line);
Daniel@0 826 exit (1);
Daniel@0 827 }
Daniel@0 828 }
Daniel@0 829 else if(sscanf(featurepair,"cost:%lf%s",&weight,junk)==1) {
Daniel@0 830 /* it is the example-dependent cost factor */
Daniel@0 831 (*costfactor)=(double)weight;
Daniel@0 832 }
Daniel@0 833 else if(sscanf(featurepair,"%ld:%lf%s",&wnum,&weight,junk)==2) {
Daniel@0 834 /* it is a regular feature */
Daniel@0 835 if(wnum<=0) {
Daniel@0 836 perror ("Feature numbers must be larger or equal to 1!!!\n");
Daniel@0 837 printf("LINE: %s\n",line);
Daniel@0 838 exit (1);
Daniel@0 839 }
Daniel@0 840 if((wpos>0) && ((words[wpos-1]).wnum >= wnum)) {
Daniel@0 841 perror ("Features must be in increasing order!!!\n");
Daniel@0 842 printf("LINE: %s\n",line);
Daniel@0 843 exit (1);
Daniel@0 844 }
Daniel@0 845 (words[wpos]).wnum=wnum;
Daniel@0 846 (words[wpos]).weight=(FVAL)weight;
Daniel@0 847 wpos++;
Daniel@0 848 }
Daniel@0 849 else {
Daniel@0 850 perror ("Cannot parse feature/value pair!!!\n");
Daniel@0 851 printf("'%s' in LINE: %s\n",featurepair,line);
Daniel@0 852 exit (1);
Daniel@0 853 }
Daniel@0 854 }
Daniel@0 855 (words[wpos]).wnum=0;
Daniel@0 856 (*numwords)=wpos+1;
Daniel@0 857 return(1);
Daniel@0 858 }
Daniel@0 859
Daniel@0 860 double *read_alphas(char *alphafile,long totdoc)
Daniel@0 861 /* reads the alpha vector from a file as written by the
Daniel@0 862 write_alphas function */
Daniel@0 863 {
Daniel@0 864 FILE *fl;
Daniel@0 865 double *alpha;
Daniel@0 866 long dnum;
Daniel@0 867
Daniel@0 868 if ((fl = fopen (alphafile, "r")) == NULL)
Daniel@0 869 { perror (alphafile); exit (1); }
Daniel@0 870
Daniel@0 871 alpha = (double *)my_malloc(sizeof(double)*totdoc);
Daniel@0 872 if(verbosity>=1) {
Daniel@0 873 printf("Reading alphas..."); fflush(stdout);
Daniel@0 874 }
Daniel@0 875 dnum=0;
Daniel@0 876 while((!feof(fl)) && fscanf(fl,"%lf\n",&alpha[dnum]) && (dnum<totdoc)) {
Daniel@0 877 dnum++;
Daniel@0 878 }
Daniel@0 879 if(dnum != totdoc)
Daniel@0 880 { perror ("\nNot enough values in alpha file!"); exit (1); }
Daniel@0 881 fclose(fl);
Daniel@0 882
Daniel@0 883 if(verbosity>=1) {
Daniel@0 884 printf("done\n"); fflush(stdout);
Daniel@0 885 }
Daniel@0 886
Daniel@0 887 return(alpha);
Daniel@0 888 }
Daniel@0 889
Daniel@0 890 void nol_ll(char *file, long int *nol, long int *wol, long int *ll)
Daniel@0 891 /* Grep through file and count number of lines, maximum number of
Daniel@0 892 spaces per line, and longest line. */
Daniel@0 893 {
Daniel@0 894 FILE *fl;
Daniel@0 895 int ic;
Daniel@0 896 char c;
Daniel@0 897 long current_length,current_wol;
Daniel@0 898
Daniel@0 899 if ((fl = fopen (file, "r")) == NULL)
Daniel@0 900 { perror (file); exit (1); }
Daniel@0 901 current_length=0;
Daniel@0 902 current_wol=0;
Daniel@0 903 (*ll)=0;
Daniel@0 904 (*nol)=1;
Daniel@0 905 (*wol)=0;
Daniel@0 906 while((ic=getc(fl)) != EOF) {
Daniel@0 907 c=(char)ic;
Daniel@0 908 current_length++;
Daniel@0 909 if(space_or_null((int)c)) {
Daniel@0 910 current_wol++;
Daniel@0 911 }
Daniel@0 912 if(c == '\n') {
Daniel@0 913 (*nol)++;
Daniel@0 914 if(current_length>(*ll)) {
Daniel@0 915 (*ll)=current_length;
Daniel@0 916 }
Daniel@0 917 if(current_wol>(*wol)) {
Daniel@0 918 (*wol)=current_wol;
Daniel@0 919 }
Daniel@0 920 current_length=0;
Daniel@0 921 current_wol=0;
Daniel@0 922 }
Daniel@0 923 }
Daniel@0 924 fclose(fl);
Daniel@0 925 }
Daniel@0 926
Daniel@0 927 long minl(long int a, long int b)
Daniel@0 928 {
Daniel@0 929 if(a<b)
Daniel@0 930 return(a);
Daniel@0 931 else
Daniel@0 932 return(b);
Daniel@0 933 }
Daniel@0 934
Daniel@0 935 long maxl(long int a, long int b)
Daniel@0 936 {
Daniel@0 937 if(a>b)
Daniel@0 938 return(a);
Daniel@0 939 else
Daniel@0 940 return(b);
Daniel@0 941 }
Daniel@0 942
Daniel@0 943 long get_runtime(void)
Daniel@0 944 {
Daniel@0 945 clock_t start;
Daniel@0 946 start = clock();
Daniel@0 947 return((long)((double)start*100.0/(double)CLOCKS_PER_SEC));
Daniel@0 948 }
Daniel@0 949
Daniel@0 950
Daniel@0 951 # ifdef _MSC_VER
Daniel@0 952
Daniel@0 953 int isnan(double a)
Daniel@0 954 {
Daniel@0 955 return(_isnan(a));
Daniel@0 956 }
Daniel@0 957
Daniel@0 958 # endif
Daniel@0 959
Daniel@0 960 int space_or_null(int c) {
Daniel@0 961 if (c==0)
Daniel@0 962 return 1;
Daniel@0 963 return isspace(c);
Daniel@0 964 }
Daniel@0 965
Daniel@0 966 void *my_malloc(size_t size)
Daniel@0 967 {
Daniel@0 968 void *ptr;
Daniel@0 969 ptr=(void *)malloc(size);
Daniel@0 970 if(!ptr) {
Daniel@0 971 perror ("Out of memory!\n");
Daniel@0 972 exit (1);
Daniel@0 973 }
Daniel@0 974 return(ptr);
Daniel@0 975 }
Daniel@0 976
Daniel@0 977 void copyright_notice(void)
Daniel@0 978 {
Daniel@0 979 printf("\nCopyright: Thorsten Joachims, thorsten@joachims.org\n\n");
Daniel@0 980 printf("This software is available for non-commercial use only. It must not\n");
Daniel@0 981 printf("be modified and distributed without prior permission of the author.\n");
Daniel@0 982 printf("The author is not responsible for implications from the use of this\n");
Daniel@0 983 printf("software.\n\n");
Daniel@0 984 }