diff --git a/thirdparty/svmlight/CMakeLists.txt b/thirdparty/svmlight/CMakeLists.txt new file mode 100644 index 000000000..c26ab1f3d --- /dev/null +++ b/thirdparty/svmlight/CMakeLists.txt @@ -0,0 +1,17 @@ +set (svm_learn_SRC + kernel.h + svm_common.c + svm_common.h + svm_hideo.c + svm_learn.c + svm_learn.h + svm_learn_main.c) + +add_executable (svm_learn + ${svm_learn_SRC}) + +set_target_properties (svm_learn + PROPERTIES + RUNTIME_OUTPUT_DIRECTORY ${TOOLS_DIR}/train/) + +target_link_libraries (svm_learn m) diff --git a/thirdparty/svmlight/LICENSE.txt b/thirdparty/svmlight/LICENSE.txt new file mode 100755 index 000000000..28d6db09f --- /dev/null +++ b/thirdparty/svmlight/LICENSE.txt @@ -0,0 +1,59 @@ +SVM-Light +--------- + +Available at http://svmlight.joachims.org/ + +Author: Thorsten Joachims + thorsten@joachims.org + + Cornell University + Department of Computer Science + 4153 Upson Hall + Ithaca, NY 14853 + USA + +LICENSING TERMS + +This program is granted free of charge for research and education +purposes. However you must obtain a license from the author to use it +for commercial purposes. + +Scientific results produced using the software provided shall +acknowledge the use of SVM-Light. Please cite as + + T. Joachims, Making large-Scale SVM Learning + Practical. Advances in Kernel Methods - Support Vector + Learning, B. Schölkopf and C. Burges and A. Smola (ed.), + MIT-Press, 1999. + http://www-ai.cs.uni-dortmund.de/DOKUMENTE/joachims_99a.pdf + +Moreover shall the author of SVM-Light be informed about the +publication. + +The software must not be modified and distributed without prior +permission of the author. + +By using SVM-Light you agree to the licensing terms. + + +NO WARRANTY + +BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT +WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER +PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, +EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE +PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME +THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + +IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY +GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF +THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO +LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY +OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED +OF THE POSSIBILITY OF SUCH DAMAGES. diff --git a/thirdparty/svmlight/Makefile b/thirdparty/svmlight/Makefile new file mode 100755 index 000000000..8b5e19acd --- /dev/null +++ b/thirdparty/svmlight/Makefile @@ -0,0 +1,105 @@ +# +# makefile for svm_light +# +# Thorsten Joachims, 2002 +# + +#Use the following to compile under unix or cygwin +CC = gcc +LD = gcc + +#Uncomment the following line to make CYGWIN produce stand-alone Windows executables +#SFLAGS= -mno-cygwin + +CFLAGS= $(SFLAGS) -O3 # release C-Compiler flags +LFLAGS= $(SFLAGS) -O3 # release linker flags +#CFLAGS= $(SFLAGS) -pg -Wall -pedantic # debugging C-Compiler flags +#LFLAGS= $(SFLAGS) -pg # debugging linker flags +LIBS=-L. -lm # used libraries + +all: svm_learn_hideo svm_classify + +tidy: + rm -f *.o + rm -f pr_loqo/*.o + +clean: tidy + rm -f svm_learn + rm -f svm_classify + rm -f libsvmlight.so + +help: info + +info: + @echo + @echo "make for SVM-light Thorsten Joachims, 1998" + @echo + @echo "Thanks to Ralf Herbrich for the initial version." + @echo + @echo "USAGE: make [svm_learn | svm_learn_loqo | svm_learn_hideo | " + @echo " libsvmlight_hideo | libsvmlight_loqo | " + @echo " svm_classify | all | clean | tidy]" + @echo + @echo " svm_learn builds the learning module (prefers HIDEO)" + @echo " svm_learn_hideo builds the learning module using HIDEO optimizer" + @echo " svm_learn_loqo builds the learning module using PR_LOQO optimizer" + @echo " svm_classify builds the classfication module" + @echo " libsvmlight_hideo builds shared object library that can be linked into" + @echo " other code using HIDEO" + @echo " libsvmlight_loqo builds shared object library that can be linked into" + @echo " other code using PR_LOQO" + @echo " all (default) builds svm_learn + svm_classify" + @echo " clean removes .o and target files" + @echo " tidy removes .o files" + @echo + +# Create executables svm_learn and svm_classify + +svm_learn_hideo: svm_learn_main.o svm_learn.o svm_common.o svm_hideo.o + $(LD) $(LFLAGS) svm_learn_main.o svm_learn.o svm_common.o svm_hideo.o -o svm_learn $(LIBS) + +#svm_learn_loqo: svm_learn_main.o svm_learn.o svm_common.o svm_loqo.o loqo +# $(LD) $(LFLAGS) svm_learn_main.o svm_learn.o svm_common.o svm_loqo.o pr_loqo/pr_loqo.o -o svm_learn $(LIBS) + +svm_classify: svm_classify.o svm_common.o + $(LD) $(LFLAGS) svm_classify.o svm_common.o -o svm_classify $(LIBS) + + +# Create library libsvmlight.so, so that external code can get access to the +# learning and classification functions of svm-light by linking this library. + +svm_learn_hideo_noexe: svm_learn_main.o svm_learn.o svm_common.o svm_hideo.o + +libsvmlight_hideo: svm_learn_main.o svm_learn.o svm_common.o svm_hideo.o + $(LD) -shared svm_learn.o svm_common.o svm_hideo.o -o libsvmlight.so + +#svm_learn_loqo_noexe: svm_learn_main.o svm_learn.o svm_common.o svm_loqo.o loqo + +#libsvmlight_loqo: svm_learn_main.o svm_learn.o svm_common.o svm_loqo.o +# $(LD) -shared svm_learn.o svm_common.o svm_loqo.o pr_loqo/pr_loqo.o -o libsvmlight.so + +# Compile components + +svm_hideo.o: svm_hideo.c + $(CC) -c $(CFLAGS) svm_hideo.c -o svm_hideo.o + +#svm_loqo.o: svm_loqo.c +# $(CC) -c $(CFLAGS) svm_loqo.c -o svm_loqo.o + +svm_common.o: svm_common.c svm_common.h kernel.h + $(CC) -c $(CFLAGS) svm_common.c -o svm_common.o + +svm_learn.o: svm_learn.c svm_common.h + $(CC) -c $(CFLAGS) svm_learn.c -o svm_learn.o + +svm_learn_main.o: svm_learn_main.c svm_learn.h svm_common.h + $(CC) -c $(CFLAGS) svm_learn_main.c -o svm_learn_main.o + +svm_classify.o: svm_classify.c svm_common.h kernel.h + $(CC) -c $(CFLAGS) svm_classify.c -o svm_classify.o + +#loqo: pr_loqo/pr_loqo.o + +#pr_loqo/pr_loqo.o: pr_loqo/pr_loqo.c +# $(CC) -c $(CFLAGS) pr_loqo/pr_loqo.c -o pr_loqo/pr_loqo.o + diff --git a/thirdparty/svmlight/kernel.h b/thirdparty/svmlight/kernel.h new file mode 100755 index 000000000..0133b0066 --- /dev/null +++ b/thirdparty/svmlight/kernel.h @@ -0,0 +1,40 @@ +/************************************************************************/ +/* */ +/* kernel.h */ +/* */ +/* User defined kernel function. Feel free to plug in your own. */ +/* */ +/* Copyright: Thorsten Joachims */ +/* Date: 16.12.97 */ +/* */ +/************************************************************************/ + +/* KERNEL_PARM is defined in svm_common.h The field 'custom' is reserved for */ +/* parameters of the user defined kernel. You can also access and use */ +/* the parameters of the other kernels. Just replace the line + return((double)(1.0)); + with your own kernel. */ + + /* Example: The following computes the polynomial kernel. sprod_ss + computes the inner product between two sparse vectors. + + return((CFLOAT)pow(kernel_parm->coef_lin*sprod_ss(a->words,b->words) + +kernel_parm->coef_const,(double)kernel_parm->poly_degree)); + */ + +/* If you are implementing a kernel that is not based on a + feature/value representation, you might want to make use of the + field "userdefined" in SVECTOR. By default, this field will contain + whatever string you put behind a # sign in the example file. So, if + a line in your training file looks like + + -1 1:3 5:6 #abcdefg + + then the SVECTOR field "words" will contain the vector 1:3 5:6, and + "userdefined" will contain the string "abcdefg". */ + +double custom_kernel(KERNEL_PARM *kernel_parm, SVECTOR *a, SVECTOR *b) + /* plug in you favorite kernel */ +{ + return((double)(1.0)); +} diff --git a/thirdparty/svmlight/svm_classify.c b/thirdparty/svmlight/svm_classify.c new file mode 100755 index 000000000..0b0333b05 --- /dev/null +++ b/thirdparty/svmlight/svm_classify.c @@ -0,0 +1,197 @@ +/***********************************************************************/ +/* */ +/* svm_classify.c */ +/* */ +/* Classification module of Support Vector Machine. */ +/* */ +/* Author: Thorsten Joachims */ +/* Date: 02.07.02 */ +/* */ +/* Copyright (c) 2002 Thorsten Joachims - All rights reserved */ +/* */ +/* This software is available for non-commercial use only. It must */ +/* not be modified and distributed without prior permission of the */ +/* author. The author is not responsible for implications from the */ +/* use of this software. */ +/* */ +/************************************************************************/ + +# include "svm_common.h" + +char docfile[200]; +char modelfile[200]; +char predictionsfile[200]; + +void read_input_parameters(int, char **, char *, char *, char *, long *, + long *); +void print_help(void); + + +int main (int argc, char* argv[]) +{ + DOC *doc; /* test example */ + WORD *words; + long max_docs,max_words_doc,lld; + long totdoc=0,queryid,slackid; + long correct=0,incorrect=0,no_accuracy=0; + long res_a=0,res_b=0,res_c=0,res_d=0,wnum,pred_format; + long j; + double t1,runtime=0; + double dist,doc_label,costfactor; + char *line,*comment; + FILE *predfl,*docfl; + MODEL *model; + + read_input_parameters(argc,argv,docfile,modelfile,predictionsfile, + &verbosity,&pred_format); + + nol_ll(docfile,&max_docs,&max_words_doc,&lld); /* scan size of input file */ + max_words_doc+=2; + lld+=2; + + line = (char *)my_malloc(sizeof(char)*lld); + words = (WORD *)my_malloc(sizeof(WORD)*(max_words_doc+10)); + + model=read_model(modelfile); + + if(model->kernel_parm.kernel_type == 0) { /* linear kernel */ + /* compute weight vector */ + add_weight_vector_to_linear_model(model); + } + + if(verbosity>=2) { + printf("Classifying test examples.."); fflush(stdout); + } + + if ((docfl = fopen (docfile, "r")) == NULL) + { perror (docfile); exit (1); } + if ((predfl = fopen (predictionsfile, "w")) == NULL) + { perror (predictionsfile); exit (1); } + + while((!feof(docfl)) && fgets(line,(int)lld,docfl)) { + if(line[0] == '#') continue; /* line contains comments */ + parse_document(line,words,&doc_label,&queryid,&slackid,&costfactor,&wnum, + max_words_doc,&comment); + totdoc++; + if(model->kernel_parm.kernel_type == 0) { /* linear kernel */ + for(j=0;(words[j]).wnum != 0;j++) { /* Check if feature numbers */ + if((words[j]).wnum>model->totwords) /* are not larger than in */ + (words[j]).wnum=0; /* model. Remove feature if */ + } /* necessary. */ + doc = create_example(-1,0,0,0.0,create_svector(words,comment,1.0)); + t1=get_runtime(); + dist=classify_example_linear(model,doc); + runtime+=(get_runtime()-t1); + free_example(doc,1); + } + else { /* non-linear kernel */ + doc = create_example(-1,0,0,0.0,create_svector(words,comment,1.0)); + t1=get_runtime(); + dist=classify_example(model,doc); + runtime+=(get_runtime()-t1); + free_example(doc,1); + } + if(dist>0) { + if(pred_format==0) { /* old weired output format */ + fprintf(predfl,"%.8g:+1 %.8g:-1\n",dist,-dist); + } + if(doc_label>0) correct++; else incorrect++; + if(doc_label>0) res_a++; else res_b++; + } + else { + if(pred_format==0) { /* old weired output format */ + fprintf(predfl,"%.8g:-1 %.8g:+1\n",-dist,dist); + } + if(doc_label<0) correct++; else incorrect++; + if(doc_label>0) res_c++; else res_d++; + } + if(pred_format==1) { /* output the value of decision function */ + fprintf(predfl,"%.8g\n",dist); + } + if((int)(0.01+(doc_label*doc_label)) != 1) + { no_accuracy=1; } /* test data is not binary labeled */ + if(verbosity>=2) { + if(totdoc % 100 == 0) { + printf("%ld..",totdoc); fflush(stdout); + } + } + } + fclose(predfl); + fclose(docfl); + free(line); + free(words); + free_model(model,1); + + if(verbosity>=2) { + printf("done\n"); + +/* Note by Gary Boone Date: 29 April 2000 */ +/* o Timing is inaccurate. The timer has 0.01 second resolution. */ +/* Because classification of a single vector takes less than */ +/* 0.01 secs, the timer was underflowing. */ + printf("Runtime (without IO) in cpu-seconds: %.2f\n", + (float)(runtime/100.0)); + + } + if((!no_accuracy) && (verbosity>=1)) { + printf("Accuracy on test set: %.2f%% (%ld correct, %ld incorrect, %ld total)\n",(float)(correct)*100.0/totdoc,correct,incorrect,totdoc); + printf("Precision/recall on test set: %.2f%%/%.2f%%\n",(float)(res_a)*100.0/(res_a+res_b),(float)(res_a)*100.0/(res_a+res_c)); + } + + return(0); +} + +void read_input_parameters(int argc, char **argv, char *docfile, + char *modelfile, char *predictionsfile, + long int *verbosity, long int *pred_format) +{ + long i; + + /* set default */ + strcpy (modelfile, "svm_model"); + strcpy (predictionsfile, "svm_predictions"); + (*verbosity)=2; + (*pred_format)=1; + + for(i=1;(i=argc) { + printf("\nNot enough input parameters!\n\n"); + print_help(); + exit(0); + } + strcpy (docfile, argv[i]); + strcpy (modelfile, argv[i+1]); + if((i+2) this help\n"); + printf(" -v [0..3] -> verbosity level (default 2)\n"); + printf(" -f [0,1] -> 0: old output format of V1.0\n"); + printf(" -> 1: output the value of decision function (default)\n\n"); +} + + + + diff --git a/thirdparty/svmlight/svm_common.c b/thirdparty/svmlight/svm_common.c new file mode 100755 index 000000000..61e72800e --- /dev/null +++ b/thirdparty/svmlight/svm_common.c @@ -0,0 +1,985 @@ +/************************************************************************/ +/* */ +/* svm_common.c */ +/* */ +/* Definitions and functions used in both svm_learn and svm_classify. */ +/* */ +/* Author: Thorsten Joachims */ +/* Date: 02.07.04 */ +/* */ +/* Copyright (c) 2004 Thorsten Joachims - All rights reserved */ +/* */ +/* This software is available for non-commercial use only. It must */ +/* not be modified and distributed without prior permission of the */ +/* author. The author is not responsible for implications from the */ +/* use of this software. */ +/* */ +/************************************************************************/ + +# include "ctype.h" +# include "svm_common.h" +# include "kernel.h" /* this contains a user supplied kernel */ + +long verbosity; /* verbosity level (0-4) */ +long kernel_cache_statistic; + +double classify_example(MODEL *model, DOC *ex) + /* classifies one example */ +{ + register long i; + register double dist; + + if((model->kernel_parm.kernel_type == LINEAR) && (model->lin_weights)) + return(classify_example_linear(model,ex)); + + dist=0; + for(i=1;isv_num;i++) { + dist+=kernel(&model->kernel_parm,model->supvec[i],ex)*model->alpha[i]; + } + return(dist-model->b); +} + +double classify_example_linear(MODEL *model, DOC *ex) + /* classifies example for linear kernel */ + + /* important: the model must have the linear weight vector computed */ + /* use: add_weight_vector_to_linear_model(&model); */ + + + /* important: the feature numbers in the example to classify must */ + /* not be larger than the weight vector! */ +{ + double sum=0; + SVECTOR *f; + + for(f=ex->fvec;f;f=f->next) + sum+=f->factor*sprod_ns(model->lin_weights,f); + return(sum-model->b); +} + + +double kernel(KERNEL_PARM *kernel_parm, DOC *a, DOC *b) + /* calculate the kernel function */ +{ + double sum=0; + SVECTOR *fa,*fb; + + /* in case the constraints are sums of feature vector as represented + as a list of SVECTOR's with their coefficient factor in the sum, + take the kernel between all pairs */ + for(fa=a->fvec;fa;fa=fa->next) { + for(fb=b->fvec;fb;fb=fb->next) { + if(fa->kernel_id == fb->kernel_id) + sum+=fa->factor*fb->factor*single_kernel(kernel_parm,fa,fb); + } + } + return(sum); +} + +double single_kernel(KERNEL_PARM *kernel_parm, SVECTOR *a, SVECTOR *b) + /* calculate the kernel function between two vectors */ +{ + kernel_cache_statistic++; + switch(kernel_parm->kernel_type) { + case 0: /* linear */ + return(sprod_ss(a,b)); + case 1: /* polynomial */ + return(pow(kernel_parm->coef_lin*sprod_ss(a,b)+kernel_parm->coef_const,(double)kernel_parm->poly_degree)); + case 2: /* radial basis function */ + return(exp(-kernel_parm->rbf_gamma*(a->twonorm_sq-2*sprod_ss(a,b)+b->twonorm_sq))); + case 3: /* sigmoid neural net */ + return(tanh(kernel_parm->coef_lin*sprod_ss(a,b)+kernel_parm->coef_const)); + case 4: /* custom-kernel supplied in file kernel.h*/ + return(custom_kernel(kernel_parm,a,b)); + default: printf("Error: Unknown kernel function\n"); exit(1); + } +} + + +SVECTOR *create_svector(WORD *words,char *userdefined,double factor) +{ + SVECTOR *vec; + long fnum,i; + + fnum=0; + while(words[fnum].wnum) { + fnum++; + } + fnum++; + vec = (SVECTOR *)my_malloc(sizeof(SVECTOR)); + vec->words = (WORD *)my_malloc(sizeof(WORD)*(fnum)); + for(i=0;iwords[i]=words[i]; + } + vec->twonorm_sq=sprod_ss(vec,vec); + + fnum=0; + while(userdefined[fnum]) { + fnum++; + } + fnum++; + vec->userdefined = (char *)my_malloc(sizeof(char)*(fnum)); + for(i=0;iuserdefined[i]=userdefined[i]; + } + vec->kernel_id=0; + vec->next=NULL; + vec->factor=factor; + return(vec); +} + +SVECTOR *copy_svector(SVECTOR *vec) +{ + SVECTOR *newvec=NULL; + if(vec) { + newvec=create_svector(vec->words,vec->userdefined,vec->factor); + newvec->next=copy_svector(vec->next); + } + return(newvec); +} + +void free_svector(SVECTOR *vec) +{ + if(vec) { + free(vec->words); + if(vec->userdefined) + free(vec->userdefined); + free_svector(vec->next); + free(vec); + } +} + +double sprod_ss(SVECTOR *a, SVECTOR *b) + /* compute the inner product of two sparse vectors */ +{ + register double sum=0; + register WORD *ai,*bj; + ai=a->words; + bj=b->words; + while (ai->wnum && bj->wnum) { + if(ai->wnum > bj->wnum) { + bj++; + } + else if (ai->wnum < bj->wnum) { + ai++; + } + else { + sum+=(ai->weight) * (bj->weight); + ai++; + bj++; + } + } + return((double)sum); +} + +SVECTOR* sub_ss(SVECTOR *a, SVECTOR *b) + /* compute the difference a-b of two sparse vectors */ + /* Note: SVECTOR lists are not followed, but only the first + SVECTOR is used */ +{ + SVECTOR *vec; + register WORD *sum,*sumi; + register WORD *ai,*bj; + long veclength; + + ai=a->words; + bj=b->words; + veclength=0; + while (ai->wnum && bj->wnum) { + if(ai->wnum > bj->wnum) { + veclength++; + bj++; + } + else if (ai->wnum < bj->wnum) { + veclength++; + ai++; + } + else { + veclength++; + ai++; + bj++; + } + } + while (bj->wnum) { + veclength++; + bj++; + } + while (ai->wnum) { + veclength++; + ai++; + } + veclength++; + + sum=(WORD *)my_malloc(sizeof(WORD)*veclength); + sumi=sum; + ai=a->words; + bj=b->words; + while (ai->wnum && bj->wnum) { + if(ai->wnum > bj->wnum) { + (*sumi)=(*bj); + sumi->weight*=(-1); + sumi++; + bj++; + } + else if (ai->wnum < bj->wnum) { + (*sumi)=(*ai); + sumi++; + ai++; + } + else { + (*sumi)=(*ai); + sumi->weight-=bj->weight; + if(sumi->weight != 0) + sumi++; + ai++; + bj++; + } + } + while (bj->wnum) { + (*sumi)=(*bj); + sumi->weight*=(-1); + sumi++; + bj++; + } + while (ai->wnum) { + (*sumi)=(*ai); + sumi++; + ai++; + } + sumi->wnum=0; + + vec=create_svector(sum,"",1.0); + free(sum); + + return(vec); +} + +SVECTOR* add_ss(SVECTOR *a, SVECTOR *b) + /* compute the sum a+b of two sparse vectors */ + /* Note: SVECTOR lists are not followed, but only the first + SVECTOR is used */ +{ + SVECTOR *vec; + register WORD *sum,*sumi; + register WORD *ai,*bj; + long veclength; + + ai=a->words; + bj=b->words; + veclength=0; + while (ai->wnum && bj->wnum) { + if(ai->wnum > bj->wnum) { + veclength++; + bj++; + } + else if (ai->wnum < bj->wnum) { + veclength++; + ai++; + } + else { + veclength++; + ai++; + bj++; + } + } + while (bj->wnum) { + veclength++; + bj++; + } + while (ai->wnum) { + veclength++; + ai++; + } + veclength++; + + /*** is veclength=lengSequence(a)+lengthSequence(b)? ***/ + + sum=(WORD *)my_malloc(sizeof(WORD)*veclength); + sumi=sum; + ai=a->words; + bj=b->words; + while (ai->wnum && bj->wnum) { + if(ai->wnum > bj->wnum) { + (*sumi)=(*bj); + sumi++; + bj++; + } + else if (ai->wnum < bj->wnum) { + (*sumi)=(*ai); + sumi++; + ai++; + } + else { + (*sumi)=(*ai); + sumi->weight+=bj->weight; + if(sumi->weight != 0) + sumi++; + ai++; + bj++; + } + } + while (bj->wnum) { + (*sumi)=(*bj); + sumi++; + bj++; + } + while (ai->wnum) { + (*sumi)=(*ai); + sumi++; + ai++; + } + sumi->wnum=0; + + vec=create_svector(sum,"",1.0); + free(sum); + + return(vec); +} + +SVECTOR* add_list_ss(SVECTOR *a) + /* computes the linear combination of the SVECTOR list weighted + by the factor of each SVECTOR */ +{ + SVECTOR *scaled,*oldsum,*sum,*f; + WORD empty[2]; + + if(a){ + sum=smult_s(a,a->factor); + for(f=a->next;f;f=f->next) { + scaled=smult_s(f,f->factor); + oldsum=sum; + sum=add_ss(sum,scaled); + free_svector(oldsum); + free_svector(scaled); + } + sum->factor=1.0; + } + else { + empty[0].wnum=0; + sum=create_svector(empty,"",1.0); + } + return(sum); +} + +void append_svector_list(SVECTOR *a, SVECTOR *b) + /* appends SVECTOR b to the end of SVECTOR a. */ +{ + SVECTOR *f; + + for(f=a;f->next;f=f->next); /* find end of first vector list */ + f->next=b; /* append the two vector lists */ +} + +SVECTOR* smult_s(SVECTOR *a, double factor) + /* scale sparse vector a by factor */ +{ + SVECTOR *vec; + register WORD *sum,*sumi; + register WORD *ai; + long veclength; + + ai=a->words; + veclength=0; + while (ai->wnum) { + veclength++; + ai++; + } + veclength++; + + sum=(WORD *)my_malloc(sizeof(WORD)*veclength); + sumi=sum; + ai=a->words; + while (ai->wnum) { + (*sumi)=(*ai); + sumi->weight*=factor; + if(sumi->weight != 0) + sumi++; + ai++; + } + sumi->wnum=0; + + vec=create_svector(sum,a->userdefined,a->factor); + free(sum); + + return(vec); +} + +int featvec_eq(SVECTOR *a, SVECTOR *b) + /* tests two sparse vectors for equality */ +{ + register WORD *ai,*bj; + ai=a->words; + bj=b->words; + while (ai->wnum && bj->wnum) { + if(ai->wnum > bj->wnum) { + if((bj->weight) != 0) + return(0); + bj++; + } + else if (ai->wnum < bj->wnum) { + if((ai->weight) != 0) + return(0); + ai++; + } + else { + if((ai->weight) != (bj->weight)) + return(0); + ai++; + bj++; + } + } + return(1); +} + +double model_length_s(MODEL *model, KERNEL_PARM *kernel_parm) + /* compute length of weight vector */ +{ + register long i,j; + register double sum=0,alphai; + register DOC *supveci; + + for(i=1;isv_num;i++) { + alphai=model->alpha[i]; + supveci=model->supvec[i]; + for(j=1;jsv_num;j++) { + sum+=alphai*model->alpha[j] + *kernel(kernel_parm,supveci,model->supvec[j]); + } + } + return(sqrt(sum)); +} + +void clear_vector_n(double *vec, long int n) +{ + register long i; + for(i=0;i<=n;i++) vec[i]=0; +} + +void add_vector_ns(double *vec_n, SVECTOR *vec_s, double faktor) +{ + register WORD *ai; + ai=vec_s->words; + while (ai->wnum) { + vec_n[ai->wnum]+=(faktor*ai->weight); + ai++; + } +} + +double sprod_ns(double *vec_n, SVECTOR *vec_s) +{ + register double sum=0; + register WORD *ai; + ai=vec_s->words; + while (ai->wnum) { + sum+=(vec_n[ai->wnum]*ai->weight); + ai++; + } + return(sum); +} + +void add_weight_vector_to_linear_model(MODEL *model) + /* compute weight vector in linear case and add to model */ +{ + long i; + SVECTOR *f; + + model->lin_weights=(double *)my_malloc(sizeof(double)*(model->totwords+1)); + clear_vector_n(model->lin_weights,model->totwords); + for(i=1;isv_num;i++) { + for(f=(model->supvec[i])->fvec;f;f=f->next) + add_vector_ns(model->lin_weights,f,f->factor*model->alpha[i]); + } +} + + +DOC *create_example(long docnum, long queryid, long slackid, + double costfactor, SVECTOR *fvec) +{ + DOC *example; + example = (DOC *)my_malloc(sizeof(DOC)); + example->docnum=docnum; + example->queryid=queryid; + example->slackid=slackid; + example->costfactor=costfactor; + example->fvec=fvec; + return(example); +} + +void free_example(DOC *example, long deep) +{ + if(example) { + if(deep) { + if(example->fvec) + free_svector(example->fvec); + } + free(example); + } +} + +void write_model(char *modelfile, MODEL *model) +{ + FILE *modelfl; + long j,i,sv_num; + SVECTOR *v; + + if(verbosity>=1) { + printf("Writing model file..."); fflush(stdout); + } + if ((modelfl = fopen (modelfile, "w")) == NULL) + { perror (modelfile); exit (1); } + fprintf(modelfl,"SVM-light Version %s\n",VERSION); + fprintf(modelfl,"%ld # kernel type\n", + model->kernel_parm.kernel_type); + fprintf(modelfl,"%ld # kernel parameter -d \n", + model->kernel_parm.poly_degree); + fprintf(modelfl,"%.8g # kernel parameter -g \n", + model->kernel_parm.rbf_gamma); + fprintf(modelfl,"%.8g # kernel parameter -s \n", + model->kernel_parm.coef_lin); + fprintf(modelfl,"%.8g # kernel parameter -r \n", + model->kernel_parm.coef_const); + fprintf(modelfl,"%s# kernel parameter -u \n",model->kernel_parm.custom); + fprintf(modelfl,"%ld # highest feature index \n",model->totwords); + fprintf(modelfl,"%ld # number of training documents \n",model->totdoc); + + sv_num=1; + for(i=1;isv_num;i++) { + for(v=model->supvec[i]->fvec;v;v=v->next) + sv_num++; + } + fprintf(modelfl,"%ld # number of support vectors plus 1 \n",sv_num); + fprintf(modelfl,"%.8g # threshold b, each following line is a SV (starting with alpha*y)\n",model->b); + + for(i=1;isv_num;i++) { + for(v=model->supvec[i]->fvec;v;v=v->next) { + fprintf(modelfl,"%.32g ",model->alpha[i]*v->factor); + for (j=0; (v->words[j]).wnum; j++) { + fprintf(modelfl,"%ld:%.8g ", + (long)(v->words[j]).wnum, + (double)(v->words[j]).weight); + } + fprintf(modelfl,"#%s\n",v->userdefined); + /* NOTE: this could be made more efficient by summing the + alpha's of identical vectors before writing them to the + file. */ + } + } + fclose(modelfl); + if(verbosity>=1) { + printf("done\n"); + } +} + + +MODEL *read_model(char *modelfile) +{ + FILE *modelfl; + long i,queryid,slackid; + double costfactor; + long max_sv,max_words,ll,wpos; + char *line,*comment; + WORD *words; + char version_buffer[100]; + MODEL *model; + + if(verbosity>=1) { + printf("Reading model..."); fflush(stdout); + } + + nol_ll(modelfile,&max_sv,&max_words,&ll); /* scan size of model file */ + max_words+=2; + ll+=2; + + words = (WORD *)my_malloc(sizeof(WORD)*(max_words+10)); + line = (char *)my_malloc(sizeof(char)*ll); + model = (MODEL *)my_malloc(sizeof(MODEL)); + + if ((modelfl = fopen (modelfile, "r")) == NULL) + { perror (modelfile); exit (1); } + + fscanf(modelfl,"SVM-light Version %s\n",version_buffer); + if(strcmp(version_buffer,VERSION)) { + perror ("Version of model-file does not match version of svm_classify!"); + exit (1); + } + fscanf(modelfl,"%ld%*[^\n]\n", &model->kernel_parm.kernel_type); + fscanf(modelfl,"%ld%*[^\n]\n", &model->kernel_parm.poly_degree); + fscanf(modelfl,"%lf%*[^\n]\n", &model->kernel_parm.rbf_gamma); + fscanf(modelfl,"%lf%*[^\n]\n", &model->kernel_parm.coef_lin); + fscanf(modelfl,"%lf%*[^\n]\n", &model->kernel_parm.coef_const); + fscanf(modelfl,"%[^#]%*[^\n]\n", model->kernel_parm.custom); + + fscanf(modelfl,"%ld%*[^\n]\n", &model->totwords); + fscanf(modelfl,"%ld%*[^\n]\n", &model->totdoc); + fscanf(modelfl,"%ld%*[^\n]\n", &model->sv_num); + fscanf(modelfl,"%lf%*[^\n]\n", &model->b); + + model->supvec = (DOC **)my_malloc(sizeof(DOC *)*model->sv_num); + model->alpha = (double *)my_malloc(sizeof(double)*model->sv_num); + model->index=NULL; + model->lin_weights=NULL; + + for(i=1;isv_num;i++) { + fgets(line,(int)ll,modelfl); + if(!parse_document(line,words,&(model->alpha[i]),&queryid,&slackid, + &costfactor,&wpos,max_words,&comment)) { + printf("\nParsing error while reading model file in SV %ld!\n%s", + i,line); + exit(1); + } + model->supvec[i] = create_example(-1, + 0,0, + 0.0, + create_svector(words,comment,1.0)); + } + fclose(modelfl); + free(line); + free(words); + if(verbosity>=1) { + fprintf(stdout, "OK. (%d support vectors read)\n",(int)(model->sv_num-1)); + } + return(model); +} + +MODEL *copy_model(MODEL *model) +{ + MODEL *newmodel; + long i; + + newmodel=(MODEL *)my_malloc(sizeof(MODEL)); + (*newmodel)=(*model); + newmodel->supvec = (DOC **)my_malloc(sizeof(DOC *)*model->sv_num); + newmodel->alpha = (double *)my_malloc(sizeof(double)*model->sv_num); + newmodel->index = NULL; /* index is not copied */ + newmodel->supvec[0] = NULL; + newmodel->alpha[0] = 0; + for(i=1;isv_num;i++) { + newmodel->alpha[i]=model->alpha[i]; + newmodel->supvec[i]=create_example(model->supvec[i]->docnum, + model->supvec[i]->queryid,0, + model->supvec[i]->costfactor, + copy_svector(model->supvec[i]->fvec)); + } + if(model->lin_weights) { + newmodel->lin_weights = (double *)my_malloc(sizeof(double)*(model->totwords+1)); + for(i=0;itotwords+1;i++) + newmodel->lin_weights[i]=model->lin_weights[i]; + } + return(newmodel); +} + +void free_model(MODEL *model, int deep) +{ + long i; + + if(model->supvec) { + if(deep) { + for(i=1;isv_num;i++) { + free_example(model->supvec[i],1); + } + } + free(model->supvec); + } + if(model->alpha) free(model->alpha); + if(model->index) free(model->index); + if(model->lin_weights) free(model->lin_weights); + free(model); +} + + +void read_documents(char *docfile, DOC ***docs, double **label, + long int *totwords, long int *totdoc) +{ + char *line,*comment; + WORD *words; + long dnum=0,wpos,dpos=0,dneg=0,dunlab=0,queryid,slackid,max_docs; + long max_words_doc, ll; + double doc_label,costfactor; + FILE *docfl; + + if(verbosity>=1) { + printf("Scanning examples..."); fflush(stdout); + } + nol_ll(docfile,&max_docs,&max_words_doc,&ll); /* scan size of input file */ + max_words_doc+=2; + ll+=2; + max_docs+=2; + if(verbosity>=1) { + printf("done\n"); fflush(stdout); + } + + (*docs) = (DOC **)my_malloc(sizeof(DOC *)*max_docs); /* feature vectors */ + (*label) = (double *)my_malloc(sizeof(double)*max_docs); /* target values */ + line = (char *)my_malloc(sizeof(char)*ll); + + if ((docfl = fopen (docfile, "r")) == NULL) + { perror (docfile); exit (1); } + + words = (WORD *)my_malloc(sizeof(WORD)*(max_words_doc+10)); + if(verbosity>=1) { + printf("Reading examples into memory..."); fflush(stdout); + } + dnum=0; + (*totwords)=0; + while((!feof(docfl)) && fgets(line,(int)ll,docfl)) { + if(line[0] == '#') continue; /* line contains comments */ + if(!parse_document(line,words,&doc_label,&queryid,&slackid,&costfactor, + &wpos,max_words_doc,&comment)) { + printf("\nParsing error in line %ld!\n%s",dnum,line); + exit(1); + } + (*label)[dnum]=doc_label; + /* printf("docnum=%ld: Class=%f ",dnum,doc_label); */ + if(doc_label > 0) dpos++; + if (doc_label < 0) dneg++; + if (doc_label == 0) dunlab++; + if((wpos>1) && ((words[wpos-2]).wnum>(*totwords))) + (*totwords)=(words[wpos-2]).wnum; + if((*totwords) > MAXFEATNUM) { + printf("\nMaximum feature number exceeds limit defined in MAXFEATNUM!\n"); + printf("LINE: %s\n",line); + exit(1); + } + (*docs)[dnum] = create_example(dnum,queryid,slackid,costfactor, + create_svector(words,comment,1.0)); + /* printf("\nNorm=%f\n",((*docs)[dnum]->fvec)->twonorm_sq); */ + dnum++; + if(verbosity>=1) { + if((dnum % 100) == 0) { + printf("%ld..",dnum); fflush(stdout); + } + } + } + + fclose(docfl); + free(line); + free(words); + if(verbosity>=1) { + fprintf(stdout, "OK. (%ld examples read)\n", dnum); + } + (*totdoc)=dnum; +} + +int parse_document(char *line, WORD *words, double *label, + long *queryid, long *slackid, double *costfactor, + long int *numwords, long int max_words_doc, + char **comment) +{ + register long wpos,pos; + long wnum; + double weight; + int numread; + char featurepair[1000],junk[1000]; + + (*queryid)=0; + (*slackid)=0; + (*costfactor)=1; + + pos=0; + (*comment)=NULL; + while(line[pos] ) { /* cut off comments */ + if((line[pos] == '#') && (!(*comment))) { + line[pos]=0; + (*comment)=&(line[pos+1]); + } + if(line[pos] == '\n') { /* strip the CR */ + line[pos]=0; + } + pos++; + } + if(!(*comment)) (*comment)=&(line[pos]); + /* printf("Comment: '%s'\n",(*comment)); */ + + wpos=0; + /* check, that line starts with target value or zero, but not with + feature pair */ + if(sscanf(line,"%s",featurepair) == EOF) return(0); + pos=0; + while((featurepair[pos] != ':') && featurepair[pos]) pos++; + if(featurepair[pos] == ':') { + perror ("Line must start with label or 0!!!\n"); + printf("LINE: %s\n",line); + exit (1); + } + /* read the target value */ + if(sscanf(line,"%lf",label) == EOF) return(0); + pos=0; + while(space_or_null((int)line[pos])) pos++; + while((!space_or_null((int)line[pos])) && line[pos]) pos++; + while(((numread=sscanf(line+pos,"%s",featurepair)) != EOF) && + (numread > 0) && + (wpos 0) + (*slackid)=(long)wnum; + else { + perror ("Slack-id must be greater or equal to 1!!!\n"); + printf("LINE: %s\n",line); + exit (1); + } + } + else if(sscanf(featurepair,"cost:%lf%s",&weight,junk)==1) { + /* it is the example-dependent cost factor */ + (*costfactor)=(double)weight; + } + else if(sscanf(featurepair,"%ld:%lf%s",&wnum,&weight,junk)==2) { + /* it is a regular feature */ + if(wnum<=0) { + perror ("Feature numbers must be larger or equal to 1!!!\n"); + printf("LINE: %s\n",line); + exit (1); + } + if((wpos>0) && ((words[wpos-1]).wnum >= wnum)) { + perror ("Features must be in increasing order!!!\n"); + printf("LINE: %s\n",line); + exit (1); + } + (words[wpos]).wnum=wnum; + (words[wpos]).weight=(FVAL)weight; + wpos++; + } + else { + perror ("Cannot parse feature/value pair!!!\n"); + printf("'%s' in LINE: %s\n",featurepair,line); + exit (1); + } + } + (words[wpos]).wnum=0; + (*numwords)=wpos+1; + return(1); +} + +double *read_alphas(char *alphafile,long totdoc) + /* reads the alpha vector from a file as written by the + write_alphas function */ +{ + FILE *fl; + double *alpha; + long dnum; + + if ((fl = fopen (alphafile, "r")) == NULL) + { perror (alphafile); exit (1); } + + alpha = (double *)my_malloc(sizeof(double)*totdoc); + if(verbosity>=1) { + printf("Reading alphas..."); fflush(stdout); + } + dnum=0; + while((!feof(fl)) && fscanf(fl,"%lf\n",&alpha[dnum]) && (dnum=1) { + printf("done\n"); fflush(stdout); + } + + return(alpha); +} + +void nol_ll(char *file, long int *nol, long int *wol, long int *ll) + /* Grep through file and count number of lines, maximum number of + spaces per line, and longest line. */ +{ + FILE *fl; + int ic; + char c; + long current_length,current_wol; + + if ((fl = fopen (file, "r")) == NULL) + { perror (file); exit (1); } + current_length=0; + current_wol=0; + (*ll)=0; + (*nol)=1; + (*wol)=0; + while((ic=getc(fl)) != EOF) { + c=(char)ic; + current_length++; + if(space_or_null((int)c)) { + current_wol++; + } + if(c == '\n') { + (*nol)++; + if(current_length>(*ll)) { + (*ll)=current_length; + } + if(current_wol>(*wol)) { + (*wol)=current_wol; + } + current_length=0; + current_wol=0; + } + } + fclose(fl); +} + +long minl(long int a, long int b) +{ + if(ab) + return(a); + else + return(b); +} + +long get_runtime(void) +{ + clock_t start; + start = clock(); + return((long)((double)start*100.0/(double)CLOCKS_PER_SEC)); +} + + +# ifdef _MSC_VER + +int isnan(double a) +{ + return(_isnan(a)); +} + +# endif + +int space_or_null(int c) { + if (c==0) + return 1; + return isspace((unsigned char)c); +} + +void *my_malloc(size_t size) +{ + void *ptr; + if(size<=0) size=1; /* for AIX compatibility */ + ptr=(void *)malloc(size); + if(!ptr) { + perror ("Out of memory!\n"); + exit (1); + } + return(ptr); +} + +void copyright_notice(void) +{ + printf("\nCopyright: Thorsten Joachims, thorsten@joachims.org\n\n"); + printf("This software is available for non-commercial use only. It must not\n"); + printf("be modified and distributed without prior permission of the author.\n"); + printf("The author is not responsible for implications from the use of this\n"); + printf("software.\n\n"); +} diff --git a/thirdparty/svmlight/svm_common.h b/thirdparty/svmlight/svm_common.h new file mode 100755 index 000000000..6487fa1d1 --- /dev/null +++ b/thirdparty/svmlight/svm_common.h @@ -0,0 +1,301 @@ +/************************************************************************/ +/* */ +/* svm_common.h */ +/* */ +/* Definitions and functions used in both svm_learn and svm_classify. */ +/* */ +/* Author: Thorsten Joachims */ +/* Date: 02.07.02 */ +/* */ +/* Copyright (c) 2002 Thorsten Joachims - All rights reserved */ +/* */ +/* This software is available for non-commercial use only. It must */ +/* not be modified and distributed without prior permission of the */ +/* author. The author is not responsible for implications from the */ +/* use of this software. */ +/* */ +/************************************************************************/ + +#ifndef SVM_COMMON +#define SVM_COMMON + +# include +# include +# include +# include +# include +# include +# include + +# define VERSION "V6.02" +# define VERSION_DATE "14.08.08" + +# define CFLOAT float /* the type of float to use for caching */ + /* kernel evaluations. Using float saves */ + /* us some memory, but you can use double, too */ +# define FNUM long /* the type used for storing feature ids */ +# define FVAL float /* the type used for storing feature values */ +# define MAXFEATNUM 99999999 /* maximum feature number (must be in + valid range of FNUM type and long int!) */ + +# define LINEAR 0 /* linear kernel type */ +# define POLY 1 /* polynoial kernel type */ +# define RBF 2 /* rbf kernel type */ +# define SIGMOID 3 /* sigmoid kernel type */ + +# define CLASSIFICATION 1 /* train classification model */ +# define REGRESSION 2 /* train regression model */ +# define RANKING 3 /* train ranking model */ +# define OPTIMIZATION 4 /* train on general set of constraints */ + +# define MAXSHRINK 50000 /* maximum number of shrinking rounds */ + +typedef struct word { + FNUM wnum; /* word number */ + FVAL weight; /* word weight */ +} WORD; + +typedef struct svector { + WORD *words; /* The features/values in the vector by + increasing feature-number. Feature + numbers that are skipped are + interpreted as having value zero. */ + double twonorm_sq; /* The squared euclidian length of the + vector. Used to speed up the RBF kernel. */ + char *userdefined; /* You can put additional information + here. This can be useful, if you are + implementing your own kernel that + does not work with feature/values + representations (for example a + string kernel). By default, + svm-light will put here the string + after the # sign from each line of + the input file. */ + long kernel_id; /* Feature vectors with different + kernel_id's are orthogonal (ie. the + feature number do not match). This + is used for computing component + kernels for linear constraints which + are a sum of several different + weight vectors. (currently not + implemented). */ + struct svector *next; /* Let's you set up a list of SVECTOR's + for linear constraints which are a + sum of multiple feature + vectors. List is terminated by + NULL. */ + double factor; /* Factor by which this feature vector + is multiplied in the sum. */ +} SVECTOR; + +typedef struct doc { + long docnum; /* Document ID. This has to be the position of + the document in the training set array. */ + long queryid; /* for learning rankings, constraints are + generated for documents with the same + queryID. */ + double costfactor; /* Scales the cost of misclassifying this + document by this factor. The effect of this + value is, that the upper bound on the alpha + for this example is scaled by this factor. + The factors are set by the feature + 'cost:' in the training data. */ + long slackid; /* Index of the slack variable + corresponding to this + constraint. All constraints with the + same slackid share the same slack + variable. This can only be used for + svm_learn_optimization. */ + SVECTOR *fvec; /* Feature vector of the example. The + feature vector can actually be a + list of feature vectors. For + example, the list will have two + elements, if this DOC is a + preference constraint. The one + vector that is supposed to be ranked + higher, will have a factor of +1, + the lower ranked one should have a + factor of -1. */ +} DOC; + +typedef struct learn_parm { + long type; /* selects between regression and + classification */ + double svm_c; /* upper bound C on alphas */ + double eps; /* regression epsilon (eps=1.0 for + classification */ + double svm_costratio; /* factor to multiply C for positive examples */ + double transduction_posratio;/* fraction of unlabeled examples to be */ + /* classified as positives */ + long biased_hyperplane; /* if nonzero, use hyperplane w*x+b=0 + otherwise w*x=0 */ + long sharedslack; /* if nonzero, it will use the shared + slack variable mode in + svm_learn_optimization. It requires + that the slackid is set for every + training example */ + long svm_maxqpsize; /* size q of working set */ + long svm_newvarsinqp; /* new variables to enter the working set + in each iteration */ + long kernel_cache_size; /* size of kernel cache in megabytes */ + double epsilon_crit; /* tolerable error for distances used + in stopping criterion */ + double epsilon_shrink; /* how much a multiplier should be above + zero for shrinking */ + long svm_iter_to_shrink; /* iterations h after which an example can + be removed by shrinking */ + long maxiter; /* number of iterations after which the + optimizer terminates, if there was + no progress in maxdiff */ + long remove_inconsistent; /* exclude examples with alpha at C and + retrain */ + long skip_final_opt_check; /* do not check KT-Conditions at the end of + optimization for examples removed by + shrinking. WARNING: This might lead to + sub-optimal solutions! */ + long compute_loo; /* if nonzero, computes leave-one-out + estimates */ + double rho; /* parameter in xi/alpha-estimates and for + pruning leave-one-out range [1..2] */ + long xa_depth; /* parameter in xi/alpha-estimates upper + bounding the number of SV the current + alpha_t is distributed over */ + char predfile[200]; /* file for predicitions on unlabeled examples + in transduction */ + char alphafile[200]; /* file to store optimal alphas in. use + empty string if alphas should not be + output */ + + /* you probably do not want to touch the following */ + double epsilon_const; /* tolerable error on eq-constraint */ + double epsilon_a; /* tolerable error on alphas at bounds */ + double opt_precision; /* precision of solver, set to e.g. 1e-21 + if you get convergence problems */ + + /* the following are only for internal use */ + long svm_c_steps; /* do so many steps for finding optimal C */ + double svm_c_factor; /* increase C by this factor every step */ + double svm_costratio_unlab; + double svm_unlabbound; + double *svm_cost; /* individual upper bounds for each var */ + long totwords; /* number of features */ +} LEARN_PARM; + +typedef struct kernel_parm { + long kernel_type; /* 0=linear, 1=poly, 2=rbf, 3=sigmoid, 4=custom */ + long poly_degree; + double rbf_gamma; + double coef_lin; + double coef_const; + char custom[50]; /* for user supplied kernel */ +} KERNEL_PARM; + +typedef struct model { + long sv_num; + long at_upper_bound; + double b; + DOC **supvec; + double *alpha; + long *index; /* index from docnum to position in model */ + long totwords; /* number of features */ + long totdoc; /* number of training documents */ + KERNEL_PARM kernel_parm; /* kernel */ + + /* the following values are not written to file */ + double loo_error,loo_recall,loo_precision; /* leave-one-out estimates */ + double xa_error,xa_recall,xa_precision; /* xi/alpha estimates */ + double *lin_weights; /* weights for linear case using + folding */ + double maxdiff; /* precision, up to which this + model is accurate */ +} MODEL; + +typedef struct quadratic_program { + long opt_n; /* number of variables */ + long opt_m; /* number of linear equality constraints */ + double *opt_ce,*opt_ce0; /* linear equality constraints */ + double *opt_g; /* hessian of objective */ + double *opt_g0; /* linear part of objective */ + double *opt_xinit; /* initial value for variables */ + double *opt_low,*opt_up; /* box constraints */ +} QP; + +typedef struct kernel_cache { + long *index; /* cache some kernel evalutations */ + CFLOAT *buffer; /* to improve speed */ + long *invindex; + long *active2totdoc; + long *totdoc2active; + long *lru; + long *occu; + long elems; + long max_elems; + long time; + long activenum; + long buffsize; +} KERNEL_CACHE; + + +typedef struct timing_profile { + long time_kernel; + long time_opti; + long time_shrink; + long time_update; + long time_model; + long time_check; + long time_select; +} TIMING; + +typedef struct shrink_state { + long *active; + long *inactive_since; + long deactnum; + double **a_history; /* for shrinking with non-linear kernel */ + long maxhistory; + double *last_a; /* for shrinking with linear kernel */ + double *last_lin; /* for shrinking with linear kernel */ +} SHRINK_STATE; + +double classify_example(MODEL *, DOC *); +double classify_example_linear(MODEL *, DOC *); +double kernel(KERNEL_PARM *, DOC *, DOC *); +double single_kernel(KERNEL_PARM *, SVECTOR *, SVECTOR *); +double custom_kernel(KERNEL_PARM *, SVECTOR *, SVECTOR *); +SVECTOR *create_svector(WORD *, char *, double); +SVECTOR *copy_svector(SVECTOR *); +void free_svector(SVECTOR *); +double sprod_ss(SVECTOR *, SVECTOR *); +SVECTOR* sub_ss(SVECTOR *, SVECTOR *); +SVECTOR* add_ss(SVECTOR *, SVECTOR *); +SVECTOR* add_list_ss(SVECTOR *); +void append_svector_list(SVECTOR *a, SVECTOR *b); +SVECTOR* smult_s(SVECTOR *, double); +int featvec_eq(SVECTOR *, SVECTOR *); +double model_length_s(MODEL *, KERNEL_PARM *); +void clear_vector_n(double *, long); +void add_vector_ns(double *, SVECTOR *, double); +double sprod_ns(double *, SVECTOR *); +void add_weight_vector_to_linear_model(MODEL *); +DOC *create_example(long, long, long, double, SVECTOR *); +void free_example(DOC *, long); +MODEL *read_model(char *); +MODEL *copy_model(MODEL *); +void free_model(MODEL *, int); +void read_documents(char *, DOC ***, double **, long *, long *); +int parse_document(char *, WORD *, double *, long *, long *, double *, long *, long, char **); +double *read_alphas(char *,long); +void nol_ll(char *, long *, long *, long *); +long minl(long, long); +long maxl(long, long); +long get_runtime(void); +int space_or_null(int); +void *my_malloc(size_t); +void copyright_notice(void); +# ifdef _MSC_VER + int isnan(double); +# endif + +extern long verbosity; /* verbosity level (0-4) */ +extern long kernel_cache_statistic; + +#endif diff --git a/thirdparty/svmlight/svm_hideo.c b/thirdparty/svmlight/svm_hideo.c new file mode 100755 index 000000000..ffad2d3c1 --- /dev/null +++ b/thirdparty/svmlight/svm_hideo.c @@ -0,0 +1,1062 @@ +/***********************************************************************/ +/* */ +/* svm_hideo.c */ +/* */ +/* The Hildreth and D'Espo solver specialized for SVMs. */ +/* */ +/* Author: Thorsten Joachims */ +/* Date: 02.07.02 */ +/* */ +/* Copyright (c) 2002 Thorsten Joachims - All rights reserved */ +/* */ +/* This software is available for non-commercial use only. It must */ +/* not be modified and distributed without prior permission of the */ +/* author. The author is not responsible for implications from the */ +/* use of this software. */ +/* */ +/***********************************************************************/ + +# include +# include "svm_common.h" + +/* + solve the quadratic programming problem + + minimize g0 * x + 1/2 x' * G * x + subject to ce*x = ce0 + l <= x <= u + + The linear constraint vector ce can only have -1/+1 as entries +*/ + +/* Common Block Declarations */ + +long verbosity; + +# define PRIMAL_OPTIMAL 1 +# define DUAL_OPTIMAL 2 +# define MAXITER_EXCEEDED 3 +# define NAN_SOLUTION 4 +# define ONLY_ONE_VARIABLE 5 + +# define LARGEROUND 0 +# define SMALLROUND 1 + +/* /////////////////////////////////////////////////////////////// */ + +# define DEF_PRECISION 1E-5 +# define DEF_MAX_ITERATIONS 200 +# define DEF_LINDEP_SENSITIVITY 1E-8 +# define EPSILON_HIDEO 1E-20 +# define EPSILON_EQ 1E-5 + +double *optimize_qp(QP *, double *, long, double *, LEARN_PARM *); +double *primal=0,*dual=0; +long precision_violations=0; +double opt_precision=DEF_PRECISION; +long maxiter=DEF_MAX_ITERATIONS; +double lindep_sensitivity=DEF_LINDEP_SENSITIVITY; +double *buffer; +long *nonoptimal; + +long smallroundcount=0; +long roundnumber=0; + +/* /////////////////////////////////////////////////////////////// */ + +void *my_malloc(); + +int optimize_hildreth_despo(long,long,double,double,double,long,long,long,double,double *, + double *,double *,double *,double *,double *, + double *,double *,double *,long *,double *,double *); +int solve_dual(long,long,double,double,long,double *,double *,double *, + double *,double *,double *,double *,double *,double *, + double *,double *,double *,double *,long); + +void linvert_matrix(double *, long, double *, double, long *); +void lprint_matrix(double *, long); +void ladd_matrix(double *, long, double); +void lcopy_matrix(double *, long, double *); +void lswitch_rows_matrix(double *, long, long, long); +void lswitchrk_matrix(double *, long, long, long); + +double calculate_qp_objective(long, double *, double *, double *); + + + +double *optimize_qp(qp,epsilon_crit,nx,threshold,learn_parm) +QP *qp; +double *epsilon_crit; +long nx; /* Maximum number of variables in QP */ +double *threshold; +LEARN_PARM *learn_parm; +/* start the optimizer and return the optimal values */ +/* The HIDEO optimizer does not necessarily fully solve the problem. */ +/* Since it requires a strictly positive definite hessian, the solution */ +/* is restricted to a linear independent subset in case the matrix is */ +/* only semi-definite. */ +{ + long i,j; + int result; + double eq,progress; + + roundnumber++; + + if(!primal) { /* allocate memory at first call */ + primal=(double *)my_malloc(sizeof(double)*nx); + dual=(double *)my_malloc(sizeof(double)*((nx+1)*2)); + nonoptimal=(long *)my_malloc(sizeof(long)*(nx)); + buffer=(double *)my_malloc(sizeof(double)*((nx+1)*2*(nx+1)*2+ + nx*nx+2*(nx+1)*2+2*nx+1+2*nx+ + nx+nx+nx*nx)); + (*threshold)=0; + for(i=0;i=4) { /* really verbose */ + printf("\n\n"); + eq=qp->opt_ce0[0]; + for(i=0;iopt_n;i++) { + eq+=qp->opt_xinit[i]*qp->opt_ce[i]; + printf("%f: ",qp->opt_g0[i]); + for(j=0;jopt_n;j++) { + printf("%f ",qp->opt_g[i*qp->opt_n+j]); + } + printf(": a=%.10f < %f",qp->opt_xinit[i],qp->opt_up[i]); + printf(": y=%f\n",qp->opt_ce[i]); + } + if(qp->opt_m) { + printf("EQ: %f*x0",qp->opt_ce[0]); + for(i=1;iopt_n;i++) { + printf(" + %f*x%ld",qp->opt_ce[i],i); + } + printf(" = %f\n\n",-qp->opt_ce0[0]); + } + } + + result=optimize_hildreth_despo(qp->opt_n,qp->opt_m, + opt_precision,(*epsilon_crit), + learn_parm->epsilon_a,maxiter, + /* (long)PRIMAL_OPTIMAL, */ + (long)0, (long)0, + lindep_sensitivity, + qp->opt_g,qp->opt_g0,qp->opt_ce,qp->opt_ce0, + qp->opt_low,qp->opt_up,primal,qp->opt_xinit, + dual,nonoptimal,buffer,&progress); + if(verbosity>=3) { + printf("return(%d)...",result); + } + + if(learn_parm->totwords < learn_parm->svm_maxqpsize) { + /* larger working sets will be linear dependent anyway */ + learn_parm->svm_maxqpsize=maxl(learn_parm->totwords,(long)2); + } + + if(result == NAN_SOLUTION) { + lindep_sensitivity*=2; /* throw out linear dependent examples more */ + /* generously */ + if(learn_parm->svm_maxqpsize>2) { + learn_parm->svm_maxqpsize--; /* decrease size of qp-subproblems */ + } + precision_violations++; + } + + /* take one round of only two variable to get unstuck */ + if((result != PRIMAL_OPTIMAL) || (!(roundnumber % 31)) || (progress <= 0)) { + + smallroundcount++; + + result=optimize_hildreth_despo(qp->opt_n,qp->opt_m, + opt_precision,(*epsilon_crit), + learn_parm->epsilon_a,(long)maxiter, + (long)PRIMAL_OPTIMAL,(long)SMALLROUND, + lindep_sensitivity, + qp->opt_g,qp->opt_g0,qp->opt_ce,qp->opt_ce0, + qp->opt_low,qp->opt_up,primal,qp->opt_xinit, + dual,nonoptimal,buffer,&progress); + if(verbosity>=3) { + printf("return_srd(%d)...",result); + } + + if(result != PRIMAL_OPTIMAL) { + if(result != ONLY_ONE_VARIABLE) + precision_violations++; + if(result == MAXITER_EXCEEDED) + maxiter+=100; + if(result == NAN_SOLUTION) { + lindep_sensitivity*=2; /* throw out linear dependent examples more */ + /* generously */ + /* results not valid, so return inital values */ + for(i=0;iopt_n;i++) { + primal[i]=qp->opt_xinit[i]; + } + } + } + } + + + if(precision_violations > 50) { + precision_violations=0; + (*epsilon_crit)*=10.0; + if(verbosity>=1) { + printf("\nWARNING: Relaxing epsilon on KT-Conditions (%f).\n", + (*epsilon_crit)); + } + } + + if((qp->opt_m>0) && (result != NAN_SOLUTION) && (!isnan(dual[1]-dual[0]))) + (*threshold)=dual[1]-dual[0]; + else + (*threshold)=0; + + if(verbosity>=4) { /* really verbose */ + printf("\n\n"); + eq=qp->opt_ce0[0]; + for(i=0;iopt_n;i++) { + eq+=primal[i]*qp->opt_ce[i]; + printf("%f: ",qp->opt_g0[i]); + for(j=0;jopt_n;j++) { + printf("%f ",qp->opt_g[i*qp->opt_n+j]); + } + printf(": a=%.30f",primal[i]); + printf(": nonopti=%ld",nonoptimal[i]); + printf(": y=%f\n",qp->opt_ce[i]); + } + printf("eq-constraint=%.30f\n",eq); + printf("b=%f\n",(*threshold)); + printf(" smallroundcount=%ld ",smallroundcount); + } + + return(primal); +} + + + +int optimize_hildreth_despo(n,m,precision,epsilon_crit,epsilon_a,maxiter,goal, + smallround,lindep_sensitivity,g,g0,ce,ce0,low,up, + primal,init,dual,lin_dependent,buffer,progress) + long n; /* number of variables */ + long m; /* number of linear equality constraints [0,1] */ + double precision; /* solve at least to this dual precision */ + double epsilon_crit; /* stop, if KT-Conditions approx fulfilled */ + double epsilon_a; /* precision of alphas at bounds */ + long maxiter; /* stop after this many iterations */ + long goal; /* keep going until goal fulfilled */ + long smallround; /* use only two variables of steepest descent */ + double lindep_sensitivity; /* epsilon for detecting linear dependent ex */ + double *g; /* hessian of objective */ + double *g0; /* linear part of objective */ + double *ce,*ce0; /* linear equality constraints */ + double *low,*up; /* box constraints */ + double *primal; /* primal variables */ + double *init; /* initial values of primal */ + double *dual; /* dual variables */ + long *lin_dependent; + double *buffer; + double *progress; /* delta in the objective function between + before and after */ +{ + long i,j,k,from,to,n_indep,changed; + double sum,bmin=0,bmax=0; + double *d,*d0,*ig,*dual_old,*temp,*start; + double *g0_new,*g_new,*ce_new,*ce0_new,*low_new,*up_new; + double add,t; + int result; + double obj_before,obj_after; + long b1,b2; + double g0_b1,g0_b2,ce0_b; + + g0_new=&(buffer[0]); /* claim regions of buffer */ + d=&(buffer[n]); + d0=&(buffer[n+(n+m)*2*(n+m)*2]); + ce_new=&(buffer[n+(n+m)*2*(n+m)*2+(n+m)*2]); + ce0_new=&(buffer[n+(n+m)*2*(n+m)*2+(n+m)*2+n]); + ig=&(buffer[n+(n+m)*2*(n+m)*2+(n+m)*2+n+m]); + dual_old=&(buffer[n+(n+m)*2*(n+m)*2+(n+m)*2+n+m+n*n]); + low_new=&(buffer[n+(n+m)*2*(n+m)*2+(n+m)*2+n+m+n*n+(n+m)*2]); + up_new=&(buffer[n+(n+m)*2*(n+m)*2+(n+m)*2+n+m+n*n+(n+m)*2+n]); + start=&(buffer[n+(n+m)*2*(n+m)*2+(n+m)*2+n+m+n*n+(n+m)*2+n+n]); + g_new=&(buffer[n+(n+m)*2*(n+m)*2+(n+m)*2+n+m+n*n+(n+m)*2+n+n+n]); + temp=&(buffer[n+(n+m)*2*(n+m)*2+(n+m)*2+n+m+n*n+(n+m)*2+n+n+n+n*n]); + + b1=-1; + b2=-1; + for(i=0;i=( up[i]-epsilon_a)) && (ce[i]>0.0))) + ) { + bmin=sum; + b1=i; + } + if(((b2==-1) || (sum>=bmax)) + && (!((init[i]<=(low[i]+epsilon_a)) && (ce[i]>0.0))) + && (!((init[i]>=( up[i]-epsilon_a)) && (ce[i]<0.0))) + ) { + bmax=sum; + b2=i; + } + } + /* in case of unbiased hyperplane, the previous projection on */ + /* equality constraint can lead to b1 or b2 being -1. */ + if((b1 == -1) || (b2 == -1)) { + b1=maxl(b1,b2); + b2=maxl(b1,b2); + } + + for(i=0;i g0_b2) { /* set b2 to upper bound */ + /* printf("case +=>\n"); */ + changed=1; + t=up[b2]-init[b2]; + if((init[b1]-low[b1]) < t) { + t=init[b1]-low[b1]; + } + start[b1]=init[b1]-t; + start[b2]=init[b2]+t; + } + } + else if(((g[b1*n+b1]>0) || (g[b2*n+b2]>0))) { /* (ce[b1] != ce[b2]) */ + /* printf("case +!\n"); */ + t=((ce[b2]/ce[b1])*g0[b1]-g0[b2]+ce0[0]*(g[b1*n+b1]*ce[b2]/ce[b1]-g[b1*n+b2]/ce[b1]))/((ce[b2]*ce[b2]/(ce[b1]*ce[b1]))*g[b1*n+b1]+g[b2*n+b2]-2*(g[b1*n+b2]*ce[b2]/ce[b1]))-init[b2]; + changed=1; + if((up[b2]-init[b2]) < t) { + t=up[b2]-init[b2]; + } + if((init[b2]-low[b2]) < -t) { + t=-(init[b2]-low[b2]); + } + if((up[b1]-init[b1]) < t) { + t=(up[b1]-init[b1]); + } + if((init[b1]-low[b1]) < -t) { + t=-(init[b1]-low[b1]); + } + start[b1]=init[b1]+t; + start[b2]=init[b2]+t; + } + } + if((-g[b1*n+b2] == g[b1*n+b1]) && (-g[b1*n+b2] == g[b2*n+b2])) { + /* printf("diffeuqal\n"); */ + if(ce[b1] != ce[b2]) { + if((g0_b1+g0_b2) < 0) { /* set b1 and b2 to upper bound */ + /* printf("case -!<\n"); */ + changed=1; + t=up[b1]-init[b1]; + if((up[b2]-init[b2]) < t) { + t=up[b2]-init[b2]; + } + start[b1]=init[b1]+t; + start[b2]=init[b2]+t; + } + else if((g0_b1+g0_b2) >= 0) { /* set b1 and b2 to lower bound */ + /* printf("case -!>\n"); */ + changed=1; + t=init[b1]-low[b1]; + if((init[b2]-low[b2]) < t) { + t=init[b2]-low[b2]; + } + start[b1]=init[b1]-t; + start[b2]=init[b2]-t; + } + } + else if(((g[b1*n+b1]>0) || (g[b2*n+b2]>0))) { /* (ce[b1]==ce[b2]) */ + /* printf("case -=\n"); */ + t=((ce[b2]/ce[b1])*g0[b1]-g0[b2]+ce0[0]*(g[b1*n+b1]*ce[b2]/ce[b1]-g[b1*n+b2]/ce[b1]))/((ce[b2]*ce[b2]/(ce[b1]*ce[b1]))*g[b1*n+b1]+g[b2*n+b2]-2*(g[b1*n+b2]*ce[b2]/ce[b1]))-init[b2]; + changed=1; + if((up[b2]-init[b2]) < t) { + t=up[b2]-init[b2]; + } + if((init[b2]-low[b2]) < -t) { + t=-(init[b2]-low[b2]); + } + if((up[b1]-init[b1]) < -t) { + t=-(up[b1]-init[b1]); + } + if((init[b1]-low[b1]) < t) { + t=init[b1]-low[b1]; + } + start[b1]=init[b1]-t; + start[b2]=init[b2]+t; + } + } + } + /* if we have a biased hyperplane, then adding a constant to the */ + /* hessian does not change the solution. So that is done for examples */ + /* with zero diagonal entry, since HIDEO cannot handle them. */ + if((m>0) + && ((fabs(g[b1*n+b1]) < lindep_sensitivity) + || (fabs(g[b2*n+b2]) < lindep_sensitivity))) { + /* printf("Case 0\n"); */ + add+=0.093274; + } + /* in case both examples are linear dependent */ + else if((m>0) + && (g[b1*n+b2] != 0 && g[b2*n+b2] != 0) + && (fabs(g[b1*n+b1]/g[b1*n+b2] - g[b1*n+b2]/g[b2*n+b2]) + < lindep_sensitivity)) { + /* printf("Case lindep\n"); */ + add+=0.078274; + } + + /* special case for zero diagonal entry on unbiased hyperplane */ + if((m==0) && (b1>=0)) { + if(fabs(g[b1*n+b1]) < lindep_sensitivity) { + /* printf("Case 0b1\n"); */ + for(i=0;i=0) + start[b1]=low[b1]; + } + } + if((m==0) && (b2>=0)) { + if(fabs(g[b2*n+b2]) < lindep_sensitivity) { + /* printf("Case 0b2\n"); */ + for(i=0;i=0) + start[b2]=low[b2]; + } + } + + /* printf("b1=%ld,b2=%ld\n",b1,b2); */ + + lcopy_matrix(g,n,d); + if((m==1) && (add>0.0)) { + for(j=0;j2) { /* switch, so that variables are better mixed */ + lswitchrk_matrix(d,n,b1,(long)0); + if(b2 == 0) + lswitchrk_matrix(d,n,b1,(long)1); + else + lswitchrk_matrix(d,n,b2,(long)1); + } + if(smallround == SMALLROUND) { + for(i=2;i0) { /* for biased hyperplane, pick two variables */ + lin_dependent[0]=0; + lin_dependent[1]=0; + } + else { /* for unbiased hyperplane, pick only one variable */ + lin_dependent[0]=smallroundcount % 2; + lin_dependent[1]=(smallroundcount+1) % 2; + } + } + else { + for(i=0;i2) { /* now switch back */ + if(b2 == 0) { + lswitchrk_matrix(ig,n,b1,(long)1); + i=lin_dependent[1]; + lin_dependent[1]=lin_dependent[b1]; + lin_dependent[b1]=i; + } + else { + lswitchrk_matrix(ig,n,b2,(long)1); + i=lin_dependent[1]; + lin_dependent[1]=lin_dependent[b2]; + lin_dependent[b2]=i; + } + lswitchrk_matrix(ig,n,b1,(long)0); + i=lin_dependent[0]; + lin_dependent[0]=lin_dependent[b1]; + lin_dependent[b1]=i; + } + /* lprint_matrix(d,n); */ + /* lprint_matrix(ig,n); */ + + lcopy_matrix(g,n,g_new); /* restore g_new matrix */ + if(add>0) + for(j=0;j0) ce0_new[0]=-ce0[0]; + for(i=0;i0) ce0_new[0]-=(start[i]*ce[i]); + } + } + from=0; /* remove linear dependent vectors */ + to=0; + n_indep=0; + for(i=0;i=3) { + printf("real_qp_size(%ld)...",n_indep); + } + + /* cannot optimize with only one variable */ + if((n_indep<=1) && (m>0) && (!changed)) { + for(i=n-1;i>=0;i--) { + primal[i]=init[i]; + } + return((int)ONLY_ONE_VARIABLE); + } + + if((!changed) || (n_indep>1)) { + result=solve_dual(n_indep,m,precision,epsilon_crit,maxiter,g_new,g0_new, + ce_new,ce0_new,low_new,up_new,primal,d,d0,ig, + dual,dual_old,temp,goal); + } + else { + result=PRIMAL_OPTIMAL; + } + + j=n_indep; + for(i=n-1;i>=0;i--) { + if(!lin_dependent[i]) { + j--; + primal[i]=primal[j]; + } + else { + primal[i]=start[i]; /* leave as is */ + } + temp[i]=primal[i]; + } + + obj_before=calculate_qp_objective(n,g,g0,init); + obj_after=calculate_qp_objective(n,g,g0,primal); + (*progress)=obj_before-obj_after; + if(verbosity>=3) { + printf("before(%.30f)...after(%.30f)...result_sd(%d)...", + obj_before,obj_after,result); + } + + return((int)result); +} + + +int solve_dual(n,m,precision,epsilon_crit,maxiter,g,g0,ce,ce0,low,up,primal, + d,d0,ig,dual,dual_old,temp,goal) + /* Solves the dual using the method of Hildreth and D'Espo. */ + /* Can only handle problems with zero or exactly one */ + /* equality constraints. */ + + long n; /* number of variables */ + long m; /* number of linear equality constraints */ + double precision; /* solve at least to this dual precision */ + double epsilon_crit; /* stop, if KT-Conditions approx fulfilled */ + long maxiter; /* stop after that many iterations */ + double *g; + double *g0; /* linear part of objective */ + double *ce,*ce0; /* linear equality constraints */ + double *low,*up; /* box constraints */ + double *primal; /* variables (with initial values) */ + double *d,*d0,*ig,*dual,*dual_old,*temp; /* buffer */ + long goal; +{ + long i,j,k,iter; + double sum,w,maxviol,viol,temp1,temp2,isnantest; + double model_b,dist; + long retrain,maxfaktor,primal_optimal=0,at_bound,scalemaxiter; + double epsilon_a=1E-15,epsilon_hideo; + double eq; + + if((m<0) || (m>1)) + perror("SOLVE DUAL: inappropriate number of eq-constrains!"); + + /* + printf("\n"); + for(i=0;i0) { + sum=0; /* dual hessian for eq constraints */ + for(j=0;j0) { + sum=0; /* dual linear component for eq constraints */ + for(j=0;j 0) && (iter < (scalemaxiter*maxfaktor))) { + iter++; + + while((maxviol > precision) && (iter < (scalemaxiter*maxfaktor))) { + iter++; + maxviol=0; + for(i=0;i<2*(n+m);i++) { + sum=d0[i]; + for(j=0;j<2*(n+m);j++) { + sum+=d[i*2*(n+m)+j]*dual_old[j]; + } + sum-=d[i*2*(n+m)+i]*dual_old[i]; + dual[i]=-sum/d[i*2*(n+m)+i]; + if(dual[i]<0) dual[i]=0; + + viol=fabs(dual[i]-dual_old[i]); + if(viol>maxviol) + maxviol=viol; + dual_old[i]=dual[i]; + } + /* + printf("%d) maxviol=%20f precision=%f\n",iter,maxviol,precision); + */ + } + + if(m>0) { + for(i=0;i=(up[i])) { + primal[i]=up[i]; + } + } + + if(m>0) + model_b=dual[n+n+1]-dual[n+n]; + else + model_b=0; + + epsilon_hideo=EPSILON_HIDEO; + for(i=0;i(low[i]+epsilon_hideo)) &&(dist>(1.0+epsilon_crit))) { + epsilon_hideo=(primal[i]-low[i])*2.0; + } + } + /* printf("\nEPSILON_HIDEO=%.30f\n",epsilon_hideo); */ + + for(i=0;i=(up[i]-epsilon_hideo)) { + primal[i]=up[i]; + } + } + + retrain=0; + primal_optimal=1; + at_bound=0; + for(i=0;(i(low[i]+epsilon_a)) && (dist > (1.0+epsilon_crit))) { + retrain=1; + primal_optimal=0; + } + if((primal[i]<=(low[i]+epsilon_a)) || (primal[i]>=(up[i]-epsilon_a))) { + at_bound++; + } + /* printf("HIDEOtemp: a[%ld]=%.30f, dist=%.6f, b=%f, at_bound=%ld\n",i,primal[i],dist,model_b,at_bound); */ + } + if(m>0) { + eq=-ce0[0]; /* check precision of eq-constraint */ + for(i=0;i=(up[i]-epsilon_a)) { + primal[i]=up[i]; + } + } + } + + isnantest=0; + for(i=0;i0) { + temp1=dual[n+n+1]; /* copy the dual variables for the eq */ + temp2=dual[n+n]; /* constraints to a handier location */ + for(i=n+n+1;i>=2;i--) { + dual[i]=dual[i-2]; + } + dual[0]=temp2; + dual[1]=temp1; + isnantest+=temp1+temp2; + } + + if(isnan(isnantest)) { + return((int)NAN_SOLUTION); + } + else if(primal_optimal) { + return((int)PRIMAL_OPTIMAL); + } + else if(maxviol == 0.0) { + return((int)DUAL_OPTIMAL); + } + else { + return((int)MAXITER_EXCEEDED); + } +} + + +void linvert_matrix(matrix,depth,inverse,lindep_sensitivity,lin_dependent) +double *matrix; +long depth; +double *inverse,lindep_sensitivity; +long *lin_dependent; /* indicates the active parts of matrix on + input and output*/ +{ + long i,j,k; + double factor; + + for(i=0;i=0;i--) { + if(!lin_dependent[i]) { + factor=1/matrix[i*depth+i]; + for(k=0;k=0;j--) { + factor=matrix[j*depth+i]; + matrix[j*depth+i]=0; + for(k=0;ktotwords=totwords; + + /* make sure -n value is reasonable */ + if((learn_parm->svm_newvarsinqp < 2) + || (learn_parm->svm_newvarsinqp > learn_parm->svm_maxqpsize)) { + learn_parm->svm_newvarsinqp=learn_parm->svm_maxqpsize; + } + + init_shrink_state(&shrink_state,totdoc,(long)MAXSHRINK); + + label = (long *)my_malloc(sizeof(long)*totdoc); + inconsistent = (long *)my_malloc(sizeof(long)*totdoc); + unlabeled = (long *)my_malloc(sizeof(long)*totdoc); + c = (double *)my_malloc(sizeof(double)*totdoc); + a = (double *)my_malloc(sizeof(double)*totdoc); + a_fullset = (double *)my_malloc(sizeof(double)*totdoc); + xi_fullset = (double *)my_malloc(sizeof(double)*totdoc); + lin = (double *)my_malloc(sizeof(double)*totdoc); + learn_parm->svm_cost = (double *)my_malloc(sizeof(double)*totdoc); + model->supvec = (DOC **)my_malloc(sizeof(DOC *)*(totdoc+2)); + model->alpha = (double *)my_malloc(sizeof(double)*(totdoc+2)); + model->index = (long *)my_malloc(sizeof(long)*(totdoc+2)); + + model->at_upper_bound=0; + model->b=0; + model->supvec[0]=0; /* element 0 reserved and empty for now */ + model->alpha[0]=0; + model->lin_weights=NULL; + model->totwords=totwords; + model->totdoc=totdoc; + model->kernel_parm=(*kernel_parm); + model->sv_num=1; + model->loo_error=-1; + model->loo_recall=-1; + model->loo_precision=-1; + model->xa_error=-1; + model->xa_recall=-1; + model->xa_precision=-1; + inconsistentnum=0; + transduction=0; + + r_delta=estimate_r_delta(docs,totdoc,kernel_parm); + r_delta_sq=r_delta*r_delta; + + r_delta_avg=estimate_r_delta_average(docs,totdoc,kernel_parm); + if(learn_parm->svm_c == 0.0) { /* default value for C */ + learn_parm->svm_c=1.0/(r_delta_avg*r_delta_avg); + if(verbosity>=1) + printf("Setting default regularization parameter C=%.4f\n", + learn_parm->svm_c); + } + + learn_parm->eps=-1.0; /* equivalent regression epsilon for + classification */ + + for(i=0;idocnum=i; + inconsistent[i]=0; + a[i]=0; + lin[i]=0; + c[i]=0.0; + unlabeled[i]=0; + if(class[i] == 0) { + unlabeled[i]=1; + label[i]=0; + transduction=1; + } + if(class[i] > 0) { + learn_parm->svm_cost[i]=learn_parm->svm_c*learn_parm->svm_costratio* + docs[i]->costfactor; + label[i]=1; + trainpos++; + } + else if(class[i] < 0) { + learn_parm->svm_cost[i]=learn_parm->svm_c*docs[i]->costfactor; + label[i]=-1; + trainneg++; + } + else { + learn_parm->svm_cost[i]=0; + } + } + if(verbosity>=2) { + printf("%ld positive, %ld negative, and %ld unlabeled examples.\n",trainpos,trainneg,totdoc-trainpos-trainneg); fflush(stdout); + } + + /* caching makes no sense for linear kernel */ + if(kernel_parm->kernel_type == LINEAR) { + kernel_cache = NULL; + } + + /* compute starting state for initial alpha values */ + if(alpha) { + if(verbosity>=1) { + printf("Computing starting state..."); fflush(stdout); + } + index = (long *)my_malloc(sizeof(long)*totdoc); + index2dnum = (long *)my_malloc(sizeof(long)*(totdoc+11)); + weights=(double *)my_malloc(sizeof(double)*(totwords+1)); + aicache = (CFLOAT *)my_malloc(sizeof(CFLOAT)*totdoc); + for(i=0;ilearn_parm->svm_cost[i]) alpha[i]=learn_parm->svm_cost[i]; + } + if(kernel_parm->kernel_type != LINEAR) { + for(i=0;i0) && (alpha[i]svm_cost[i]) + && (kernel_cache_space_available(kernel_cache))) + cache_kernel_row(kernel_cache,docs,i,kernel_parm); + for(i=0;isvm_cost[i]) + && (kernel_cache_space_available(kernel_cache))) + cache_kernel_row(kernel_cache,docs,i,kernel_parm); + } + (void)compute_index(index,totdoc,index2dnum); + update_linear_component(docs,label,index2dnum,alpha,a,index2dnum,totdoc, + totwords,kernel_parm,kernel_cache,lin,aicache, + weights); + (void)calculate_svm_model(docs,label,unlabeled,lin,alpha,a,c, + learn_parm,index2dnum,index2dnum,model); + for(i=0;i=1) { + printf("done.\n"); fflush(stdout); + } + } + + if(transduction) { + learn_parm->svm_iter_to_shrink=99999999; + if(verbosity >= 1) + printf("\nDeactivating Shrinking due to an incompatibility with the transductive \nlearner in the current version.\n\n"); + } + + if(transduction && learn_parm->compute_loo) { + learn_parm->compute_loo=0; + if(verbosity >= 1) + printf("\nCannot compute leave-one-out estimates for transductive learner.\n\n"); + } + + if(learn_parm->remove_inconsistent && learn_parm->compute_loo) { + learn_parm->compute_loo=0; + printf("\nCannot compute leave-one-out estimates when removing inconsistent examples.\n\n"); + } + + if(learn_parm->compute_loo && ((trainpos == 1) || (trainneg == 1))) { + learn_parm->compute_loo=0; + printf("\nCannot compute leave-one-out with only one example in one class.\n\n"); + } + + + if(verbosity==1) { + printf("Optimizing"); fflush(stdout); + } + + /* train the svm */ + iterations=optimize_to_convergence(docs,label,totdoc,totwords,learn_parm, + kernel_parm,kernel_cache,&shrink_state,model, + inconsistent,unlabeled,a,lin, + c,&timing_profile, + &maxdiff,(long)-1, + (long)1); + + if(verbosity>=1) { + if(verbosity==1) printf("done. (%ld iterations)\n",iterations); + + misclassified=0; + for(i=0;(ib)*(double)label[i] <= 0.0) + misclassified++; + } + + printf("Optimization finished (%ld misclassified, maxdiff=%.5f).\n", + misclassified,maxdiff); + + runtime_end=get_runtime(); + if(verbosity>=2) { + printf("Runtime in cpu-seconds: %.2f (%.2f%% for kernel/%.2f%% for optimizer/%.2f%% for final/%.2f%% for update/%.2f%% for model/%.2f%% for check/%.2f%% for select)\n", + ((float)runtime_end-(float)runtime_start)/100.0, + (100.0*timing_profile.time_kernel)/(float)(runtime_end-runtime_start), + (100.0*timing_profile.time_opti)/(float)(runtime_end-runtime_start), + (100.0*timing_profile.time_shrink)/(float)(runtime_end-runtime_start), + (100.0*timing_profile.time_update)/(float)(runtime_end-runtime_start), + (100.0*timing_profile.time_model)/(float)(runtime_end-runtime_start), + (100.0*timing_profile.time_check)/(float)(runtime_end-runtime_start), + (100.0*timing_profile.time_select)/(float)(runtime_end-runtime_start)); + } + else { + printf("Runtime in cpu-seconds: %.2f\n", + (runtime_end-runtime_start)/100.0); + } + + if(learn_parm->remove_inconsistent) { + inconsistentnum=0; + for(i=0;isv_num-1,inconsistentnum); + } + else { + upsupvecnum=0; + for(i=1;isv_num;i++) { + if(fabs(model->alpha[i]) >= + (learn_parm->svm_cost[(model->supvec[i])->docnum]- + learn_parm->epsilon_a)) + upsupvecnum++; + } + printf("Number of SV: %ld (including %ld at upper bound)\n", + model->sv_num-1,upsupvecnum); + } + + if((verbosity>=1) && (!learn_parm->skip_final_opt_check)) { + loss=0; + model_length=0; + for(i=0;ib)*(double)label[i] < 1.0-learn_parm->epsilon_crit) + loss+=1.0-(lin[i]-model->b)*(double)label[i]; + model_length+=a[i]*label[i]*lin[i]; + } + model_length=sqrt(model_length); + fprintf(stdout,"L1 loss: loss=%.5f\n",loss); + fprintf(stdout,"Norm of weight vector: |w|=%.5f\n",model_length); + example_length=estimate_sphere(model,kernel_parm); + fprintf(stdout,"Norm of longest example vector: |x|=%.5f\n", + length_of_longest_document_vector(docs,totdoc,kernel_parm)); + fprintf(stdout,"Estimated VCdim of classifier: VCdim<=%.5f\n", + estimate_margin_vcdim(model,model_length,example_length, + kernel_parm)); + if((!learn_parm->remove_inconsistent) && (!transduction)) { + runtime_start_xa=get_runtime(); + if(verbosity>=1) { + printf("Computing XiAlpha-estimates..."); fflush(stdout); + } + compute_xa_estimates(model,label,unlabeled,totdoc,docs,lin,a, + kernel_parm,learn_parm,&(model->xa_error), + &(model->xa_recall),&(model->xa_precision)); + if(verbosity>=1) { + printf("done\n"); + } + printf("Runtime for XiAlpha-estimates in cpu-seconds: %.2f\n", + (get_runtime()-runtime_start_xa)/100.0); + + fprintf(stdout,"XiAlpha-estimate of the error: error<=%.2f%% (rho=%.2f,depth=%ld)\n", + model->xa_error,learn_parm->rho,learn_parm->xa_depth); + fprintf(stdout,"XiAlpha-estimate of the recall: recall=>%.2f%% (rho=%.2f,depth=%ld)\n", + model->xa_recall,learn_parm->rho,learn_parm->xa_depth); + fprintf(stdout,"XiAlpha-estimate of the precision: precision=>%.2f%% (rho=%.2f,depth=%ld)\n", + model->xa_precision,learn_parm->rho,learn_parm->xa_depth); + } + else if(!learn_parm->remove_inconsistent) { + estimate_transduction_quality(model,label,unlabeled,totdoc,docs,lin); + } + } + if(verbosity>=1) { + printf("Number of kernel evaluations: %ld\n",kernel_cache_statistic); + } + } + + + /* leave-one-out testing starts now */ + if(learn_parm->compute_loo) { + /* save results of training on full dataset for leave-one-out */ + runtime_start_loo=get_runtime(); + for(i=0;ib)*(double)label[i]); + if(xi_fullset[i]<0) xi_fullset[i]=0; + a_fullset[i]=a[i]; + } + if(verbosity>=1) { + printf("Computing leave-one-out"); + } + + /* repeat this loop for every held-out example */ + for(heldout=0;(heldoutrho*a_fullset[heldout]*r_delta_sq+xi_fullset[heldout] + < 1.0) { + /* guaranteed to not produce a leave-one-out error */ + if(verbosity==1) { + printf("+"); fflush(stdout); + } + } + else if(xi_fullset[heldout] > 1.0) { + /* guaranteed to produce a leave-one-out error */ + loo_count++; + if(label[heldout] > 0) loo_count_pos++; else loo_count_neg++; + if(verbosity==1) { + printf("-"); fflush(stdout); + } + } + else { + loocomputed++; + heldout_c=learn_parm->svm_cost[heldout]; /* set upper bound to zero */ + learn_parm->svm_cost[heldout]=0; + /* make sure heldout example is not currently */ + /* shrunk away. Assumes that lin is up to date! */ + shrink_state.active[heldout]=1; + if(verbosity>=2) + printf("\nLeave-One-Out test on example %ld\n",heldout); + if(verbosity>=1) { + printf("(?[%ld]",heldout); fflush(stdout); + } + + optimize_to_convergence(docs,label,totdoc,totwords,learn_parm, + kernel_parm, + kernel_cache,&shrink_state,model,inconsistent,unlabeled, + a,lin,c,&timing_profile, + &maxdiff,heldout,(long)2); + + /* printf("%.20f\n",(lin[heldout]-model->b)*(double)label[heldout]); */ + + if(((lin[heldout]-model->b)*(double)label[heldout]) <= 0.0) { + loo_count++; /* there was a loo-error */ + if(label[heldout] > 0) loo_count_pos++; else loo_count_neg++; + if(verbosity>=1) { + printf("-)"); fflush(stdout); + } + } + else { + if(verbosity>=1) { + printf("+)"); fflush(stdout); + } + } + /* now we need to restore the original data set*/ + learn_parm->svm_cost[heldout]=heldout_c; /* restore upper bound */ + } + } /* end of leave-one-out loop */ + + + if(verbosity>=1) { + printf("\nRetrain on full problem"); fflush(stdout); + } + optimize_to_convergence(docs,label,totdoc,totwords,learn_parm, + kernel_parm, + kernel_cache,&shrink_state,model,inconsistent,unlabeled, + a,lin,c,&timing_profile, + &maxdiff,(long)-1,(long)1); + if(verbosity >= 1) + printf("done.\n"); + + + /* after all leave-one-out computed */ + model->loo_error=100.0*loo_count/(double)totdoc; + model->loo_recall=(1.0-(double)loo_count_pos/(double)trainpos)*100.0; + model->loo_precision=(trainpos-loo_count_pos)/ + (double)(trainpos-loo_count_pos+loo_count_neg)*100.0; + if(verbosity >= 1) { + fprintf(stdout,"Leave-one-out estimate of the error: error=%.2f%%\n", + model->loo_error); + fprintf(stdout,"Leave-one-out estimate of the recall: recall=%.2f%%\n", + model->loo_recall); + fprintf(stdout,"Leave-one-out estimate of the precision: precision=%.2f%%\n", + model->loo_precision); + fprintf(stdout,"Actual leave-one-outs computed: %ld (rho=%.2f)\n", + loocomputed,learn_parm->rho); + printf("Runtime for leave-one-out in cpu-seconds: %.2f\n", + (double)(get_runtime()-runtime_start_loo)/100.0); + } + } + + if(learn_parm->alphafile[0]) + write_alphas(learn_parm->alphafile,a,label,totdoc); + + shrink_state_cleanup(&shrink_state); + free(label); + free(inconsistent); + free(unlabeled); + free(c); + free(a); + free(a_fullset); + free(xi_fullset); + free(lin); + free(learn_parm->svm_cost); +} + + +/* Learns an SVM regression model based on the training data in + docs/label. The resulting model is returned in the structure + model. */ + +void svm_learn_regression(DOC **docs, double *value, long int totdoc, + long int totwords, LEARN_PARM *learn_parm, + KERNEL_PARM *kernel_parm, + KERNEL_CACHE **kernel_cache, MODEL *model) + /* docs: Training vectors (x-part) */ + /* class: Training value (y-part) */ + /* totdoc: Number of examples in docs/label */ + /* totwords: Number of features (i.e. highest feature index) */ + /* learn_parm: Learning paramenters */ + /* kernel_parm: Kernel paramenters */ + /* kernel_cache:Initialized Cache, if using a kernel. NULL if + linear. Note that it will be free'd and reassigned */ + /* model: Returns learning result (assumed empty before called) */ +{ + long *inconsistent,i,j; + long inconsistentnum; + long upsupvecnum; + double loss,model_length,example_length; + double maxdiff,*lin,*a,*c; + long runtime_start,runtime_end; + long iterations,kernel_cache_size; + long *unlabeled; + double r_delta_sq=0,r_delta,r_delta_avg; + double *xi_fullset; /* buffer for storing xi on full sample in loo */ + double *a_fullset; /* buffer for storing alpha on full sample in loo */ + TIMING timing_profile; + SHRINK_STATE shrink_state; + DOC **docs_org; + long *label; + + /* set up regression problem in standard form */ + docs_org=docs; + docs = (DOC **)my_malloc(sizeof(DOC)*2*totdoc); + label = (long *)my_malloc(sizeof(long)*2*totdoc); + c = (double *)my_malloc(sizeof(double)*2*totdoc); + for(i=0;icostfactor,docs_org[i]->fvec); + label[i]=+1; + c[i]=value[i]; + docs[j]=create_example(j,0,0,docs_org[i]->costfactor,docs_org[i]->fvec); + label[j]=-1; + c[j]=value[i]; + } + totdoc*=2; + + /* need to get a bigger kernel cache */ + if(*kernel_cache) { + kernel_cache_size=(*kernel_cache)->buffsize*sizeof(CFLOAT)/(1024*1024); + kernel_cache_cleanup(*kernel_cache); + (*kernel_cache)=kernel_cache_init(totdoc,kernel_cache_size); + } + + runtime_start=get_runtime(); + timing_profile.time_kernel=0; + timing_profile.time_opti=0; + timing_profile.time_shrink=0; + timing_profile.time_update=0; + timing_profile.time_model=0; + timing_profile.time_check=0; + timing_profile.time_select=0; + kernel_cache_statistic=0; + + learn_parm->totwords=totwords; + + /* make sure -n value is reasonable */ + if((learn_parm->svm_newvarsinqp < 2) + || (learn_parm->svm_newvarsinqp > learn_parm->svm_maxqpsize)) { + learn_parm->svm_newvarsinqp=learn_parm->svm_maxqpsize; + } + + init_shrink_state(&shrink_state,totdoc,(long)MAXSHRINK); + + inconsistent = (long *)my_malloc(sizeof(long)*totdoc); + unlabeled = (long *)my_malloc(sizeof(long)*totdoc); + a = (double *)my_malloc(sizeof(double)*totdoc); + a_fullset = (double *)my_malloc(sizeof(double)*totdoc); + xi_fullset = (double *)my_malloc(sizeof(double)*totdoc); + lin = (double *)my_malloc(sizeof(double)*totdoc); + learn_parm->svm_cost = (double *)my_malloc(sizeof(double)*totdoc); + model->supvec = (DOC **)my_malloc(sizeof(DOC *)*(totdoc+2)); + model->alpha = (double *)my_malloc(sizeof(double)*(totdoc+2)); + model->index = (long *)my_malloc(sizeof(long)*(totdoc+2)); + + model->at_upper_bound=0; + model->b=0; + model->supvec[0]=0; /* element 0 reserved and empty for now */ + model->alpha[0]=0; + model->lin_weights=NULL; + model->totwords=totwords; + model->totdoc=totdoc; + model->kernel_parm=(*kernel_parm); + model->sv_num=1; + model->loo_error=-1; + model->loo_recall=-1; + model->loo_precision=-1; + model->xa_error=-1; + model->xa_recall=-1; + model->xa_precision=-1; + inconsistentnum=0; + + r_delta=estimate_r_delta(docs,totdoc,kernel_parm); + r_delta_sq=r_delta*r_delta; + + r_delta_avg=estimate_r_delta_average(docs,totdoc,kernel_parm); + if(learn_parm->svm_c == 0.0) { /* default value for C */ + learn_parm->svm_c=1.0/(r_delta_avg*r_delta_avg); + if(verbosity>=1) + printf("Setting default regularization parameter C=%.4f\n", + learn_parm->svm_c); + } + + for(i=0;i 0) { + learn_parm->svm_cost[i]=learn_parm->svm_c*learn_parm->svm_costratio* + docs[i]->costfactor; + } + else if(label[i] < 0) { + learn_parm->svm_cost[i]=learn_parm->svm_c*docs[i]->costfactor; + } + } + + /* caching makes no sense for linear kernel */ + if((kernel_parm->kernel_type == LINEAR) && (*kernel_cache)) { + printf("WARNING: Using a kernel cache for linear case will slow optimization down!\n"); + } + + if(verbosity==1) { + printf("Optimizing"); fflush(stdout); + } + + /* train the svm */ + iterations=optimize_to_convergence(docs,label,totdoc,totwords,learn_parm, + kernel_parm,*kernel_cache,&shrink_state, + model,inconsistent,unlabeled,a,lin,c, + &timing_profile,&maxdiff,(long)-1, + (long)1); + + if(verbosity>=1) { + if(verbosity==1) printf("done. (%ld iterations)\n",iterations); + + printf("Optimization finished (maxdiff=%.5f).\n",maxdiff); + + runtime_end=get_runtime(); + if(verbosity>=2) { + printf("Runtime in cpu-seconds: %.2f (%.2f%% for kernel/%.2f%% for optimizer/%.2f%% for final/%.2f%% for update/%.2f%% for model/%.2f%% for check/%.2f%% for select)\n", + ((float)runtime_end-(float)runtime_start)/100.0, + (100.0*timing_profile.time_kernel)/(float)(runtime_end-runtime_start), + (100.0*timing_profile.time_opti)/(float)(runtime_end-runtime_start), + (100.0*timing_profile.time_shrink)/(float)(runtime_end-runtime_start), + (100.0*timing_profile.time_update)/(float)(runtime_end-runtime_start), + (100.0*timing_profile.time_model)/(float)(runtime_end-runtime_start), + (100.0*timing_profile.time_check)/(float)(runtime_end-runtime_start), + (100.0*timing_profile.time_select)/(float)(runtime_end-runtime_start)); + } + else { + printf("Runtime in cpu-seconds: %.2f\n", + (runtime_end-runtime_start)/100.0); + } + + if(learn_parm->remove_inconsistent) { + inconsistentnum=0; + for(i=0;isv_num-1,inconsistentnum); + } + else { + upsupvecnum=0; + for(i=1;isv_num;i++) { + if(fabs(model->alpha[i]) >= + (learn_parm->svm_cost[(model->supvec[i])->docnum]- + learn_parm->epsilon_a)) + upsupvecnum++; + } + printf("Number of SV: %ld (including %ld at upper bound)\n", + model->sv_num-1,upsupvecnum); + } + + if((verbosity>=1) && (!learn_parm->skip_final_opt_check)) { + loss=0; + model_length=0; + for(i=0;ib)*(double)label[i] < (-learn_parm->eps+(double)label[i]*c[i])-learn_parm->epsilon_crit) + loss+=-learn_parm->eps+(double)label[i]*c[i]-(lin[i]-model->b)*(double)label[i]; + model_length+=a[i]*label[i]*lin[i]; + } + model_length=sqrt(model_length); + fprintf(stdout,"L1 loss: loss=%.5f\n",loss); + fprintf(stdout,"Norm of weight vector: |w|=%.5f\n",model_length); + example_length=estimate_sphere(model,kernel_parm); + fprintf(stdout,"Norm of longest example vector: |x|=%.5f\n", + length_of_longest_document_vector(docs,totdoc,kernel_parm)); + } + if(verbosity>=1) { + printf("Number of kernel evaluations: %ld\n",kernel_cache_statistic); + } + } + + if(learn_parm->alphafile[0]) + write_alphas(learn_parm->alphafile,a,label,totdoc); + + /* this makes sure the model we return does not contain pointers to the + temporary documents */ + for(i=1;isv_num;i++) { + j=model->supvec[i]->docnum; + if(j >= (totdoc/2)) { + j=totdoc-j-1; + } + model->supvec[i]=docs_org[j]; + } + + shrink_state_cleanup(&shrink_state); + for(i=0;isvm_cost); +} + +void svm_learn_ranking(DOC **docs, double *rankvalue, long int totdoc, + long int totwords, LEARN_PARM *learn_parm, + KERNEL_PARM *kernel_parm, KERNEL_CACHE **kernel_cache, + MODEL *model) + /* docs: Training vectors (x-part) */ + /* rankvalue: Training target values that determine the ranking */ + /* totdoc: Number of examples in docs/label */ + /* totwords: Number of features (i.e. highest feature index) */ + /* learn_parm: Learning paramenters */ + /* kernel_parm: Kernel paramenters */ + /* kernel_cache:Initialized pointer to Cache of size 1*totdoc, if + using a kernel. NULL if linear. NOTE: Cache is + getting reinitialized in this function */ + /* model: Returns learning result (assumed empty before called) */ +{ + DOC **docdiff; + long i,j,k,totpair,kernel_cache_size; + double *target,*alpha,cost; + long *greater,*lesser; + MODEL *pairmodel; + SVECTOR *flow,*fhigh; + + totpair=0; + for(i=0;iqueryid==docs[j]->queryid) && (rankvalue[i] != rankvalue[j])) { + totpair++; + } + } + } + + printf("Constructing %ld rank constraints...",totpair); fflush(stdout); + docdiff=(DOC **)my_malloc(sizeof(DOC)*totpair); + target=(double *)my_malloc(sizeof(double)*totpair); + greater=(long *)my_malloc(sizeof(long)*totpair); + lesser=(long *)my_malloc(sizeof(long)*totpair); + + k=0; + for(i=0;iqueryid == docs[j]->queryid) { + cost=(docs[i]->costfactor+docs[j]->costfactor)/2.0; + if(rankvalue[i] > rankvalue[j]) { + if(kernel_parm->kernel_type == LINEAR) + docdiff[k]=create_example(k,0,0,cost, + sub_ss(docs[i]->fvec,docs[j]->fvec)); + else { + flow=copy_svector(docs[j]->fvec); + flow->factor=-1.0; + flow->next=NULL; + fhigh=copy_svector(docs[i]->fvec); + fhigh->factor=1.0; + fhigh->next=flow; + docdiff[k]=create_example(k,0,0,cost,fhigh); + } + target[k]=1; + greater[k]=i; + lesser[k]=j; + k++; + } + else if(rankvalue[i] < rankvalue[j]) { + if(kernel_parm->kernel_type == LINEAR) + docdiff[k]=create_example(k,0,0,cost, + sub_ss(docs[i]->fvec,docs[j]->fvec)); + else { + flow=copy_svector(docs[j]->fvec); + flow->factor=-1.0; + flow->next=NULL; + fhigh=copy_svector(docs[i]->fvec); + fhigh->factor=1.0; + fhigh->next=flow; + docdiff[k]=create_example(k,0,0,cost,fhigh); + } + target[k]=-1; + greater[k]=i; + lesser[k]=j; + k++; + } + } + } + } + printf("done.\n"); fflush(stdout); + + /* need to get a bigger kernel cache */ + if(*kernel_cache) { + kernel_cache_size=(*kernel_cache)->buffsize*sizeof(CFLOAT)/(1024*1024); + kernel_cache_cleanup(*kernel_cache); + (*kernel_cache)=kernel_cache_init(totpair,kernel_cache_size); + } + + /* must use unbiased hyperplane on difference vectors */ + learn_parm->biased_hyperplane=0; + pairmodel=(MODEL *)my_malloc(sizeof(MODEL)); + svm_learn_classification(docdiff,target,totpair,totwords,learn_parm, + kernel_parm,(*kernel_cache),pairmodel,NULL); + + /* Transfer the result into a more compact model. If you would like + to output the original model on pairs of documents, see below. */ + alpha=(double *)my_malloc(sizeof(double)*totdoc); + for(i=0;isv_num;i++) { + alpha[lesser[(pairmodel->supvec[i])->docnum]]-=pairmodel->alpha[i]; + alpha[greater[(pairmodel->supvec[i])->docnum]]+=pairmodel->alpha[i]; + } + model->supvec = (DOC **)my_malloc(sizeof(DOC *)*(totdoc+2)); + model->alpha = (double *)my_malloc(sizeof(double)*(totdoc+2)); + model->index = (long *)my_malloc(sizeof(long)*(totdoc+2)); + model->supvec[0]=0; /* element 0 reserved and empty for now */ + model->alpha[0]=0; + model->sv_num=1; + for(i=0;isupvec[model->sv_num]=docs[i]; + model->alpha[model->sv_num]=alpha[i]; + model->index[i]=model->sv_num; + model->sv_num++; + } + else { + model->index[i]=-1; + } + } + model->at_upper_bound=0; + model->b=0; + model->lin_weights=NULL; + model->totwords=totwords; + model->totdoc=totdoc; + model->kernel_parm=(*kernel_parm); + model->loo_error=-1; + model->loo_recall=-1; + model->loo_precision=-1; + model->xa_error=-1; + model->xa_recall=-1; + model->xa_precision=-1; + + free(alpha); + free(greater); + free(lesser); + free(target); + + /* If you would like to output the original model on pairs of + document, replace the following lines with '(*model)=(*pairmodel);' */ + for(i=0;i rhs_i - \xi_i + + This corresponds to the -z o option. */ + +void svm_learn_optimization(DOC **docs, double *rhs, long int + totdoc, long int totwords, + LEARN_PARM *learn_parm, + KERNEL_PARM *kernel_parm, + KERNEL_CACHE *kernel_cache, MODEL *model, + double *alpha) + /* docs: Left-hand side of inequalities (x-part) */ + /* rhs: Right-hand side of inequalities */ + /* totdoc: Number of examples in docs/label */ + /* totwords: Number of features (i.e. highest feature index) */ + /* learn_parm: Learning paramenters */ + /* kernel_parm: Kernel paramenters */ + /* kernel_cache:Initialized Cache of size 1*totdoc, if using a kernel. + NULL if linear.*/ + /* model: Returns solution as SV expansion (assumed empty before called) */ + /* alpha: Start values for the alpha variables or NULL + pointer. The new alpha values are returned after + optimization if not NULL. Array must be of size totdoc. */ +{ + long i,*label; + long misclassified,upsupvecnum; + double loss,model_length,example_length; + double maxdiff,*lin,*a,*c; + long runtime_start,runtime_end; + long iterations,maxslackid,svsetnum; + long *unlabeled,*inconsistent; + double r_delta_sq=0,r_delta,r_delta_avg; + long *index,*index2dnum; + double *weights,*slack,*alphaslack; + CFLOAT *aicache; /* buffer to keep one row of hessian */ + + TIMING timing_profile; + SHRINK_STATE shrink_state; + + runtime_start=get_runtime(); + timing_profile.time_kernel=0; + timing_profile.time_opti=0; + timing_profile.time_shrink=0; + timing_profile.time_update=0; + timing_profile.time_model=0; + timing_profile.time_check=0; + timing_profile.time_select=0; + kernel_cache_statistic=0; + + learn_parm->totwords=totwords; + + /* make sure -n value is reasonable */ + if((learn_parm->svm_newvarsinqp < 2) + || (learn_parm->svm_newvarsinqp > learn_parm->svm_maxqpsize)) { + learn_parm->svm_newvarsinqp=learn_parm->svm_maxqpsize; + } + + init_shrink_state(&shrink_state,totdoc,(long)MAXSHRINK); + + label = (long *)my_malloc(sizeof(long)*totdoc); + unlabeled = (long *)my_malloc(sizeof(long)*totdoc); + inconsistent = (long *)my_malloc(sizeof(long)*totdoc); + c = (double *)my_malloc(sizeof(double)*totdoc); + a = (double *)my_malloc(sizeof(double)*totdoc); + lin = (double *)my_malloc(sizeof(double)*totdoc); + learn_parm->svm_cost = (double *)my_malloc(sizeof(double)*totdoc); + model->supvec = (DOC **)my_malloc(sizeof(DOC *)*(totdoc+2)); + model->alpha = (double *)my_malloc(sizeof(double)*(totdoc+2)); + model->index = (long *)my_malloc(sizeof(long)*(totdoc+2)); + + model->at_upper_bound=0; + model->b=0; + model->supvec[0]=0; /* element 0 reserved and empty for now */ + model->alpha[0]=0; + model->lin_weights=NULL; + model->totwords=totwords; + model->totdoc=totdoc; + model->kernel_parm=(*kernel_parm); + model->sv_num=1; + model->loo_error=-1; + model->loo_recall=-1; + model->loo_precision=-1; + model->xa_error=-1; + model->xa_recall=-1; + model->xa_precision=-1; + + r_delta=estimate_r_delta(docs,totdoc,kernel_parm); + r_delta_sq=r_delta*r_delta; + + r_delta_avg=estimate_r_delta_average(docs,totdoc,kernel_parm); + if(learn_parm->svm_c == 0.0) { /* default value for C */ + learn_parm->svm_c=1.0/(r_delta_avg*r_delta_avg); + if(verbosity>=1) + printf("Setting default regularization parameter C=%.4f\n", + learn_parm->svm_c); + } + + learn_parm->biased_hyperplane=0; /* learn an unbiased hyperplane */ + + learn_parm->eps=0.0; /* No margin, unless explicitly handcoded + in the right-hand side in the training + set. */ + + for(i=0;idocnum=i; + a[i]=0; + lin[i]=0; + c[i]=rhs[i]; /* set right-hand side */ + unlabeled[i]=0; + inconsistent[i]=0; + learn_parm->svm_cost[i]=learn_parm->svm_c*learn_parm->svm_costratio* + docs[i]->costfactor; + label[i]=1; + } + if(learn_parm->sharedslack) /* if shared slacks are used, they must */ + for(i=0;islackid) { + perror("Error: Missing shared slacks definitions in some of the examples."); + exit(0); + } + + /* compute starting state for initial alpha values */ + if(alpha) { + if(verbosity>=1) { + printf("Computing starting state..."); fflush(stdout); + } + index = (long *)my_malloc(sizeof(long)*totdoc); + index2dnum = (long *)my_malloc(sizeof(long)*(totdoc+11)); + weights=(double *)my_malloc(sizeof(double)*(totwords+1)); + aicache = (CFLOAT *)my_malloc(sizeof(CFLOAT)*totdoc); + for(i=0;ilearn_parm->svm_cost[i]) alpha[i]=learn_parm->svm_cost[i]; + } + if(kernel_parm->kernel_type != LINEAR) { + for(i=0;i0) && (alpha[i]svm_cost[i]) + && (kernel_cache_space_available(kernel_cache))) + cache_kernel_row(kernel_cache,docs,i,kernel_parm); + for(i=0;isvm_cost[i]) + && (kernel_cache_space_available(kernel_cache))) + cache_kernel_row(kernel_cache,docs,i,kernel_parm); + } + (void)compute_index(index,totdoc,index2dnum); + update_linear_component(docs,label,index2dnum,alpha,a,index2dnum,totdoc, + totwords,kernel_parm,kernel_cache,lin,aicache, + weights); + (void)calculate_svm_model(docs,label,unlabeled,lin,alpha,a,c, + learn_parm,index2dnum,index2dnum,model); + for(i=0;i=1) { + printf("done.\n"); fflush(stdout); + } + } + + /* removing inconsistent does not work for general optimization problem */ + if(learn_parm->remove_inconsistent) { + learn_parm->remove_inconsistent = 0; + printf("'remove inconsistent' not available in this mode. Switching option off!"); fflush(stdout); + } + + /* caching makes no sense for linear kernel */ + if(kernel_parm->kernel_type == LINEAR) { + kernel_cache = NULL; + } + + if(verbosity==1) { + printf("Optimizing"); fflush(stdout); + } + + /* train the svm */ + if(learn_parm->sharedslack) + iterations=optimize_to_convergence_sharedslack(docs,label,totdoc, + totwords,learn_parm,kernel_parm, + kernel_cache,&shrink_state,model, + a,lin,c,&timing_profile, + &maxdiff); + else + iterations=optimize_to_convergence(docs,label,totdoc, + totwords,learn_parm,kernel_parm, + kernel_cache,&shrink_state,model, + inconsistent,unlabeled, + a,lin,c,&timing_profile, + &maxdiff,(long)-1,(long)1); + + if(verbosity>=1) { + if(verbosity==1) printf("done. (%ld iterations)\n",iterations); + + misclassified=0; + for(i=0;(ib)*(double)label[i] <= 0.0) + misclassified++; + } + + printf("Optimization finished (maxdiff=%.5f).\n",maxdiff); + + runtime_end=get_runtime(); + if(verbosity>=2) { + printf("Runtime in cpu-seconds: %.2f (%.2f%% for kernel/%.2f%% for optimizer/%.2f%% for final/%.2f%% for update/%.2f%% for model/%.2f%% for check/%.2f%% for select)\n", + ((float)runtime_end-(float)runtime_start)/100.0, + (100.0*timing_profile.time_kernel)/(float)(runtime_end-runtime_start), + (100.0*timing_profile.time_opti)/(float)(runtime_end-runtime_start), + (100.0*timing_profile.time_shrink)/(float)(runtime_end-runtime_start), + (100.0*timing_profile.time_update)/(float)(runtime_end-runtime_start), + (100.0*timing_profile.time_model)/(float)(runtime_end-runtime_start), + (100.0*timing_profile.time_check)/(float)(runtime_end-runtime_start), + (100.0*timing_profile.time_select)/(float)(runtime_end-runtime_start)); + } + else { + printf("Runtime in cpu-seconds: %.2f\n", + (runtime_end-runtime_start)/100.0); + } + } + if((verbosity>=1) && (!learn_parm->skip_final_opt_check)) { + loss=0; + model_length=0; + for(i=0;ib)*(double)label[i] < c[i]-learn_parm->epsilon_crit) + loss+=c[i]-(lin[i]-model->b)*(double)label[i]; + model_length+=a[i]*label[i]*lin[i]; + } + model_length=sqrt(model_length); + fprintf(stdout,"Norm of weight vector: |w|=%.5f\n",model_length); + } + + if(learn_parm->sharedslack) { + index = (long *)my_malloc(sizeof(long)*totdoc); + index2dnum = (long *)my_malloc(sizeof(long)*(totdoc+11)); + maxslackid=0; + for(i=0;islackid) + maxslackid=docs[i]->slackid; + } + (void)compute_index(index,totdoc,index2dnum); + slack=(double *)my_malloc(sizeof(double)*(maxslackid+1)); + alphaslack=(double *)my_malloc(sizeof(double)*(maxslackid+1)); + for(i=0;i<=maxslackid;i++) { /* init shared slacks */ + slack[i]=0; + alphaslack[i]=0; + } + compute_shared_slacks(docs,label,a,lin,c,index2dnum,learn_parm, + slack,alphaslack); + loss=0; + model->at_upper_bound=0; + svsetnum=0; + for(i=0;i<=maxslackid;i++) { /* create full index */ + loss+=slack[i]; + if(alphaslack[i] > (learn_parm->svm_c - learn_parm->epsilon_a)) + model->at_upper_bound++; + if(alphaslack[i] > learn_parm->epsilon_a) + svsetnum++; + } + free(index); + free(index2dnum); + free(slack); + free(alphaslack); + } + + if((verbosity>=1) && (!learn_parm->skip_final_opt_check)) { + if(learn_parm->sharedslack) { + printf("Number of SV: %ld\n", + model->sv_num-1); + printf("Number of non-zero slack variables: %ld (out of %ld)\n", + model->at_upper_bound,svsetnum); + fprintf(stdout,"L1 loss: loss=%.5f\n",loss); + } + else { + upsupvecnum=0; + for(i=1;isv_num;i++) { + if(fabs(model->alpha[i]) >= + (learn_parm->svm_cost[(model->supvec[i])->docnum]- + learn_parm->epsilon_a)) + upsupvecnum++; + } + printf("Number of SV: %ld (including %ld at upper bound)\n", + model->sv_num-1,upsupvecnum); + fprintf(stdout,"L1 loss: loss=%.5f\n",loss); + } + example_length=estimate_sphere(model,kernel_parm); + fprintf(stdout,"Norm of longest example vector: |x|=%.5f\n", + length_of_longest_document_vector(docs,totdoc,kernel_parm)); + } + if(verbosity>=1) { + printf("Number of kernel evaluations: %ld\n",kernel_cache_statistic); + } + + if(alpha) { + for(i=0;ialphafile[0]) + write_alphas(learn_parm->alphafile,a,label,totdoc); + + shrink_state_cleanup(&shrink_state); + free(label); + free(unlabeled); + free(inconsistent); + free(c); + free(a); + free(lin); + free(learn_parm->svm_cost); +} + + +long optimize_to_convergence(DOC **docs, long int *label, long int totdoc, + long int totwords, LEARN_PARM *learn_parm, + KERNEL_PARM *kernel_parm, + KERNEL_CACHE *kernel_cache, + SHRINK_STATE *shrink_state, MODEL *model, + long int *inconsistent, long int *unlabeled, + double *a, double *lin, double *c, + TIMING *timing_profile, double *maxdiff, + long int heldout, long int retrain) + /* docs: Training vectors (x-part) */ + /* label: Training labels/value (y-part, zero if test example for + transduction) */ + /* totdoc: Number of examples in docs/label */ + /* totwords: Number of features (i.e. highest feature index) */ + /* laern_parm: Learning paramenters */ + /* kernel_parm: Kernel paramenters */ + /* kernel_cache: Initialized/partly filled Cache, if using a kernel. + NULL if linear. */ + /* shrink_state: State of active variables */ + /* model: Returns learning result */ + /* inconsistent: examples thrown out as inconstistent */ + /* unlabeled: test examples for transduction */ + /* a: alphas */ + /* lin: linear component of gradient */ + /* c: right hand side of inequalities (margin) */ + /* maxdiff: returns maximum violation of KT-conditions */ + /* heldout: marks held-out example for leave-one-out (or -1) */ + /* retrain: selects training mode (1=regular / 2=holdout) */ +{ + long *chosen,*key,i,j,jj,*last_suboptimal_at,noshrink; + long inconsistentnum,choosenum,already_chosen=0,iteration; + long misclassified,supvecnum=0,*active2dnum,inactivenum; + long *working2dnum,*selexam; + long activenum; + double criterion,eq; + double *a_old; + long t0=0,t1=0,t2=0,t3=0,t4=0,t5=0,t6=0; /* timing */ + long transductcycle; + long transduction; + double epsilon_crit_org; + double bestmaxdiff; + long bestmaxdiffiter,terminate; + + double *selcrit; /* buffer for sorting */ + CFLOAT *aicache; /* buffer to keep one row of hessian */ + double *weights; /* buffer for weight vector in linear case */ + QP qp; /* buffer for one quadratic program */ + + epsilon_crit_org=learn_parm->epsilon_crit; /* save org */ + if(kernel_parm->kernel_type == LINEAR) { + learn_parm->epsilon_crit=2.0; + kernel_cache=NULL; /* caching makes no sense for linear kernel */ + } + learn_parm->epsilon_shrink=2; + (*maxdiff)=1; + + learn_parm->totwords=totwords; + + chosen = (long *)my_malloc(sizeof(long)*totdoc); + last_suboptimal_at = (long *)my_malloc(sizeof(long)*totdoc); + key = (long *)my_malloc(sizeof(long)*(totdoc+11)); + selcrit = (double *)my_malloc(sizeof(double)*totdoc); + selexam = (long *)my_malloc(sizeof(long)*totdoc); + a_old = (double *)my_malloc(sizeof(double)*totdoc); + aicache = (CFLOAT *)my_malloc(sizeof(CFLOAT)*totdoc); + working2dnum = (long *)my_malloc(sizeof(long)*(totdoc+11)); + active2dnum = (long *)my_malloc(sizeof(long)*(totdoc+11)); + qp.opt_ce = (double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize); + qp.opt_ce0 = (double *)my_malloc(sizeof(double)); + qp.opt_g = (double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize + *learn_parm->svm_maxqpsize); + qp.opt_g0 = (double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize); + qp.opt_xinit = (double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize); + qp.opt_low=(double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize); + qp.opt_up=(double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize); + weights=(double *)my_malloc(sizeof(double)*(totwords+1)); + + choosenum=0; + inconsistentnum=0; + transductcycle=0; + transduction=0; + if(!retrain) retrain=1; + iteration=1; + bestmaxdiffiter=1; + bestmaxdiff=999999999; + terminate=0; + + if(kernel_cache) { + kernel_cache->time=iteration; /* for lru cache */ + kernel_cache_reset_lru(kernel_cache); + } + + for(i=0;iactive,totdoc,active2dnum); + inactivenum=totdoc-activenum; + clear_index(working2dnum); + + /* repeat this loop until we have convergence */ + for(;retrain && (!terminate);iteration++) { + + if(kernel_cache) + kernel_cache->time=iteration; /* for lru cache */ + if(verbosity>=2) { + printf( + "Iteration %ld: ",iteration); fflush(stdout); + } + else if(verbosity==1) { + printf("."); fflush(stdout); + } + + if(verbosity>=2) t0=get_runtime(); + if(verbosity>=3) { + printf("\nSelecting working set... "); fflush(stdout); + } + + if(learn_parm->svm_newvarsinqp>learn_parm->svm_maxqpsize) + learn_parm->svm_newvarsinqp=learn_parm->svm_maxqpsize; + + i=0; + for(jj=0;(j=working2dnum[jj])>=0;jj++) { /* clear working set */ + if((chosen[j]>=(learn_parm->svm_maxqpsize/ + minl(learn_parm->svm_maxqpsize, + learn_parm->svm_newvarsinqp))) + || (inconsistent[j]) + || (j == heldout)) { + chosen[j]=0; + choosenum--; + } + else { + chosen[j]++; + working2dnum[i++]=j; + } + } + working2dnum[i]=-1; + + if(retrain == 2) { + choosenum=0; + for(jj=0;(j=working2dnum[jj])>=0;jj++) { /* fully clear working set */ + chosen[j]=0; + } + clear_index(working2dnum); + for(i=0;ibiased_hyperplane) { + eq=0; + for(i=0;i learn_parm->epsilon_a);i++) { + if((eq*label[i] > 0) && (a[i] > 0)) { + chosen[i]=88888; + choosenum++; + if((eq*label[i]) > a[i]) { + eq-=(a[i]*label[i]); + a[i]=0; + } + else { + a[i]-=(eq*label[i]); + eq=0; + } + } + } + } + compute_index(chosen,totdoc,working2dnum); + } + else { /* select working set according to steepest gradient */ + if(iteration % 101) { + already_chosen=0; + if((minl(learn_parm->svm_newvarsinqp, + learn_parm->svm_maxqpsize-choosenum)>=4) + && (kernel_parm->kernel_type != LINEAR)) { + /* select part of the working set from cache */ + already_chosen=select_next_qp_subproblem_grad( + label,unlabeled,a,lin,c,totdoc, + (long)(minl(learn_parm->svm_maxqpsize-choosenum, + learn_parm->svm_newvarsinqp) + /2), + learn_parm,inconsistent,active2dnum, + working2dnum,selcrit,selexam,kernel_cache,1, + key,chosen); + choosenum+=already_chosen; + } + choosenum+=select_next_qp_subproblem_grad( + label,unlabeled,a,lin,c,totdoc, + minl(learn_parm->svm_maxqpsize-choosenum, + learn_parm->svm_newvarsinqp-already_chosen), + learn_parm,inconsistent,active2dnum, + working2dnum,selcrit,selexam,kernel_cache,0,key, + chosen); + } + else { /* once in a while, select a somewhat random working set + to get unlocked of infinite loops due to numerical + inaccuracies in the core qp-solver */ + choosenum+=select_next_qp_subproblem_rand( + label,unlabeled,a,lin,c,totdoc, + minl(learn_parm->svm_maxqpsize-choosenum, + learn_parm->svm_newvarsinqp), + learn_parm,inconsistent,active2dnum, + working2dnum,selcrit,selexam,kernel_cache,key, + chosen,iteration); + } + } + + if(verbosity>=2) { + printf(" %ld vectors chosen\n",choosenum); fflush(stdout); + } + + if(verbosity>=2) t1=get_runtime(); + + if(kernel_cache) + cache_multiple_kernel_rows(kernel_cache,docs,working2dnum, + choosenum,kernel_parm); + + if(verbosity>=2) t2=get_runtime(); + if(retrain != 2) { + optimize_svm(docs,label,unlabeled,inconsistent,0.0,chosen,active2dnum, + model,totdoc,working2dnum,choosenum,a,lin,c,learn_parm, + aicache,kernel_parm,&qp,&epsilon_crit_org); + } + + if(verbosity>=2) t3=get_runtime(); + update_linear_component(docs,label,active2dnum,a,a_old,working2dnum,totdoc, + totwords,kernel_parm,kernel_cache,lin,aicache, + weights); + + if(verbosity>=2) t4=get_runtime(); + supvecnum=calculate_svm_model(docs,label,unlabeled,lin,a,a_old,c, + learn_parm,working2dnum,active2dnum,model); + + if(verbosity>=2) t5=get_runtime(); + + /* The following computation of the objective function works only */ + /* relative to the active variables */ + if(verbosity>=3) { + criterion=compute_objective_function(a,lin,c,learn_parm->eps,label, + active2dnum); + printf("Objective function (over active variables): %.16f\n",criterion); + fflush(stdout); + } + + for(jj=0;(i=working2dnum[jj])>=0;jj++) { + a_old[i]=a[i]; + } + + if(retrain == 2) { /* reset inconsistent unlabeled examples */ + for(i=0;(i=2) { + t6=get_runtime(); + timing_profile->time_select+=t1-t0; + timing_profile->time_kernel+=t2-t1; + timing_profile->time_opti+=t3-t2; + timing_profile->time_update+=t4-t3; + timing_profile->time_model+=t5-t4; + timing_profile->time_check+=t6-t5; + } + + /* checking whether optimizer got stuck */ + if((*maxdiff) < bestmaxdiff) { + bestmaxdiff=(*maxdiff); + bestmaxdiffiter=iteration; + } + if(iteration > (bestmaxdiffiter+learn_parm->maxiter)) { + /* long time no progress? */ + terminate=1; + retrain=0; + if(verbosity>=1) + printf("\nWARNING: Relaxing KT-Conditions due to slow progress! Terminating!\n"); + } + + noshrink=0; + if((!retrain) && (inactivenum>0) + && ((!learn_parm->skip_final_opt_check) + || (kernel_parm->kernel_type == LINEAR))) { + if(((verbosity>=1) && (kernel_parm->kernel_type != LINEAR)) + || (verbosity>=2)) { + if(verbosity==1) { + printf("\n"); + } + printf(" Checking optimality of inactive variables..."); + fflush(stdout); + } + t1=get_runtime(); + reactivate_inactive_examples(label,unlabeled,a,shrink_state,lin,c,totdoc, + totwords,iteration,learn_parm,inconsistent, + docs,kernel_parm,kernel_cache,model,aicache, + weights,maxdiff); + /* Update to new active variables. */ + activenum=compute_index(shrink_state->active,totdoc,active2dnum); + inactivenum=totdoc-activenum; + /* reset watchdog */ + bestmaxdiff=(*maxdiff); + bestmaxdiffiter=iteration; + /* termination criterion */ + noshrink=1; + retrain=0; + if((*maxdiff) > learn_parm->epsilon_crit) + retrain=1; + timing_profile->time_shrink+=get_runtime()-t1; + if(((verbosity>=1) && (kernel_parm->kernel_type != LINEAR)) + || (verbosity>=2)) { + printf("done.\n"); fflush(stdout); + printf(" Number of inactive variables = %ld\n",inactivenum); + } + } + + if((!retrain) && (learn_parm->epsilon_crit>(*maxdiff))) + learn_parm->epsilon_crit=(*maxdiff); + if((!retrain) && (learn_parm->epsilon_crit>epsilon_crit_org)) { + learn_parm->epsilon_crit/=2.0; + retrain=1; + noshrink=1; + } + if(learn_parm->epsilon_critepsilon_crit=epsilon_crit_org; + + if(verbosity>=2) { + printf(" => (%ld SV (incl. %ld SV at u-bound), max violation=%.5f)\n", + supvecnum,model->at_upper_bound,(*maxdiff)); + fflush(stdout); + } + if(verbosity>=3) { + printf("\n"); + } + + if((!retrain) && (transduction)) { + for(i=0;(iactive[i]=1; + } + activenum=compute_index(shrink_state->active,totdoc,active2dnum); + inactivenum=0; + if(verbosity==1) printf("done\n"); + retrain=incorporate_unlabeled_examples(model,label,inconsistent, + unlabeled,a,lin,totdoc, + selcrit,selexam,key, + transductcycle,kernel_parm, + learn_parm); + epsilon_crit_org=learn_parm->epsilon_crit; + if(kernel_parm->kernel_type == LINEAR) + learn_parm->epsilon_crit=1; + transductcycle++; + /* reset watchdog */ + bestmaxdiff=(*maxdiff); + bestmaxdiffiter=iteration; + } + else if(((iteration % 10) == 0) && (!noshrink)) { + activenum=shrink_problem(docs,learn_parm,shrink_state,kernel_parm, + active2dnum,last_suboptimal_at,iteration,totdoc, + maxl((long)(activenum/10), + maxl((long)(totdoc/500),100)), + a,inconsistent); + inactivenum=totdoc-activenum; + if((kernel_cache) + && (supvecnum>kernel_cache->max_elems) + && ((kernel_cache->activenum-activenum)>maxl((long)(activenum/10),500))) { + kernel_cache_shrink(kernel_cache,totdoc, + minl((kernel_cache->activenum-activenum), + (kernel_cache->activenum-supvecnum)), + shrink_state->active); + } + } + + if((!retrain) && learn_parm->remove_inconsistent) { + if(verbosity>=1) { + printf(" Moving training errors to inconsistent examples..."); + fflush(stdout); + } + if(learn_parm->remove_inconsistent == 1) { + retrain=identify_inconsistent(a,label,unlabeled,totdoc,learn_parm, + &inconsistentnum,inconsistent); + } + else if(learn_parm->remove_inconsistent == 2) { + retrain=identify_misclassified(lin,label,unlabeled,totdoc, + model,&inconsistentnum,inconsistent); + } + else if(learn_parm->remove_inconsistent == 3) { + retrain=identify_one_misclassified(lin,label,unlabeled,totdoc, + model,&inconsistentnum,inconsistent); + } + if(retrain) { + if(kernel_parm->kernel_type == LINEAR) { /* reinit shrinking */ + learn_parm->epsilon_crit=2.0; + } + } + if(verbosity>=1) { + printf("done.\n"); + if(retrain) { + printf(" Now %ld inconsistent examples.\n",inconsistentnum); + } + } + } + } /* end of loop */ + + free(chosen); + free(last_suboptimal_at); + free(key); + free(selcrit); + free(selexam); + free(a_old); + free(aicache); + free(working2dnum); + free(active2dnum); + free(qp.opt_ce); + free(qp.opt_ce0); + free(qp.opt_g); + free(qp.opt_g0); + free(qp.opt_xinit); + free(qp.opt_low); + free(qp.opt_up); + free(weights); + + learn_parm->epsilon_crit=epsilon_crit_org; /* restore org */ + model->maxdiff=(*maxdiff); + + return(iteration); +} + +long optimize_to_convergence_sharedslack(DOC **docs, long int *label, + long int totdoc, + long int totwords, LEARN_PARM *learn_parm, + KERNEL_PARM *kernel_parm, + KERNEL_CACHE *kernel_cache, + SHRINK_STATE *shrink_state, MODEL *model, + double *a, double *lin, double *c, + TIMING *timing_profile, double *maxdiff) + /* docs: Training vectors (x-part) */ + /* label: Training labels/value (y-part, zero if test example for + transduction) */ + /* totdoc: Number of examples in docs/label */ + /* totwords: Number of features (i.e. highest feature index) */ + /* learn_parm: Learning paramenters */ + /* kernel_parm: Kernel paramenters */ + /* kernel_cache: Initialized/partly filled Cache, if using a kernel. + NULL if linear. */ + /* shrink_state: State of active variables */ + /* model: Returns learning result */ + /* a: alphas */ + /* lin: linear component of gradient */ + /* c: right hand side of inequalities (margin) */ + /* maxdiff: returns maximum violation of KT-conditions */ +{ + long *chosen,*key,i,j,jj,*last_suboptimal_at,noshrink,*unlabeled; + long *inconsistent,choosenum,already_chosen=0,iteration; + long misclassified,supvecnum=0,*active2dnum,inactivenum; + long *working2dnum,*selexam,*ignore; + long activenum,retrain,maxslackid,slackset,jointstep; + double criterion,eq_target; + double *a_old,*alphaslack; + long t0=0,t1=0,t2=0,t3=0,t4=0,t5=0,t6=0; /* timing */ + double epsilon_crit_org,maxsharedviol; + double bestmaxdiff; + long bestmaxdiffiter,terminate; + + double *selcrit; /* buffer for sorting */ + CFLOAT *aicache; /* buffer to keep one row of hessian */ + double *weights; /* buffer for weight vector in linear case */ + QP qp; /* buffer for one quadratic program */ + double *slack; /* vector of slack variables for optimization with + shared slacks */ + + epsilon_crit_org=learn_parm->epsilon_crit; /* save org */ + if(kernel_parm->kernel_type == LINEAR) { + learn_parm->epsilon_crit=2.0; + kernel_cache=NULL; /* caching makes no sense for linear kernel */ + } + learn_parm->epsilon_shrink=2; + (*maxdiff)=1; + + learn_parm->totwords=totwords; + + chosen = (long *)my_malloc(sizeof(long)*totdoc); + unlabeled = (long *)my_malloc(sizeof(long)*totdoc); + inconsistent = (long *)my_malloc(sizeof(long)*totdoc); + ignore = (long *)my_malloc(sizeof(long)*totdoc); + key = (long *)my_malloc(sizeof(long)*(totdoc+11)); + selcrit = (double *)my_malloc(sizeof(double)*totdoc); + selexam = (long *)my_malloc(sizeof(long)*totdoc); + a_old = (double *)my_malloc(sizeof(double)*totdoc); + aicache = (CFLOAT *)my_malloc(sizeof(CFLOAT)*totdoc); + working2dnum = (long *)my_malloc(sizeof(long)*(totdoc+11)); + active2dnum = (long *)my_malloc(sizeof(long)*(totdoc+11)); + qp.opt_ce = (double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize); + qp.opt_ce0 = (double *)my_malloc(sizeof(double)); + qp.opt_g = (double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize + *learn_parm->svm_maxqpsize); + qp.opt_g0 = (double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize); + qp.opt_xinit = (double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize); + qp.opt_low=(double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize); + qp.opt_up=(double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize); + weights=(double *)my_malloc(sizeof(double)*(totwords+1)); + maxslackid=0; + for(i=0;islackid) + maxslackid=docs[i]->slackid; + } + slack=(double *)my_malloc(sizeof(double)*(maxslackid+1)); + alphaslack=(double *)my_malloc(sizeof(double)*(maxslackid+1)); + last_suboptimal_at = (long *)my_malloc(sizeof(long)*(maxslackid+1)); + for(i=0;i<=maxslackid;i++) { /* init shared slacks */ + slack[i]=0; + alphaslack[i]=0; + last_suboptimal_at[i]=1; + } + + choosenum=0; + retrain=1; + iteration=1; + bestmaxdiffiter=1; + bestmaxdiff=999999999; + terminate=0; + + if(kernel_cache) { + kernel_cache->time=iteration; /* for lru cache */ + kernel_cache_reset_lru(kernel_cache); + } + + for(i=0;iactive,totdoc,active2dnum); + inactivenum=totdoc-activenum; + clear_index(working2dnum); + + /* call to init slack and alphaslack */ + compute_shared_slacks(docs,label,a,lin,c,active2dnum,learn_parm, + slack,alphaslack); + + /* repeat this loop until we have convergence */ + for(;retrain && (!terminate);iteration++) { + + if(kernel_cache) + kernel_cache->time=iteration; /* for lru cache */ + if(verbosity>=2) { + printf( + "Iteration %ld: ",iteration); fflush(stdout); + } + else if(verbosity==1) { + printf("."); fflush(stdout); + } + + if(verbosity>=2) t0=get_runtime(); + if(verbosity>=3) { + printf("\nSelecting working set... "); fflush(stdout); + } + + if(learn_parm->svm_newvarsinqp>learn_parm->svm_maxqpsize) + learn_parm->svm_newvarsinqp=learn_parm->svm_maxqpsize; + + /* select working set according to steepest gradient */ + jointstep=0; + eq_target=0; + if(iteration % 101) { + slackset=select_next_qp_slackset(docs,label,a,lin,slack,alphaslack,c, + learn_parm,active2dnum,&maxsharedviol); + if((iteration % 2) + || (!slackset) || (maxsharedviolepsilon_crit)){ + /* do a step with examples from different slack sets */ + if(verbosity >= 2) { + printf("(i-step)"); fflush(stdout); + } + i=0; + for(jj=0;(j=working2dnum[jj])>=0;jj++) { /* clear old part of working set */ + if((chosen[j]>=(learn_parm->svm_maxqpsize/ + minl(learn_parm->svm_maxqpsize, + learn_parm->svm_newvarsinqp)))) { + chosen[j]=0; + choosenum--; + } + else { + chosen[j]++; + working2dnum[i++]=j; + } + } + working2dnum[i]=-1; + + already_chosen=0; + if((minl(learn_parm->svm_newvarsinqp, + learn_parm->svm_maxqpsize-choosenum)>=4) + && (kernel_parm->kernel_type != LINEAR)) { + /* select part of the working set from cache */ + already_chosen=select_next_qp_subproblem_grad( + label,unlabeled,a,lin,c,totdoc, + (long)(minl(learn_parm->svm_maxqpsize-choosenum, + learn_parm->svm_newvarsinqp) + /2), + learn_parm,inconsistent,active2dnum, + working2dnum,selcrit,selexam,kernel_cache, + (long)1,key,chosen); + choosenum+=already_chosen; + } + choosenum+=select_next_qp_subproblem_grad( + label,unlabeled,a,lin,c,totdoc, + minl(learn_parm->svm_maxqpsize-choosenum, + learn_parm->svm_newvarsinqp-already_chosen), + learn_parm,inconsistent,active2dnum, + working2dnum,selcrit,selexam,kernel_cache, + (long)0,key,chosen); + } + else { /* do a step with all examples from same slack set */ + if(verbosity >= 2) { + printf("(j-step on %ld)",slackset); fflush(stdout); + } + jointstep=1; + for(jj=0;(j=working2dnum[jj])>=0;jj++) { /* clear working set */ + chosen[j]=0; + } + working2dnum[0]=-1; + eq_target=alphaslack[slackset]; + for(j=0;j=0;jj++) { */ + if(docs[j]->slackid != slackset) + ignore[j]=1; + else { + ignore[j]=0; + learn_parm->svm_cost[j]=learn_parm->svm_c; + /* printf("Inslackset(%ld,%ld)",j,shrink_state->active[j]); */ + } + } + learn_parm->biased_hyperplane=1; + choosenum=select_next_qp_subproblem_grad( + label,unlabeled,a,lin,c,totdoc, + learn_parm->svm_maxqpsize, + learn_parm,ignore,active2dnum, + working2dnum,selcrit,selexam,kernel_cache, + (long)0,key,chosen); + learn_parm->biased_hyperplane=0; + } + } + else { /* once in a while, select a somewhat random working set + to get unlocked of infinite loops due to numerical + inaccuracies in the core qp-solver */ + choosenum+=select_next_qp_subproblem_rand( + label,unlabeled,a,lin,c,totdoc, + minl(learn_parm->svm_maxqpsize-choosenum, + learn_parm->svm_newvarsinqp), + learn_parm,inconsistent,active2dnum, + working2dnum,selcrit,selexam,kernel_cache,key, + chosen,iteration); + } + + if(verbosity>=2) { + printf(" %ld vectors chosen\n",choosenum); fflush(stdout); + } + + if(verbosity>=2) t1=get_runtime(); + + if(kernel_cache) + cache_multiple_kernel_rows(kernel_cache,docs,working2dnum, + choosenum,kernel_parm); + + if(verbosity>=2) t2=get_runtime(); + if(jointstep) learn_parm->biased_hyperplane=1; + optimize_svm(docs,label,unlabeled,ignore,eq_target,chosen,active2dnum, + model,totdoc,working2dnum,choosenum,a,lin,c,learn_parm, + aicache,kernel_parm,&qp,&epsilon_crit_org); + learn_parm->biased_hyperplane=0; + + for(jj=0;(i=working2dnum[jj])>=0;jj++) /* recompute sums of alphas */ + alphaslack[docs[i]->slackid]+=(a[i]-a_old[i]); + for(jj=0;(i=working2dnum[jj])>=0;jj++) { /* reduce alpha to fulfill + constraints */ + if(alphaslack[docs[i]->slackid] > learn_parm->svm_c) { + if(a[i] < (alphaslack[docs[i]->slackid]-learn_parm->svm_c)) { + alphaslack[docs[i]->slackid]-=a[i]; + a[i]=0; + } + else { + a[i]-=(alphaslack[docs[i]->slackid]-learn_parm->svm_c); + alphaslack[docs[i]->slackid]=learn_parm->svm_c; + } + } + } + for(jj=0;(i=active2dnum[jj])>=0;jj++) + learn_parm->svm_cost[i]=a[i]+(learn_parm->svm_c + -alphaslack[docs[i]->slackid]); + + if(verbosity>=2) t3=get_runtime(); + update_linear_component(docs,label,active2dnum,a,a_old,working2dnum,totdoc, + totwords,kernel_parm,kernel_cache,lin,aicache, + weights); + compute_shared_slacks(docs,label,a,lin,c,active2dnum,learn_parm, + slack,alphaslack); + + if(verbosity>=2) t4=get_runtime(); + supvecnum=calculate_svm_model(docs,label,unlabeled,lin,a,a_old,c, + learn_parm,working2dnum,active2dnum,model); + + if(verbosity>=2) t5=get_runtime(); + + /* The following computation of the objective function works only */ + /* relative to the active variables */ + if(verbosity>=3) { + criterion=compute_objective_function(a,lin,c,learn_parm->eps,label, + active2dnum); + printf("Objective function (over active variables): %.16f\n",criterion); + fflush(stdout); + } + + for(jj=0;(i=working2dnum[jj])>=0;jj++) { + a_old[i]=a[i]; + } + + retrain=check_optimality_sharedslack(docs,model,label,a,lin,c, + slack,alphaslack,totdoc,learn_parm, + maxdiff,epsilon_crit_org,&misclassified, + active2dnum,last_suboptimal_at, + iteration,kernel_parm); + + if(verbosity>=2) { + t6=get_runtime(); + timing_profile->time_select+=t1-t0; + timing_profile->time_kernel+=t2-t1; + timing_profile->time_opti+=t3-t2; + timing_profile->time_update+=t4-t3; + timing_profile->time_model+=t5-t4; + timing_profile->time_check+=t6-t5; + } + + /* checking whether optimizer got stuck */ + if((*maxdiff) < bestmaxdiff) { + bestmaxdiff=(*maxdiff); + bestmaxdiffiter=iteration; + } + if(iteration > (bestmaxdiffiter+learn_parm->maxiter)) { + /* long time no progress? */ + terminate=1; + retrain=0; + if(verbosity>=1) + printf("\nWARNING: Relaxing KT-Conditions due to slow progress! Terminating!\n"); + } + + noshrink=0; + + if((!retrain) && (inactivenum>0) + && ((!learn_parm->skip_final_opt_check) + || (kernel_parm->kernel_type == LINEAR))) { + if(((verbosity>=1) && (kernel_parm->kernel_type != LINEAR)) + || (verbosity>=2)) { + if(verbosity==1) { + printf("\n"); + } + printf(" Checking optimality of inactive variables..."); + fflush(stdout); + } + t1=get_runtime(); + reactivate_inactive_examples(label,unlabeled,a,shrink_state,lin,c,totdoc, + totwords,iteration,learn_parm,inconsistent, + docs,kernel_parm,kernel_cache,model,aicache, + weights,maxdiff); + /* Update to new active variables. */ + activenum=compute_index(shrink_state->active,totdoc,active2dnum); + inactivenum=totdoc-activenum; + /* check optimality, since check in reactivate does not work for + sharedslacks */ + retrain=check_optimality_sharedslack(docs,model,label,a,lin,c, + slack,alphaslack,totdoc,learn_parm, + maxdiff,epsilon_crit_org,&misclassified, + active2dnum,last_suboptimal_at, + iteration,kernel_parm); + + /* reset watchdog */ + bestmaxdiff=(*maxdiff); + bestmaxdiffiter=iteration; + /* termination criterion */ + noshrink=1; + retrain=0; + if((*maxdiff) > learn_parm->epsilon_crit) + retrain=1; + timing_profile->time_shrink+=get_runtime()-t1; + if(((verbosity>=1) && (kernel_parm->kernel_type != LINEAR)) + || (verbosity>=2)) { + printf("done.\n"); fflush(stdout); + printf(" Number of inactive variables = %ld\n",inactivenum); + } + } + + if((!retrain) && (learn_parm->epsilon_crit>(*maxdiff))) + learn_parm->epsilon_crit=(*maxdiff); + if((!retrain) && (learn_parm->epsilon_crit>epsilon_crit_org)) { + learn_parm->epsilon_crit/=2.0; + retrain=1; + noshrink=1; + } + if(learn_parm->epsilon_critepsilon_crit=epsilon_crit_org; + + if(verbosity>=2) { + printf(" => (%ld SV (incl. %ld SV at u-bound), max violation=%.5f)\n", + supvecnum,model->at_upper_bound,(*maxdiff)); + fflush(stdout); + } + if(verbosity>=3) { + printf("\n"); + } + + if(((iteration % 10) == 0) && (!noshrink)) { + activenum=shrink_problem(docs,learn_parm,shrink_state, + kernel_parm,active2dnum, + last_suboptimal_at,iteration,totdoc, + maxl((long)(activenum/10), + maxl((long)(totdoc/500),100)), + a,inconsistent); + inactivenum=totdoc-activenum; + if((kernel_cache) + && (supvecnum>kernel_cache->max_elems) + && ((kernel_cache->activenum-activenum)>maxl((long)(activenum/10),500))) { + kernel_cache_shrink(kernel_cache,totdoc, + minl((kernel_cache->activenum-activenum), + (kernel_cache->activenum-supvecnum)), + shrink_state->active); + } + } + + } /* end of loop */ + + + free(alphaslack); + free(slack); + free(chosen); + free(unlabeled); + free(inconsistent); + free(ignore); + free(last_suboptimal_at); + free(key); + free(selcrit); + free(selexam); + free(a_old); + free(aicache); + free(working2dnum); + free(active2dnum); + free(qp.opt_ce); + free(qp.opt_ce0); + free(qp.opt_g); + free(qp.opt_g0); + free(qp.opt_xinit); + free(qp.opt_low); + free(qp.opt_up); + free(weights); + + learn_parm->epsilon_crit=epsilon_crit_org; /* restore org */ + model->maxdiff=(*maxdiff); + + return(iteration); +} + + +double compute_objective_function(double *a, double *lin, double *c, + double eps, long int *label, + long int *active2dnum) + /* Return value of objective function. */ + /* Works only relative to the active variables! */ +{ + long i,ii; + double criterion; + /* calculate value of objective function */ + criterion=0; + for(ii=0;active2dnum[ii]>=0;ii++) { + i=active2dnum[ii]; + criterion=criterion+(eps-(double)label[i]*c[i])*a[i]+0.5*a[i]*label[i]*lin[i]; + } + return(criterion); +} + +void clear_index(long int *index) + /* initializes and empties index */ +{ + index[0]=-1; +} + +void add_to_index(long int *index, long int elem) + /* initializes and empties index */ +{ + register long i; + for(i=0;index[i] != -1;i++); + index[i]=elem; + index[i+1]=-1; +} + +long compute_index(long int *binfeature, long int range, long int *index) + /* create an inverted index of binfeature */ +{ + register long i,ii; + + ii=0; + for(i=0;i=3) { + printf("Running optimizer..."); fflush(stdout); + } + /* call the qp-subsolver */ + a_v=optimize_qp(qp,epsilon_crit_target, + learn_parm->svm_maxqpsize, + &(model->b), /* in case the optimizer gives us */ + /* the threshold for free. otherwise */ + /* b is calculated in calculate_model. */ + learn_parm); + if(verbosity>=3) { + printf("done\n"); + } + + for(i=0;iepsilon_a)) { + a[working2dnum[i]]=0; + } + else if(a_v[i]>=(learn_parm->svm_cost[working2dnum[i]]-learn_parm->epsilon_a)) { + a[working2dnum[i]]=learn_parm->svm_cost[working2dnum[i]]; + } + */ + } +} + +void compute_matrices_for_optimization(DOC **docs, long int *label, + long int *unlabeled, long *exclude_from_eq_const, double eq_target, + long int *chosen, long int *active2dnum, + long int *key, MODEL *model, double *a, double *lin, double *c, + long int varnum, long int totdoc, LEARN_PARM *learn_parm, + CFLOAT *aicache, KERNEL_PARM *kernel_parm, QP *qp) +{ + register long ki,kj,i,j; + register double kernel_temp; + + if(verbosity>=3) { + fprintf(stdout,"Computing qp-matrices (type %ld kernel [degree %ld, rbf_gamma %f, coef_lin %f, coef_const %f])...",kernel_parm->kernel_type,kernel_parm->poly_degree,kernel_parm->rbf_gamma,kernel_parm->coef_lin,kernel_parm->coef_const); + fflush(stdout); + } + + qp->opt_n=varnum; + qp->opt_ce0[0]=-eq_target; /* compute the constant for equality constraint */ + for(j=1;jsv_num;j++) { /* start at 1 */ + if((!chosen[(model->supvec[j])->docnum]) + && (!exclude_from_eq_const[(model->supvec[j])->docnum])) { + qp->opt_ce0[0]+=model->alpha[j]; + } + } + if(learn_parm->biased_hyperplane) + qp->opt_m=1; + else + qp->opt_m=0; /* eq-constraint will be ignored */ + + /* init linear part of objective function */ + for(i=0;iopt_g0[i]=lin[key[i]]; + } + + for(i=0;iopt_ce[i]=label[ki]; + qp->opt_low[i]=0; + qp->opt_up[i]=learn_parm->svm_cost[ki]; + + kernel_temp=(double)kernel(kernel_parm,docs[ki],docs[ki]); + /* compute linear part of objective function */ + qp->opt_g0[i]-=(kernel_temp*a[ki]*(double)label[ki]); + /* compute quadratic part of objective function */ + qp->opt_g[varnum*i+i]=kernel_temp; + for(j=i+1;jopt_g0[i]-=(kernel_temp*a[kj]*(double)label[kj]); + qp->opt_g0[j]-=(kernel_temp*a[ki]*(double)label[ki]); + /* compute quadratic part of objective function */ + qp->opt_g[varnum*i+j]=(double)label[ki]*(double)label[kj]*kernel_temp; + qp->opt_g[varnum*j+i]=(double)label[ki]*(double)label[kj]*kernel_temp; + } + + if(verbosity>=3) { + if(i % 20 == 0) { + fprintf(stdout,"%ld..",i); fflush(stdout); + } + } + } + + for(i=0;iopt_xinit[i]=a[key[i]]; + /* set linear part of objective function */ + qp->opt_g0[i]=(learn_parm->eps-(double)label[key[i]]*c[key[i]])+qp->opt_g0[i]*(double)label[key[i]]; + } + + if(verbosity>=3) { + fprintf(stdout,"done\n"); + } +} + +long calculate_svm_model(DOC **docs, long int *label, long int *unlabeled, + double *lin, double *a, double *a_old, double *c, + LEARN_PARM *learn_parm, long int *working2dnum, + long int *active2dnum, MODEL *model) + /* Compute decision function based on current values */ + /* of alpha. */ +{ + long i,ii,pos,b_calculated=0,first_low,first_high; + double ex_c,b_temp,b_low,b_high; + + if(verbosity>=3) { + printf("Calculating model..."); fflush(stdout); + } + + if(!learn_parm->biased_hyperplane) { + model->b=0; + b_calculated=1; + } + + for(ii=0;(i=working2dnum[ii])>=0;ii++) { + if((a_old[i]>0) && (a[i]==0)) { /* remove from model */ + pos=model->index[i]; + model->index[i]=-1; + (model->sv_num)--; + model->supvec[pos]=model->supvec[model->sv_num]; + model->alpha[pos]=model->alpha[model->sv_num]; + model->index[(model->supvec[pos])->docnum]=pos; + } + else if((a_old[i]==0) && (a[i]>0)) { /* add to model */ + model->supvec[model->sv_num]=docs[i]; + model->alpha[model->sv_num]=a[i]*(double)label[i]; + model->index[i]=model->sv_num; + (model->sv_num)++; + } + else if(a_old[i]==a[i]) { /* nothing to do */ + } + else { /* just update alpha */ + model->alpha[model->index[i]]=a[i]*(double)label[i]; + } + + ex_c=learn_parm->svm_cost[i]-learn_parm->epsilon_a; + if((a_old[i]>=ex_c) && (a[i]at_upper_bound)--; + } + else if((a_old[i]=ex_c)) { + (model->at_upper_bound)++; + } + + if((!b_calculated) + && (a[i]>learn_parm->epsilon_a) && (a[i]b=((double)label[i]*learn_parm->eps-c[i]+lin[i]); + /* model->b=(-(double)label[i]+lin[i]); */ + b_calculated=1; + } + } + + /* No alpha in the working set not at bounds, so b was not + calculated in the usual way. The following handles this special + case. */ + if(learn_parm->biased_hyperplane + && (!b_calculated) + && (model->sv_num-1 == model->at_upper_bound)) { + first_low=1; + first_high=1; + b_low=0; + b_high=0; + for(ii=0;(i=active2dnum[ii])>=0;ii++) { + ex_c=learn_parm->svm_cost[i]-learn_parm->epsilon_a; + if(a[i]0) { + b_temp=-(learn_parm->eps-c[i]+lin[i]); + if((b_temp>b_low) || (first_low)) { + b_low=b_temp; + first_low=0; + } + } + else { + b_temp=-(-learn_parm->eps-c[i]+lin[i]); + if((b_tempeps-c[i]+lin[i]); + if((b_temp>b_low) || (first_low)) { + b_low=b_temp; + first_low=0; + } + } + else { + b_temp=-(learn_parm->eps-c[i]+lin[i]); + if((b_tempb=-b_low; + } + else if(first_low) { + model->b=-b_high; + } + else { + model->b=-(b_high+b_low)/2.0; /* select b as the middle of range */ + /* printf("\nb_low=%f, b_high=%f,b=%f\n",b_low,b_high,model->b); */ + } + } + + if(verbosity>=3) { + printf("done\n"); fflush(stdout); + } + + return(model->sv_num-1); /* have to substract one, since element 0 is empty*/ +} + +long check_optimality(MODEL *model, long int *label, long int *unlabeled, + double *a, double *lin, double *c, long int totdoc, + LEARN_PARM *learn_parm, double *maxdiff, + double epsilon_crit_org, long int *misclassified, + long int *inconsistent, long int *active2dnum, + long int *last_suboptimal_at, + long int iteration, KERNEL_PARM *kernel_parm) + /* Check KT-conditions */ +{ + long i,ii,retrain; + double dist,ex_c,target; + + if(kernel_parm->kernel_type == LINEAR) { /* be optimistic */ + learn_parm->epsilon_shrink=-learn_parm->epsilon_crit+epsilon_crit_org; + } + else { /* be conservative */ + learn_parm->epsilon_shrink=learn_parm->epsilon_shrink*0.7+(*maxdiff)*0.3; + } + retrain=0; + (*maxdiff)=0; + (*misclassified)=0; + for(ii=0;(i=active2dnum[ii])>=0;ii++) { + if((!inconsistent[i]) && label[i]) { + dist=(lin[i]-model->b)*(double)label[i];/* 'distance' from + hyperplane*/ + target=-(learn_parm->eps-(double)label[i]*c[i]); + ex_c=learn_parm->svm_cost[i]-learn_parm->epsilon_a; + if(dist <= 0) { + (*misclassified)++; /* does not work due to deactivation of var */ + } + if((a[i]>learn_parm->epsilon_a) && (dist > target)) { + if((dist-target)>(*maxdiff)) /* largest violation */ + (*maxdiff)=dist-target; + } + else if((a[i](*maxdiff)) /* largest violation */ + (*maxdiff)=target-dist; + } + /* Count how long a variable was at lower/upper bound (and optimal).*/ + /* Variables, which were at the bound and optimal for a long */ + /* time are unlikely to become support vectors. In case our */ + /* cache is filled up, those variables are excluded to save */ + /* kernel evaluations. (See chapter 'Shrinking').*/ + if((a[i]>(learn_parm->epsilon_a)) + && (a[i]epsilon_a)) + && (dist < (target+learn_parm->epsilon_shrink))) { + last_suboptimal_at[i]=iteration; /* not likely optimal */ + } + else if((a[i]>=ex_c) + && (dist > (target-learn_parm->epsilon_shrink))) { + last_suboptimal_at[i]=iteration; /* not likely optimal */ + } + } + } + /* termination criterion */ + if((!retrain) && ((*maxdiff) > learn_parm->epsilon_crit)) { + retrain=1; + } + return(retrain); +} + +long check_optimality_sharedslack(DOC **docs, MODEL *model, long int *label, + double *a, double *lin, double *c, double *slack, + double *alphaslack, + long int totdoc, + LEARN_PARM *learn_parm, double *maxdiff, + double epsilon_crit_org, long int *misclassified, + long int *active2dnum, + long int *last_suboptimal_at, + long int iteration, KERNEL_PARM *kernel_parm) + /* Check KT-conditions */ +{ + long i,ii,retrain; + double dist,ex_c=0,target; + + if(kernel_parm->kernel_type == LINEAR) { /* be optimistic */ + learn_parm->epsilon_shrink=-learn_parm->epsilon_crit+epsilon_crit_org; + } + else { /* be conservative */ + learn_parm->epsilon_shrink=learn_parm->epsilon_shrink*0.7+(*maxdiff)*0.3; + } + + retrain=0; + (*maxdiff)=0; + (*misclassified)=0; + for(ii=0;(i=active2dnum[ii])>=0;ii++) { + /* 'distance' from hyperplane*/ + dist=(lin[i]-model->b)*(double)label[i]+slack[docs[i]->slackid]; + target=-(learn_parm->eps-(double)label[i]*c[i]); + ex_c=learn_parm->svm_c-learn_parm->epsilon_a; + if((a[i]>learn_parm->epsilon_a) && (dist > target)) { + if((dist-target)>(*maxdiff)) { /* largest violation */ + (*maxdiff)=dist-target; + if(verbosity>=5) printf("sid %ld: dist=%.2f, target=%.2f, slack=%.2f, a=%f, alphaslack=%f\n",docs[i]->slackid,dist,target,slack[docs[i]->slackid],a[i],alphaslack[docs[i]->slackid]); + if(verbosity>=5) printf(" (single %f)\n",(*maxdiff)); + } + } + if((alphaslack[docs[i]->slackid]slackid]>0)) { + if((slack[docs[i]->slackid])>(*maxdiff)) { /* largest violation */ + (*maxdiff)=slack[docs[i]->slackid]; + if(verbosity>=5) printf("sid %ld: dist=%.2f, target=%.2f, slack=%.2f, a=%f, alphaslack=%f\n",docs[i]->slackid,dist,target,slack[docs[i]->slackid],a[i],alphaslack[docs[i]->slackid]); + if(verbosity>=5) printf(" (joint %f)\n",(*maxdiff)); + } + } + /* Count how long a variable was at lower/upper bound (and optimal).*/ + /* Variables, which were at the bound and optimal for a long */ + /* time are unlikely to become support vectors. In case our */ + /* cache is filled up, those variables are excluded to save */ + /* kernel evaluations. (See chapter 'Shrinking').*/ + if((a[i]>(learn_parm->epsilon_a)) + && (a[i]slackid]=iteration; /* not at bound */ + } + else if((a[i]<=(learn_parm->epsilon_a)) + && (dist < (target+learn_parm->epsilon_shrink))) { + last_suboptimal_at[docs[i]->slackid]=iteration; /* not likely optimal */ + } + else if((a[i]>=ex_c) + && (slack[docs[i]->slackid] < learn_parm->epsilon_shrink)) { + last_suboptimal_at[docs[i]->slackid]=iteration; /* not likely optimal */ + } + } + /* termination criterion */ + if((!retrain) && ((*maxdiff) > learn_parm->epsilon_crit)) { + retrain=1; + } + return(retrain); +} + +void compute_shared_slacks(DOC **docs, long int *label, + double *a, double *lin, + double *c, long int *active2dnum, + LEARN_PARM *learn_parm, + double *slack, double *alphaslack) + /* compute the value of shared slacks and the joint alphas */ +{ + long jj,i; + double dist,target; + + for(jj=0;(i=active2dnum[jj])>=0;jj++) { /* clear slack variables */ + slack[docs[i]->slackid]=0.0; + alphaslack[docs[i]->slackid]=0.0; + } + for(jj=0;(i=active2dnum[jj])>=0;jj++) { /* recompute slack variables */ + dist=(lin[i])*(double)label[i]; + target=-(learn_parm->eps-(double)label[i]*c[i]); + if((target-dist) > slack[docs[i]->slackid]) + slack[docs[i]->slackid]=target-dist; + alphaslack[docs[i]->slackid]+=a[i]; + } +} + + +long identify_inconsistent(double *a, long int *label, + long int *unlabeled, long int totdoc, + LEARN_PARM *learn_parm, + long int *inconsistentnum, long int *inconsistent) +{ + long i,retrain; + + /* Throw out examples with multipliers at upper bound. This */ + /* corresponds to the -i 1 option. */ + /* ATTENTION: this is just a heuristic for finding a close */ + /* to minimum number of examples to exclude to */ + /* make the problem separable with desired margin */ + retrain=0; + for(i=0;i=(learn_parm->svm_cost[i]-learn_parm->epsilon_a))) { + (*inconsistentnum)++; + inconsistent[i]=1; /* never choose again */ + retrain=2; /* start over */ + if(verbosity>=3) { + printf("inconsistent(%ld)..",i); fflush(stdout); + } + } + } + return(retrain); +} + +long identify_misclassified(double *lin, long int *label, + long int *unlabeled, long int totdoc, + MODEL *model, long int *inconsistentnum, + long int *inconsistent) +{ + long i,retrain; + double dist; + + /* Throw out misclassified examples. This */ + /* corresponds to the -i 2 option. */ + /* ATTENTION: this is just a heuristic for finding a close */ + /* to minimum number of examples to exclude to */ + /* make the problem separable with desired margin */ + retrain=0; + for(i=0;ib)*(double)label[i]; /* 'distance' from hyperplane*/ + if((!inconsistent[i]) && (!unlabeled[i]) && (dist <= 0)) { + (*inconsistentnum)++; + inconsistent[i]=1; /* never choose again */ + retrain=2; /* start over */ + if(verbosity>=3) { + printf("inconsistent(%ld)..",i); fflush(stdout); + } + } + } + return(retrain); +} + +long identify_one_misclassified(double *lin, long int *label, + long int *unlabeled, + long int totdoc, MODEL *model, + long int *inconsistentnum, + long int *inconsistent) +{ + long i,retrain,maxex=-1; + double dist,maxdist=0; + + /* Throw out the 'most misclassified' example. This */ + /* corresponds to the -i 3 option. */ + /* ATTENTION: this is just a heuristic for finding a close */ + /* to minimum number of examples to exclude to */ + /* make the problem separable with desired margin */ + retrain=0; + for(i=0;ib)*(double)label[i];/* 'distance' from hyperplane*/ + if(dist=0) { + (*inconsistentnum)++; + inconsistent[maxex]=1; /* never choose again */ + retrain=2; /* start over */ + if(verbosity>=3) { + printf("inconsistent(%ld)..",i); fflush(stdout); + } + } + return(retrain); +} + +void update_linear_component(DOC **docs, long int *label, + long int *active2dnum, double *a, + double *a_old, long int *working2dnum, + long int totdoc, long int totwords, + KERNEL_PARM *kernel_parm, + KERNEL_CACHE *kernel_cache, + double *lin, CFLOAT *aicache, double *weights) + /* keep track of the linear component */ + /* lin of the gradient etc. by updating */ + /* based on the change of the variables */ + /* in the current working set */ +{ + register long i,ii,j,jj; + register double tec; + SVECTOR *f; + + if(kernel_parm->kernel_type==0) { /* special linear case */ + clear_vector_n(weights,totwords); + for(ii=0;(i=working2dnum[ii])>=0;ii++) { + if(a[i] != a_old[i]) { + for(f=docs[i]->fvec;f;f=f->next) + add_vector_ns(weights,f, + f->factor*((a[i]-a_old[i])*(double)label[i])); + } + } + for(jj=0;(j=active2dnum[jj])>=0;jj++) { + for(f=docs[j]->fvec;f;f=f->next) + lin[j]+=f->factor*sprod_ns(weights,f); + } + } + else { /* general case */ + for(jj=0;(i=working2dnum[jj])>=0;jj++) { + if(a[i] != a_old[i]) { + get_kernel_row(kernel_cache,docs,i,totdoc,active2dnum,aicache, + kernel_parm); + for(ii=0;(j=active2dnum[ii])>=0;ii++) { + tec=aicache[j]; + lin[j]+=(((a[i]*tec)-(a_old[i]*tec))*(double)label[i]); + } + } + } + } +} + + +long incorporate_unlabeled_examples(MODEL *model, long int *label, + long int *inconsistent, + long int *unlabeled, + double *a, double *lin, + long int totdoc, double *selcrit, + long int *select, long int *key, + long int transductcycle, + KERNEL_PARM *kernel_parm, + LEARN_PARM *learn_parm) +{ + long i,j,k,j1,j2,j3,j4,unsupaddnum1=0,unsupaddnum2=0; + long pos,neg,upos,uneg,orgpos,orgneg,nolabel,newpos,newneg,allunlab; + double dist,model_length,posratio,negratio; + long check_every=2; + double loss; + static double switchsens=0.0,switchsensorg=0.0; + double umin,umax,sumalpha; + long imin=0,imax=0; + static long switchnum=0; + + switchsens/=1.2; + + /* assumes that lin[] is up to date -> no inactive vars */ + + orgpos=0; + orgneg=0; + newpos=0; + newneg=0; + nolabel=0; + allunlab=0; + for(i=0;i 0) { + orgpos++; + } + else { + orgneg++; + } + } + else { + allunlab++; + if(unlabeled[i]) { + if(label[i] > 0) { + newpos++; + } + else if(label[i] < 0) { + newneg++; + } + } + } + if(label[i]==0) { + nolabel++; + } + } + + if(learn_parm->transduction_posratio >= 0) { + posratio=learn_parm->transduction_posratio; + } + else { + posratio=(double)orgpos/(double)(orgpos+orgneg); /* use ratio of pos/neg */ + } /* in training data */ + negratio=1.0-posratio; + + learn_parm->svm_costratio=1.0; /* global */ + if(posratio>0) { + learn_parm->svm_costratio_unlab=negratio/posratio; + } + else { + learn_parm->svm_costratio_unlab=1.0; + } + + pos=0; + neg=0; + upos=0; + uneg=0; + for(i=0;ib); /* 'distance' from hyperplane*/ + if(dist>0) { + pos++; + } + else { + neg++; + } + if(unlabeled[i]) { + if(dist>0) { + upos++; + } + else { + uneg++; + } + } + if((!unlabeled[i]) && (a[i]>(learn_parm->svm_cost[i]-learn_parm->epsilon_a))) { + /* printf("Ubounded %ld (class %ld, unlabeled %ld)\n",i,label[i],unlabeled[i]); */ + } + } + if(verbosity>=2) { + printf("POS=%ld, ORGPOS=%ld, ORGNEG=%ld\n",pos,orgpos,orgneg); + printf("POS=%ld, NEWPOS=%ld, NEWNEG=%ld\n",pos,newpos,newneg); + printf("pos ratio = %f (%f).\n",(double)(upos)/(double)(allunlab),posratio); + fflush(stdout); + } + + if(transductcycle == 0) { + j1=0; + j2=0; + j4=0; + for(i=0;ib); /* 'distance' from hyperplane*/ + if((label[i]==0) && (unlabeled[i])) { + selcrit[j4]=dist; + key[j4]=i; + j4++; + } + } + unsupaddnum1=0; + unsupaddnum2=0; + select_top_n(selcrit,j4,select,(long)(allunlab*posratio+0.5)); + for(k=0;(k<(long)(allunlab*posratio+0.5));k++) { + i=key[select[k]]; + label[i]=1; + unsupaddnum1++; + j1++; + } + for(i=0;isvm_cost[i]=learn_parm->svm_c* + learn_parm->svm_costratio_unlab*learn_parm->svm_unlabbound; + } + else if(label[i] == -1) { + learn_parm->svm_cost[i]=learn_parm->svm_c* + learn_parm->svm_unlabbound; + } + } + } + if(verbosity>=1) { + /* printf("costratio %f, costratio_unlab %f, unlabbound %f\n", + learn_parm->svm_costratio,learn_parm->svm_costratio_unlab, + learn_parm->svm_unlabbound); */ + printf("Classifying unlabeled data as %ld POS / %ld NEG.\n", + unsupaddnum1,unsupaddnum2); + fflush(stdout); + } + if(verbosity >= 1) + printf("Retraining."); + if(verbosity >= 2) printf("\n"); + return((long)3); + } + if((transductcycle % check_every) == 0) { + if(verbosity >= 1) + printf("Retraining."); + if(verbosity >= 2) printf("\n"); + j1=0; + j2=0; + unsupaddnum1=0; + unsupaddnum2=0; + for(i=0;isvm_cost[i]=learn_parm->svm_c* + learn_parm->svm_costratio_unlab*learn_parm->svm_unlabbound; + } + else if(label[i] == -1) { + learn_parm->svm_cost[i]=learn_parm->svm_c* + learn_parm->svm_unlabbound; + } + } + } + + if(verbosity>=2) { + /* printf("costratio %f, costratio_unlab %f, unlabbound %f\n", + learn_parm->svm_costratio,learn_parm->svm_costratio_unlab, + learn_parm->svm_unlabbound); */ + printf("%ld positive -> Added %ld POS / %ld NEG unlabeled examples.\n", + upos,unsupaddnum1,unsupaddnum2); + fflush(stdout); + } + + if(learn_parm->svm_unlabbound == 1) { + learn_parm->epsilon_crit=0.001; /* do the last run right */ + } + else { + learn_parm->epsilon_crit=0.01; /* otherwise, no need to be so picky */ + } + + return((long)3); + } + else if(((transductcycle % check_every) < check_every)) { + model_length=0; + sumalpha=0; + loss=0; + for(i=0;ib); /* 'distance' from hyperplane*/ + if((label[i]*dist)<(1.0-learn_parm->epsilon_crit)) { + loss+=(1.0-(label[i]*dist))*learn_parm->svm_cost[i]; + } + } + model_length=sqrt(model_length); + if(verbosity>=2) { + printf("Model-length = %f (%f), loss = %f, objective = %f\n", + model_length,sumalpha,loss,loss+0.5*model_length*model_length); + fflush(stdout); + } + j1=0; + j2=0; + j3=0; + j4=0; + unsupaddnum1=0; + unsupaddnum2=0; + umin=99999; + umax=-99999; + j4=1; + while(j4) { + umin=99999; + umax=-99999; + for(i=0;(ib); + if((label[i]>0) && (unlabeled[i]) && (!inconsistent[i]) + && (distumax)) { + umax=dist; + imax=i; + } + } + if((umin < (umax+switchsens-1E-4))) { + j1++; + j2++; + unsupaddnum1++; + unlabeled[imin]=3; + inconsistent[imin]=1; + unsupaddnum2++; + unlabeled[imax]=2; + inconsistent[imax]=1; + } + else + j4=0; + j4=0; + } + for(j=0;(j0) { + unlabeled[j]=2; + } + else if(label[j]<0) { + unlabeled[j]=3; + } + /* inconsistent[j]=1; */ + j3++; + } + } + switchnum+=unsupaddnum1+unsupaddnum2; + + /* stop and print out current margin + printf("switchnum %ld %ld\n",switchnum,kernel_parm->poly_degree); + if(switchnum == 2*kernel_parm->poly_degree) { + learn_parm->svm_unlabbound=1; + } + */ + + if((!unsupaddnum1) && (!unsupaddnum2)) { + if((learn_parm->svm_unlabbound>=1) && ((newpos+newneg) == allunlab)) { + for(j=0;(jpredfile,model,lin,a,unlabeled,label, + totdoc,learn_parm); + if(verbosity>=1) + printf("Number of switches: %ld\n",switchnum); + return((long)0); + } + switchsens=switchsensorg; + learn_parm->svm_unlabbound*=1.5; + if(learn_parm->svm_unlabbound>1) { + learn_parm->svm_unlabbound=1; + } + model->at_upper_bound=0; /* since upper bound increased */ + if(verbosity>=1) + printf("Increasing influence of unlabeled examples to %f%% .", + learn_parm->svm_unlabbound*100.0); + } + else if(verbosity>=1) { + printf("%ld positive -> Switching labels of %ld POS / %ld NEG unlabeled examples.", + upos,unsupaddnum1,unsupaddnum2); + fflush(stdout); + } + + if(verbosity >= 2) printf("\n"); + + learn_parm->epsilon_crit=0.5; /* don't need to be so picky */ + + for(i=0;isvm_cost[i]=learn_parm->svm_c* + learn_parm->svm_costratio_unlab*learn_parm->svm_unlabbound; + } + else if(label[i] == -1) { + learn_parm->svm_cost[i]=learn_parm->svm_c* + learn_parm->svm_unlabbound; + } + } + } + + return((long)2); + } + + return((long)0); +} + +/*************************** Working set selection ***************************/ + +long select_next_qp_subproblem_grad(long int *label, + long int *unlabeled, + double *a, double *lin, + double *c, long int totdoc, + long int qp_size, + LEARN_PARM *learn_parm, + long int *inconsistent, + long int *active2dnum, + long int *working2dnum, + double *selcrit, + long int *select, + KERNEL_CACHE *kernel_cache, + long int cache_only, + long int *key, long int *chosen) + /* Use the feasible direction approach to select the next + qp-subproblem (see chapter 'Selecting a good working set'). If + 'cache_only' is true, then the variables are selected only among + those for which the kernel evaluations are cached. */ +{ + long choosenum,i,j,k,activedoc,inum,valid; + double s; + + for(inum=0;working2dnum[inum]>=0;inum++); /* find end of index */ + choosenum=0; + activedoc=0; + for(i=0;(j=active2dnum[i])>=0;i++) { + s=-label[j]; + if(kernel_cache && cache_only) + valid=(kernel_cache->index[j]>=0); + else + valid=1; + if(valid + && (!((a[j]<=(0+learn_parm->epsilon_a)) && (s<0))) + && (!((a[j]>=(learn_parm->svm_cost[j]-learn_parm->epsilon_a)) + && (s>0))) + && (!chosen[j]) + && (label[j]) + && (!inconsistent[j])) + { + selcrit[activedoc]=(double)label[j]*(learn_parm->eps-(double)label[j]*c[j]+(double)label[j]*lin[j]); + /* selcrit[activedoc]=(double)label[j]*(-1.0+(double)label[j]*lin[j]); */ + key[activedoc]=j; + activedoc++; + } + } + select_top_n(selcrit,activedoc,select,(long)(qp_size/2)); + for(k=0;(choosenum<(qp_size/2)) && (k<(qp_size/2)) && (kbiased_hyperplane || (selcrit[select[k]] > 0)) { */ + i=key[select[k]]; + chosen[i]=1; + working2dnum[inum+choosenum]=i; + choosenum+=1; + if(kernel_cache) + kernel_cache_touch(kernel_cache,i); /* make sure it does not get + kicked out of cache */ + /* } */ + } + + activedoc=0; + for(i=0;(j=active2dnum[i])>=0;i++) { + s=label[j]; + if(kernel_cache && cache_only) + valid=(kernel_cache->index[j]>=0); + else + valid=1; + if(valid + && (!((a[j]<=(0+learn_parm->epsilon_a)) && (s<0))) + && (!((a[j]>=(learn_parm->svm_cost[j]-learn_parm->epsilon_a)) + && (s>0))) + && (!chosen[j]) + && (label[j]) + && (!inconsistent[j])) + { + selcrit[activedoc]=-(double)label[j]*(learn_parm->eps-(double)label[j]*c[j]+(double)label[j]*lin[j]); + /* selcrit[activedoc]=-(double)(label[j]*(-1.0+(double)label[j]*lin[j])); */ + key[activedoc]=j; + activedoc++; + } + } + select_top_n(selcrit,activedoc,select,(long)(qp_size/2)); + for(k=0;(choosenumbiased_hyperplane || (selcrit[select[k]] > 0)) { */ + i=key[select[k]]; + chosen[i]=1; + working2dnum[inum+choosenum]=i; + choosenum+=1; + if(kernel_cache) + kernel_cache_touch(kernel_cache,i); /* make sure it does not get + kicked out of cache */ + /* } */ + } + working2dnum[inum+choosenum]=-1; /* complete index */ + return(choosenum); +} + +long select_next_qp_subproblem_rand(long int *label, + long int *unlabeled, + double *a, double *lin, + double *c, long int totdoc, + long int qp_size, + LEARN_PARM *learn_parm, + long int *inconsistent, + long int *active2dnum, + long int *working2dnum, + double *selcrit, + long int *select, + KERNEL_CACHE *kernel_cache, + long int *key, + long int *chosen, + long int iteration) +/* Use the feasible direction approach to select the next + qp-subproblem (see section 'Selecting a good working set'). Chooses + a feasible direction at (pseudo) random to help jump over numerical + problem. */ +{ + long choosenum,i,j,k,activedoc,inum; + double s; + + for(inum=0;working2dnum[inum]>=0;inum++); /* find end of index */ + choosenum=0; + activedoc=0; + for(i=0;(j=active2dnum[i])>=0;i++) { + s=-label[j]; + if((!((a[j]<=(0+learn_parm->epsilon_a)) && (s<0))) + && (!((a[j]>=(learn_parm->svm_cost[j]-learn_parm->epsilon_a)) + && (s>0))) + && (!inconsistent[j]) + && (label[j]) + && (!chosen[j])) { + selcrit[activedoc]=(j+iteration) % totdoc; + key[activedoc]=j; + activedoc++; + } + } + select_top_n(selcrit,activedoc,select,(long)(qp_size/2)); + for(k=0;(choosenum<(qp_size/2)) && (k<(qp_size/2)) && (k=0;i++) { + s=label[j]; + if((!((a[j]<=(0+learn_parm->epsilon_a)) && (s<0))) + && (!((a[j]>=(learn_parm->svm_cost[j]-learn_parm->epsilon_a)) + && (s>0))) + && (!inconsistent[j]) + && (label[j]) + && (!chosen[j])) { + selcrit[activedoc]=(j+iteration) % totdoc; + key[activedoc]=j; + activedoc++; + } + } + select_top_n(selcrit,activedoc,select,(long)(qp_size/2)); + for(k=0;(choosenum=0;ii++) { + ex_c=learn_parm->svm_c-learn_parm->epsilon_a; + if(alphaslack[docs[i]->slackid] >= ex_c) { + dist=(lin[i])*(double)label[i]+slack[docs[i]->slackid]; /* distance */ + target=-(learn_parm->eps-(double)label[i]*c[i]); /* rhs of constraint */ + if((a[i]>learn_parm->epsilon_a) && (dist > target)) { + if((dist-target)>maxdiff) { /* largest violation */ + maxdiff=dist-target; + maxdiffid=docs[i]->slackid; + } + } + } + } + (*maxviol)=maxdiff; + return(maxdiffid); +} + + +void select_top_n(double *selcrit, long int range, long int *select, + long int n) +{ + register long i,j; + + for(i=0;(i=0;j--) { + if((j>0) && (selcrit[select[j-1]]0) { + for(i=n;iselcrit[select[n-1]]) { + for(j=n-1;j>=0;j--) { + if((j>0) && (selcrit[select[j-1]]deactnum=0; + shrink_state->active = (long *)my_malloc(sizeof(long)*totdoc); + shrink_state->inactive_since = (long *)my_malloc(sizeof(long)*totdoc); + shrink_state->a_history = (double **)my_malloc(sizeof(double *)*maxhistory); + shrink_state->maxhistory=maxhistory; + shrink_state->last_lin = (double *)my_malloc(sizeof(double)*totdoc); + shrink_state->last_a = (double *)my_malloc(sizeof(double)*totdoc); + + for(i=0;iactive[i]=1; + shrink_state->inactive_since[i]=0; + shrink_state->last_a[i]=0; + shrink_state->last_lin[i]=0; + } +} + +void shrink_state_cleanup(SHRINK_STATE *shrink_state) +{ + free(shrink_state->active); + free(shrink_state->inactive_since); + if(shrink_state->deactnum > 0) + free(shrink_state->a_history[shrink_state->deactnum-1]); + free(shrink_state->a_history); + free(shrink_state->last_a); + free(shrink_state->last_lin); +} + +long shrink_problem(DOC **docs, + LEARN_PARM *learn_parm, + SHRINK_STATE *shrink_state, + KERNEL_PARM *kernel_parm, + long int *active2dnum, + long int *last_suboptimal_at, + long int iteration, + long int totdoc, + long int minshrink, + double *a, + long int *inconsistent) + /* Shrink some variables away. Do the shrinking only if at least + minshrink variables can be removed. */ +{ + long i,ii,change,activenum,lastiter; + double *a_old; + + activenum=0; + change=0; + for(ii=0;active2dnum[ii]>=0;ii++) { + i=active2dnum[ii]; + activenum++; + if(learn_parm->sharedslack) + lastiter=last_suboptimal_at[docs[i]->slackid]; + else + lastiter=last_suboptimal_at[i]; + if(((iteration-lastiter) > learn_parm->svm_iter_to_shrink) + || (inconsistent[i])) { + change++; + } + } + if((change>=minshrink) /* shrink only if sufficiently many candidates */ + && (shrink_state->deactnummaxhistory)) { /* and enough memory */ + /* Shrink problem by removing those variables which are */ + /* optimal at a bound for a minimum number of iterations */ + if(verbosity>=2) { + printf(" Shrinking..."); fflush(stdout); + } + if(kernel_parm->kernel_type != LINEAR) { /* non-linear case save alphas */ + a_old=(double *)my_malloc(sizeof(double)*totdoc); + shrink_state->a_history[shrink_state->deactnum]=a_old; + for(i=0;i=0;ii++) { + i=active2dnum[ii]; + if(learn_parm->sharedslack) + lastiter=last_suboptimal_at[docs[i]->slackid]; + else + lastiter=last_suboptimal_at[i]; + if(((iteration-lastiter) > learn_parm->svm_iter_to_shrink) + || (inconsistent[i])) { + shrink_state->active[i]=0; + shrink_state->inactive_since[i]=shrink_state->deactnum; + } + } + activenum=compute_index(shrink_state->active,totdoc,active2dnum); + shrink_state->deactnum++; + if(kernel_parm->kernel_type == LINEAR) { + shrink_state->deactnum=0; + } + if(verbosity>=2) { + printf("done.\n"); fflush(stdout); + printf(" Number of inactive variables = %ld\n",totdoc-activenum); + } + } + return(activenum); +} + + +void reactivate_inactive_examples(long int *label, + long int *unlabeled, + double *a, + SHRINK_STATE *shrink_state, + double *lin, + double *c, + long int totdoc, + long int totwords, + long int iteration, + LEARN_PARM *learn_parm, + long int *inconsistent, + DOC **docs, + KERNEL_PARM *kernel_parm, + KERNEL_CACHE *kernel_cache, + MODEL *model, + CFLOAT *aicache, + double *weights, + double *maxdiff) + /* Make all variables active again which had been removed by + shrinking. */ + /* Computes lin for those variables from scratch. */ +{ + register long i,j,ii,jj,t,*changed2dnum,*inactive2dnum; + long *changed,*inactive; + register double kernel_val,*a_old,dist; + double ex_c,target; + SVECTOR *f; + + if(kernel_parm->kernel_type == LINEAR) { /* special linear case */ + a_old=shrink_state->last_a; + clear_vector_n(weights,totwords); + for(i=0;ifvec;f;f=f->next) + add_vector_ns(weights,f, + f->factor*((a[i]-a_old[i])*(double)label[i])); + a_old[i]=a[i]; + } + } + for(i=0;iactive[i]) { + for(f=docs[i]->fvec;f;f=f->next) + lin[i]=shrink_state->last_lin[i]+f->factor*sprod_ns(weights,f); + } + shrink_state->last_lin[i]=lin[i]; + } + } + else { + changed=(long *)my_malloc(sizeof(long)*totdoc); + changed2dnum=(long *)my_malloc(sizeof(long)*(totdoc+11)); + inactive=(long *)my_malloc(sizeof(long)*totdoc); + inactive2dnum=(long *)my_malloc(sizeof(long)*(totdoc+11)); + for(t=shrink_state->deactnum-1;(t>=0) && shrink_state->a_history[t];t--) { + if(verbosity>=2) { + printf("%ld..",t); fflush(stdout); + } + a_old=shrink_state->a_history[t]; + for(i=0;iactive[i]) + && (shrink_state->inactive_since[i] == t)); + changed[i]= (a[i] != a_old[i]); + } + compute_index(inactive,totdoc,inactive2dnum); + compute_index(changed,totdoc,changed2dnum); + + for(ii=0;(i=changed2dnum[ii])>=0;ii++) { + get_kernel_row(kernel_cache,docs,i,totdoc,inactive2dnum,aicache, + kernel_parm); + for(jj=0;(j=inactive2dnum[jj])>=0;jj++) { + kernel_val=aicache[j]; + lin[j]+=(((a[i]*kernel_val)-(a_old[i]*kernel_val))*(double)label[i]); + } + } + } + free(changed); + free(changed2dnum); + free(inactive); + free(inactive2dnum); + } + (*maxdiff)=0; + for(i=0;iinactive_since[i]=shrink_state->deactnum-1; + if(!inconsistent[i]) { + dist=(lin[i]-model->b)*(double)label[i]; + target=-(learn_parm->eps-(double)label[i]*c[i]); + ex_c=learn_parm->svm_cost[i]-learn_parm->epsilon_a; + if((a[i]>learn_parm->epsilon_a) && (dist > target)) { + if((dist-target)>(*maxdiff)) /* largest violation */ + (*maxdiff)=dist-target; + } + else if((a[i](*maxdiff)) /* largest violation */ + (*maxdiff)=target-dist; + } + if((a[i]>(0+learn_parm->epsilon_a)) + && (a[i]active[i]=1; /* not at bound */ + } + else if((a[i]<=(0+learn_parm->epsilon_a)) && (dist < (target+learn_parm->epsilon_shrink))) { + shrink_state->active[i]=1; + } + else if((a[i]>=ex_c) + && (dist > (target-learn_parm->epsilon_shrink))) { + shrink_state->active[i]=1; + } + else if(learn_parm->sharedslack) { /* make all active when sharedslack */ + shrink_state->active[i]=1; + } + } + } + if(kernel_parm->kernel_type != LINEAR) { /* update history for non-linear */ + for(i=0;ia_history[shrink_state->deactnum-1])[i]=a[i]; + } + for(t=shrink_state->deactnum-2;(t>=0) && shrink_state->a_history[t];t--) { + free(shrink_state->a_history[t]); + shrink_state->a_history[t]=0; + } + } +} + +/****************************** Cache handling *******************************/ + +void get_kernel_row(KERNEL_CACHE *kernel_cache, DOC **docs, + long int docnum, long int totdoc, + long int *active2dnum, CFLOAT *buffer, + KERNEL_PARM *kernel_parm) + /* Get's a row of the matrix of kernel values This matrix has the + same form as the Hessian, just that the elements are not + multiplied by */ + /* y_i * y_j * a_i * a_j */ + /* Takes the values from the cache if available. */ +{ + register long i,j,start; + DOC *ex; + + ex=docs[docnum]; + + if(kernel_cache->index[docnum] != -1) { /* row is cached? */ + kernel_cache->lru[kernel_cache->index[docnum]]=kernel_cache->time; /* lru */ + start=kernel_cache->activenum*kernel_cache->index[docnum]; + for(i=0;(j=active2dnum[i])>=0;i++) { + if(kernel_cache->totdoc2active[j] >= 0) { /* column is cached? */ + buffer[j]=kernel_cache->buffer[start+kernel_cache->totdoc2active[j]]; + } + else { + buffer[j]=(CFLOAT)kernel(kernel_parm,ex,docs[j]); + } + } + } + else { + for(i=0;(j=active2dnum[i])>=0;i++) { + buffer[j]=(CFLOAT)kernel(kernel_parm,ex,docs[j]); + } + } +} + + +void cache_kernel_row(KERNEL_CACHE *kernel_cache, DOC **docs, + long int m, KERNEL_PARM *kernel_parm) + /* Fills cache for the row m */ +{ + register DOC *ex; + register long j,k,l; + register CFLOAT *cache; + + if(!kernel_cache_check(kernel_cache,m)) { /* not cached yet*/ + cache = kernel_cache_clean_and_malloc(kernel_cache,m); + if(cache) { + l=kernel_cache->totdoc2active[m]; + ex=docs[m]; + for(j=0;jactivenum;j++) { /* fill cache */ + k=kernel_cache->active2totdoc[j]; + if((kernel_cache->index[k] != -1) && (l != -1) && (k != m)) { + cache[j]=kernel_cache->buffer[kernel_cache->activenum + *kernel_cache->index[k]+l]; + } + else { + cache[j]=kernel(kernel_parm,ex,docs[k]); + } + } + } + else { + perror("Error: Kernel cache full! => increase cache size"); + } + } +} + + +void cache_multiple_kernel_rows(KERNEL_CACHE *kernel_cache, DOC **docs, + long int *key, long int varnum, + KERNEL_PARM *kernel_parm) + /* Fills cache for the rows in key */ +{ + register long i; + + for(i=0;i=2) { + printf(" Reorganizing cache..."); fflush(stdout); + } + + keep=(long *)my_malloc(sizeof(long)*totdoc); + for(j=0;jactivenum) && (scountactive2totdoc[jj]; + if(!after[j]) { + scount++; + keep[j]=0; + } + } + + for(i=0;imax_elems;i++) { + for(jj=0;jjactivenum;jj++) { + j=kernel_cache->active2totdoc[jj]; + if(!keep[j]) { + from++; + } + else { + kernel_cache->buffer[to]=kernel_cache->buffer[from]; + to++; + from++; + } + } + } + + kernel_cache->activenum=0; + for(j=0;jtotdoc2active[j] != -1)) { + kernel_cache->active2totdoc[kernel_cache->activenum]=j; + kernel_cache->totdoc2active[j]=kernel_cache->activenum; + kernel_cache->activenum++; + } + else { + kernel_cache->totdoc2active[j]=-1; + } + } + + kernel_cache->max_elems=(long)(kernel_cache->buffsize/kernel_cache->activenum); + if(kernel_cache->max_elems>totdoc) { + kernel_cache->max_elems=totdoc; + } + + free(keep); + + if(verbosity>=2) { + printf("done.\n"); fflush(stdout); + printf(" Cache-size in rows = %ld\n",kernel_cache->max_elems); + } +} + +KERNEL_CACHE *kernel_cache_init(long int totdoc, long int buffsize) +{ + long i; + KERNEL_CACHE *kernel_cache; + + kernel_cache=(KERNEL_CACHE *)my_malloc(sizeof(KERNEL_CACHE)); + kernel_cache->index = (long *)my_malloc(sizeof(long)*totdoc); + kernel_cache->occu = (long *)my_malloc(sizeof(long)*totdoc); + kernel_cache->lru = (long *)my_malloc(sizeof(long)*totdoc); + kernel_cache->invindex = (long *)my_malloc(sizeof(long)*totdoc); + kernel_cache->active2totdoc = (long *)my_malloc(sizeof(long)*totdoc); + kernel_cache->totdoc2active = (long *)my_malloc(sizeof(long)*totdoc); + kernel_cache->buffer = (CFLOAT *)my_malloc((size_t)(buffsize)*1024*1024); + + kernel_cache->buffsize=(long)(buffsize/sizeof(CFLOAT)*1024*1024); + + kernel_cache->max_elems=(long)(kernel_cache->buffsize/totdoc); + if(kernel_cache->max_elems>totdoc) { + kernel_cache->max_elems=totdoc; + } + + if(verbosity>=2) { + printf(" Cache-size in rows = %ld\n",kernel_cache->max_elems); + printf(" Kernel evals so far: %ld\n",kernel_cache_statistic); + } + + kernel_cache->elems=0; /* initialize cache */ + for(i=0;iindex[i]=-1; + kernel_cache->lru[i]=0; + } + for(i=0;ioccu[i]=0; + kernel_cache->invindex[i]=-1; + } + + kernel_cache->activenum=totdoc;; + for(i=0;iactive2totdoc[i]=i; + kernel_cache->totdoc2active[i]=i; + } + + kernel_cache->time=0; + + return(kernel_cache); +} + +void kernel_cache_reset_lru(KERNEL_CACHE *kernel_cache) +{ + long maxlru=0,k; + + for(k=0;kmax_elems;k++) { + if(maxlru < kernel_cache->lru[k]) + maxlru=kernel_cache->lru[k]; + } + for(k=0;kmax_elems;k++) { + kernel_cache->lru[k]-=maxlru; + } +} + +void kernel_cache_cleanup(KERNEL_CACHE *kernel_cache) +{ + free(kernel_cache->index); + free(kernel_cache->occu); + free(kernel_cache->lru); + free(kernel_cache->invindex); + free(kernel_cache->active2totdoc); + free(kernel_cache->totdoc2active); + free(kernel_cache->buffer); + free(kernel_cache); +} + +long kernel_cache_malloc(KERNEL_CACHE *kernel_cache) +{ + long i; + + if(kernel_cache_space_available(kernel_cache)) { + for(i=0;imax_elems;i++) { + if(!kernel_cache->occu[i]) { + kernel_cache->occu[i]=1; + kernel_cache->elems++; + return(i); + } + } + } + return(-1); +} + +void kernel_cache_free(KERNEL_CACHE *kernel_cache, long int i) +{ + kernel_cache->occu[i]=0; + kernel_cache->elems--; +} + +long kernel_cache_free_lru(KERNEL_CACHE *kernel_cache) + /* remove least recently used cache element */ +{ + register long k,least_elem=-1,least_time; + + least_time=kernel_cache->time+1; + for(k=0;kmax_elems;k++) { + if(kernel_cache->invindex[k] != -1) { + if(kernel_cache->lru[k]lru[k]; + least_elem=k; + } + } + } + if(least_elem != -1) { + kernel_cache_free(kernel_cache,least_elem); + kernel_cache->index[kernel_cache->invindex[least_elem]]=-1; + kernel_cache->invindex[least_elem]=-1; + return(1); + } + return(0); +} + + +CFLOAT *kernel_cache_clean_and_malloc(KERNEL_CACHE *kernel_cache, + long int docnum) + /* Get a free cache entry. In case cache is full, the lru element + is removed. */ +{ + long result; + if((result = kernel_cache_malloc(kernel_cache)) == -1) { + if(kernel_cache_free_lru(kernel_cache)) { + result = kernel_cache_malloc(kernel_cache); + } + } + kernel_cache->index[docnum]=result; + if(result == -1) { + return(0); + } + kernel_cache->invindex[result]=docnum; + kernel_cache->lru[kernel_cache->index[docnum]]=kernel_cache->time; /* lru */ + return((CFLOAT *)((long)kernel_cache->buffer + +(kernel_cache->activenum*sizeof(CFLOAT)* + kernel_cache->index[docnum]))); +} + +long kernel_cache_touch(KERNEL_CACHE *kernel_cache, long int docnum) + /* Update lru time to avoid removal from cache. */ +{ + if(kernel_cache && kernel_cache->index[docnum] != -1) { + kernel_cache->lru[kernel_cache->index[docnum]]=kernel_cache->time; /* lru */ + return(1); + } + return(0); +} + +long kernel_cache_check(KERNEL_CACHE *kernel_cache, long int docnum) + /* Is that row cached? */ +{ + return(kernel_cache->index[docnum] != -1); +} + +long kernel_cache_space_available(KERNEL_CACHE *kernel_cache) + /* Is there room for one more row? */ +{ + return(kernel_cache->elems < kernel_cache->max_elems); +} + +/************************** Compute estimates ******************************/ + +void compute_xa_estimates(MODEL *model, long int *label, + long int *unlabeled, long int totdoc, + DOC **docs, double *lin, double *a, + KERNEL_PARM *kernel_parm, + LEARN_PARM *learn_parm, double *error, + double *recall, double *precision) + /* Computes xa-estimate of error rate, recall, and precision. See + T. Joachims, Estimating the Generalization Performance of an SVM + Efficiently, IMCL, 2000. */ +{ + long i,looerror,looposerror,loonegerror; + long totex,totposex; + double xi,r_delta,r_delta_sq,sim=0; + long *sv2dnum=NULL,*sv=NULL,svnum; + + r_delta=estimate_r_delta(docs,totdoc,kernel_parm); + r_delta_sq=r_delta*r_delta; + + looerror=0; + looposerror=0; + loonegerror=0; + totex=0; + totposex=0; + svnum=0; + + if(learn_parm->xa_depth > 0) { + sv = (long *)my_malloc(sizeof(long)*(totdoc+11)); + for(i=0;isv_num;i++) + if(a[model->supvec[i]->docnum] + < (learn_parm->svm_cost[model->supvec[i]->docnum] + -learn_parm->epsilon_a)) { + sv[model->supvec[i]->docnum]=1; + svnum++; + } + sv2dnum = (long *)my_malloc(sizeof(long)*(totdoc+11)); + clear_index(sv2dnum); + compute_index(sv,totdoc,sv2dnum); + } + + for(i=0;ib)*(double)label[i]); + if(xi<0) xi=0; + if(label[i]>0) { + totposex++; + } + if((learn_parm->rho*a[i]*r_delta_sq+xi) >= 1.0) { + if(learn_parm->xa_depth > 0) { /* makes assumptions */ + sim=distribute_alpha_t_greedily(sv2dnum,svnum,docs,a,i,label, + kernel_parm,learn_parm, + (double)((1.0-xi-a[i]*r_delta_sq)/(2.0*a[i]))); + } + if((learn_parm->xa_depth == 0) || + ((a[i]*kernel(kernel_parm,docs[i],docs[i])+a[i]*2.0*sim+xi) >= 1.0)) { + looerror++; + if(label[i]>0) { + looposerror++; + } + else { + loonegerror++; + } + } + } + totex++; + } + } + + (*error)=((double)looerror/(double)totex)*100.0; + (*recall)=(1.0-(double)looposerror/(double)totposex)*100.0; + (*precision)=(((double)totposex-(double)looposerror) + /((double)totposex-(double)looposerror+(double)loonegerror))*100.0; + + free(sv); + free(sv2dnum); +} + + +double distribute_alpha_t_greedily(long int *sv2dnum, long int svnum, + DOC **docs, double *a, + long int docnum, + long int *label, + KERNEL_PARM *kernel_parm, + LEARN_PARM *learn_parm, double thresh) + /* Experimental Code improving plain XiAlpha Estimates by + computing a better bound using a greedy optimzation strategy. */ +{ + long best_depth=0; + long i,j,k,d,skip,allskip; + double best,best_val[101],val,init_val_sq,init_val_lin; + long best_ex[101]; + CFLOAT *cache,*trow; + + cache=(CFLOAT *)my_malloc(sizeof(CFLOAT)*learn_parm->xa_depth*svnum); + trow = (CFLOAT *)my_malloc(sizeof(CFLOAT)*svnum); + + for(k=0;kxa_depth;d++) { + allskip=1; + if(d>=1) { + init_val_sq+=cache[best_ex[d-1]+svnum*(d-1)]; + for(k=0;kkernel_type == LINEAR) + val+=docs[sv2dnum[i]]->fvec->twonorm_sq; + else + val+=kernel(kernel_parm,docs[sv2dnum[i]],docs[sv2dnum[i]]); + for(j=0;jxa_depth; + } + } + + free(cache); + free(trow); + + /* printf("Distribute[%ld](%ld)=%f, ",docnum,best_depth,best); */ + return(best); +} + + +void estimate_transduction_quality(MODEL *model, long int *label, + long int *unlabeled, + long int totdoc, DOC **docs, double *lin) + /* Loo-bound based on observation that loo-errors must have an + equal distribution in both training and test examples, given + that the test examples are classified correctly. Compare + chapter "Constraints on the Transductive Hyperplane" in my + Dissertation. */ +{ + long i,j,l=0,ulab=0,lab=0,labpos=0,labneg=0,ulabpos=0,ulabneg=0,totulab=0; + double totlab=0,totlabpos=0,totlabneg=0,labsum=0,ulabsum=0; + double r_delta,r_delta_sq,xi,xisum=0,asum=0; + + r_delta=estimate_r_delta(docs,totdoc,&(model->kernel_parm)); + r_delta_sq=r_delta*r_delta; + + for(j=0;j 0) + totlabpos++; + else + totlabneg++; + } + } + for(j=1;jsv_num;j++) { + i=model->supvec[j]->docnum; + xi=1.0-((lin[i]-model->b)*(double)label[i]); + if(xi<0) xi=0; + + xisum+=xi; + asum+=fabs(model->alpha[j]); + if(unlabeled[i]) { + ulabsum+=(fabs(model->alpha[j])*r_delta_sq+xi); + } + else { + labsum+=(fabs(model->alpha[j])*r_delta_sq+xi); + } + if((fabs(model->alpha[j])*r_delta_sq+xi) >= 1) { + l++; + if(unlabeled[model->supvec[j]->docnum]) { + ulab++; + if(model->alpha[j] > 0) + ulabpos++; + else + ulabneg++; + } + else { + lab++; + if(model->alpha[j] > 0) + labpos++; + else + labneg++; + } + } + } + printf("xacrit>=1: labeledpos=%.5f labeledneg=%.5f default=%.5f\n",(double)labpos/(double)totlab*100.0,(double)labneg/(double)totlab*100.0,(double)totlabpos/(double)(totlab)*100.0); + printf("xacrit>=1: unlabelpos=%.5f unlabelneg=%.5f\n",(double)ulabpos/(double)totulab*100.0,(double)ulabneg/(double)totulab*100.0); + printf("xacrit>=1: labeled=%.5f unlabled=%.5f all=%.5f\n",(double)lab/(double)totlab*100.0,(double)ulab/(double)totulab*100.0,(double)l/(double)(totdoc)*100.0); + printf("xacritsum: labeled=%.5f unlabled=%.5f all=%.5f\n",(double)labsum/(double)totlab*100.0,(double)ulabsum/(double)totulab*100.0,(double)(labsum+ulabsum)/(double)(totdoc)*100.0); + printf("r_delta_sq=%.5f xisum=%.5f asum=%.5f\n",r_delta_sq,xisum,asum); +} + +double estimate_margin_vcdim(MODEL *model, double w, double R, + KERNEL_PARM *kernel_parm) + /* optional: length of model vector in feature space */ + /* optional: radius of ball containing the data */ +{ + double h; + + /* follows chapter 5.6.4 in [Vapnik/95] */ + + if(w<0) { + w=model_length_s(model,kernel_parm); + } + if(R<0) { + R=estimate_sphere(model,kernel_parm); + } + h = w*w * R*R +1; + return(h); +} + +double estimate_sphere(MODEL *model, KERNEL_PARM *kernel_parm) + /* Approximates the radius of the ball containing */ + /* the support vectors by bounding it with the */ +{ /* length of the longest support vector. This is */ + register long j; /* pretty good for text categorization, since all */ + double xlen,maxxlen=0; /* documents have feature vectors of length 1. It */ + DOC *nulldoc; /* assumes that the center of the ball is at the */ + WORD nullword; /* origin of the space. */ + + nullword.wnum=0; + nulldoc=create_example(-2,0,0,0.0,create_svector(&nullword,"",1.0)); + + for(j=1;jsv_num;j++) { + xlen=sqrt(kernel(kernel_parm,model->supvec[j],model->supvec[j]) + -2*kernel(kernel_parm,model->supvec[j],nulldoc) + +kernel(kernel_parm,nulldoc,nulldoc)); + if(xlen>maxxlen) { + maxxlen=xlen; + } + } + + free_example(nulldoc,1); + return(maxxlen); +} + +double estimate_r_delta(DOC **docs, long int totdoc, KERNEL_PARM *kernel_parm) +{ + long i; + double maxxlen,xlen; + DOC *nulldoc; /* assumes that the center of the ball is at the */ + WORD nullword; /* origin of the space. */ + + nullword.wnum=0; + nulldoc=create_example(-2,0,0,0.0,create_svector(&nullword,"",1.0)); + + maxxlen=0; + for(i=0;imaxxlen) { + maxxlen=xlen; + } + } + + free_example(nulldoc,1); + return(maxxlen); +} + +double estimate_r_delta_average(DOC **docs, long int totdoc, + KERNEL_PARM *kernel_parm) +{ + long i; + double avgxlen; + DOC *nulldoc; /* assumes that the center of the ball is at the */ + WORD nullword; /* origin of the space. */ + + nullword.wnum=0; + nulldoc=create_example(-2,0,0,0.0,create_svector(&nullword,"",1.0)); + + avgxlen=0; + for(i=0;imaxxlen) { + maxxlen=xlen; + } + } + + return(maxxlen); +} + +/****************************** IO-handling **********************************/ + +void write_prediction(char *predfile, MODEL *model, double *lin, + double *a, long int *unlabeled, + long int *label, long int totdoc, + LEARN_PARM *learn_parm) +{ + FILE *predfl; + long i; + double dist,a_max; + + if(verbosity>=1) { + printf("Writing prediction file..."); fflush(stdout); + } + if ((predfl = fopen (predfile, "w")) == NULL) + { perror (predfile); exit (1); } + a_max=learn_parm->epsilon_a; + for(i=0;ia_max)) { + a_max=a[i]; + } + } + for(i=0;i(learn_parm->epsilon_a))) { + dist=(double)label[i]*(1.0-learn_parm->epsilon_crit-a[i]/(a_max*2.0)); + } + else { + dist=(lin[i]-model->b); + } + if(dist>0) { + fprintf(predfl,"%.8g:+1 %.8g:-1\n",dist,-dist); + } + else { + fprintf(predfl,"%.8g:-1 %.8g:+1\n",-dist,dist); + } + } + } + fclose(predfl); + if(verbosity>=1) { + printf("done\n"); + } +} + +void write_alphas(char *alphafile, double *a, + long int *label, long int totdoc) +{ + FILE *alphafl; + long i; + + if(verbosity>=1) { + printf("Writing alpha file..."); fflush(stdout); + } + if ((alphafl = fopen (alphafile, "w")) == NULL) + { perror (alphafile); exit (1); } + for(i=0;i=1) { + printf("done\n"); + } +} + diff --git a/thirdparty/svmlight/svm_learn.h b/thirdparty/svmlight/svm_learn.h new file mode 100755 index 000000000..b1968ee5c --- /dev/null +++ b/thirdparty/svmlight/svm_learn.h @@ -0,0 +1,169 @@ +/***********************************************************************/ +/* */ +/* svm_learn.h */ +/* */ +/* Declarations for learning module of Support Vector Machine. */ +/* */ +/* Author: Thorsten Joachims */ +/* Date: 02.07.02 */ +/* */ +/* Copyright (c) 2002 Thorsten Joachims - All rights reserved */ +/* */ +/* This software is available for non-commercial use only. It must */ +/* not be modified and distributed without prior permission of the */ +/* author. The author is not responsible for implications from the */ +/* use of this software. */ +/* */ +/***********************************************************************/ + +#ifndef SVM_LEARN +#define SVM_LEARN + +void svm_learn_classification(DOC **, double *, long, long, LEARN_PARM *, + KERNEL_PARM *, KERNEL_CACHE *, MODEL *, + double *); +void svm_learn_regression(DOC **, double *, long, long, LEARN_PARM *, + KERNEL_PARM *, KERNEL_CACHE **, MODEL *); +void svm_learn_ranking(DOC **, double *, long, long, LEARN_PARM *, + KERNEL_PARM *, KERNEL_CACHE **, MODEL *); +void svm_learn_optimization(DOC **, double *, long, long, LEARN_PARM *, + KERNEL_PARM *, KERNEL_CACHE *, MODEL *, + double *); +long optimize_to_convergence(DOC **, long *, long, long, LEARN_PARM *, + KERNEL_PARM *, KERNEL_CACHE *, SHRINK_STATE *, + MODEL *, long *, long *, double *, + double *, double *, + TIMING *, double *, long, long); +long optimize_to_convergence_sharedslack(DOC **, long *, long, long, + LEARN_PARM *, + KERNEL_PARM *, KERNEL_CACHE *, SHRINK_STATE *, + MODEL *, double *, double *, double *, + TIMING *, double *); +double compute_objective_function(double *, double *, double *, double, + long *, long *); +void clear_index(long *); +void add_to_index(long *, long); +long compute_index(long *,long, long *); +void optimize_svm(DOC **, long *, long *, long *, double, long *, long *, + MODEL *, + long, long *, long, double *, double *, double *, + LEARN_PARM *, CFLOAT *, KERNEL_PARM *, QP *, double *); +void compute_matrices_for_optimization(DOC **, long *, long *, long *, double, + long *, + long *, long *, MODEL *, double *, + double *, double *, long, long, LEARN_PARM *, + CFLOAT *, KERNEL_PARM *, QP *); +long calculate_svm_model(DOC **, long *, long *, double *, double *, + double *, double *, LEARN_PARM *, long *, + long *, MODEL *); +long check_optimality(MODEL *, long *, long *, double *, double *, + double *, long, + LEARN_PARM *,double *, double, long *, long *, long *, + long *, long, KERNEL_PARM *); +long check_optimality_sharedslack(DOC **docs, MODEL *model, long int *label, + double *a, double *lin, double *c, double *slack, + double *alphaslack, long int totdoc, + LEARN_PARM *learn_parm, double *maxdiff, + double epsilon_crit_org, long int *misclassified, + long int *active2dnum, + long int *last_suboptimal_at, + long int iteration, KERNEL_PARM *kernel_parm); +void compute_shared_slacks(DOC **docs, long int *label, double *a, + double *lin, double *c, long int *active2dnum, + LEARN_PARM *learn_parm, + double *slack, double *alphaslack); +long identify_inconsistent(double *, long *, long *, long, LEARN_PARM *, + long *, long *); +long identify_misclassified(double *, long *, long *, long, + MODEL *, long *, long *); +long identify_one_misclassified(double *, long *, long *, long, + MODEL *, long *, long *); +long incorporate_unlabeled_examples(MODEL *, long *,long *, long *, + double *, double *, long, double *, + long *, long *, long, KERNEL_PARM *, + LEARN_PARM *); +void update_linear_component(DOC **, long *, long *, double *, double *, + long *, long, long, KERNEL_PARM *, + KERNEL_CACHE *, double *, + CFLOAT *, double *); +long select_next_qp_subproblem_grad(long *, long *, double *, + double *, double *, long, + long, LEARN_PARM *, long *, long *, + long *, double *, long *, KERNEL_CACHE *, + long, long *, long *); +long select_next_qp_subproblem_rand(long *, long *, double *, + double *, double *, long, + long, LEARN_PARM *, long *, long *, + long *, double *, long *, KERNEL_CACHE *, + long *, long *, long); +long select_next_qp_slackset(DOC **docs, long int *label, double *a, + double *lin, double *slack, double *alphaslack, + double *c, LEARN_PARM *learn_parm, + long int *active2dnum, double *maxviol); +void select_top_n(double *, long, long *, long); +void init_shrink_state(SHRINK_STATE *, long, long); +void shrink_state_cleanup(SHRINK_STATE *); +long shrink_problem(DOC **, LEARN_PARM *, SHRINK_STATE *, KERNEL_PARM *, + long *, long *, long, long, long, double *, long *); +void reactivate_inactive_examples(long *, long *, double *, SHRINK_STATE *, + double *, double*, long, long, long, LEARN_PARM *, + long *, DOC **, KERNEL_PARM *, + KERNEL_CACHE *, MODEL *, CFLOAT *, + double *, double *); + +/* cache kernel evalutations to improve speed */ +KERNEL_CACHE *kernel_cache_init(long, long); +void kernel_cache_cleanup(KERNEL_CACHE *); +void get_kernel_row(KERNEL_CACHE *,DOC **, long, long, long *, CFLOAT *, + KERNEL_PARM *); +void cache_kernel_row(KERNEL_CACHE *,DOC **, long, KERNEL_PARM *); +void cache_multiple_kernel_rows(KERNEL_CACHE *,DOC **, long *, long, + KERNEL_PARM *); +void kernel_cache_shrink(KERNEL_CACHE *,long, long, long *); +void kernel_cache_reset_lru(KERNEL_CACHE *); +long kernel_cache_malloc(KERNEL_CACHE *); +void kernel_cache_free(KERNEL_CACHE *,long); +long kernel_cache_free_lru(KERNEL_CACHE *); +CFLOAT *kernel_cache_clean_and_malloc(KERNEL_CACHE *,long); +long kernel_cache_touch(KERNEL_CACHE *,long); +long kernel_cache_check(KERNEL_CACHE *,long); +long kernel_cache_space_available(KERNEL_CACHE *); + +void compute_xa_estimates(MODEL *, long *, long *, long, DOC **, + double *, double *, KERNEL_PARM *, + LEARN_PARM *, double *, double *, double *); +double xa_estimate_error(MODEL *, long *, long *, long, DOC **, + double *, double *, KERNEL_PARM *, + LEARN_PARM *); +double xa_estimate_recall(MODEL *, long *, long *, long, DOC **, + double *, double *, KERNEL_PARM *, + LEARN_PARM *); +double xa_estimate_precision(MODEL *, long *, long *, long, DOC **, + double *, double *, KERNEL_PARM *, + LEARN_PARM *); +void avg_similarity_of_sv_of_one_class(MODEL *, DOC **, double *, long *, KERNEL_PARM *, double *, double *); +double most_similar_sv_of_same_class(MODEL *, DOC **, double *, long, long *, KERNEL_PARM *, LEARN_PARM *); +double distribute_alpha_t_greedily(long *, long, DOC **, double *, long, long *, KERNEL_PARM *, LEARN_PARM *, double); +double distribute_alpha_t_greedily_noindex(MODEL *, DOC **, double *, long, long *, KERNEL_PARM *, LEARN_PARM *, double); +void estimate_transduction_quality(MODEL *, long *, long *, long, DOC **, double *); +double estimate_margin_vcdim(MODEL *, double, double, KERNEL_PARM *); +double estimate_sphere(MODEL *, KERNEL_PARM *); +double estimate_r_delta_average(DOC **, long, KERNEL_PARM *); +double estimate_r_delta(DOC **, long, KERNEL_PARM *); +double length_of_longest_document_vector(DOC **, long, KERNEL_PARM *); + +void write_model(char *, MODEL *); +void write_prediction(char *, MODEL *, double *, double *, long *, long *, + long, LEARN_PARM *); +void write_alphas(char *, double *, long *, long); + +typedef struct cache_parm_s { + KERNEL_CACHE *kernel_cache; + CFLOAT *cache; + DOC **docs; + long m; + KERNEL_PARM *kernel_parm; + long offset,stepsize; +} cache_parm_t; + +#endif diff --git a/thirdparty/svmlight/svm_learn_main.c b/thirdparty/svmlight/svm_learn_main.c new file mode 100755 index 000000000..e2a157dad --- /dev/null +++ b/thirdparty/svmlight/svm_learn_main.c @@ -0,0 +1,397 @@ +/***********************************************************************/ +/* */ +/* svm_learn_main.c */ +/* */ +/* Command line interface to the learning module of the */ +/* Support Vector Machine. */ +/* */ +/* Author: Thorsten Joachims */ +/* Date: 02.07.02 */ +/* */ +/* Copyright (c) 2000 Thorsten Joachims - All rights reserved */ +/* */ +/* This software is available for non-commercial use only. It must */ +/* not be modified and distributed without prior permission of the */ +/* author. The author is not responsible for implications from the */ +/* use of this software. */ +/* */ +/***********************************************************************/ + + +/* if svm-learn is used out of C++, define it as extern "C" */ +#ifdef __cplusplus +extern "C" { +#endif + +# include "svm_common.h" +# include "svm_learn.h" + +#ifdef __cplusplus +} +#endif + +char docfile[200]; /* file with training examples */ +char modelfile[200]; /* file for resulting classifier */ +char restartfile[200]; /* file with initial alphas */ + +void read_input_parameters(int, char **, char *, char *, char *, long *, + LEARN_PARM *, KERNEL_PARM *); +void wait_any_key(); +void print_help(); + + + +int main (int argc, char* argv[]) +{ + DOC **docs; /* training examples */ + long totwords,totdoc,i; + double *target; + double *alpha_in=NULL; + KERNEL_CACHE *kernel_cache; + LEARN_PARM learn_parm; + KERNEL_PARM kernel_parm; + MODEL *model=(MODEL *)my_malloc(sizeof(MODEL)); + + read_input_parameters(argc,argv,docfile,modelfile,restartfile,&verbosity, + &learn_parm,&kernel_parm); + read_documents(docfile,&docs,&target,&totwords,&totdoc); + if(restartfile[0]) alpha_in=read_alphas(restartfile,totdoc); + + if(kernel_parm.kernel_type == LINEAR) { /* don't need the cache */ + kernel_cache=NULL; + } + else { + /* Always get a new kernel cache. It is not possible to use the + same cache for two different training runs */ + kernel_cache=kernel_cache_init(totdoc,learn_parm.kernel_cache_size); + } + + if(learn_parm.type == CLASSIFICATION) { + svm_learn_classification(docs,target,totdoc,totwords,&learn_parm, + &kernel_parm,kernel_cache,model,alpha_in); + } + else if(learn_parm.type == REGRESSION) { + svm_learn_regression(docs,target,totdoc,totwords,&learn_parm, + &kernel_parm,&kernel_cache,model); + } + else if(learn_parm.type == RANKING) { + svm_learn_ranking(docs,target,totdoc,totwords,&learn_parm, + &kernel_parm,&kernel_cache,model); + } + else if(learn_parm.type == OPTIMIZATION) { + svm_learn_optimization(docs,target,totdoc,totwords,&learn_parm, + &kernel_parm,kernel_cache,model,alpha_in); + } + + if(kernel_cache) { + /* Free the memory used for the cache. */ + kernel_cache_cleanup(kernel_cache); + } + + /* Warning: The model contains references to the original data 'docs'. + If you want to free the original data, and only keep the model, you + have to make a deep copy of 'model'. */ + /* deep_copy_of_model=copy_model(model); */ + write_model(modelfile,model); + + free(alpha_in); + free_model(model,0); + for(i=0;ipredfile, "trans_predictions"); + strcpy (learn_parm->alphafile, ""); + strcpy (restartfile, ""); + (*verbosity)=1; + learn_parm->biased_hyperplane=1; + learn_parm->sharedslack=0; + learn_parm->remove_inconsistent=0; + learn_parm->skip_final_opt_check=0; + learn_parm->svm_maxqpsize=10; + learn_parm->svm_newvarsinqp=0; + learn_parm->svm_iter_to_shrink=-9999; + learn_parm->maxiter=100000; + learn_parm->kernel_cache_size=40; + learn_parm->svm_c=0.0; + learn_parm->eps=0.1; + learn_parm->transduction_posratio=-1.0; + learn_parm->svm_costratio=1.0; + learn_parm->svm_costratio_unlab=1.0; + learn_parm->svm_unlabbound=1E-5; + learn_parm->epsilon_crit=0.001; + learn_parm->epsilon_a=1E-15; + learn_parm->compute_loo=0; + learn_parm->rho=1.0; + learn_parm->xa_depth=0; + kernel_parm->kernel_type=0; + kernel_parm->poly_degree=3; + kernel_parm->rbf_gamma=1.0; + kernel_parm->coef_lin=1; + kernel_parm->coef_const=1; + strcpy(kernel_parm->custom,"empty"); + strcpy(type,"c"); + + for(i=1;(ibiased_hyperplane=atol(argv[i]); break; + case 'i': i++; learn_parm->remove_inconsistent=atol(argv[i]); break; + case 'f': i++; learn_parm->skip_final_opt_check=!atol(argv[i]); break; + case 'q': i++; learn_parm->svm_maxqpsize=atol(argv[i]); break; + case 'n': i++; learn_parm->svm_newvarsinqp=atol(argv[i]); break; + case '#': i++; learn_parm->maxiter=atol(argv[i]); break; + case 'h': i++; learn_parm->svm_iter_to_shrink=atol(argv[i]); break; + case 'm': i++; learn_parm->kernel_cache_size=atol(argv[i]); break; + case 'c': i++; learn_parm->svm_c=atof(argv[i]); break; + case 'w': i++; learn_parm->eps=atof(argv[i]); break; + case 'p': i++; learn_parm->transduction_posratio=atof(argv[i]); break; + case 'j': i++; learn_parm->svm_costratio=atof(argv[i]); break; + case 'e': i++; learn_parm->epsilon_crit=atof(argv[i]); break; + case 'o': i++; learn_parm->rho=atof(argv[i]); break; + case 'k': i++; learn_parm->xa_depth=atol(argv[i]); break; + case 'x': i++; learn_parm->compute_loo=atol(argv[i]); break; + case 't': i++; kernel_parm->kernel_type=atol(argv[i]); break; + case 'd': i++; kernel_parm->poly_degree=atol(argv[i]); break; + case 'g': i++; kernel_parm->rbf_gamma=atof(argv[i]); break; + case 's': i++; kernel_parm->coef_lin=atof(argv[i]); break; + case 'r': i++; kernel_parm->coef_const=atof(argv[i]); break; + case 'u': i++; strcpy(kernel_parm->custom,argv[i]); break; + case 'l': i++; strcpy(learn_parm->predfile,argv[i]); break; + case 'a': i++; strcpy(learn_parm->alphafile,argv[i]); break; + case 'y': i++; strcpy(restartfile,argv[i]); break; + default: printf("\nUnrecognized option %s!\n\n",argv[i]); + print_help(); + exit(0); + } + } + if(i>=argc) { + printf("\nNot enough input parameters!\n\n"); + wait_any_key(); + print_help(); + exit(0); + } + strcpy (docfile, argv[i]); + if((i+1)svm_iter_to_shrink == -9999) { + if(kernel_parm->kernel_type == LINEAR) + learn_parm->svm_iter_to_shrink=2; + else + learn_parm->svm_iter_to_shrink=100; + } + if(strcmp(type,"c")==0) { + learn_parm->type=CLASSIFICATION; + } + else if(strcmp(type,"r")==0) { + learn_parm->type=REGRESSION; + } + else if(strcmp(type,"p")==0) { + learn_parm->type=RANKING; + } + else if(strcmp(type,"o")==0) { + learn_parm->type=OPTIMIZATION; + } + else if(strcmp(type,"s")==0) { + learn_parm->type=OPTIMIZATION; + learn_parm->sharedslack=1; + } + else { + printf("\nUnknown type '%s': Valid types are 'c' (classification), 'r' regession, and 'p' preference ranking.\n",type); + wait_any_key(); + print_help(); + exit(0); + } + if((learn_parm->skip_final_opt_check) + && (kernel_parm->kernel_type == LINEAR)) { + printf("\nIt does not make sense to skip the final optimality check for linear kernels.\n\n"); + learn_parm->skip_final_opt_check=0; + } + if((learn_parm->skip_final_opt_check) + && (learn_parm->remove_inconsistent)) { + printf("\nIt is necessary to do the final optimality check when removing inconsistent \nexamples.\n"); + wait_any_key(); + print_help(); + exit(0); + } + if((learn_parm->svm_maxqpsize<2)) { + printf("\nMaximum size of QP-subproblems not in valid range: %ld [2..]\n",learn_parm->svm_maxqpsize); + wait_any_key(); + print_help(); + exit(0); + } + if((learn_parm->svm_maxqpsizesvm_newvarsinqp)) { + printf("\nMaximum size of QP-subproblems [%ld] must be larger than the number of\n",learn_parm->svm_maxqpsize); + printf("new variables [%ld] entering the working set in each iteration.\n",learn_parm->svm_newvarsinqp); + wait_any_key(); + print_help(); + exit(0); + } + if(learn_parm->svm_iter_to_shrink<1) { + printf("\nMaximum number of iterations for shrinking not in valid range: %ld [1,..]\n",learn_parm->svm_iter_to_shrink); + wait_any_key(); + print_help(); + exit(0); + } + if(learn_parm->svm_c<0) { + printf("\nThe C parameter must be greater than zero!\n\n"); + wait_any_key(); + print_help(); + exit(0); + } + if(learn_parm->transduction_posratio>1) { + printf("\nThe fraction of unlabeled examples to classify as positives must\n"); + printf("be less than 1.0 !!!\n\n"); + wait_any_key(); + print_help(); + exit(0); + } + if(learn_parm->svm_costratio<=0) { + printf("\nThe COSTRATIO parameter must be greater than zero!\n\n"); + wait_any_key(); + print_help(); + exit(0); + } + if(learn_parm->epsilon_crit<=0) { + printf("\nThe epsilon parameter must be greater than zero!\n\n"); + wait_any_key(); + print_help(); + exit(0); + } + if(learn_parm->rho<0) { + printf("\nThe parameter rho for xi/alpha-estimates and leave-one-out pruning must\n"); + printf("be greater than zero (typically 1.0 or 2.0, see T. Joachims, Estimating the\n"); + printf("Generalization Performance of an SVM Efficiently, ICML, 2000.)!\n\n"); + wait_any_key(); + print_help(); + exit(0); + } + if((learn_parm->xa_depth<0) || (learn_parm->xa_depth>100)) { + printf("\nThe parameter depth for ext. xi/alpha-estimates must be in [0..100] (zero\n"); + printf("for switching to the conventional xa/estimates described in T. Joachims,\n"); + printf("Estimating the Generalization Performance of an SVM Efficiently, ICML, 2000.)\n"); + wait_any_key(); + print_help(); + exit(0); + } +} + +void wait_any_key() +{ + printf("\n(more)\n"); + (void)getc(stdin); +} + +void print_help() +{ + printf("\nSVM-light %s: Support Vector Machine, learning module %s\n",VERSION,VERSION_DATE); + copyright_notice(); + printf(" usage: svm_learn [options] example_file model_file\n\n"); + printf("Arguments:\n"); + printf(" example_file-> file with training data\n"); + printf(" model_file -> file to store learned decision rule in\n"); + + printf("General options:\n"); + printf(" -? -> this help\n"); + printf(" -v [0..3] -> verbosity level (default 1)\n"); + printf("Learning options:\n"); + printf(" -z {c,r,p} -> select between classification (c), regression (r),\n"); + printf(" and preference ranking (p) (default classification)\n"); + printf(" -c float -> C: trade-off between training error\n"); + printf(" and margin (default [avg. x*x]^-1)\n"); + printf(" -w [0..] -> epsilon width of tube for regression\n"); + printf(" (default 0.1)\n"); + printf(" -j float -> Cost: cost-factor, by which training errors on\n"); + printf(" positive examples outweight errors on negative\n"); + printf(" examples (default 1) (see [4])\n"); + printf(" -b [0,1] -> use biased hyperplane (i.e. x*w+b>0) instead\n"); + printf(" of unbiased hyperplane (i.e. x*w>0) (default 1)\n"); + printf(" -i [0,1] -> remove inconsistent training examples\n"); + printf(" and retrain (default 0)\n"); + printf("Performance estimation options:\n"); + printf(" -x [0,1] -> compute leave-one-out estimates (default 0)\n"); + printf(" (see [5])\n"); + printf(" -o ]0..2] -> value of rho for XiAlpha-estimator and for pruning\n"); + printf(" leave-one-out computation (default 1.0) (see [2])\n"); + printf(" -k [0..100] -> search depth for extended XiAlpha-estimator \n"); + printf(" (default 0)\n"); + printf("Transduction options (see [3]):\n"); + printf(" -p [0..1] -> fraction of unlabeled examples to be classified\n"); + printf(" into the positive class (default is the ratio of\n"); + printf(" positive and negative examples in the training data)\n"); + printf("Kernel options:\n"); + printf(" -t int -> type of kernel function:\n"); + printf(" 0: linear (default)\n"); + printf(" 1: polynomial (s a*b+c)^d\n"); + printf(" 2: radial basis function exp(-gamma ||a-b||^2)\n"); + printf(" 3: sigmoid tanh(s a*b + c)\n"); + printf(" 4: user defined kernel from kernel.h\n"); + printf(" -d int -> parameter d in polynomial kernel\n"); + printf(" -g float -> parameter gamma in rbf kernel\n"); + printf(" -s float -> parameter s in sigmoid/poly kernel\n"); + printf(" -r float -> parameter c in sigmoid/poly kernel\n"); + printf(" -u string -> parameter of user defined kernel\n"); + printf("Optimization options (see [1]):\n"); + printf(" -q [2..] -> maximum size of QP-subproblems (default 10)\n"); + printf(" -n [2..q] -> number of new variables entering the working set\n"); + printf(" in each iteration (default n = q). Set n size of cache for kernel evaluations in MB (default 40)\n"); + printf(" The larger the faster...\n"); + printf(" -e float -> eps: Allow that error for termination criterion\n"); + printf(" [y [w*x+b] - 1] >= eps (default 0.001)\n"); + printf(" -y [0,1] -> restart the optimization from alpha values in file\n"); + printf(" specified by -a option. (default 0)\n"); + printf(" -h [5..] -> number of iterations a variable needs to be\n"); + printf(" optimal before considered for shrinking (default 100)\n"); + printf(" -f [0,1] -> do final optimality check for variables removed\n"); + printf(" by shrinking. Although this test is usually \n"); + printf(" positive, there is no guarantee that the optimum\n"); + printf(" was found if the test is omitted. (default 1)\n"); + printf(" -y string -> if option is given, reads alphas from file with given\n"); + printf(" and uses them as starting point. (default 'disabled')\n"); + printf(" -# int -> terminate optimization, if no progress after this\n"); + printf(" number of iterations. (default 100000)\n"); + printf("Output options:\n"); + printf(" -l string -> file to write predicted labels of unlabeled\n"); + printf(" examples into after transductive learning\n"); + printf(" -a string -> write all alphas to this file after learning\n"); + printf(" (in the same order as in the training set)\n"); + wait_any_key(); + printf("\nMore details in:\n"); + printf("[1] T. Joachims, Making Large-Scale SVM Learning Practical. Advances in\n"); + printf(" Kernel Methods - Support Vector Learning, B. Schölkopf and C. Burges and\n"); + printf(" A. Smola (ed.), MIT Press, 1999.\n"); + printf("[2] T. Joachims, Estimating the Generalization performance of an SVM\n"); + printf(" Efficiently. International Conference on Machine Learning (ICML), 2000.\n"); + printf("[3] T. Joachims, Transductive Inference for Text Classification using Support\n"); + printf(" Vector Machines. International Conference on Machine Learning (ICML),\n"); + printf(" 1999.\n"); + printf("[4] K. Morik, P. Brockhausen, and T. Joachims, Combining statistical learning\n"); + printf(" with a knowledge-based approach - A case study in intensive care \n"); + printf(" monitoring. International Conference on Machine Learning (ICML), 1999.\n"); + printf("[5] T. Joachims, Learning to Classify Text Using Support Vector\n"); + printf(" Machines: Methods, Theory, and Algorithms. Dissertation, Kluwer,\n"); + printf(" 2002.\n\n"); +} + + diff --git a/thirdparty/svmlight/svm_loqo.c b/thirdparty/svmlight/svm_loqo.c new file mode 100755 index 000000000..ff31a655f --- /dev/null +++ b/thirdparty/svmlight/svm_loqo.c @@ -0,0 +1,211 @@ +/***********************************************************************/ +/* */ +/* svm_loqo.c */ +/* */ +/* Interface to the PR_LOQO optimization package for SVM. */ +/* */ +/* Author: Thorsten Joachims */ +/* Date: 19.07.99 */ +/* */ +/* Copyright (c) 1999 Universitaet Dortmund - All rights reserved */ +/* */ +/* This software is available for non-commercial use only. It must */ +/* not be modified and distributed without prior permission of the */ +/* author. The author is not responsible for implications from the */ +/* use of this software. */ +/* */ +/***********************************************************************/ + +# include +# include "pr_loqo/pr_loqo.h" +# include "svm_common.h" + +/* Common Block Declarations */ + +long verbosity; + +/* /////////////////////////////////////////////////////////////// */ + +# define DEF_PRECISION_LINEAR 1E-8 +# define DEF_PRECISION_NONLINEAR 1E-14 + +double *optimize_qp(); +double *primal=0,*dual=0; +double init_margin=0.15; +long init_iter=500,precision_violations=0; +double model_b; +double opt_precision=DEF_PRECISION_LINEAR; + +/* /////////////////////////////////////////////////////////////// */ + +void *my_malloc(); + +double *optimize_qp(qp,epsilon_crit,nx,threshold,learn_parm) +QP *qp; +double *epsilon_crit; +long nx; /* Maximum number of variables in QP */ +double *threshold; +LEARN_PARM *learn_parm; +/* start the optimizer and return the optimal values */ +{ + register long i,j,result; + double margin,obj_before,obj_after; + double sigdig,dist,epsilon_loqo; + int iter; + + if(!primal) { /* allocate memory at first call */ + primal=(double *)my_malloc(sizeof(double)*nx*3); + dual=(double *)my_malloc(sizeof(double)*(nx*2+1)); + } + + if(verbosity>=4) { /* really verbose */ + printf("\n\n"); + for(i=0;iopt_n;i++) { + printf("%f: ",qp->opt_g0[i]); + for(j=0;jopt_n;j++) { + printf("%f ",qp->opt_g[i*qp->opt_n+j]); + } + printf(": a%ld=%.10f < %f",i,qp->opt_xinit[i],qp->opt_up[i]); + printf(": y=%f\n",qp->opt_ce[i]); + } + for(j=0;jopt_m;j++) { + printf("EQ-%ld: %f*a0",j,qp->opt_ce[j]); + for(i=1;iopt_n;i++) { + printf(" + %f*a%ld",qp->opt_ce[i],i); + } + printf(" = %f\n\n",-qp->opt_ce0[0]); + } +} + + obj_before=0; /* calculate objective before optimization */ + for(i=0;iopt_n;i++) { + obj_before+=(qp->opt_g0[i]*qp->opt_xinit[i]); + obj_before+=(0.5*qp->opt_xinit[i]*qp->opt_xinit[i]*qp->opt_g[i*qp->opt_n+i]); + for(j=0;jopt_xinit[j]*qp->opt_xinit[i]*qp->opt_g[j*qp->opt_n+i]); + } + } + + result=STILL_RUNNING; + qp->opt_ce0[0]*=(-1.0); + /* Run pr_loqo. If a run fails, try again with parameters which lead */ + /* to a slower, but more robust setting. */ + for(margin=init_margin,iter=init_iter; + (margin<=0.9999999) && (result!=OPTIMAL_SOLUTION);) { + sigdig=-log10(opt_precision); + + result=pr_loqo((int)qp->opt_n,(int)qp->opt_m, + (double *)qp->opt_g0,(double *)qp->opt_g, + (double *)qp->opt_ce,(double *)qp->opt_ce0, + (double *)qp->opt_low,(double *)qp->opt_up, + (double *)primal,(double *)dual, + (int)(verbosity-2), + (double)sigdig,(int)iter, + (double)margin,(double)(qp->opt_up[0])/4.0,(int)0); + + if(isnan(dual[0])) { /* check for choldc problem */ + if(verbosity>=2) { + printf("NOTICE: Restarting PR_LOQO with more conservative parameters.\n"); + } + if(init_margin<0.80) { /* become more conservative in general */ + init_margin=(4.0*margin+1.0)/5.0; + } + margin=(margin+1.0)/2.0; + (opt_precision)*=10.0; /* reduce precision */ + if(verbosity>=2) { + printf("NOTICE: Reducing precision of PR_LOQO.\n"); + } + } + else if(result!=OPTIMAL_SOLUTION) { + iter+=2000; + init_iter+=10; + (opt_precision)*=10.0; /* reduce precision */ + if(verbosity>=2) { + printf("NOTICE: Reducing precision of PR_LOQO due to (%ld).\n",result); + } + } + } + + if(qp->opt_m) /* Thanks to Alex Smola for this hint */ + model_b=dual[0]; + else + model_b=0; + + /* Check the precision of the alphas. If results of current optimization */ + /* violate KT-Conditions, relax the epsilon on the bounds on alphas. */ + epsilon_loqo=1E-10; + for(i=0;iopt_n;i++) { + dist=-model_b*qp->opt_ce[i]; + dist+=(qp->opt_g0[i]+1.0); + for(j=0;jopt_g[j*qp->opt_n+i]); + } + for(j=i;jopt_n;j++) { + dist+=(primal[j]*qp->opt_g[i*qp->opt_n+j]); + } + /* printf("LOQO: a[%d]=%f, dist=%f, b=%f\n",i,primal[i],dist,dual[0]); */ + if((primal[i]<(qp->opt_up[i]-epsilon_loqo)) && (dist < (1.0-(*epsilon_crit)))) { + epsilon_loqo=(qp->opt_up[i]-primal[i])*2.0; + } + else if((primal[i]>(0+epsilon_loqo)) && (dist > (1.0+(*epsilon_crit)))) { + epsilon_loqo=primal[i]*2.0; + } + } + + for(i=0;iopt_n;i++) { /* clip alphas to bounds */ + if(primal[i]<=(0+epsilon_loqo)) { + primal[i]=0; + } + else if(primal[i]>=(qp->opt_up[i]-epsilon_loqo)) { + primal[i]=qp->opt_up[i]; + } + } + + obj_after=0; /* calculate objective after optimization */ + for(i=0;iopt_n;i++) { + obj_after+=(qp->opt_g0[i]*primal[i]); + obj_after+=(0.5*primal[i]*primal[i]*qp->opt_g[i*qp->opt_n+i]); + for(j=0;jopt_g[j*qp->opt_n+i]); + } + } + + /* if optimizer returned NAN values, reset and retry with smaller */ + /* working set. */ + if(isnan(obj_after) || isnan(model_b)) { + for(i=0;iopt_n;i++) { + primal[i]=qp->opt_xinit[i]; + } + model_b=0; + if(learn_parm->svm_maxqpsize>2) { + learn_parm->svm_maxqpsize--; /* decrease size of qp-subproblems */ + } + } + + if(obj_after >= obj_before) { /* check whether there was progress */ + (opt_precision)/=100.0; + precision_violations++; + if(verbosity>=2) { + printf("NOTICE: Increasing Precision of PR_LOQO.\n"); + } + } + + if(precision_violations > 500) { + (*epsilon_crit)*=10.0; + precision_violations=0; + if(verbosity>=1) { + printf("\nWARNING: Relaxing epsilon on KT-Conditions.\n"); + } + } + + (*threshold)=model_b; + + if(result!=OPTIMAL_SOLUTION) { + printf("\nERROR: PR_LOQO did not converge. \n"); + return(qp->opt_xinit); + } + else { + return(primal); + } +} +