diff --git a/thirdparty/svmlight/CMakeLists.txt b/thirdparty/svmlight/CMakeLists.txt
new file mode 100644
index 000000000..c26ab1f3d
--- /dev/null
+++ b/thirdparty/svmlight/CMakeLists.txt
@@ -0,0 +1,17 @@
+set (svm_learn_SRC
+    kernel.h
+    svm_common.c
+    svm_common.h
+    svm_hideo.c
+    svm_learn.c
+    svm_learn.h
+    svm_learn_main.c)
+
+add_executable (svm_learn
+    ${svm_learn_SRC})
+
+set_target_properties (svm_learn 
+    PROPERTIES 
+    RUNTIME_OUTPUT_DIRECTORY  ${TOOLS_DIR}/train/)
+
+target_link_libraries (svm_learn m)
diff --git a/thirdparty/svmlight/LICENSE.txt b/thirdparty/svmlight/LICENSE.txt
new file mode 100755
index 000000000..28d6db09f
--- /dev/null
+++ b/thirdparty/svmlight/LICENSE.txt
@@ -0,0 +1,59 @@
+SVM-Light
+---------
+
+Available at http://svmlight.joachims.org/
+
+Author: Thorsten Joachims
+	thorsten@joachims.org
+
+	Cornell University
+	Department of Computer Science
+	4153 Upson Hall
+	Ithaca, NY 14853
+	USA
+
+LICENSING TERMS 
+
+This program is granted free of charge for research and education
+purposes. However you must obtain a license from the author to use it
+for commercial purposes.
+
+Scientific results produced using the software provided shall
+acknowledge the use of SVM-Light. Please cite as  
+
+       T. Joachims, Making large-Scale SVM Learning
+       Practical. Advances in Kernel Methods - Support Vector
+       Learning, B. Schölkopf and C. Burges and A. Smola (ed.),
+       MIT-Press, 1999. 
+       http://www-ai.cs.uni-dortmund.de/DOKUMENTE/joachims_99a.pdf
+
+Moreover shall the author of SVM-Light be informed about the
+publication.
+
+The software must not be modified and distributed without prior
+permission of the author.
+
+By using SVM-Light you agree to the licensing terms.
+
+
+NO WARRANTY 
+
+BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT
+WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER
+PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND,
+EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE
+PROGRAM IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME
+THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
+GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF
+THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO
+LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY
+OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED
+OF THE POSSIBILITY OF SUCH DAMAGES.
diff --git a/thirdparty/svmlight/Makefile b/thirdparty/svmlight/Makefile
new file mode 100755
index 000000000..8b5e19acd
--- /dev/null
+++ b/thirdparty/svmlight/Makefile
@@ -0,0 +1,105 @@
+#
+# makefile for svm_light 
+#
+# Thorsten Joachims, 2002
+#
+
+#Use the following to compile under unix or cygwin
+CC = gcc
+LD = gcc
+
+#Uncomment the following line to make CYGWIN produce stand-alone Windows executables
+#SFLAGS= -mno-cygwin
+
+CFLAGS=  $(SFLAGS) -O3                     # release C-Compiler flags
+LFLAGS=  $(SFLAGS) -O3                     # release linker flags
+#CFLAGS= $(SFLAGS) -pg -Wall -pedantic      # debugging C-Compiler flags
+#LFLAGS= $(SFLAGS) -pg                      # debugging linker flags
+LIBS=-L. -lm                               # used libraries
+
+all: svm_learn_hideo svm_classify
+
+tidy: 
+	rm -f *.o 
+	rm -f pr_loqo/*.o
+
+clean:	tidy
+	rm -f svm_learn
+	rm -f svm_classify
+	rm -f libsvmlight.so
+
+help:   info
+
+info:
+	@echo
+	@echo "make for SVM-light               Thorsten Joachims, 1998"
+	@echo
+	@echo "Thanks to Ralf Herbrich for the initial version."
+	@echo 
+	@echo "USAGE: make [svm_learn | svm_learn_loqo | svm_learn_hideo | "
+	@echo "             libsvmlight_hideo | libsvmlight_loqo | "
+	@echo "             svm_classify | all | clean | tidy]"
+	@echo 
+	@echo "    svm_learn           builds the learning module (prefers HIDEO)"
+	@echo "    svm_learn_hideo     builds the learning module using HIDEO optimizer"
+	@echo "    svm_learn_loqo      builds the learning module using PR_LOQO optimizer"
+	@echo "    svm_classify        builds the classfication module"
+	@echo "    libsvmlight_hideo   builds shared object library that can be linked into"
+	@echo "                        other code using HIDEO"
+	@echo "    libsvmlight_loqo    builds shared object library that can be linked into"
+	@echo "                        other code using PR_LOQO"
+	@echo "    all (default)       builds svm_learn + svm_classify"
+	@echo "    clean               removes .o and target files"
+	@echo "    tidy                removes .o files"
+	@echo
+
+# Create executables svm_learn and svm_classify
+
+svm_learn_hideo: svm_learn_main.o svm_learn.o svm_common.o svm_hideo.o 
+	$(LD) $(LFLAGS) svm_learn_main.o svm_learn.o svm_common.o svm_hideo.o -o svm_learn $(LIBS)
+
+#svm_learn_loqo: svm_learn_main.o svm_learn.o svm_common.o svm_loqo.o loqo
+#	$(LD) $(LFLAGS) svm_learn_main.o svm_learn.o svm_common.o svm_loqo.o pr_loqo/pr_loqo.o -o svm_learn $(LIBS)
+
+svm_classify: svm_classify.o svm_common.o 
+	$(LD) $(LFLAGS) svm_classify.o svm_common.o -o svm_classify $(LIBS)
+
+
+# Create library libsvmlight.so, so that external code can get access to the
+# learning and classification functions of svm-light by linking this library.
+
+svm_learn_hideo_noexe: svm_learn_main.o svm_learn.o svm_common.o svm_hideo.o 
+
+libsvmlight_hideo: svm_learn_main.o svm_learn.o svm_common.o svm_hideo.o 
+	$(LD) -shared svm_learn.o svm_common.o svm_hideo.o -o libsvmlight.so
+
+#svm_learn_loqo_noexe: svm_learn_main.o svm_learn.o svm_common.o svm_loqo.o loqo
+
+#libsvmlight_loqo: svm_learn_main.o svm_learn.o svm_common.o svm_loqo.o 
+#	$(LD) -shared svm_learn.o svm_common.o svm_loqo.o  pr_loqo/pr_loqo.o -o libsvmlight.so
+
+# Compile components
+
+svm_hideo.o: svm_hideo.c
+	$(CC) -c $(CFLAGS) svm_hideo.c -o svm_hideo.o 
+
+#svm_loqo.o: svm_loqo.c 
+#	$(CC) -c $(CFLAGS) svm_loqo.c -o svm_loqo.o 
+
+svm_common.o: svm_common.c svm_common.h kernel.h
+	$(CC) -c $(CFLAGS) svm_common.c -o svm_common.o 
+
+svm_learn.o: svm_learn.c svm_common.h
+	$(CC) -c $(CFLAGS) svm_learn.c -o svm_learn.o 
+
+svm_learn_main.o: svm_learn_main.c svm_learn.h svm_common.h
+	$(CC) -c $(CFLAGS) svm_learn_main.c -o svm_learn_main.o 
+
+svm_classify.o: svm_classify.c svm_common.h kernel.h
+	$(CC) -c $(CFLAGS) svm_classify.c -o svm_classify.o
+
+#loqo: pr_loqo/pr_loqo.o
+
+#pr_loqo/pr_loqo.o: pr_loqo/pr_loqo.c
+#	$(CC) -c $(CFLAGS) pr_loqo/pr_loqo.c -o pr_loqo/pr_loqo.o
+
diff --git a/thirdparty/svmlight/kernel.h b/thirdparty/svmlight/kernel.h
new file mode 100755
index 000000000..0133b0066
--- /dev/null
+++ b/thirdparty/svmlight/kernel.h
@@ -0,0 +1,40 @@
+/************************************************************************/
+/*                                                                      */
+/*   kernel.h                                                           */
+/*                                                                      */
+/*   User defined kernel function. Feel free to plug in your own.       */
+/*                                                                      */
+/*   Copyright: Thorsten Joachims                                       */
+/*   Date: 16.12.97                                                     */
+/*                                                                      */
+/************************************************************************/
+
+/* KERNEL_PARM is defined in svm_common.h The field 'custom' is reserved for */
+/* parameters of the user defined kernel. You can also access and use */
+/* the parameters of the other kernels. Just replace the line 
+             return((double)(1.0)); 
+   with your own kernel. */
+
+  /* Example: The following computes the polynomial kernel. sprod_ss
+              computes the inner product between two sparse vectors. 
+
+      return((CFLOAT)pow(kernel_parm->coef_lin*sprod_ss(a->words,b->words)
+             +kernel_parm->coef_const,(double)kernel_parm->poly_degree)); 
+  */
+
+/* If you are implementing a kernel that is not based on a
+   feature/value representation, you might want to make use of the
+   field "userdefined" in SVECTOR. By default, this field will contain
+   whatever string you put behind a # sign in the example file. So, if
+   a line in your training file looks like
+
+   -1 1:3 5:6 #abcdefg
+
+   then the SVECTOR field "words" will contain the vector 1:3 5:6, and
+   "userdefined" will contain the string "abcdefg". */
+
+double custom_kernel(KERNEL_PARM *kernel_parm, SVECTOR *a, SVECTOR *b) 
+     /* plug in you favorite kernel */                          
+{
+  return((double)(1.0)); 
+}
diff --git a/thirdparty/svmlight/svm_classify.c b/thirdparty/svmlight/svm_classify.c
new file mode 100755
index 000000000..0b0333b05
--- /dev/null
+++ b/thirdparty/svmlight/svm_classify.c
@@ -0,0 +1,197 @@
+/***********************************************************************/
+/*                                                                     */
+/*   svm_classify.c                                                    */
+/*                                                                     */
+/*   Classification module of Support Vector Machine.                  */
+/*                                                                     */
+/*   Author: Thorsten Joachims                                         */
+/*   Date: 02.07.02                                                    */
+/*                                                                     */
+/*   Copyright (c) 2002  Thorsten Joachims - All rights reserved       */
+/*                                                                     */
+/*   This software is available for non-commercial use only. It must   */
+/*   not be modified and distributed without prior permission of the   */
+/*   author. The author is not responsible for implications from the   */
+/*   use of this software.                                             */
+/*                                                                     */
+/************************************************************************/
+
+# include "svm_common.h"
+
+char docfile[200];
+char modelfile[200];
+char predictionsfile[200];
+
+void read_input_parameters(int, char **, char *, char *, char *, long *, 
+			   long *);
+void print_help(void);
+
+
+int main (int argc, char* argv[])
+{
+  DOC *doc;   /* test example */
+  WORD *words;
+  long max_docs,max_words_doc,lld;
+  long totdoc=0,queryid,slackid;
+  long correct=0,incorrect=0,no_accuracy=0;
+  long res_a=0,res_b=0,res_c=0,res_d=0,wnum,pred_format;
+  long j;
+  double t1,runtime=0;
+  double dist,doc_label,costfactor;
+  char *line,*comment; 
+  FILE *predfl,*docfl;
+  MODEL *model; 
+
+  read_input_parameters(argc,argv,docfile,modelfile,predictionsfile,
+			&verbosity,&pred_format);
+
+  nol_ll(docfile,&max_docs,&max_words_doc,&lld); /* scan size of input file */
+  max_words_doc+=2;
+  lld+=2;
+
+  line = (char *)my_malloc(sizeof(char)*lld);
+  words = (WORD *)my_malloc(sizeof(WORD)*(max_words_doc+10));
+
+  model=read_model(modelfile);
+
+  if(model->kernel_parm.kernel_type == 0) { /* linear kernel */
+    /* compute weight vector */
+    add_weight_vector_to_linear_model(model);
+  }
+  
+  if(verbosity>=2) {
+    printf("Classifying test examples.."); fflush(stdout);
+  }
+
+  if ((docfl = fopen (docfile, "r")) == NULL)
+  { perror (docfile); exit (1); }
+  if ((predfl = fopen (predictionsfile, "w")) == NULL)
+  { perror (predictionsfile); exit (1); }
+
+  while((!feof(docfl)) && fgets(line,(int)lld,docfl)) {
+    if(line[0] == '#') continue;  /* line contains comments */
+    parse_document(line,words,&doc_label,&queryid,&slackid,&costfactor,&wnum,
+		   max_words_doc,&comment);
+    totdoc++;
+    if(model->kernel_parm.kernel_type == 0) {   /* linear kernel */
+      for(j=0;(words[j]).wnum != 0;j++) {  /* Check if feature numbers   */
+	if((words[j]).wnum>model->totwords) /* are not larger than in     */
+	  (words[j]).wnum=0;               /* model. Remove feature if   */
+      }                                        /* necessary.                 */
+      doc = create_example(-1,0,0,0.0,create_svector(words,comment,1.0));
+      t1=get_runtime();
+      dist=classify_example_linear(model,doc);
+      runtime+=(get_runtime()-t1);
+      free_example(doc,1);
+    }
+    else {                             /* non-linear kernel */
+      doc = create_example(-1,0,0,0.0,create_svector(words,comment,1.0));
+      t1=get_runtime();
+      dist=classify_example(model,doc);
+      runtime+=(get_runtime()-t1);
+      free_example(doc,1);
+    }
+    if(dist>0) {
+      if(pred_format==0) { /* old weired output format */
+	fprintf(predfl,"%.8g:+1 %.8g:-1\n",dist,-dist);
+      }
+      if(doc_label>0) correct++; else incorrect++;
+      if(doc_label>0) res_a++; else res_b++;
+    }
+    else {
+      if(pred_format==0) { /* old weired output format */
+	fprintf(predfl,"%.8g:-1 %.8g:+1\n",-dist,dist);
+      }
+      if(doc_label<0) correct++; else incorrect++;
+      if(doc_label>0) res_c++; else res_d++;
+    }
+    if(pred_format==1) { /* output the value of decision function */
+      fprintf(predfl,"%.8g\n",dist);
+    }
+    if((int)(0.01+(doc_label*doc_label)) != 1) 
+      { no_accuracy=1; } /* test data is not binary labeled */
+    if(verbosity>=2) {
+      if(totdoc % 100 == 0) {
+	printf("%ld..",totdoc); fflush(stdout);
+      }
+    }
+  }  
+  fclose(predfl);
+  fclose(docfl);
+  free(line);
+  free(words);
+  free_model(model,1);
+
+  if(verbosity>=2) {
+    printf("done\n");
+
+/*   Note by Gary Boone                     Date: 29 April 2000        */
+/*      o Timing is inaccurate. The timer has 0.01 second resolution.  */
+/*        Because classification of a single vector takes less than    */
+/*        0.01 secs, the timer was underflowing.                       */
+    printf("Runtime (without IO) in cpu-seconds: %.2f\n",
+	   (float)(runtime/100.0));
+    
+  }
+  if((!no_accuracy) && (verbosity>=1)) {
+    printf("Accuracy on test set: %.2f%% (%ld correct, %ld incorrect, %ld total)\n",(float)(correct)*100.0/totdoc,correct,incorrect,totdoc);
+    printf("Precision/recall on test set: %.2f%%/%.2f%%\n",(float)(res_a)*100.0/(res_a+res_b),(float)(res_a)*100.0/(res_a+res_c));
+  }
+
+  return(0);
+}
+
+void read_input_parameters(int argc, char **argv, char *docfile, 
+			   char *modelfile, char *predictionsfile, 
+			   long int *verbosity, long int *pred_format)
+{
+  long i;
+  
+  /* set default */
+  strcpy (modelfile, "svm_model");
+  strcpy (predictionsfile, "svm_predictions"); 
+  (*verbosity)=2;
+  (*pred_format)=1;
+
+  for(i=1;(i<argc) && ((argv[i])[0] == '-');i++) {
+    switch ((argv[i])[1]) 
+      { 
+      case 'h': print_help(); exit(0);
+      case 'v': i++; (*verbosity)=atol(argv[i]); break;
+      case 'f': i++; (*pred_format)=atol(argv[i]); break;
+      default: printf("\nUnrecognized option %s!\n\n",argv[i]);
+	       print_help();
+	       exit(0);
+      }
+  }
+  if((i+1)>=argc) {
+    printf("\nNot enough input parameters!\n\n");
+    print_help();
+    exit(0);
+  }
+  strcpy (docfile, argv[i]);
+  strcpy (modelfile, argv[i+1]);
+  if((i+2)<argc) {
+    strcpy (predictionsfile, argv[i+2]);
+  }
+  if(((*pred_format) != 0) && ((*pred_format) != 1)) {
+    printf("\nOutput format can only take the values 0 or 1!\n\n");
+    print_help();
+    exit(0);
+  }
+}
+
+void print_help(void)
+{
+  printf("\nSVM-light %s: Support Vector Machine, classification module     %s\n",VERSION,VERSION_DATE);
+  copyright_notice();
+  printf("   usage: svm_classify [options] example_file model_file output_file\n\n");
+  printf("options: -h         -> this help\n");
+  printf("         -v [0..3]  -> verbosity level (default 2)\n");
+  printf("         -f [0,1]   -> 0: old output format of V1.0\n");
+  printf("                    -> 1: output the value of decision function (default)\n\n");
+}
+
+
+
+
diff --git a/thirdparty/svmlight/svm_common.c b/thirdparty/svmlight/svm_common.c
new file mode 100755
index 000000000..61e72800e
--- /dev/null
+++ b/thirdparty/svmlight/svm_common.c
@@ -0,0 +1,985 @@
+/************************************************************************/
+/*                                                                      */
+/*   svm_common.c                                                       */
+/*                                                                      */
+/*   Definitions and functions used in both svm_learn and svm_classify. */
+/*                                                                      */
+/*   Author: Thorsten Joachims                                          */
+/*   Date: 02.07.04                                                     */
+/*                                                                      */
+/*   Copyright (c) 2004  Thorsten Joachims - All rights reserved        */
+/*                                                                      */
+/*   This software is available for non-commercial use only. It must    */
+/*   not be modified and distributed without prior permission of the    */
+/*   author. The author is not responsible for implications from the    */
+/*   use of this software.                                              */
+/*                                                                      */
+/************************************************************************/
+
+# include "ctype.h"
+# include "svm_common.h"
+# include "kernel.h"           /* this contains a user supplied kernel */
+
+long   verbosity;              /* verbosity level (0-4) */
+long   kernel_cache_statistic;
+
+double classify_example(MODEL *model, DOC *ex) 
+     /* classifies one example */
+{
+  register long i;
+  register double dist;
+
+  if((model->kernel_parm.kernel_type == LINEAR) && (model->lin_weights))
+    return(classify_example_linear(model,ex));
+	   
+  dist=0;
+  for(i=1;i<model->sv_num;i++) {  
+    dist+=kernel(&model->kernel_parm,model->supvec[i],ex)*model->alpha[i];
+  }
+  return(dist-model->b);
+}
+
+double classify_example_linear(MODEL *model, DOC *ex) 
+     /* classifies example for linear kernel */
+     
+     /* important: the model must have the linear weight vector computed */
+     /* use: add_weight_vector_to_linear_model(&model); */
+
+
+     /* important: the feature numbers in the example to classify must */
+     /*            not be larger than the weight vector!               */
+{
+  double sum=0;
+  SVECTOR *f;
+
+  for(f=ex->fvec;f;f=f->next)  
+    sum+=f->factor*sprod_ns(model->lin_weights,f);
+  return(sum-model->b);
+}
+
+
+double kernel(KERNEL_PARM *kernel_parm, DOC *a, DOC *b) 
+     /* calculate the kernel function */
+{
+  double sum=0;
+  SVECTOR *fa,*fb;
+
+  /* in case the constraints are sums of feature vector as represented
+     as a list of SVECTOR's with their coefficient factor in the sum,
+     take the kernel between all pairs */ 
+  for(fa=a->fvec;fa;fa=fa->next) { 
+    for(fb=b->fvec;fb;fb=fb->next) {
+      if(fa->kernel_id == fb->kernel_id)
+	sum+=fa->factor*fb->factor*single_kernel(kernel_parm,fa,fb);
+    }
+  }
+  return(sum);
+}
+
+double single_kernel(KERNEL_PARM *kernel_parm, SVECTOR *a, SVECTOR *b) 
+     /* calculate the kernel function between two vectors */
+{
+  kernel_cache_statistic++;
+  switch(kernel_parm->kernel_type) {
+    case 0: /* linear */ 
+            return(sprod_ss(a,b)); 
+    case 1: /* polynomial */
+            return(pow(kernel_parm->coef_lin*sprod_ss(a,b)+kernel_parm->coef_const,(double)kernel_parm->poly_degree)); 
+    case 2: /* radial basis function */
+            return(exp(-kernel_parm->rbf_gamma*(a->twonorm_sq-2*sprod_ss(a,b)+b->twonorm_sq)));
+    case 3: /* sigmoid neural net */
+            return(tanh(kernel_parm->coef_lin*sprod_ss(a,b)+kernel_parm->coef_const)); 
+    case 4: /* custom-kernel supplied in file kernel.h*/
+            return(custom_kernel(kernel_parm,a,b)); 
+    default: printf("Error: Unknown kernel function\n"); exit(1);
+  }
+}
+
+
+SVECTOR *create_svector(WORD *words,char *userdefined,double factor)
+{
+  SVECTOR *vec;
+  long    fnum,i;
+
+  fnum=0;
+  while(words[fnum].wnum) {
+    fnum++;
+  }
+  fnum++;
+  vec = (SVECTOR *)my_malloc(sizeof(SVECTOR));
+  vec->words = (WORD *)my_malloc(sizeof(WORD)*(fnum));
+  for(i=0;i<fnum;i++) { 
+      vec->words[i]=words[i];
+  }
+  vec->twonorm_sq=sprod_ss(vec,vec);
+
+  fnum=0;
+  while(userdefined[fnum]) {
+    fnum++;
+  }
+  fnum++;
+  vec->userdefined = (char *)my_malloc(sizeof(char)*(fnum));
+  for(i=0;i<fnum;i++) { 
+      vec->userdefined[i]=userdefined[i];
+  }
+  vec->kernel_id=0;
+  vec->next=NULL;
+  vec->factor=factor;
+  return(vec);
+}
+
+SVECTOR *copy_svector(SVECTOR *vec)
+{
+  SVECTOR *newvec=NULL;
+  if(vec) {
+    newvec=create_svector(vec->words,vec->userdefined,vec->factor);
+    newvec->next=copy_svector(vec->next);
+  }
+  return(newvec);
+}
+    
+void free_svector(SVECTOR *vec)
+{
+  if(vec) {
+    free(vec->words);
+    if(vec->userdefined)
+      free(vec->userdefined);
+    free_svector(vec->next);
+    free(vec);
+  }
+}
+
+double sprod_ss(SVECTOR *a, SVECTOR *b) 
+     /* compute the inner product of two sparse vectors */
+{
+    register double sum=0;
+    register WORD *ai,*bj;
+    ai=a->words;
+    bj=b->words;
+    while (ai->wnum && bj->wnum) {
+      if(ai->wnum > bj->wnum) {
+	bj++;
+      }
+      else if (ai->wnum < bj->wnum) {
+	ai++;
+      }
+      else {
+	sum+=(ai->weight) * (bj->weight);
+	ai++;
+	bj++;
+      }
+    }
+    return((double)sum);
+}
+
+SVECTOR* sub_ss(SVECTOR *a, SVECTOR *b) 
+     /* compute the difference a-b of two sparse vectors */
+     /* Note: SVECTOR lists are not followed, but only the first
+	SVECTOR is used */
+{
+    SVECTOR *vec;
+    register WORD *sum,*sumi;
+    register WORD *ai,*bj;
+    long veclength;
+  
+    ai=a->words;
+    bj=b->words;
+    veclength=0;
+    while (ai->wnum && bj->wnum) {
+      if(ai->wnum > bj->wnum) {
+	veclength++;
+	bj++;
+      }
+      else if (ai->wnum < bj->wnum) {
+	veclength++;
+	ai++;
+      }
+      else {
+	veclength++;
+	ai++;
+	bj++;
+      }
+    }
+    while (bj->wnum) {
+      veclength++;
+      bj++;
+    }
+    while (ai->wnum) {
+      veclength++;
+      ai++;
+    }
+    veclength++;
+
+    sum=(WORD *)my_malloc(sizeof(WORD)*veclength);
+    sumi=sum;
+    ai=a->words;
+    bj=b->words;
+    while (ai->wnum && bj->wnum) {
+      if(ai->wnum > bj->wnum) {
+	(*sumi)=(*bj);
+	sumi->weight*=(-1);
+	sumi++;
+	bj++;
+      }
+      else if (ai->wnum < bj->wnum) {
+	(*sumi)=(*ai);
+	sumi++;
+	ai++;
+      }
+      else {
+	(*sumi)=(*ai);
+	sumi->weight-=bj->weight;
+	if(sumi->weight != 0)
+	  sumi++;
+	ai++;
+	bj++;
+      }
+    }
+    while (bj->wnum) {
+      (*sumi)=(*bj);
+      sumi->weight*=(-1);
+      sumi++;
+      bj++;
+    }
+    while (ai->wnum) {
+      (*sumi)=(*ai);
+      sumi++;
+      ai++;
+    }
+    sumi->wnum=0;
+
+    vec=create_svector(sum,"",1.0);
+    free(sum);
+
+    return(vec);
+}
+
+SVECTOR* add_ss(SVECTOR *a, SVECTOR *b) 
+     /* compute the sum a+b of two sparse vectors */
+     /* Note: SVECTOR lists are not followed, but only the first
+	SVECTOR is used */
+{
+    SVECTOR *vec;
+    register WORD *sum,*sumi;
+    register WORD *ai,*bj;
+    long veclength;
+  
+    ai=a->words;
+    bj=b->words;
+    veclength=0;
+    while (ai->wnum && bj->wnum) {
+      if(ai->wnum > bj->wnum) {
+	veclength++;
+	bj++;
+      }
+      else if (ai->wnum < bj->wnum) {
+	veclength++;
+	ai++;
+      }
+      else {
+	veclength++;
+	ai++;
+	bj++;
+      }
+    }
+    while (bj->wnum) {
+      veclength++;
+      bj++;
+    }
+    while (ai->wnum) {
+      veclength++;
+      ai++;
+    }
+    veclength++;
+
+    /*** is veclength=lengSequence(a)+lengthSequence(b)? ***/
+
+    sum=(WORD *)my_malloc(sizeof(WORD)*veclength);
+    sumi=sum;
+    ai=a->words;
+    bj=b->words;
+    while (ai->wnum && bj->wnum) {
+      if(ai->wnum > bj->wnum) {
+	(*sumi)=(*bj);
+	sumi++;
+	bj++;
+      }
+      else if (ai->wnum < bj->wnum) {
+	(*sumi)=(*ai);
+	sumi++;
+	ai++;
+      }
+      else {
+	(*sumi)=(*ai);
+	sumi->weight+=bj->weight;
+	if(sumi->weight != 0)
+	  sumi++;
+	ai++;
+	bj++;
+      }
+    }
+    while (bj->wnum) {
+      (*sumi)=(*bj);
+      sumi++;
+      bj++;
+    }
+    while (ai->wnum) {
+      (*sumi)=(*ai);
+      sumi++;
+      ai++;
+    }
+    sumi->wnum=0;
+
+    vec=create_svector(sum,"",1.0);
+    free(sum);
+
+    return(vec);
+}
+
+SVECTOR* add_list_ss(SVECTOR *a) 
+     /* computes the linear combination of the SVECTOR list weighted
+	by the factor of each SVECTOR */
+{
+  SVECTOR *scaled,*oldsum,*sum,*f;
+  WORD    empty[2];
+    
+  if(a){
+    sum=smult_s(a,a->factor);
+    for(f=a->next;f;f=f->next) {
+      scaled=smult_s(f,f->factor);
+      oldsum=sum;
+      sum=add_ss(sum,scaled);
+      free_svector(oldsum);
+      free_svector(scaled);
+    }
+    sum->factor=1.0;
+  }
+  else {
+    empty[0].wnum=0;
+    sum=create_svector(empty,"",1.0);
+  }
+  return(sum);
+}
+
+void append_svector_list(SVECTOR *a, SVECTOR *b) 
+     /* appends SVECTOR b to the end of SVECTOR a. */
+{
+    SVECTOR *f;
+    
+    for(f=a;f->next;f=f->next);  /* find end of first vector list */
+    f->next=b;                   /* append the two vector lists */
+}
+
+SVECTOR* smult_s(SVECTOR *a, double factor) 
+     /* scale sparse vector a by factor */
+{
+    SVECTOR *vec;
+    register WORD *sum,*sumi;
+    register WORD *ai;
+    long veclength;
+  
+    ai=a->words;
+    veclength=0;
+    while (ai->wnum) {
+      veclength++;
+      ai++;
+    }
+    veclength++;
+
+    sum=(WORD *)my_malloc(sizeof(WORD)*veclength);
+    sumi=sum;
+    ai=a->words;
+    while (ai->wnum) {
+	(*sumi)=(*ai);
+	sumi->weight*=factor;
+	if(sumi->weight != 0)
+	  sumi++;
+	ai++;
+    }
+    sumi->wnum=0;
+
+    vec=create_svector(sum,a->userdefined,a->factor);
+    free(sum);
+
+    return(vec);
+}
+
+int featvec_eq(SVECTOR *a, SVECTOR *b)
+     /* tests two sparse vectors for equality */
+{
+    register WORD *ai,*bj;
+    ai=a->words;
+    bj=b->words;
+    while (ai->wnum && bj->wnum) {
+      if(ai->wnum > bj->wnum) {
+	if((bj->weight) != 0)
+	  return(0);
+	bj++;
+      }
+      else if (ai->wnum < bj->wnum) {
+	if((ai->weight) != 0)
+	  return(0);
+	ai++;
+      }
+      else {
+	if((ai->weight) != (bj->weight)) 
+	  return(0);
+	ai++;
+	bj++;
+      }
+    }
+    return(1);
+}
+
+double model_length_s(MODEL *model, KERNEL_PARM *kernel_parm) 
+     /* compute length of weight vector */
+{
+  register long i,j;
+  register double sum=0,alphai;
+  register DOC *supveci;
+
+  for(i=1;i<model->sv_num;i++) {  
+    alphai=model->alpha[i];
+    supveci=model->supvec[i];
+    for(j=1;j<model->sv_num;j++) {
+      sum+=alphai*model->alpha[j]
+	   *kernel(kernel_parm,supveci,model->supvec[j]);
+    }
+  }
+  return(sqrt(sum));
+}
+
+void clear_vector_n(double *vec, long int n)
+{
+  register long i;
+  for(i=0;i<=n;i++) vec[i]=0;
+}
+
+void add_vector_ns(double *vec_n, SVECTOR *vec_s, double faktor)
+{
+  register WORD *ai;
+  ai=vec_s->words;
+  while (ai->wnum) {
+    vec_n[ai->wnum]+=(faktor*ai->weight);
+    ai++;
+  }
+}
+
+double sprod_ns(double *vec_n, SVECTOR *vec_s)
+{
+  register double sum=0;
+  register WORD *ai;
+  ai=vec_s->words;
+  while (ai->wnum) {
+    sum+=(vec_n[ai->wnum]*ai->weight);
+    ai++;
+  }
+  return(sum);
+}
+
+void add_weight_vector_to_linear_model(MODEL *model)
+     /* compute weight vector in linear case and add to model */
+{
+  long i;
+  SVECTOR *f;
+
+  model->lin_weights=(double *)my_malloc(sizeof(double)*(model->totwords+1));
+  clear_vector_n(model->lin_weights,model->totwords);
+  for(i=1;i<model->sv_num;i++) {
+    for(f=(model->supvec[i])->fvec;f;f=f->next)  
+      add_vector_ns(model->lin_weights,f,f->factor*model->alpha[i]);
+  }
+}
+
+
+DOC *create_example(long docnum, long queryid, long slackid, 
+		    double costfactor, SVECTOR *fvec)
+{
+  DOC *example;
+  example = (DOC *)my_malloc(sizeof(DOC));
+  example->docnum=docnum;
+  example->queryid=queryid;
+  example->slackid=slackid;
+  example->costfactor=costfactor;
+  example->fvec=fvec;
+  return(example);
+}
+
+void free_example(DOC *example, long deep)
+{
+  if(example) {
+    if(deep) {
+      if(example->fvec)
+	free_svector(example->fvec);
+    }
+    free(example);
+  }
+}
+
+void write_model(char *modelfile, MODEL *model)
+{
+  FILE *modelfl;
+  long j,i,sv_num;
+  SVECTOR *v;
+
+  if(verbosity>=1) {
+    printf("Writing model file..."); fflush(stdout);
+  }
+  if ((modelfl = fopen (modelfile, "w")) == NULL)
+  { perror (modelfile); exit (1); }
+  fprintf(modelfl,"SVM-light Version %s\n",VERSION);
+  fprintf(modelfl,"%ld # kernel type\n",
+	  model->kernel_parm.kernel_type);
+  fprintf(modelfl,"%ld # kernel parameter -d \n",
+	  model->kernel_parm.poly_degree);
+  fprintf(modelfl,"%.8g # kernel parameter -g \n",
+	  model->kernel_parm.rbf_gamma);
+  fprintf(modelfl,"%.8g # kernel parameter -s \n",
+	  model->kernel_parm.coef_lin);
+  fprintf(modelfl,"%.8g # kernel parameter -r \n",
+	  model->kernel_parm.coef_const);
+  fprintf(modelfl,"%s# kernel parameter -u \n",model->kernel_parm.custom);
+  fprintf(modelfl,"%ld # highest feature index \n",model->totwords);
+  fprintf(modelfl,"%ld # number of training documents \n",model->totdoc);
+ 
+  sv_num=1;
+  for(i=1;i<model->sv_num;i++) {
+    for(v=model->supvec[i]->fvec;v;v=v->next) 
+      sv_num++;
+  }
+  fprintf(modelfl,"%ld # number of support vectors plus 1 \n",sv_num);
+  fprintf(modelfl,"%.8g # threshold b, each following line is a SV (starting with alpha*y)\n",model->b);
+
+  for(i=1;i<model->sv_num;i++) {
+    for(v=model->supvec[i]->fvec;v;v=v->next) {
+      fprintf(modelfl,"%.32g ",model->alpha[i]*v->factor);
+      for (j=0; (v->words[j]).wnum; j++) {
+	fprintf(modelfl,"%ld:%.8g ",
+		(long)(v->words[j]).wnum,
+		(double)(v->words[j]).weight);
+      }
+      fprintf(modelfl,"#%s\n",v->userdefined);
+    /* NOTE: this could be made more efficient by summing the
+       alpha's of identical vectors before writing them to the
+       file. */
+    }
+  }
+  fclose(modelfl);
+  if(verbosity>=1) {
+    printf("done\n");
+  }
+}
+
+
+MODEL *read_model(char *modelfile)
+{
+  FILE *modelfl;
+  long i,queryid,slackid;
+  double costfactor;
+  long max_sv,max_words,ll,wpos;
+  char *line,*comment;
+  WORD *words;
+  char version_buffer[100];
+  MODEL *model;
+
+  if(verbosity>=1) {
+    printf("Reading model..."); fflush(stdout);
+  }
+
+  nol_ll(modelfile,&max_sv,&max_words,&ll); /* scan size of model file */
+  max_words+=2;
+  ll+=2;
+
+  words = (WORD *)my_malloc(sizeof(WORD)*(max_words+10));
+  line = (char *)my_malloc(sizeof(char)*ll);
+  model = (MODEL *)my_malloc(sizeof(MODEL));
+
+  if ((modelfl = fopen (modelfile, "r")) == NULL)
+  { perror (modelfile); exit (1); }
+
+  fscanf(modelfl,"SVM-light Version %s\n",version_buffer);
+  if(strcmp(version_buffer,VERSION)) {
+    perror ("Version of model-file does not match version of svm_classify!"); 
+    exit (1); 
+  }
+  fscanf(modelfl,"%ld%*[^\n]\n", &model->kernel_parm.kernel_type);  
+  fscanf(modelfl,"%ld%*[^\n]\n", &model->kernel_parm.poly_degree);
+  fscanf(modelfl,"%lf%*[^\n]\n", &model->kernel_parm.rbf_gamma);
+  fscanf(modelfl,"%lf%*[^\n]\n", &model->kernel_parm.coef_lin);
+  fscanf(modelfl,"%lf%*[^\n]\n", &model->kernel_parm.coef_const);
+  fscanf(modelfl,"%[^#]%*[^\n]\n", model->kernel_parm.custom);
+
+  fscanf(modelfl,"%ld%*[^\n]\n", &model->totwords);
+  fscanf(modelfl,"%ld%*[^\n]\n", &model->totdoc);
+  fscanf(modelfl,"%ld%*[^\n]\n", &model->sv_num);
+  fscanf(modelfl,"%lf%*[^\n]\n", &model->b);
+
+  model->supvec = (DOC **)my_malloc(sizeof(DOC *)*model->sv_num);
+  model->alpha = (double *)my_malloc(sizeof(double)*model->sv_num);
+  model->index=NULL;
+  model->lin_weights=NULL;
+
+  for(i=1;i<model->sv_num;i++) {
+    fgets(line,(int)ll,modelfl);
+    if(!parse_document(line,words,&(model->alpha[i]),&queryid,&slackid,
+		       &costfactor,&wpos,max_words,&comment)) {
+      printf("\nParsing error while reading model file in SV %ld!\n%s",
+	     i,line);
+      exit(1);
+    }
+    model->supvec[i] = create_example(-1,
+				      0,0,
+				      0.0,
+				      create_svector(words,comment,1.0));
+  }
+  fclose(modelfl);
+  free(line);
+  free(words);
+  if(verbosity>=1) {
+    fprintf(stdout, "OK. (%d support vectors read)\n",(int)(model->sv_num-1));
+  }
+  return(model);
+}
+
+MODEL *copy_model(MODEL *model)
+{
+  MODEL *newmodel;
+  long  i;
+
+  newmodel=(MODEL *)my_malloc(sizeof(MODEL));
+  (*newmodel)=(*model);
+  newmodel->supvec = (DOC **)my_malloc(sizeof(DOC *)*model->sv_num);
+  newmodel->alpha = (double *)my_malloc(sizeof(double)*model->sv_num);
+  newmodel->index = NULL; /* index is not copied */
+  newmodel->supvec[0] = NULL;
+  newmodel->alpha[0] = 0;
+  for(i=1;i<model->sv_num;i++) {
+    newmodel->alpha[i]=model->alpha[i];
+    newmodel->supvec[i]=create_example(model->supvec[i]->docnum,
+				       model->supvec[i]->queryid,0,
+				       model->supvec[i]->costfactor,
+				       copy_svector(model->supvec[i]->fvec));
+  }
+  if(model->lin_weights) {
+    newmodel->lin_weights = (double *)my_malloc(sizeof(double)*(model->totwords+1));
+    for(i=0;i<model->totwords+1;i++) 
+      newmodel->lin_weights[i]=model->lin_weights[i];
+  }
+  return(newmodel);
+}
+
+void free_model(MODEL *model, int deep)
+{
+  long i;
+
+  if(model->supvec) {
+    if(deep) {
+      for(i=1;i<model->sv_num;i++) {
+	free_example(model->supvec[i],1);
+      }
+    }
+    free(model->supvec);
+  }
+  if(model->alpha) free(model->alpha);
+  if(model->index) free(model->index);
+  if(model->lin_weights) free(model->lin_weights);
+  free(model);
+}
+
+
+void read_documents(char *docfile, DOC ***docs, double **label, 
+		    long int *totwords, long int *totdoc)
+{
+  char *line,*comment;
+  WORD *words;
+  long dnum=0,wpos,dpos=0,dneg=0,dunlab=0,queryid,slackid,max_docs;
+  long max_words_doc, ll;
+  double doc_label,costfactor;
+  FILE *docfl;
+
+  if(verbosity>=1) {
+    printf("Scanning examples..."); fflush(stdout);
+  }
+  nol_ll(docfile,&max_docs,&max_words_doc,&ll); /* scan size of input file */
+  max_words_doc+=2;
+  ll+=2;
+  max_docs+=2;
+  if(verbosity>=1) {
+    printf("done\n"); fflush(stdout);
+  }
+
+  (*docs) = (DOC **)my_malloc(sizeof(DOC *)*max_docs);    /* feature vectors */
+  (*label) = (double *)my_malloc(sizeof(double)*max_docs); /* target values */
+  line = (char *)my_malloc(sizeof(char)*ll);
+
+  if ((docfl = fopen (docfile, "r")) == NULL)
+  { perror (docfile); exit (1); }
+
+  words = (WORD *)my_malloc(sizeof(WORD)*(max_words_doc+10));
+  if(verbosity>=1) {
+    printf("Reading examples into memory..."); fflush(stdout);
+  }
+  dnum=0;
+  (*totwords)=0;
+  while((!feof(docfl)) && fgets(line,(int)ll,docfl)) {
+    if(line[0] == '#') continue;  /* line contains comments */
+    if(!parse_document(line,words,&doc_label,&queryid,&slackid,&costfactor,
+		       &wpos,max_words_doc,&comment)) {
+      printf("\nParsing error in line %ld!\n%s",dnum,line);
+      exit(1);
+    }
+    (*label)[dnum]=doc_label;
+    /* printf("docnum=%ld: Class=%f ",dnum,doc_label); */
+    if(doc_label > 0) dpos++;
+    if (doc_label < 0) dneg++;
+    if (doc_label == 0) dunlab++;
+    if((wpos>1) && ((words[wpos-2]).wnum>(*totwords))) 
+      (*totwords)=(words[wpos-2]).wnum;
+    if((*totwords) > MAXFEATNUM) {
+      printf("\nMaximum feature number exceeds limit defined in MAXFEATNUM!\n");
+      printf("LINE: %s\n",line);
+      exit(1);
+    }
+    (*docs)[dnum] = create_example(dnum,queryid,slackid,costfactor,
+				   create_svector(words,comment,1.0));
+    /* printf("\nNorm=%f\n",((*docs)[dnum]->fvec)->twonorm_sq);  */
+    dnum++;  
+    if(verbosity>=1) {
+      if((dnum % 100) == 0) {
+	printf("%ld..",dnum); fflush(stdout);
+      }
+    }
+  } 
+
+  fclose(docfl);
+  free(line);
+  free(words);
+  if(verbosity>=1) {
+    fprintf(stdout, "OK. (%ld examples read)\n", dnum);
+  }
+  (*totdoc)=dnum;
+}
+
+int parse_document(char *line, WORD *words, double *label,
+		   long *queryid, long *slackid, double *costfactor,
+		   long int *numwords, long int max_words_doc,
+		   char **comment)
+{
+  register long wpos,pos;
+  long wnum;
+  double weight;
+  int numread;
+  char featurepair[1000],junk[1000];
+
+  (*queryid)=0;
+  (*slackid)=0;
+  (*costfactor)=1;
+
+  pos=0;
+  (*comment)=NULL;
+  while(line[pos] ) {      /* cut off comments */
+    if((line[pos] == '#') && (!(*comment))) {
+      line[pos]=0;
+      (*comment)=&(line[pos+1]);
+    }
+    if(line[pos] == '\n') { /* strip the CR */
+      line[pos]=0;
+    }
+    pos++;
+  }
+  if(!(*comment)) (*comment)=&(line[pos]);
+  /* printf("Comment: '%s'\n",(*comment)); */
+
+  wpos=0;
+  /* check, that line starts with target value or zero, but not with
+     feature pair */
+  if(sscanf(line,"%s",featurepair) == EOF) return(0);
+  pos=0;
+  while((featurepair[pos] != ':') && featurepair[pos]) pos++;
+  if(featurepair[pos] == ':') {
+	perror ("Line must start with label or 0!!!\n"); 
+	printf("LINE: %s\n",line);
+	exit (1); 
+  }
+  /* read the target value */
+  if(sscanf(line,"%lf",label) == EOF) return(0);
+  pos=0;
+  while(space_or_null((int)line[pos])) pos++;
+  while((!space_or_null((int)line[pos])) && line[pos]) pos++;
+  while(((numread=sscanf(line+pos,"%s",featurepair)) != EOF) && 
+	(numread > 0) && 
+	(wpos<max_words_doc)) {
+    /* printf("%s\n",featurepair); */
+    while(space_or_null((int)line[pos])) pos++;
+    while((!space_or_null((int)line[pos])) && line[pos]) pos++;
+    if(sscanf(featurepair,"qid:%ld%s",&wnum,junk)==1) {
+      /* it is the query id */
+      (*queryid)=(long)wnum;
+    }
+    else if(sscanf(featurepair,"sid:%ld%s",&wnum,junk)==1) {
+      /* it is the slack id */
+      if(wnum > 0) 
+	(*slackid)=(long)wnum;
+      else {
+	perror ("Slack-id must be greater or equal to 1!!!\n"); 
+	printf("LINE: %s\n",line);
+	exit (1); 
+      }
+    }
+    else if(sscanf(featurepair,"cost:%lf%s",&weight,junk)==1) {
+      /* it is the example-dependent cost factor */
+      (*costfactor)=(double)weight;
+    }
+    else if(sscanf(featurepair,"%ld:%lf%s",&wnum,&weight,junk)==2) {
+      /* it is a regular feature */
+      if(wnum<=0) { 
+	perror ("Feature numbers must be larger or equal to 1!!!\n"); 
+	printf("LINE: %s\n",line);
+	exit (1); 
+      }
+      if((wpos>0) && ((words[wpos-1]).wnum >= wnum)) { 
+	perror ("Features must be in increasing order!!!\n"); 
+	printf("LINE: %s\n",line);
+	exit (1); 
+      }
+      (words[wpos]).wnum=wnum;
+      (words[wpos]).weight=(FVAL)weight; 
+      wpos++;
+    }
+    else {
+      perror ("Cannot parse feature/value pair!!!\n"); 
+      printf("'%s' in LINE: %s\n",featurepair,line);
+      exit (1); 
+    }
+  }
+  (words[wpos]).wnum=0;
+  (*numwords)=wpos+1;
+  return(1);
+}
+
+double *read_alphas(char *alphafile,long totdoc)
+     /* reads the alpha vector from a file as written by the
+        write_alphas function */
+{
+  FILE *fl;
+  double *alpha;
+  long dnum;
+
+  if ((fl = fopen (alphafile, "r")) == NULL)
+  { perror (alphafile); exit (1); }
+
+  alpha = (double *)my_malloc(sizeof(double)*totdoc);
+  if(verbosity>=1) {
+    printf("Reading alphas..."); fflush(stdout);
+  }
+  dnum=0;
+  while((!feof(fl)) && fscanf(fl,"%lf\n",&alpha[dnum]) && (dnum<totdoc)) {
+    dnum++;
+  }
+  if(dnum != totdoc)
+  { perror ("\nNot enough values in alpha file!"); exit (1); }
+  fclose(fl);
+
+  if(verbosity>=1) {
+    printf("done\n"); fflush(stdout);
+  }
+
+  return(alpha);
+}
+
+void nol_ll(char *file, long int *nol, long int *wol, long int *ll) 
+     /* Grep through file and count number of lines, maximum number of
+        spaces per line, and longest line. */
+{
+  FILE *fl;
+  int ic;
+  char c;
+  long current_length,current_wol;
+
+  if ((fl = fopen (file, "r")) == NULL)
+  { perror (file); exit (1); }
+  current_length=0;
+  current_wol=0;
+  (*ll)=0;
+  (*nol)=1;
+  (*wol)=0;
+  while((ic=getc(fl)) != EOF) {
+    c=(char)ic;
+    current_length++;
+    if(space_or_null((int)c)) {
+      current_wol++;
+    }
+    if(c == '\n') {
+      (*nol)++;
+      if(current_length>(*ll)) {
+	(*ll)=current_length;
+      }
+      if(current_wol>(*wol)) {
+	(*wol)=current_wol;
+      }
+      current_length=0;
+      current_wol=0;
+    }
+  }
+  fclose(fl);
+}
+
+long minl(long int a, long int b)
+{
+  if(a<b)
+    return(a);
+  else
+    return(b);
+}
+
+long maxl(long int a, long int b)
+{
+  if(a>b)
+    return(a);
+  else
+    return(b);
+}
+
+long get_runtime(void)
+{
+  clock_t start;
+  start = clock();
+  return((long)((double)start*100.0/(double)CLOCKS_PER_SEC));
+}
+
+
+# ifdef _MSC_VER
+
+int isnan(double a)
+{
+  return(_isnan(a));
+}
+
+# endif 
+
+int space_or_null(int c) {
+  if (c==0)
+    return 1;
+  return isspace((unsigned char)c);
+}
+
+void *my_malloc(size_t size)
+{
+  void *ptr;
+  if(size<=0) size=1; /* for AIX compatibility */
+  ptr=(void *)malloc(size);
+  if(!ptr) { 
+    perror ("Out of memory!\n"); 
+    exit (1); 
+  }
+  return(ptr);
+}
+
+void copyright_notice(void)
+{
+  printf("\nCopyright: Thorsten Joachims, thorsten@joachims.org\n\n");
+  printf("This software is available for non-commercial use only. It must not\n");
+  printf("be modified and distributed without prior permission of the author.\n");
+  printf("The author is not responsible for implications from the use of this\n");
+  printf("software.\n\n");
+}
diff --git a/thirdparty/svmlight/svm_common.h b/thirdparty/svmlight/svm_common.h
new file mode 100755
index 000000000..6487fa1d1
--- /dev/null
+++ b/thirdparty/svmlight/svm_common.h
@@ -0,0 +1,301 @@
+/************************************************************************/
+/*                                                                      */
+/*   svm_common.h                                                       */
+/*                                                                      */
+/*   Definitions and functions used in both svm_learn and svm_classify. */
+/*                                                                      */
+/*   Author: Thorsten Joachims                                          */
+/*   Date: 02.07.02                                                     */
+/*                                                                      */
+/*   Copyright (c) 2002  Thorsten Joachims - All rights reserved        */
+/*                                                                      */
+/*   This software is available for non-commercial use only. It must    */
+/*   not be modified and distributed without prior permission of the    */
+/*   author. The author is not responsible for implications from the    */
+/*   use of this software.                                              */
+/*                                                                      */
+/************************************************************************/
+
+#ifndef SVM_COMMON
+#define SVM_COMMON
+
+# include <stdio.h>
+# include <ctype.h>
+# include <math.h>
+# include <string.h>
+# include <stdlib.h>
+# include <time.h> 
+# include <float.h>
+
+# define VERSION       "V6.02"
+# define VERSION_DATE  "14.08.08"
+
+# define CFLOAT  float       /* the type of float to use for caching */
+                             /* kernel evaluations. Using float saves */
+                             /* us some memory, but you can use double, too */
+# define FNUM    long        /* the type used for storing feature ids */
+# define FVAL    float       /* the type used for storing feature values */
+# define MAXFEATNUM 99999999 /* maximum feature number (must be in
+			  	valid range of FNUM type and long int!) */
+
+# define LINEAR  0           /* linear kernel type */
+# define POLY    1           /* polynoial kernel type */
+# define RBF     2           /* rbf kernel type */
+# define SIGMOID 3           /* sigmoid kernel type */
+
+# define CLASSIFICATION 1    /* train classification model */
+# define REGRESSION     2    /* train regression model */
+# define RANKING        3    /* train ranking model */
+# define OPTIMIZATION   4    /* train on general set of constraints */
+
+# define MAXSHRINK     50000    /* maximum number of shrinking rounds */
+
+typedef struct word {
+  FNUM    wnum;	               /* word number */
+  FVAL    weight;              /* word weight */
+} WORD;
+
+typedef struct svector {
+  WORD    *words;              /* The features/values in the vector by
+				  increasing feature-number. Feature
+				  numbers that are skipped are
+				  interpreted as having value zero. */
+  double  twonorm_sq;          /* The squared euclidian length of the
+                                  vector. Used to speed up the RBF kernel. */
+  char    *userdefined;        /* You can put additional information
+				  here. This can be useful, if you are
+				  implementing your own kernel that
+				  does not work with feature/values
+				  representations (for example a
+				  string kernel). By default,
+				  svm-light will put here the string
+				  after the # sign from each line of
+				  the input file. */
+  long    kernel_id;           /* Feature vectors with different
+				  kernel_id's are orthogonal (ie. the
+				  feature number do not match). This
+				  is used for computing component
+				  kernels for linear constraints which
+				  are a sum of several different
+				  weight vectors. (currently not
+				  implemented). */
+  struct svector *next;        /* Let's you set up a list of SVECTOR's
+				  for linear constraints which are a
+				  sum of multiple feature
+				  vectors. List is terminated by
+				  NULL. */
+  double  factor;              /* Factor by which this feature vector
+				  is multiplied in the sum. */
+} SVECTOR;
+
+typedef struct doc {
+  long    docnum;              /* Document ID. This has to be the position of 
+                                  the document in the training set array. */
+  long    queryid;             /* for learning rankings, constraints are 
+				  generated for documents with the same 
+				  queryID. */
+  double  costfactor;          /* Scales the cost of misclassifying this
+				  document by this factor. The effect of this
+				  value is, that the upper bound on the alpha
+				  for this example is scaled by this factor.
+				  The factors are set by the feature 
+				  'cost:<val>' in the training data. */
+  long    slackid;             /* Index of the slack variable
+				  corresponding to this
+				  constraint. All constraints with the
+				  same slackid share the same slack
+				  variable. This can only be used for
+				  svm_learn_optimization. */
+  SVECTOR *fvec;               /* Feature vector of the example. The
+				  feature vector can actually be a
+				  list of feature vectors. For
+				  example, the list will have two
+				  elements, if this DOC is a
+				  preference constraint. The one
+				  vector that is supposed to be ranked
+				  higher, will have a factor of +1,
+				  the lower ranked one should have a
+				  factor of -1. */
+} DOC;
+
+typedef struct learn_parm {
+  long   type;                 /* selects between regression and
+				  classification */
+  double svm_c;                /* upper bound C on alphas */
+  double eps;                  /* regression epsilon (eps=1.0 for
+				  classification */
+  double svm_costratio;        /* factor to multiply C for positive examples */
+  double transduction_posratio;/* fraction of unlabeled examples to be */
+                               /* classified as positives */
+  long   biased_hyperplane;    /* if nonzero, use hyperplane w*x+b=0 
+				  otherwise w*x=0 */
+  long   sharedslack;          /* if nonzero, it will use the shared
+                                  slack variable mode in
+                                  svm_learn_optimization. It requires
+                                  that the slackid is set for every
+                                  training example */
+  long   svm_maxqpsize;        /* size q of working set */
+  long   svm_newvarsinqp;      /* new variables to enter the working set 
+				  in each iteration */
+  long   kernel_cache_size;    /* size of kernel cache in megabytes */
+  double epsilon_crit;         /* tolerable error for distances used 
+				  in stopping criterion */
+  double epsilon_shrink;       /* how much a multiplier should be above 
+				  zero for shrinking */
+  long   svm_iter_to_shrink;   /* iterations h after which an example can
+				  be removed by shrinking */
+  long   maxiter;              /* number of iterations after which the
+				  optimizer terminates, if there was
+				  no progress in maxdiff */
+  long   remove_inconsistent;  /* exclude examples with alpha at C and 
+				  retrain */
+  long   skip_final_opt_check; /* do not check KT-Conditions at the end of
+				  optimization for examples removed by 
+				  shrinking. WARNING: This might lead to 
+				  sub-optimal solutions! */
+  long   compute_loo;          /* if nonzero, computes leave-one-out
+				  estimates */
+  double rho;                  /* parameter in xi/alpha-estimates and for
+				  pruning leave-one-out range [1..2] */
+  long   xa_depth;             /* parameter in xi/alpha-estimates upper
+				  bounding the number of SV the current
+				  alpha_t is distributed over */
+  char predfile[200];          /* file for predicitions on unlabeled examples
+				  in transduction */
+  char alphafile[200];         /* file to store optimal alphas in. use  
+				  empty string if alphas should not be 
+				  output */
+
+  /* you probably do not want to touch the following */
+  double epsilon_const;        /* tolerable error on eq-constraint */
+  double epsilon_a;            /* tolerable error on alphas at bounds */
+  double opt_precision;        /* precision of solver, set to e.g. 1e-21 
+				  if you get convergence problems */
+
+  /* the following are only for internal use */
+  long   svm_c_steps;          /* do so many steps for finding optimal C */
+  double svm_c_factor;         /* increase C by this factor every step */
+  double svm_costratio_unlab;
+  double svm_unlabbound;
+  double *svm_cost;            /* individual upper bounds for each var */
+  long   totwords;             /* number of features */
+} LEARN_PARM;
+
+typedef struct kernel_parm {
+  long    kernel_type;   /* 0=linear, 1=poly, 2=rbf, 3=sigmoid, 4=custom */
+  long    poly_degree;
+  double  rbf_gamma;
+  double  coef_lin;
+  double  coef_const;
+  char    custom[50];    /* for user supplied kernel */
+} KERNEL_PARM;
+
+typedef struct model {
+  long    sv_num;	
+  long    at_upper_bound;
+  double  b;
+  DOC     **supvec;
+  double  *alpha;
+  long    *index;       /* index from docnum to position in model */
+  long    totwords;     /* number of features */
+  long    totdoc;       /* number of training documents */
+  KERNEL_PARM kernel_parm; /* kernel */
+
+  /* the following values are not written to file */
+  double  loo_error,loo_recall,loo_precision; /* leave-one-out estimates */
+  double  xa_error,xa_recall,xa_precision;    /* xi/alpha estimates */
+  double  *lin_weights;                       /* weights for linear case using
+						 folding */
+  double  maxdiff;                            /* precision, up to which this 
+						 model is accurate */
+} MODEL;
+
+typedef struct quadratic_program {
+  long   opt_n;            /* number of variables */
+  long   opt_m;            /* number of linear equality constraints */
+  double *opt_ce,*opt_ce0; /* linear equality constraints */
+  double *opt_g;           /* hessian of objective */
+  double *opt_g0;          /* linear part of objective */
+  double *opt_xinit;       /* initial value for variables */
+  double *opt_low,*opt_up; /* box constraints */
+} QP;
+
+typedef struct kernel_cache {
+  long   *index;  /* cache some kernel evalutations */
+  CFLOAT *buffer; /* to improve speed */
+  long   *invindex;
+  long   *active2totdoc;
+  long   *totdoc2active;
+  long   *lru;
+  long   *occu;
+  long   elems;
+  long   max_elems;
+  long   time;
+  long   activenum;
+  long   buffsize;
+} KERNEL_CACHE;
+
+
+typedef struct timing_profile {
+  long   time_kernel;
+  long   time_opti;
+  long   time_shrink;
+  long   time_update;
+  long   time_model;
+  long   time_check;
+  long   time_select;
+} TIMING;
+
+typedef struct shrink_state {
+  long   *active;
+  long   *inactive_since;
+  long   deactnum;
+  double **a_history;  /* for shrinking with non-linear kernel */
+  long   maxhistory;
+  double *last_a;      /* for shrinking with linear kernel */
+  double *last_lin;    /* for shrinking with linear kernel */
+} SHRINK_STATE;
+
+double classify_example(MODEL *, DOC *);
+double classify_example_linear(MODEL *, DOC *);
+double kernel(KERNEL_PARM *, DOC *, DOC *); 
+double single_kernel(KERNEL_PARM *, SVECTOR *, SVECTOR *); 
+double custom_kernel(KERNEL_PARM *, SVECTOR *, SVECTOR *); 
+SVECTOR *create_svector(WORD *, char *, double);
+SVECTOR *copy_svector(SVECTOR *);
+void   free_svector(SVECTOR *);
+double    sprod_ss(SVECTOR *, SVECTOR *);
+SVECTOR*  sub_ss(SVECTOR *, SVECTOR *); 
+SVECTOR*  add_ss(SVECTOR *, SVECTOR *); 
+SVECTOR*  add_list_ss(SVECTOR *); 
+void      append_svector_list(SVECTOR *a, SVECTOR *b);
+SVECTOR*  smult_s(SVECTOR *, double);
+int       featvec_eq(SVECTOR *, SVECTOR *); 
+double model_length_s(MODEL *, KERNEL_PARM *);
+void   clear_vector_n(double *, long);
+void   add_vector_ns(double *, SVECTOR *, double);
+double sprod_ns(double *, SVECTOR *);
+void   add_weight_vector_to_linear_model(MODEL *);
+DOC    *create_example(long, long, long, double, SVECTOR *);
+void   free_example(DOC *, long);
+MODEL  *read_model(char *);
+MODEL  *copy_model(MODEL *);
+void   free_model(MODEL *, int);
+void   read_documents(char *, DOC ***, double **, long *, long *);
+int    parse_document(char *, WORD *, double *, long *, long *, double *, long *, long, char **);
+double *read_alphas(char *,long);
+void   nol_ll(char *, long *, long *, long *);
+long   minl(long, long);
+long   maxl(long, long);
+long   get_runtime(void);
+int    space_or_null(int);
+void   *my_malloc(size_t); 
+void   copyright_notice(void);
+# ifdef _MSC_VER
+   int isnan(double);
+# endif
+
+extern long   verbosity;              /* verbosity level (0-4) */
+extern long   kernel_cache_statistic;
+
+#endif
diff --git a/thirdparty/svmlight/svm_hideo.c b/thirdparty/svmlight/svm_hideo.c
new file mode 100755
index 000000000..ffad2d3c1
--- /dev/null
+++ b/thirdparty/svmlight/svm_hideo.c
@@ -0,0 +1,1062 @@
+/***********************************************************************/
+/*                                                                     */
+/*   svm_hideo.c                                                       */
+/*                                                                     */
+/*   The Hildreth and D'Espo solver specialized for SVMs.              */
+/*                                                                     */
+/*   Author: Thorsten Joachims                                         */
+/*   Date: 02.07.02                                                    */
+/*                                                                     */
+/*   Copyright (c) 2002  Thorsten Joachims - All rights reserved       */
+/*                                                                     */
+/*   This software is available for non-commercial use only. It must   */
+/*   not be modified and distributed without prior permission of the   */
+/*   author. The author is not responsible for implications from the   */
+/*   use of this software.                                             */
+/*                                                                     */
+/***********************************************************************/
+
+# include <math.h>
+# include "svm_common.h"
+
+/* 
+  solve the quadratic programming problem
+ 
+  minimize   g0 * x + 1/2 x' * G * x
+  subject to ce*x = ce0
+             l <= x <= u
+ 
+  The linear constraint vector ce can only have -1/+1 as entries 
+*/
+
+/* Common Block Declarations */
+
+long verbosity;
+
+# define PRIMAL_OPTIMAL      1
+# define DUAL_OPTIMAL        2
+# define MAXITER_EXCEEDED    3
+# define NAN_SOLUTION        4
+# define ONLY_ONE_VARIABLE   5
+
+# define LARGEROUND          0
+# define SMALLROUND          1
+
+/* /////////////////////////////////////////////////////////////// */
+
+# define DEF_PRECISION          1E-5
+# define DEF_MAX_ITERATIONS     200
+# define DEF_LINDEP_SENSITIVITY 1E-8
+# define EPSILON_HIDEO          1E-20
+# define EPSILON_EQ             1E-5
+
+double *optimize_qp(QP *, double *, long, double *, LEARN_PARM *);
+double *primal=0,*dual=0;
+long   precision_violations=0;
+double opt_precision=DEF_PRECISION;
+long   maxiter=DEF_MAX_ITERATIONS;
+double lindep_sensitivity=DEF_LINDEP_SENSITIVITY;
+double *buffer;
+long   *nonoptimal;
+
+long  smallroundcount=0;
+long  roundnumber=0;
+
+/* /////////////////////////////////////////////////////////////// */
+
+void *my_malloc();
+
+int optimize_hildreth_despo(long,long,double,double,double,long,long,long,double,double *,
+			    double *,double *,double *,double *,double *,
+			    double *,double *,double *,long *,double *,double *);
+int solve_dual(long,long,double,double,long,double *,double *,double *,
+	       double *,double *,double *,double *,double *,double *,
+	       double *,double *,double *,double *,long);
+
+void linvert_matrix(double *, long, double *, double, long *);
+void lprint_matrix(double *, long);
+void ladd_matrix(double *, long, double);
+void lcopy_matrix(double *, long, double *);
+void lswitch_rows_matrix(double *, long, long, long);
+void lswitchrk_matrix(double *, long, long, long);
+
+double calculate_qp_objective(long, double *, double *, double *);
+
+
+
+double *optimize_qp(qp,epsilon_crit,nx,threshold,learn_parm)
+QP *qp;
+double *epsilon_crit;
+long nx; /* Maximum number of variables in QP */
+double *threshold; 
+LEARN_PARM *learn_parm;
+/* start the optimizer and return the optimal values */
+/* The HIDEO optimizer does not necessarily fully solve the problem. */
+/* Since it requires a strictly positive definite hessian, the solution */
+/* is restricted to a linear independent subset in case the matrix is */
+/* only semi-definite. */
+{
+  long i,j;
+  int result;
+  double eq,progress;
+
+  roundnumber++;
+
+  if(!primal) { /* allocate memory at first call */
+    primal=(double *)my_malloc(sizeof(double)*nx);
+    dual=(double *)my_malloc(sizeof(double)*((nx+1)*2));
+    nonoptimal=(long *)my_malloc(sizeof(long)*(nx));
+    buffer=(double *)my_malloc(sizeof(double)*((nx+1)*2*(nx+1)*2+
+					       nx*nx+2*(nx+1)*2+2*nx+1+2*nx+
+					       nx+nx+nx*nx));
+    (*threshold)=0;
+    for(i=0;i<nx;i++) {
+      primal[i]=0;
+    }
+  }
+
+  if(verbosity>=4) { /* really verbose */
+    printf("\n\n");
+    eq=qp->opt_ce0[0];
+    for(i=0;i<qp->opt_n;i++) {
+      eq+=qp->opt_xinit[i]*qp->opt_ce[i];
+      printf("%f: ",qp->opt_g0[i]);
+      for(j=0;j<qp->opt_n;j++) {
+	printf("%f ",qp->opt_g[i*qp->opt_n+j]);
+      }
+      printf(": a=%.10f < %f",qp->opt_xinit[i],qp->opt_up[i]);
+      printf(": y=%f\n",qp->opt_ce[i]);
+    }
+    if(qp->opt_m) {
+      printf("EQ: %f*x0",qp->opt_ce[0]);
+      for(i=1;i<qp->opt_n;i++) {
+	printf(" + %f*x%ld",qp->opt_ce[i],i);
+      }
+      printf(" = %f\n\n",-qp->opt_ce0[0]);
+    }
+  }
+
+  result=optimize_hildreth_despo(qp->opt_n,qp->opt_m,
+				 opt_precision,(*epsilon_crit),
+				 learn_parm->epsilon_a,maxiter,
+				 /* (long)PRIMAL_OPTIMAL, */
+				 (long)0, (long)0,
+				 lindep_sensitivity,
+				 qp->opt_g,qp->opt_g0,qp->opt_ce,qp->opt_ce0,
+				 qp->opt_low,qp->opt_up,primal,qp->opt_xinit,
+				 dual,nonoptimal,buffer,&progress);
+  if(verbosity>=3) { 
+    printf("return(%d)...",result);
+  }
+
+  if(learn_parm->totwords < learn_parm->svm_maxqpsize) { 
+    /* larger working sets will be linear dependent anyway */
+    learn_parm->svm_maxqpsize=maxl(learn_parm->totwords,(long)2);
+  }
+
+  if(result == NAN_SOLUTION) {
+    lindep_sensitivity*=2;  /* throw out linear dependent examples more */
+                            /* generously */
+    if(learn_parm->svm_maxqpsize>2) {
+      learn_parm->svm_maxqpsize--;  /* decrease size of qp-subproblems */
+    }
+    precision_violations++;
+  }
+
+  /* take one round of only two variable to get unstuck */
+  if((result != PRIMAL_OPTIMAL) || (!(roundnumber % 31)) || (progress <= 0)) {
+
+    smallroundcount++;
+
+    result=optimize_hildreth_despo(qp->opt_n,qp->opt_m,
+				   opt_precision,(*epsilon_crit),
+				   learn_parm->epsilon_a,(long)maxiter,
+				   (long)PRIMAL_OPTIMAL,(long)SMALLROUND,
+				   lindep_sensitivity,
+				   qp->opt_g,qp->opt_g0,qp->opt_ce,qp->opt_ce0,
+				   qp->opt_low,qp->opt_up,primal,qp->opt_xinit,
+				   dual,nonoptimal,buffer,&progress);
+    if(verbosity>=3) { 
+      printf("return_srd(%d)...",result);
+    }
+
+    if(result != PRIMAL_OPTIMAL) {
+      if(result != ONLY_ONE_VARIABLE) 
+	precision_violations++;
+      if(result == MAXITER_EXCEEDED) 
+	maxiter+=100;
+      if(result == NAN_SOLUTION) {
+	lindep_sensitivity*=2;  /* throw out linear dependent examples more */
+	                        /* generously */
+	/* results not valid, so return inital values */
+	for(i=0;i<qp->opt_n;i++) {
+	  primal[i]=qp->opt_xinit[i];
+	}
+      }
+    }
+  }
+
+
+  if(precision_violations > 50) {
+    precision_violations=0;
+    (*epsilon_crit)*=10.0; 
+    if(verbosity>=1) {
+      printf("\nWARNING: Relaxing epsilon on KT-Conditions (%f).\n",
+	     (*epsilon_crit));
+    }
+  }	  
+
+  if((qp->opt_m>0) && (result != NAN_SOLUTION) && (!isnan(dual[1]-dual[0])))
+    (*threshold)=dual[1]-dual[0];
+  else
+    (*threshold)=0;
+
+  if(verbosity>=4) { /* really verbose */
+    printf("\n\n");
+    eq=qp->opt_ce0[0];
+    for(i=0;i<qp->opt_n;i++) {
+      eq+=primal[i]*qp->opt_ce[i];
+      printf("%f: ",qp->opt_g0[i]);
+      for(j=0;j<qp->opt_n;j++) {
+	printf("%f ",qp->opt_g[i*qp->opt_n+j]);
+      }
+      printf(": a=%.30f",primal[i]);
+      printf(": nonopti=%ld",nonoptimal[i]);
+      printf(": y=%f\n",qp->opt_ce[i]);
+    }
+    printf("eq-constraint=%.30f\n",eq);
+    printf("b=%f\n",(*threshold));
+    printf(" smallroundcount=%ld ",smallroundcount);
+  }
+
+  return(primal);
+}
+
+
+
+int optimize_hildreth_despo(n,m,precision,epsilon_crit,epsilon_a,maxiter,goal,
+			    smallround,lindep_sensitivity,g,g0,ce,ce0,low,up,
+			    primal,init,dual,lin_dependent,buffer,progress)
+     long   n;            /* number of variables */
+     long   m;            /* number of linear equality constraints [0,1] */
+     double precision;    /* solve at least to this dual precision */
+     double epsilon_crit; /* stop, if KT-Conditions approx fulfilled */
+     double epsilon_a;    /* precision of alphas at bounds */
+     long   maxiter;      /* stop after this many iterations */
+     long   goal;         /* keep going until goal fulfilled */
+     long   smallround;   /* use only two variables of steepest descent */
+     double lindep_sensitivity; /* epsilon for detecting linear dependent ex */
+     double *g;           /* hessian of objective */
+     double *g0;          /* linear part of objective */
+     double *ce,*ce0;     /* linear equality constraints */
+     double *low,*up;     /* box constraints */
+     double *primal;      /* primal variables */
+     double *init;        /* initial values of primal */
+     double *dual;        /* dual variables */
+     long   *lin_dependent;
+     double *buffer;
+     double *progress;    /* delta in the objective function between
+                             before and after */
+{
+  long i,j,k,from,to,n_indep,changed;
+  double sum,bmin=0,bmax=0;
+  double *d,*d0,*ig,*dual_old,*temp,*start;       
+  double *g0_new,*g_new,*ce_new,*ce0_new,*low_new,*up_new;
+  double add,t;
+  int result;
+  double obj_before,obj_after; 
+  long b1,b2;
+  double g0_b1,g0_b2,ce0_b;
+
+  g0_new=&(buffer[0]);    /* claim regions of buffer */
+  d=&(buffer[n]);
+  d0=&(buffer[n+(n+m)*2*(n+m)*2]);
+  ce_new=&(buffer[n+(n+m)*2*(n+m)*2+(n+m)*2]);
+  ce0_new=&(buffer[n+(n+m)*2*(n+m)*2+(n+m)*2+n]);
+  ig=&(buffer[n+(n+m)*2*(n+m)*2+(n+m)*2+n+m]);
+  dual_old=&(buffer[n+(n+m)*2*(n+m)*2+(n+m)*2+n+m+n*n]);
+  low_new=&(buffer[n+(n+m)*2*(n+m)*2+(n+m)*2+n+m+n*n+(n+m)*2]);
+  up_new=&(buffer[n+(n+m)*2*(n+m)*2+(n+m)*2+n+m+n*n+(n+m)*2+n]);
+  start=&(buffer[n+(n+m)*2*(n+m)*2+(n+m)*2+n+m+n*n+(n+m)*2+n+n]);
+  g_new=&(buffer[n+(n+m)*2*(n+m)*2+(n+m)*2+n+m+n*n+(n+m)*2+n+n+n]);
+  temp=&(buffer[n+(n+m)*2*(n+m)*2+(n+m)*2+n+m+n*n+(n+m)*2+n+n+n+n*n]);
+
+  b1=-1;
+  b2=-1;
+  for(i=0;i<n;i++) {   /* get variables with steepest feasible descent */
+    sum=g0[i];         
+    for(j=0;j<n;j++) 
+      sum+=init[j]*g[i*n+j];
+    sum=sum*ce[i];
+    if(((b1==-1) || (sum<bmin)) 
+       && (!((init[i]<=(low[i]+epsilon_a)) && (ce[i]<0.0)))
+       && (!((init[i]>=( up[i]-epsilon_a)) && (ce[i]>0.0)))
+       ) {
+      bmin=sum;
+      b1=i;
+    }
+    if(((b2==-1) || (sum>=bmax)) 
+       && (!((init[i]<=(low[i]+epsilon_a)) && (ce[i]>0.0)))
+       && (!((init[i]>=( up[i]-epsilon_a)) && (ce[i]<0.0)))
+       ) {
+      bmax=sum;
+      b2=i;
+    }
+  }
+  /* in case of unbiased hyperplane, the previous projection on */
+  /* equality constraint can lead to b1 or b2 being -1. */
+  if((b1 == -1) || (b2 == -1)) {
+    b1=maxl(b1,b2);
+    b2=maxl(b1,b2);
+  }
+
+  for(i=0;i<n;i++) {
+    start[i]=init[i];
+  }
+
+  /* in case both example vectors are linearly dependent */
+  /* WARNING: Assumes that ce[] in {-1,1} */
+  add=0;
+  changed=0;
+  if((b1 != b2) && (m==1)) {
+    for(i=0;i<n;i++) {  /* fix other vectors */
+      if(i==b1) 
+	g0_b1=g0[i];
+      if(i==b2) 
+	g0_b2=g0[i];
+    }
+    ce0_b=ce0[0];
+    for(i=0;i<n;i++) {  
+      if((i!=b1) && (i!=b2)) {
+	for(j=0;j<n;j++) {
+	  if(j==b1) 
+	    g0_b1+=start[i]*g[i*n+j];
+	  if(j==b2) 
+	    g0_b2+=start[i]*g[i*n+j];
+	}
+	ce0_b-=(start[i]*ce[i]);
+      }
+    }
+    if((g[b1*n+b2] == g[b1*n+b1]) && (g[b1*n+b2] == g[b2*n+b2])) {
+      /* printf("euqal\n"); */
+      if(ce[b1] == ce[b2]) { 
+	if(g0_b1 <= g0_b2) { /* set b1 to upper bound */
+	  /* printf("case +=<\n"); */
+	  changed=1;
+	  t=up[b1]-init[b1];
+	  if((init[b2]-low[b2]) < t) {
+	    t=init[b2]-low[b2];
+	  }
+	  start[b1]=init[b1]+t;
+	  start[b2]=init[b2]-t;
+	}
+	else if(g0_b1 > g0_b2) { /* set b2 to upper bound */
+	  /* printf("case +=>\n"); */
+	  changed=1;
+	  t=up[b2]-init[b2];
+	  if((init[b1]-low[b1]) < t) {
+	    t=init[b1]-low[b1];
+	  }
+	  start[b1]=init[b1]-t;
+	  start[b2]=init[b2]+t;
+	}
+      }
+      else if(((g[b1*n+b1]>0) || (g[b2*n+b2]>0))) { /* (ce[b1] != ce[b2]) */ 
+	/* printf("case +!\n"); */
+	t=((ce[b2]/ce[b1])*g0[b1]-g0[b2]+ce0[0]*(g[b1*n+b1]*ce[b2]/ce[b1]-g[b1*n+b2]/ce[b1]))/((ce[b2]*ce[b2]/(ce[b1]*ce[b1]))*g[b1*n+b1]+g[b2*n+b2]-2*(g[b1*n+b2]*ce[b2]/ce[b1]))-init[b2];
+	changed=1;
+	if((up[b2]-init[b2]) < t) {
+	  t=up[b2]-init[b2];
+	}
+	if((init[b2]-low[b2]) < -t) {
+	  t=-(init[b2]-low[b2]);
+	}
+	if((up[b1]-init[b1]) < t) {
+	  t=(up[b1]-init[b1]);
+	}
+	if((init[b1]-low[b1]) < -t) {
+	  t=-(init[b1]-low[b1]);
+	}
+	start[b1]=init[b1]+t;
+	start[b2]=init[b2]+t;
+      }
+    }
+    if((-g[b1*n+b2] == g[b1*n+b1]) && (-g[b1*n+b2] == g[b2*n+b2])) {
+      /* printf("diffeuqal\n"); */
+      if(ce[b1] != ce[b2]) {
+	if((g0_b1+g0_b2) < 0) { /* set b1 and b2 to upper bound */
+	  /* printf("case -!<\n"); */
+	  changed=1;
+	  t=up[b1]-init[b1];
+	  if((up[b2]-init[b2]) < t) {
+	    t=up[b2]-init[b2];
+	  }
+	  start[b1]=init[b1]+t;
+	  start[b2]=init[b2]+t;
+	}     
+	else if((g0_b1+g0_b2) >= 0) { /* set b1 and b2 to lower bound */
+	  /* printf("case -!>\n"); */
+	  changed=1;
+	  t=init[b1]-low[b1];
+	  if((init[b2]-low[b2]) < t) {
+	    t=init[b2]-low[b2];
+	  }
+	  start[b1]=init[b1]-t;
+	  start[b2]=init[b2]-t;
+	}
+      }
+      else if(((g[b1*n+b1]>0) || (g[b2*n+b2]>0))) { /* (ce[b1]==ce[b2]) */
+	/*  printf("case -=\n"); */
+	t=((ce[b2]/ce[b1])*g0[b1]-g0[b2]+ce0[0]*(g[b1*n+b1]*ce[b2]/ce[b1]-g[b1*n+b2]/ce[b1]))/((ce[b2]*ce[b2]/(ce[b1]*ce[b1]))*g[b1*n+b1]+g[b2*n+b2]-2*(g[b1*n+b2]*ce[b2]/ce[b1]))-init[b2];
+	changed=1;
+	if((up[b2]-init[b2]) < t) {
+	  t=up[b2]-init[b2];
+	}
+	if((init[b2]-low[b2]) < -t) {
+	  t=-(init[b2]-low[b2]);
+	}
+	if((up[b1]-init[b1]) < -t) {
+	  t=-(up[b1]-init[b1]);
+	}
+	if((init[b1]-low[b1]) < t) {
+	  t=init[b1]-low[b1];
+	}
+	start[b1]=init[b1]-t;
+	start[b2]=init[b2]+t;
+      }	
+    }
+  }
+  /* if we have a biased hyperplane, then adding a constant to the */
+  /* hessian does not change the solution. So that is done for examples */
+  /* with zero diagonal entry, since HIDEO cannot handle them. */
+  if((m>0) 
+     && ((fabs(g[b1*n+b1]) < lindep_sensitivity) 
+	 || (fabs(g[b2*n+b2]) < lindep_sensitivity))) {
+    /* printf("Case 0\n"); */
+    add+=0.093274;
+  }    
+  /* in case both examples are linear dependent */
+  else if((m>0) 
+	  && (g[b1*n+b2] != 0 && g[b2*n+b2] != 0)
+	  && (fabs(g[b1*n+b1]/g[b1*n+b2] - g[b1*n+b2]/g[b2*n+b2])
+	      < lindep_sensitivity)) { 
+    /* printf("Case lindep\n"); */
+    add+=0.078274;
+  }
+
+  /* special case for zero diagonal entry on unbiased hyperplane */
+  if((m==0) && (b1>=0))  {
+    if(fabs(g[b1*n+b1]) < lindep_sensitivity) { 
+      /* printf("Case 0b1\n"); */
+      for(i=0;i<n;i++) {  /* fix other vectors */
+	if(i==b1) 
+	  g0_b1=g0[i];
+      }
+      for(i=0;i<n;i++) {  
+	if(i!=b1) {
+	  for(j=0;j<n;j++) {
+	    if(j==b1) 
+	      g0_b1+=start[i]*g[i*n+j];
+	  }
+	}
+      }
+      if(g0_b1<0)
+	start[b1]=up[b1];
+      if(g0_b1>=0)
+	start[b1]=low[b1];
+    }
+  }
+  if((m==0) && (b2>=0))  {
+    if(fabs(g[b2*n+b2]) < lindep_sensitivity) { 
+      /* printf("Case 0b2\n"); */
+      for(i=0;i<n;i++) {  /* fix other vectors */
+	if(i==b2) 
+	  g0_b2=g0[i];
+      }
+      for(i=0;i<n;i++) {  
+	if(i!=b2) {
+	  for(j=0;j<n;j++) {
+	    if(j==b2) 
+	      g0_b2+=start[i]*g[i*n+j];
+	  }
+	}
+      }
+      if(g0_b2<0)
+	start[b2]=up[b2];
+      if(g0_b2>=0)
+	start[b2]=low[b2];
+    }
+  }
+
+  /* printf("b1=%ld,b2=%ld\n",b1,b2); */
+
+  lcopy_matrix(g,n,d);
+  if((m==1) && (add>0.0)) {
+    for(j=0;j<n;j++) {
+      for(k=0;k<n;k++) {
+	d[j*n+k]+=add*ce[j]*ce[k];
+      }
+    }
+  }
+  else {
+    add=0.0;
+  }
+
+  if(n>2) {                    /* switch, so that variables are better mixed */
+    lswitchrk_matrix(d,n,b1,(long)0); 
+    if(b2 == 0) 
+      lswitchrk_matrix(d,n,b1,(long)1); 
+    else
+      lswitchrk_matrix(d,n,b2,(long)1); 
+  }
+  if(smallround == SMALLROUND) {
+    for(i=2;i<n;i++) {
+      lin_dependent[i]=1;
+    }
+    if(m>0) { /* for biased hyperplane, pick two variables */
+      lin_dependent[0]=0;
+      lin_dependent[1]=0;
+    }
+    else {    /* for unbiased hyperplane, pick only one variable */
+      lin_dependent[0]=smallroundcount % 2;
+      lin_dependent[1]=(smallroundcount+1) % 2;
+    }
+  }
+  else {
+    for(i=0;i<n;i++) {
+      lin_dependent[i]=0;
+    }
+  }
+  linvert_matrix(d,n,ig,lindep_sensitivity,lin_dependent);
+  if(n>2) {                    /* now switch back */
+    if(b2 == 0) {
+      lswitchrk_matrix(ig,n,b1,(long)1); 
+      i=lin_dependent[1];  
+      lin_dependent[1]=lin_dependent[b1];
+      lin_dependent[b1]=i;
+    }
+    else {
+      lswitchrk_matrix(ig,n,b2,(long)1); 
+      i=lin_dependent[1];  
+      lin_dependent[1]=lin_dependent[b2];
+      lin_dependent[b2]=i;
+    }
+    lswitchrk_matrix(ig,n,b1,(long)0); 
+    i=lin_dependent[0];  
+    lin_dependent[0]=lin_dependent[b1];
+    lin_dependent[b1]=i;
+  }
+  /* lprint_matrix(d,n); */
+  /* lprint_matrix(ig,n); */
+
+  lcopy_matrix(g,n,g_new);   /* restore g_new matrix */
+  if(add>0)
+    for(j=0;j<n;j++) {
+      for(k=0;k<n;k++) {
+	g_new[j*n+k]+=add*ce[j]*ce[k];
+      }
+    }
+
+  for(i=0;i<n;i++) {  /* fix linear dependent vectors */
+    g0_new[i]=g0[i]+add*ce0[0]*ce[i];
+  }
+  if(m>0) ce0_new[0]=-ce0[0];
+  for(i=0;i<n;i++) {  /* fix linear dependent vectors */
+    if(lin_dependent[i]) {
+      for(j=0;j<n;j++) {
+	if(!lin_dependent[j]) {
+	  g0_new[j]+=start[i]*g_new[i*n+j];
+	}
+      }
+      if(m>0) ce0_new[0]-=(start[i]*ce[i]);
+    }
+  }
+  from=0;   /* remove linear dependent vectors */
+  to=0;
+  n_indep=0;
+  for(i=0;i<n;i++) {
+    if(!lin_dependent[i]) {
+      g0_new[n_indep]=g0_new[i];
+      ce_new[n_indep]=ce[i]; 
+      low_new[n_indep]=low[i];
+      up_new[n_indep]=up[i];
+      primal[n_indep]=start[i];
+      n_indep++;
+    }
+    for(j=0;j<n;j++) {
+      if((!lin_dependent[i]) && (!lin_dependent[j])) {
+        ig[to]=ig[from];
+        g_new[to]=g_new[from];
+	to++;
+      }
+      from++;
+    }
+  }
+
+  if(verbosity>=3) {
+    printf("real_qp_size(%ld)...",n_indep);
+  }
+  
+  /* cannot optimize with only one variable */
+  if((n_indep<=1) && (m>0) && (!changed)) { 
+    for(i=n-1;i>=0;i--) {
+      primal[i]=init[i];
+    }
+    return((int)ONLY_ONE_VARIABLE);
+  }
+
+  if((!changed) || (n_indep>1)) { 
+    result=solve_dual(n_indep,m,precision,epsilon_crit,maxiter,g_new,g0_new,
+		      ce_new,ce0_new,low_new,up_new,primal,d,d0,ig,
+		      dual,dual_old,temp,goal);
+  }
+  else {
+    result=PRIMAL_OPTIMAL;
+  }
+  
+  j=n_indep;
+  for(i=n-1;i>=0;i--) {
+    if(!lin_dependent[i]) {
+      j--;
+      primal[i]=primal[j];
+    }
+    else {
+      primal[i]=start[i];  /* leave as is */
+    }
+    temp[i]=primal[i];
+  }
+   
+  obj_before=calculate_qp_objective(n,g,g0,init);
+  obj_after=calculate_qp_objective(n,g,g0,primal);
+  (*progress)=obj_before-obj_after;
+  if(verbosity>=3) {
+    printf("before(%.30f)...after(%.30f)...result_sd(%d)...",
+	   obj_before,obj_after,result); 
+  }
+
+  return((int)result);
+}
+
+
+int solve_dual(n,m,precision,epsilon_crit,maxiter,g,g0,ce,ce0,low,up,primal,
+	       d,d0,ig,dual,dual_old,temp,goal)
+     /* Solves the dual using the method of Hildreth and D'Espo. */
+     /* Can only handle problems with zero or exactly one */
+     /* equality constraints. */
+
+     long   n;            /* number of variables */
+     long   m;            /* number of linear equality constraints */
+     double precision;    /* solve at least to this dual precision */
+     double epsilon_crit; /* stop, if KT-Conditions approx fulfilled */
+     long   maxiter;      /* stop after that many iterations */
+     double *g;
+     double *g0;          /* linear part of objective */
+     double *ce,*ce0;     /* linear equality constraints */
+     double *low,*up;     /* box constraints */
+     double *primal;      /* variables (with initial values) */
+     double *d,*d0,*ig,*dual,*dual_old,*temp;       /* buffer  */
+     long goal;
+{
+  long i,j,k,iter;
+  double sum,w,maxviol,viol,temp1,temp2,isnantest;
+  double model_b,dist;
+  long retrain,maxfaktor,primal_optimal=0,at_bound,scalemaxiter;
+  double epsilon_a=1E-15,epsilon_hideo;
+  double eq; 
+
+  if((m<0) || (m>1)) 
+    perror("SOLVE DUAL: inappropriate number of eq-constrains!");
+
+  /*  
+  printf("\n");
+  for(i=0;i<n;i++) {
+    printf("%f: ",g0[i]);
+    for(j=0;j<n;j++) {
+      printf("%f ",g[i*n+j]);
+    }
+    printf(": a=%.30f",primal[i]);
+    printf(": y=%f\n",ce[i]);
+  }
+  */
+
+  for(i=0;i<2*(n+m);i++) {
+    dual[i]=0;
+    dual_old[i]=0;
+  }
+  for(i=0;i<n;i++) {   
+    for(j=0;j<n;j++) {   /* dual hessian for box constraints */
+      d[i*2*(n+m)+j]=ig[i*n+j];
+      d[(i+n)*2*(n+m)+j]=-ig[i*n+j];
+      d[i*2*(n+m)+j+n]=-ig[i*n+j];
+      d[(i+n)*2*(n+m)+j+n]=ig[i*n+j];
+    }
+    if(m>0) {
+      sum=0;              /* dual hessian for eq constraints */
+      for(j=0;j<n;j++) {
+	sum+=(ce[j]*ig[i*n+j]);
+      }
+      d[i*2*(n+m)+2*n]=sum;
+      d[i*2*(n+m)+2*n+1]=-sum;
+      d[(n+i)*2*(n+m)+2*n]=-sum;
+      d[(n+i)*2*(n+m)+2*n+1]=sum;
+      d[(n+n)*2*(n+m)+i]=sum;
+      d[(n+n+1)*2*(n+m)+i]=-sum;
+      d[(n+n)*2*(n+m)+(n+i)]=-sum;
+      d[(n+n+1)*2*(n+m)+(n+i)]=sum;
+      
+      sum=0;
+      for(j=0;j<n;j++) {
+	for(k=0;k<n;k++) {
+	  sum+=(ce[k]*ce[j]*ig[j*n+k]);
+	}
+      }
+      d[(n+n)*2*(n+m)+2*n]=sum;
+      d[(n+n)*2*(n+m)+2*n+1]=-sum;
+      d[(n+n+1)*2*(n+m)+2*n]=-sum;
+      d[(n+n+1)*2*(n+m)+2*n+1]=sum;
+    } 
+  }
+
+  for(i=0;i<n;i++) {   /* dual linear component for the box constraints */
+    w=0;
+    for(j=0;j<n;j++) {
+      w+=(ig[i*n+j]*g0[j]); 
+    }
+    d0[i]=up[i]+w;
+    d0[i+n]=-low[i]-w;
+  }
+
+  if(m>0) {  
+    sum=0;             /* dual linear component for eq constraints */
+    for(j=0;j<n;j++) {
+      for(k=0;k<n;k++) {
+	sum+=(ce[k]*ig[k*n+j]*g0[j]); 
+      }
+    }
+    d0[2*n]=ce0[0]+sum;
+    d0[2*n+1]=-ce0[0]-sum;
+  }
+
+  maxviol=999999;
+  iter=0;
+  retrain=1;
+  maxfaktor=1;
+  scalemaxiter=maxiter/5;
+  while((retrain) && (maxviol > 0) && (iter < (scalemaxiter*maxfaktor))) {
+    iter++;
+    
+    while((maxviol > precision) && (iter < (scalemaxiter*maxfaktor))) {
+      iter++;
+      maxviol=0;
+      for(i=0;i<2*(n+m);i++) {
+	sum=d0[i];
+	for(j=0;j<2*(n+m);j++) {
+	  sum+=d[i*2*(n+m)+j]*dual_old[j];
+	}
+	sum-=d[i*2*(n+m)+i]*dual_old[i];
+	dual[i]=-sum/d[i*2*(n+m)+i];
+	if(dual[i]<0) dual[i]=0;
+	
+	viol=fabs(dual[i]-dual_old[i]);
+	if(viol>maxviol) 
+	  maxviol=viol;
+	dual_old[i]=dual[i];
+      }
+      /*
+      printf("%d) maxviol=%20f precision=%f\n",iter,maxviol,precision); 
+      */
+    }
+  
+    if(m>0) {
+      for(i=0;i<n;i++) {
+	temp[i]=dual[i]-dual[i+n]+ce[i]*(dual[n+n]-dual[n+n+1])+g0[i];
+      }
+    } 
+    else {
+      for(i=0;i<n;i++) {
+	temp[i]=dual[i]-dual[i+n]+g0[i];
+      }
+    }
+    for(i=0;i<n;i++) {
+      primal[i]=0;             /* calc value of primal variables */
+      for(j=0;j<n;j++) {
+	primal[i]+=ig[i*n+j]*temp[j];
+      }
+      primal[i]*=-1.0;
+      if(primal[i]<=(low[i])) {  /* clip conservatively */
+	primal[i]=low[i];
+      }
+      else if(primal[i]>=(up[i])) {
+	primal[i]=up[i];
+      }
+    }
+
+    if(m>0) 
+      model_b=dual[n+n+1]-dual[n+n];
+    else
+      model_b=0;
+
+    epsilon_hideo=EPSILON_HIDEO;
+    for(i=0;i<n;i++) {           /* check precision of alphas */
+      dist=-model_b*ce[i]; 
+      dist+=(g0[i]+1.0);
+      for(j=0;j<i;j++) {
+	dist+=(primal[j]*g[j*n+i]);
+      }
+      for(j=i;j<n;j++) {
+	dist+=(primal[j]*g[i*n+j]);
+      }
+      if((primal[i]<(up[i]-epsilon_hideo)) && (dist < (1.0-epsilon_crit))) {
+	epsilon_hideo=(up[i]-primal[i])*2.0;
+      }
+      else if((primal[i]>(low[i]+epsilon_hideo)) &&(dist>(1.0+epsilon_crit))) {
+	epsilon_hideo=(primal[i]-low[i])*2.0;
+      }
+    }
+    /* printf("\nEPSILON_HIDEO=%.30f\n",epsilon_hideo); */
+
+    for(i=0;i<n;i++) {           /* clip alphas to bounds */
+      if(primal[i]<=(low[i]+epsilon_hideo)) {
+	primal[i]=low[i];
+      }
+      else if(primal[i]>=(up[i]-epsilon_hideo)) {
+	primal[i]=up[i];
+      }
+    }
+
+    retrain=0;
+    primal_optimal=1;
+    at_bound=0;
+    for(i=0;(i<n);i++) {  /* check primal KT-Conditions */
+      dist=-model_b*ce[i]; 
+      dist+=(g0[i]+1.0);
+      for(j=0;j<i;j++) {
+	dist+=(primal[j]*g[j*n+i]);
+      }
+      for(j=i;j<n;j++) {
+	dist+=(primal[j]*g[i*n+j]);
+      }
+      if((primal[i]<(up[i]-epsilon_a)) && (dist < (1.0-epsilon_crit))) {
+	retrain=1;
+	primal_optimal=0;
+      }
+      else if((primal[i]>(low[i]+epsilon_a)) && (dist > (1.0+epsilon_crit))) {
+	retrain=1;
+	primal_optimal=0;
+      }
+      if((primal[i]<=(low[i]+epsilon_a)) || (primal[i]>=(up[i]-epsilon_a))) {
+	at_bound++;
+      }
+      /*    printf("HIDEOtemp: a[%ld]=%.30f, dist=%.6f, b=%f, at_bound=%ld\n",i,primal[i],dist,model_b,at_bound);  */
+    }
+    if(m>0) {
+      eq=-ce0[0];               /* check precision of eq-constraint */
+      for(i=0;i<n;i++) { 
+	eq+=(ce[i]*primal[i]);
+      }
+      if((EPSILON_EQ < fabs(eq)) 
+	 /*
+	 && !((goal==PRIMAL_OPTIMAL) 
+	       && (at_bound==n)) */
+	 ) {
+	retrain=1;
+	primal_optimal=0;
+      }
+      /* printf("\n eq=%.30f ce0=%f at-bound=%ld\n",eq,ce0[0],at_bound);  */
+    }
+
+    if(retrain) {
+      precision/=10;
+      if(((goal == PRIMAL_OPTIMAL) && (maxfaktor < 50000))
+	 || (maxfaktor < 5)) {
+	maxfaktor++;
+      }
+    }
+  }
+
+  if(!primal_optimal) {
+    for(i=0;i<n;i++) {
+      primal[i]=0;             /* calc value of primal variables */
+      for(j=0;j<n;j++) {
+	primal[i]+=ig[i*n+j]*temp[j];
+      }
+      primal[i]*=-1.0;
+      if(primal[i]<=(low[i]+epsilon_a)) {  /* clip conservatively */
+	primal[i]=low[i];
+      }
+      else if(primal[i]>=(up[i]-epsilon_a)) {
+	primal[i]=up[i];
+      }
+    }
+  }
+
+  isnantest=0;
+  for(i=0;i<n;i++) {           /* check for isnan */
+    isnantest+=primal[i];
+  }
+
+  if(m>0) {
+    temp1=dual[n+n+1];   /* copy the dual variables for the eq */
+    temp2=dual[n+n];     /* constraints to a handier location */
+    for(i=n+n+1;i>=2;i--) {
+      dual[i]=dual[i-2];
+    }
+    dual[0]=temp2;
+    dual[1]=temp1;
+    isnantest+=temp1+temp2;
+  }
+
+  if(isnan(isnantest)) {
+    return((int)NAN_SOLUTION);
+  }
+  else if(primal_optimal) {
+    return((int)PRIMAL_OPTIMAL);
+  }
+  else if(maxviol == 0.0) {
+    return((int)DUAL_OPTIMAL);
+  }
+  else {
+    return((int)MAXITER_EXCEEDED);
+  }
+}
+
+
+void linvert_matrix(matrix,depth,inverse,lindep_sensitivity,lin_dependent)
+double *matrix;
+long depth;
+double *inverse,lindep_sensitivity;
+long *lin_dependent;  /* indicates the active parts of matrix on 
+			 input and output*/
+{
+  long i,j,k;
+  double factor;
+
+  for(i=0;i<depth;i++) {
+    /*    lin_dependent[i]=0; */
+    for(j=0;j<depth;j++) {
+      inverse[i*depth+j]=0.0;
+    }
+    inverse[i*depth+i]=1.0;
+  }
+  for(i=0;i<depth;i++) {
+    if(lin_dependent[i] || (fabs(matrix[i*depth+i])<lindep_sensitivity)) {
+      lin_dependent[i]=1;
+    }
+    else {
+      for(j=i+1;j<depth;j++) {
+	factor=matrix[j*depth+i]/matrix[i*depth+i];
+	for(k=i;k<depth;k++) {
+	  matrix[j*depth+k]-=(factor*matrix[i*depth+k]);
+	}
+	for(k=0;k<depth;k++) {
+	  inverse[j*depth+k]-=(factor*inverse[i*depth+k]);
+	}
+      }
+    }
+  }
+  for(i=depth-1;i>=0;i--) {
+    if(!lin_dependent[i]) {
+      factor=1/matrix[i*depth+i];
+      for(k=0;k<depth;k++) {
+	inverse[i*depth+k]*=factor;
+      }
+      matrix[i*depth+i]=1;
+      for(j=i-1;j>=0;j--) {
+	factor=matrix[j*depth+i];
+	matrix[j*depth+i]=0;
+	for(k=0;k<depth;k++) {
+	  inverse[j*depth+k]-=(factor*inverse[i*depth+k]);
+	}
+      }
+    }
+  }
+}
+
+void lprint_matrix(matrix,depth)
+double *matrix;
+long depth;
+{
+  long i,j;
+  for(i=0;i<depth;i++) {
+    for(j=0;j<depth;j++) {
+      printf("%5.2f ",(double)(matrix[i*depth+j]));
+    }
+    printf("\n");
+  }
+  printf("\n");
+}
+
+void ladd_matrix(matrix,depth,scalar)
+double *matrix;
+long depth;
+double scalar;
+{
+  long i,j;
+  for(i=0;i<depth;i++) {
+    for(j=0;j<depth;j++) {
+      matrix[i*depth+j]+=scalar;
+    }
+  }
+}
+
+void lcopy_matrix(matrix,depth,matrix2) 
+double *matrix;
+long depth;
+double *matrix2;
+{
+  long i;
+  
+  for(i=0;i<(depth)*(depth);i++) {
+    matrix2[i]=matrix[i];
+  }
+}
+
+void lswitch_rows_matrix(matrix,depth,r1,r2) 
+double *matrix;
+long depth,r1,r2;
+{
+  long i;
+  double temp;
+
+  for(i=0;i<depth;i++) {
+    temp=matrix[r1*depth+i];
+    matrix[r1*depth+i]=matrix[r2*depth+i];
+    matrix[r2*depth+i]=temp;
+  }
+}
+
+void lswitchrk_matrix(matrix,depth,rk1,rk2) 
+double *matrix;
+long depth,rk1,rk2;
+{
+  long i;
+  double temp;
+
+  for(i=0;i<depth;i++) {
+    temp=matrix[rk1*depth+i];
+    matrix[rk1*depth+i]=matrix[rk2*depth+i];
+    matrix[rk2*depth+i]=temp;
+  }
+  for(i=0;i<depth;i++) {
+    temp=matrix[i*depth+rk1];
+    matrix[i*depth+rk1]=matrix[i*depth+rk2];
+    matrix[i*depth+rk2]=temp;
+  }
+}
+
+double calculate_qp_objective(opt_n,opt_g,opt_g0,alpha)
+long opt_n;
+double *opt_g,*opt_g0,*alpha;
+{
+  double obj;
+  long i,j;
+  obj=0;  /* calculate objective  */
+  for(i=0;i<opt_n;i++) {
+    obj+=(opt_g0[i]*alpha[i]);
+    obj+=(0.5*alpha[i]*alpha[i]*opt_g[i*opt_n+i]);
+    for(j=0;j<i;j++) {
+      obj+=(alpha[j]*alpha[i]*opt_g[j*opt_n+i]);
+    }
+  }
+  return(obj);
+}
diff --git a/thirdparty/svmlight/svm_learn.c b/thirdparty/svmlight/svm_learn.c
new file mode 100755
index 000000000..67a6eef81
--- /dev/null
+++ b/thirdparty/svmlight/svm_learn.c
@@ -0,0 +1,4147 @@
+/***********************************************************************/
+/*                                                                     */
+/*   svm_learn.c                                                       */
+/*                                                                     */
+/*   Learning module of Support Vector Machine.                        */
+/*                                                                     */
+/*   Author: Thorsten Joachims                                         */
+/*   Date: 02.07.02                                                    */
+/*                                                                     */
+/*   Copyright (c) 2002  Thorsten Joachims - All rights reserved       */
+/*                                                                     */
+/*   This software is available for non-commercial use only. It must   */
+/*   not be modified and distributed without prior permission of the   */
+/*   author. The author is not responsible for implications from the   */
+/*   use of this software.                                             */
+/*                                                                     */
+/***********************************************************************/
+
+
+# include "svm_common.h"
+# include "svm_learn.h"
+
+
+/* interface to QP-solver */
+double *optimize_qp(QP *, double *, long, double *, LEARN_PARM *);
+
+/*---------------------------------------------------------------------------*/
+
+/* Learns an SVM classification model based on the training data in
+   docs/label. The resulting model is returned in the structure
+   model. */
+
+void svm_learn_classification(DOC **docs, double *class, long int
+			      totdoc, long int totwords, 
+			      LEARN_PARM *learn_parm, 
+			      KERNEL_PARM *kernel_parm, 
+			      KERNEL_CACHE *kernel_cache, 
+			      MODEL *model,
+			      double *alpha)
+     /* docs:        Training vectors (x-part) */
+     /* class:       Training labels (y-part, zero if test example for
+                     transduction) */
+     /* totdoc:      Number of examples in docs/label */
+     /* totwords:    Number of features (i.e. highest feature index) */
+     /* learn_parm:  Learning paramenters */
+     /* kernel_parm: Kernel paramenters */
+     /* kernel_cache:Initialized Cache of size totdoc, if using a kernel. 
+                     NULL if linear.*/
+     /* model:       Returns learning result (assumed empty before called) */
+     /* alpha:       Start values for the alpha variables or NULL
+	             pointer. The new alpha values are returned after 
+		     optimization if not NULL. Array must be of size totdoc. */
+{
+  long *inconsistent,i,*label;
+  long inconsistentnum;
+  long misclassified,upsupvecnum;
+  double loss,model_length,example_length;
+  double maxdiff,*lin,*a,*c;
+  long runtime_start,runtime_end;
+  long iterations;
+  long *unlabeled,transduction;
+  long heldout;
+  long loo_count=0,loo_count_pos=0,loo_count_neg=0,trainpos=0,trainneg=0;
+  long loocomputed=0,runtime_start_loo=0,runtime_start_xa=0;
+  double heldout_c=0,r_delta_sq=0,r_delta,r_delta_avg;
+  long *index,*index2dnum;
+  double *weights;
+  CFLOAT *aicache;  /* buffer to keep one row of hessian */
+
+  double *xi_fullset; /* buffer for storing xi on full sample in loo */
+  double *a_fullset;  /* buffer for storing alpha on full sample in loo */
+  TIMING timing_profile;
+  SHRINK_STATE shrink_state;
+
+  runtime_start=get_runtime();
+  timing_profile.time_kernel=0;
+  timing_profile.time_opti=0;
+  timing_profile.time_shrink=0;
+  timing_profile.time_update=0;
+  timing_profile.time_model=0;
+  timing_profile.time_check=0;
+  timing_profile.time_select=0;
+  kernel_cache_statistic=0;
+
+  learn_parm->totwords=totwords;
+
+  /* make sure -n value is reasonable */
+  if((learn_parm->svm_newvarsinqp < 2) 
+     || (learn_parm->svm_newvarsinqp > learn_parm->svm_maxqpsize)) {
+    learn_parm->svm_newvarsinqp=learn_parm->svm_maxqpsize;
+  }
+
+  init_shrink_state(&shrink_state,totdoc,(long)MAXSHRINK);
+
+  label = (long *)my_malloc(sizeof(long)*totdoc);
+  inconsistent = (long *)my_malloc(sizeof(long)*totdoc);
+  unlabeled = (long *)my_malloc(sizeof(long)*totdoc);
+  c = (double *)my_malloc(sizeof(double)*totdoc);
+  a = (double *)my_malloc(sizeof(double)*totdoc);
+  a_fullset = (double *)my_malloc(sizeof(double)*totdoc);
+  xi_fullset = (double *)my_malloc(sizeof(double)*totdoc);
+  lin = (double *)my_malloc(sizeof(double)*totdoc);
+  learn_parm->svm_cost = (double *)my_malloc(sizeof(double)*totdoc);
+  model->supvec = (DOC **)my_malloc(sizeof(DOC *)*(totdoc+2));
+  model->alpha = (double *)my_malloc(sizeof(double)*(totdoc+2));
+  model->index = (long *)my_malloc(sizeof(long)*(totdoc+2));
+
+  model->at_upper_bound=0;
+  model->b=0;	       
+  model->supvec[0]=0;  /* element 0 reserved and empty for now */
+  model->alpha[0]=0;
+  model->lin_weights=NULL;
+  model->totwords=totwords;
+  model->totdoc=totdoc;
+  model->kernel_parm=(*kernel_parm);
+  model->sv_num=1;
+  model->loo_error=-1;
+  model->loo_recall=-1;
+  model->loo_precision=-1;
+  model->xa_error=-1;
+  model->xa_recall=-1;
+  model->xa_precision=-1;
+  inconsistentnum=0;
+  transduction=0;
+
+  r_delta=estimate_r_delta(docs,totdoc,kernel_parm);
+  r_delta_sq=r_delta*r_delta;
+
+  r_delta_avg=estimate_r_delta_average(docs,totdoc,kernel_parm);
+  if(learn_parm->svm_c == 0.0) {  /* default value for C */
+    learn_parm->svm_c=1.0/(r_delta_avg*r_delta_avg);
+    if(verbosity>=1) 
+      printf("Setting default regularization parameter C=%.4f\n",
+	     learn_parm->svm_c);
+  }
+
+  learn_parm->eps=-1.0;      /* equivalent regression epsilon for
+				classification */
+
+  for(i=0;i<totdoc;i++) {    /* various inits */
+    docs[i]->docnum=i;
+    inconsistent[i]=0;
+    a[i]=0;
+    lin[i]=0;
+    c[i]=0.0;
+    unlabeled[i]=0;
+    if(class[i] == 0) {
+      unlabeled[i]=1;
+      label[i]=0;
+      transduction=1;
+    }
+    if(class[i] > 0) {
+      learn_parm->svm_cost[i]=learn_parm->svm_c*learn_parm->svm_costratio*
+	docs[i]->costfactor;
+      label[i]=1;
+      trainpos++;
+    }
+    else if(class[i] < 0) {
+      learn_parm->svm_cost[i]=learn_parm->svm_c*docs[i]->costfactor;
+      label[i]=-1;
+      trainneg++;
+    }
+    else {
+      learn_parm->svm_cost[i]=0;
+    }
+  }
+  if(verbosity>=2) {
+    printf("%ld positive, %ld negative, and %ld unlabeled examples.\n",trainpos,trainneg,totdoc-trainpos-trainneg); fflush(stdout);
+  }
+
+  /* caching makes no sense for linear kernel */
+  if(kernel_parm->kernel_type == LINEAR) {
+    kernel_cache = NULL;   
+  } 
+
+  /* compute starting state for initial alpha values */
+  if(alpha) {
+    if(verbosity>=1) {
+      printf("Computing starting state..."); fflush(stdout);
+    }
+    index = (long *)my_malloc(sizeof(long)*totdoc);
+    index2dnum = (long *)my_malloc(sizeof(long)*(totdoc+11));
+    weights=(double *)my_malloc(sizeof(double)*(totwords+1));
+    aicache = (CFLOAT *)my_malloc(sizeof(CFLOAT)*totdoc);
+    for(i=0;i<totdoc;i++) {    /* create full index and clip alphas */
+      index[i]=1;
+      alpha[i]=fabs(alpha[i]);
+      if(alpha[i]<0) alpha[i]=0;
+      if(alpha[i]>learn_parm->svm_cost[i]) alpha[i]=learn_parm->svm_cost[i];
+    }
+    if(kernel_parm->kernel_type != LINEAR) {
+      for(i=0;i<totdoc;i++)     /* fill kernel cache with unbounded SV */
+	if((alpha[i]>0) && (alpha[i]<learn_parm->svm_cost[i]) 
+	   && (kernel_cache_space_available(kernel_cache))) 
+	  cache_kernel_row(kernel_cache,docs,i,kernel_parm);
+      for(i=0;i<totdoc;i++)     /* fill rest of kernel cache with bounded SV */
+	if((alpha[i]==learn_parm->svm_cost[i]) 
+	   && (kernel_cache_space_available(kernel_cache))) 
+	  cache_kernel_row(kernel_cache,docs,i,kernel_parm);
+    }
+    (void)compute_index(index,totdoc,index2dnum);
+    update_linear_component(docs,label,index2dnum,alpha,a,index2dnum,totdoc,
+			    totwords,kernel_parm,kernel_cache,lin,aicache,
+			    weights);
+    (void)calculate_svm_model(docs,label,unlabeled,lin,alpha,a,c,
+			      learn_parm,index2dnum,index2dnum,model);
+    for(i=0;i<totdoc;i++) {    /* copy initial alphas */
+      a[i]=alpha[i];
+    }
+    free(index);
+    free(index2dnum);
+    free(weights);
+    free(aicache);
+    if(verbosity>=1) {
+      printf("done.\n");  fflush(stdout);
+    }   
+  } 
+
+  if(transduction) {
+    learn_parm->svm_iter_to_shrink=99999999;
+    if(verbosity >= 1)
+      printf("\nDeactivating Shrinking due to an incompatibility with the transductive \nlearner in the current version.\n\n");
+  }
+
+  if(transduction && learn_parm->compute_loo) {
+    learn_parm->compute_loo=0;
+    if(verbosity >= 1)
+      printf("\nCannot compute leave-one-out estimates for transductive learner.\n\n");
+  }    
+
+  if(learn_parm->remove_inconsistent && learn_parm->compute_loo) {
+    learn_parm->compute_loo=0;
+    printf("\nCannot compute leave-one-out estimates when removing inconsistent examples.\n\n");
+  }    
+
+  if(learn_parm->compute_loo && ((trainpos == 1) || (trainneg == 1))) {
+    learn_parm->compute_loo=0;
+    printf("\nCannot compute leave-one-out with only one example in one class.\n\n");
+  }    
+
+
+  if(verbosity==1) {
+    printf("Optimizing"); fflush(stdout);
+  }
+
+  /* train the svm */
+  iterations=optimize_to_convergence(docs,label,totdoc,totwords,learn_parm,
+				     kernel_parm,kernel_cache,&shrink_state,model,
+				     inconsistent,unlabeled,a,lin,
+				     c,&timing_profile,
+				     &maxdiff,(long)-1,
+				     (long)1);
+  
+  if(verbosity>=1) {
+    if(verbosity==1) printf("done. (%ld iterations)\n",iterations);
+
+    misclassified=0;
+    for(i=0;(i<totdoc);i++) { /* get final statistic */
+      if((lin[i]-model->b)*(double)label[i] <= 0.0) 
+	misclassified++;
+    }
+
+    printf("Optimization finished (%ld misclassified, maxdiff=%.5f).\n",
+	   misclassified,maxdiff); 
+
+    runtime_end=get_runtime();
+    if(verbosity>=2) {
+      printf("Runtime in cpu-seconds: %.2f (%.2f%% for kernel/%.2f%% for optimizer/%.2f%% for final/%.2f%% for update/%.2f%% for model/%.2f%% for check/%.2f%% for select)\n",
+        ((float)runtime_end-(float)runtime_start)/100.0,
+        (100.0*timing_profile.time_kernel)/(float)(runtime_end-runtime_start),
+	(100.0*timing_profile.time_opti)/(float)(runtime_end-runtime_start),
+	(100.0*timing_profile.time_shrink)/(float)(runtime_end-runtime_start),
+        (100.0*timing_profile.time_update)/(float)(runtime_end-runtime_start),
+        (100.0*timing_profile.time_model)/(float)(runtime_end-runtime_start),
+        (100.0*timing_profile.time_check)/(float)(runtime_end-runtime_start),
+        (100.0*timing_profile.time_select)/(float)(runtime_end-runtime_start));
+    }
+    else {
+      printf("Runtime in cpu-seconds: %.2f\n",
+	     (runtime_end-runtime_start)/100.0);
+    }
+
+    if(learn_parm->remove_inconsistent) {	  
+      inconsistentnum=0;
+      for(i=0;i<totdoc;i++) 
+	if(inconsistent[i]) 
+	  inconsistentnum++;
+      printf("Number of SV: %ld (plus %ld inconsistent examples)\n",
+	     model->sv_num-1,inconsistentnum);
+    }
+    else {
+      upsupvecnum=0;
+      for(i=1;i<model->sv_num;i++) {
+	if(fabs(model->alpha[i]) >= 
+	   (learn_parm->svm_cost[(model->supvec[i])->docnum]-
+	    learn_parm->epsilon_a)) 
+	  upsupvecnum++;
+      }
+      printf("Number of SV: %ld (including %ld at upper bound)\n",
+	     model->sv_num-1,upsupvecnum);
+    }
+    
+    if((verbosity>=1) && (!learn_parm->skip_final_opt_check)) {
+      loss=0;
+      model_length=0; 
+      for(i=0;i<totdoc;i++) {
+	if((lin[i]-model->b)*(double)label[i] < 1.0-learn_parm->epsilon_crit)
+	  loss+=1.0-(lin[i]-model->b)*(double)label[i];
+	model_length+=a[i]*label[i]*lin[i];
+      }
+      model_length=sqrt(model_length);
+      fprintf(stdout,"L1 loss: loss=%.5f\n",loss);
+      fprintf(stdout,"Norm of weight vector: |w|=%.5f\n",model_length);
+      example_length=estimate_sphere(model,kernel_parm); 
+      fprintf(stdout,"Norm of longest example vector: |x|=%.5f\n",
+	      length_of_longest_document_vector(docs,totdoc,kernel_parm));
+      fprintf(stdout,"Estimated VCdim of classifier: VCdim<=%.5f\n",
+	      estimate_margin_vcdim(model,model_length,example_length,
+				    kernel_parm));
+      if((!learn_parm->remove_inconsistent) && (!transduction)) {
+	runtime_start_xa=get_runtime();
+	if(verbosity>=1) {
+	  printf("Computing XiAlpha-estimates..."); fflush(stdout);
+	}
+	compute_xa_estimates(model,label,unlabeled,totdoc,docs,lin,a,
+			     kernel_parm,learn_parm,&(model->xa_error),
+			     &(model->xa_recall),&(model->xa_precision));
+	if(verbosity>=1) {
+	  printf("done\n");
+	}
+	printf("Runtime for XiAlpha-estimates in cpu-seconds: %.2f\n",
+	       (get_runtime()-runtime_start_xa)/100.0);
+	
+	fprintf(stdout,"XiAlpha-estimate of the error: error<=%.2f%% (rho=%.2f,depth=%ld)\n",
+		model->xa_error,learn_parm->rho,learn_parm->xa_depth);
+	fprintf(stdout,"XiAlpha-estimate of the recall: recall=>%.2f%% (rho=%.2f,depth=%ld)\n",
+		model->xa_recall,learn_parm->rho,learn_parm->xa_depth);
+	fprintf(stdout,"XiAlpha-estimate of the precision: precision=>%.2f%% (rho=%.2f,depth=%ld)\n",
+		model->xa_precision,learn_parm->rho,learn_parm->xa_depth);
+      }
+      else if(!learn_parm->remove_inconsistent) {
+	estimate_transduction_quality(model,label,unlabeled,totdoc,docs,lin);
+      }
+    }
+    if(verbosity>=1) {
+      printf("Number of kernel evaluations: %ld\n",kernel_cache_statistic);
+    }
+  }
+
+
+  /* leave-one-out testing starts now */
+  if(learn_parm->compute_loo) {
+    /* save results of training on full dataset for leave-one-out */
+    runtime_start_loo=get_runtime();
+    for(i=0;i<totdoc;i++) {
+      xi_fullset[i]=1.0-((lin[i]-model->b)*(double)label[i]);
+      if(xi_fullset[i]<0) xi_fullset[i]=0;
+      a_fullset[i]=a[i];
+    }
+    if(verbosity>=1) {
+      printf("Computing leave-one-out");
+    }
+    
+    /* repeat this loop for every held-out example */
+    for(heldout=0;(heldout<totdoc);heldout++) {
+      if(learn_parm->rho*a_fullset[heldout]*r_delta_sq+xi_fullset[heldout]
+	 < 1.0) { 
+	/* guaranteed to not produce a leave-one-out error */
+	if(verbosity==1) {
+	  printf("+"); fflush(stdout); 
+	}
+      }
+      else if(xi_fullset[heldout] > 1.0) {
+	/* guaranteed to produce a leave-one-out error */
+	loo_count++;
+	if(label[heldout] > 0)  loo_count_pos++; else loo_count_neg++;
+	if(verbosity==1) {
+	  printf("-"); fflush(stdout); 
+	}
+      }
+      else {
+	loocomputed++;
+	heldout_c=learn_parm->svm_cost[heldout]; /* set upper bound to zero */
+	learn_parm->svm_cost[heldout]=0;
+	/* make sure heldout example is not currently  */
+	/* shrunk away. Assumes that lin is up to date! */
+	shrink_state.active[heldout]=1;  
+	if(verbosity>=2) 
+	  printf("\nLeave-One-Out test on example %ld\n",heldout);
+	if(verbosity>=1) {
+	  printf("(?[%ld]",heldout); fflush(stdout); 
+	}
+	
+	optimize_to_convergence(docs,label,totdoc,totwords,learn_parm,
+				kernel_parm,
+				kernel_cache,&shrink_state,model,inconsistent,unlabeled,
+				a,lin,c,&timing_profile,
+				&maxdiff,heldout,(long)2);
+
+	/* printf("%.20f\n",(lin[heldout]-model->b)*(double)label[heldout]); */
+
+	if(((lin[heldout]-model->b)*(double)label[heldout]) <= 0.0) { 
+	  loo_count++;                            /* there was a loo-error */
+	  if(label[heldout] > 0)  loo_count_pos++; else loo_count_neg++;
+	  if(verbosity>=1) {
+	    printf("-)"); fflush(stdout); 
+	  }
+	}
+	else {
+	  if(verbosity>=1) {
+	    printf("+)"); fflush(stdout); 
+	  }
+	}
+	/* now we need to restore the original data set*/
+	learn_parm->svm_cost[heldout]=heldout_c; /* restore upper bound */
+      }
+    } /* end of leave-one-out loop */
+
+
+    if(verbosity>=1) {
+      printf("\nRetrain on full problem"); fflush(stdout); 
+    }
+    optimize_to_convergence(docs,label,totdoc,totwords,learn_parm,
+			    kernel_parm,
+			    kernel_cache,&shrink_state,model,inconsistent,unlabeled,
+			    a,lin,c,&timing_profile,
+			    &maxdiff,(long)-1,(long)1);
+    if(verbosity >= 1) 
+      printf("done.\n");
+    
+    
+    /* after all leave-one-out computed */
+    model->loo_error=100.0*loo_count/(double)totdoc;
+    model->loo_recall=(1.0-(double)loo_count_pos/(double)trainpos)*100.0;
+    model->loo_precision=(trainpos-loo_count_pos)/
+      (double)(trainpos-loo_count_pos+loo_count_neg)*100.0;
+    if(verbosity >= 1) {
+      fprintf(stdout,"Leave-one-out estimate of the error: error=%.2f%%\n",
+	      model->loo_error);
+      fprintf(stdout,"Leave-one-out estimate of the recall: recall=%.2f%%\n",
+	      model->loo_recall);
+      fprintf(stdout,"Leave-one-out estimate of the precision: precision=%.2f%%\n",
+	      model->loo_precision);
+      fprintf(stdout,"Actual leave-one-outs computed:  %ld (rho=%.2f)\n",
+	      loocomputed,learn_parm->rho);
+      printf("Runtime for leave-one-out in cpu-seconds: %.2f\n",
+	     (double)(get_runtime()-runtime_start_loo)/100.0);
+    }
+  }
+    
+  if(learn_parm->alphafile[0])
+    write_alphas(learn_parm->alphafile,a,label,totdoc);
+  
+  shrink_state_cleanup(&shrink_state);
+  free(label);
+  free(inconsistent);
+  free(unlabeled);
+  free(c);
+  free(a);
+  free(a_fullset);
+  free(xi_fullset);
+  free(lin);
+  free(learn_parm->svm_cost);
+}
+
+
+/* Learns an SVM regression model based on the training data in
+   docs/label. The resulting model is returned in the structure
+   model. */
+
+void svm_learn_regression(DOC **docs, double *value, long int totdoc, 
+			  long int totwords, LEARN_PARM *learn_parm, 
+			  KERNEL_PARM *kernel_parm, 
+			  KERNEL_CACHE **kernel_cache, MODEL *model)
+     /* docs:        Training vectors (x-part) */
+     /* class:       Training value (y-part) */
+     /* totdoc:      Number of examples in docs/label */
+     /* totwords:    Number of features (i.e. highest feature index) */
+     /* learn_parm:  Learning paramenters */
+     /* kernel_parm: Kernel paramenters */
+     /* kernel_cache:Initialized Cache, if using a kernel. NULL if
+                     linear. Note that it will be free'd and reassigned */
+     /* model:       Returns learning result (assumed empty before called) */
+{
+  long *inconsistent,i,j;
+  long inconsistentnum;
+  long upsupvecnum;
+  double loss,model_length,example_length;
+  double maxdiff,*lin,*a,*c;
+  long runtime_start,runtime_end;
+  long iterations,kernel_cache_size;
+  long *unlabeled;
+  double r_delta_sq=0,r_delta,r_delta_avg;
+  double *xi_fullset; /* buffer for storing xi on full sample in loo */
+  double *a_fullset;  /* buffer for storing alpha on full sample in loo */
+  TIMING timing_profile;
+  SHRINK_STATE shrink_state;
+  DOC **docs_org;
+  long *label;
+
+  /* set up regression problem in standard form */
+  docs_org=docs;
+  docs = (DOC **)my_malloc(sizeof(DOC)*2*totdoc);
+  label = (long *)my_malloc(sizeof(long)*2*totdoc);
+  c = (double *)my_malloc(sizeof(double)*2*totdoc);
+  for(i=0;i<totdoc;i++) {   
+    j=2*totdoc-1-i;
+    docs[i]=create_example(i,0,0,docs_org[i]->costfactor,docs_org[i]->fvec);
+    label[i]=+1;
+    c[i]=value[i];
+    docs[j]=create_example(j,0,0,docs_org[i]->costfactor,docs_org[i]->fvec);
+    label[j]=-1;
+    c[j]=value[i];
+  }
+  totdoc*=2;
+
+  /* need to get a bigger kernel cache */
+  if(*kernel_cache) {
+    kernel_cache_size=(*kernel_cache)->buffsize*sizeof(CFLOAT)/(1024*1024);
+    kernel_cache_cleanup(*kernel_cache);
+    (*kernel_cache)=kernel_cache_init(totdoc,kernel_cache_size);
+  }
+
+  runtime_start=get_runtime();
+  timing_profile.time_kernel=0;
+  timing_profile.time_opti=0;
+  timing_profile.time_shrink=0;
+  timing_profile.time_update=0;
+  timing_profile.time_model=0;
+  timing_profile.time_check=0;
+  timing_profile.time_select=0;
+  kernel_cache_statistic=0;
+
+  learn_parm->totwords=totwords;
+
+  /* make sure -n value is reasonable */
+  if((learn_parm->svm_newvarsinqp < 2) 
+     || (learn_parm->svm_newvarsinqp > learn_parm->svm_maxqpsize)) {
+    learn_parm->svm_newvarsinqp=learn_parm->svm_maxqpsize;
+  }
+
+  init_shrink_state(&shrink_state,totdoc,(long)MAXSHRINK);
+
+  inconsistent = (long *)my_malloc(sizeof(long)*totdoc);
+  unlabeled = (long *)my_malloc(sizeof(long)*totdoc);
+  a = (double *)my_malloc(sizeof(double)*totdoc);
+  a_fullset = (double *)my_malloc(sizeof(double)*totdoc);
+  xi_fullset = (double *)my_malloc(sizeof(double)*totdoc);
+  lin = (double *)my_malloc(sizeof(double)*totdoc);
+  learn_parm->svm_cost = (double *)my_malloc(sizeof(double)*totdoc);
+  model->supvec = (DOC **)my_malloc(sizeof(DOC *)*(totdoc+2));
+  model->alpha = (double *)my_malloc(sizeof(double)*(totdoc+2));
+  model->index = (long *)my_malloc(sizeof(long)*(totdoc+2));
+
+  model->at_upper_bound=0;
+  model->b=0;	       
+  model->supvec[0]=0;  /* element 0 reserved and empty for now */
+  model->alpha[0]=0;
+  model->lin_weights=NULL;
+  model->totwords=totwords;
+  model->totdoc=totdoc;
+  model->kernel_parm=(*kernel_parm);
+  model->sv_num=1;
+  model->loo_error=-1;
+  model->loo_recall=-1;
+  model->loo_precision=-1;
+  model->xa_error=-1;
+  model->xa_recall=-1;
+  model->xa_precision=-1;
+  inconsistentnum=0;
+
+  r_delta=estimate_r_delta(docs,totdoc,kernel_parm);
+  r_delta_sq=r_delta*r_delta;
+
+  r_delta_avg=estimate_r_delta_average(docs,totdoc,kernel_parm);
+  if(learn_parm->svm_c == 0.0) {  /* default value for C */
+    learn_parm->svm_c=1.0/(r_delta_avg*r_delta_avg);
+    if(verbosity>=1) 
+      printf("Setting default regularization parameter C=%.4f\n",
+	     learn_parm->svm_c);
+  }
+
+  for(i=0;i<totdoc;i++) {    /* various inits */
+    inconsistent[i]=0;
+    a[i]=0;
+    lin[i]=0;
+    unlabeled[i]=0;
+    if(label[i] > 0) {
+      learn_parm->svm_cost[i]=learn_parm->svm_c*learn_parm->svm_costratio*
+	docs[i]->costfactor;
+    }
+    else if(label[i] < 0) {
+      learn_parm->svm_cost[i]=learn_parm->svm_c*docs[i]->costfactor;
+    }
+  }
+
+  /* caching makes no sense for linear kernel */
+  if((kernel_parm->kernel_type == LINEAR) && (*kernel_cache)) {
+    printf("WARNING: Using a kernel cache for linear case will slow optimization down!\n");
+  } 
+
+  if(verbosity==1) {
+    printf("Optimizing"); fflush(stdout);
+  }
+
+  /* train the svm */
+  iterations=optimize_to_convergence(docs,label,totdoc,totwords,learn_parm,
+				     kernel_parm,*kernel_cache,&shrink_state,
+				     model,inconsistent,unlabeled,a,lin,c,
+				     &timing_profile,&maxdiff,(long)-1,
+				     (long)1);
+  
+  if(verbosity>=1) {
+    if(verbosity==1) printf("done. (%ld iterations)\n",iterations);
+
+    printf("Optimization finished (maxdiff=%.5f).\n",maxdiff); 
+
+    runtime_end=get_runtime();
+    if(verbosity>=2) {
+      printf("Runtime in cpu-seconds: %.2f (%.2f%% for kernel/%.2f%% for optimizer/%.2f%% for final/%.2f%% for update/%.2f%% for model/%.2f%% for check/%.2f%% for select)\n",
+        ((float)runtime_end-(float)runtime_start)/100.0,
+        (100.0*timing_profile.time_kernel)/(float)(runtime_end-runtime_start),
+	(100.0*timing_profile.time_opti)/(float)(runtime_end-runtime_start),
+	(100.0*timing_profile.time_shrink)/(float)(runtime_end-runtime_start),
+        (100.0*timing_profile.time_update)/(float)(runtime_end-runtime_start),
+        (100.0*timing_profile.time_model)/(float)(runtime_end-runtime_start),
+        (100.0*timing_profile.time_check)/(float)(runtime_end-runtime_start),
+        (100.0*timing_profile.time_select)/(float)(runtime_end-runtime_start));
+    }
+    else {
+      printf("Runtime in cpu-seconds: %.2f\n",
+	     (runtime_end-runtime_start)/100.0);
+    }
+
+    if(learn_parm->remove_inconsistent) {	  
+      inconsistentnum=0;
+      for(i=0;i<totdoc;i++) 
+	if(inconsistent[i]) 
+	  inconsistentnum++;
+      printf("Number of SV: %ld (plus %ld inconsistent examples)\n",
+	     model->sv_num-1,inconsistentnum);
+    }
+    else {
+      upsupvecnum=0;
+      for(i=1;i<model->sv_num;i++) {
+	if(fabs(model->alpha[i]) >= 
+	   (learn_parm->svm_cost[(model->supvec[i])->docnum]-
+	    learn_parm->epsilon_a)) 
+	  upsupvecnum++;
+      }
+      printf("Number of SV: %ld (including %ld at upper bound)\n",
+	     model->sv_num-1,upsupvecnum);
+    }
+    
+    if((verbosity>=1) && (!learn_parm->skip_final_opt_check)) {
+      loss=0;
+      model_length=0; 
+      for(i=0;i<totdoc;i++) {
+	if((lin[i]-model->b)*(double)label[i] < (-learn_parm->eps+(double)label[i]*c[i])-learn_parm->epsilon_crit)
+	  loss+=-learn_parm->eps+(double)label[i]*c[i]-(lin[i]-model->b)*(double)label[i];
+	model_length+=a[i]*label[i]*lin[i];
+      }
+      model_length=sqrt(model_length);
+      fprintf(stdout,"L1 loss: loss=%.5f\n",loss);
+      fprintf(stdout,"Norm of weight vector: |w|=%.5f\n",model_length);
+      example_length=estimate_sphere(model,kernel_parm); 
+      fprintf(stdout,"Norm of longest example vector: |x|=%.5f\n",
+	      length_of_longest_document_vector(docs,totdoc,kernel_parm));
+    }
+    if(verbosity>=1) {
+      printf("Number of kernel evaluations: %ld\n",kernel_cache_statistic);
+    }
+  }
+    
+  if(learn_parm->alphafile[0])
+    write_alphas(learn_parm->alphafile,a,label,totdoc);
+
+  /* this makes sure the model we return does not contain pointers to the 
+     temporary documents */
+  for(i=1;i<model->sv_num;i++) { 
+    j=model->supvec[i]->docnum;
+    if(j >= (totdoc/2)) {
+      j=totdoc-j-1;
+    }
+    model->supvec[i]=docs_org[j];
+  }
+  
+  shrink_state_cleanup(&shrink_state);
+  for(i=0;i<totdoc;i++)
+    free_example(docs[i],0);
+  free(docs);
+  free(label);
+  free(inconsistent);
+  free(unlabeled);
+  free(c);
+  free(a);
+  free(a_fullset);
+  free(xi_fullset);
+  free(lin);
+  free(learn_parm->svm_cost);
+}
+
+void svm_learn_ranking(DOC **docs, double *rankvalue, long int totdoc, 
+		       long int totwords, LEARN_PARM *learn_parm, 
+		       KERNEL_PARM *kernel_parm, KERNEL_CACHE **kernel_cache, 
+		       MODEL *model)
+     /* docs:        Training vectors (x-part) */
+     /* rankvalue:   Training target values that determine the ranking */
+     /* totdoc:      Number of examples in docs/label */
+     /* totwords:    Number of features (i.e. highest feature index) */
+     /* learn_parm:  Learning paramenters */
+     /* kernel_parm: Kernel paramenters */
+     /* kernel_cache:Initialized pointer to Cache of size 1*totdoc, if 
+	             using a kernel. NULL if linear. NOTE: Cache is 
+                     getting reinitialized in this function */
+     /* model:       Returns learning result (assumed empty before called) */
+{
+  DOC **docdiff;
+  long i,j,k,totpair,kernel_cache_size;
+  double *target,*alpha,cost;
+  long *greater,*lesser;
+  MODEL *pairmodel;
+  SVECTOR *flow,*fhigh;
+
+  totpair=0;
+  for(i=0;i<totdoc;i++) {
+    for(j=i+1;j<totdoc;j++) {
+      if((docs[i]->queryid==docs[j]->queryid) && (rankvalue[i] != rankvalue[j])) {
+	totpair++;
+      }
+    }
+  }
+
+  printf("Constructing %ld rank constraints...",totpair); fflush(stdout);
+  docdiff=(DOC **)my_malloc(sizeof(DOC)*totpair);
+  target=(double *)my_malloc(sizeof(double)*totpair); 
+  greater=(long *)my_malloc(sizeof(long)*totpair); 
+  lesser=(long *)my_malloc(sizeof(long)*totpair); 
+
+  k=0;
+  for(i=0;i<totdoc;i++) {
+    for(j=i+1;j<totdoc;j++) {
+      if(docs[i]->queryid == docs[j]->queryid) {
+	cost=(docs[i]->costfactor+docs[j]->costfactor)/2.0;
+	if(rankvalue[i] > rankvalue[j]) {
+	  if(kernel_parm->kernel_type == LINEAR)
+	    docdiff[k]=create_example(k,0,0,cost,
+				      sub_ss(docs[i]->fvec,docs[j]->fvec));
+	  else {
+	    flow=copy_svector(docs[j]->fvec);
+	    flow->factor=-1.0;
+	    flow->next=NULL;
+	    fhigh=copy_svector(docs[i]->fvec);
+	    fhigh->factor=1.0;
+	    fhigh->next=flow;
+	    docdiff[k]=create_example(k,0,0,cost,fhigh);
+	  }
+	  target[k]=1;
+	  greater[k]=i;
+	  lesser[k]=j;
+	  k++;
+	}
+	else if(rankvalue[i] < rankvalue[j]) {
+	  if(kernel_parm->kernel_type == LINEAR)
+	    docdiff[k]=create_example(k,0,0,cost,
+				      sub_ss(docs[i]->fvec,docs[j]->fvec));
+	  else {
+	    flow=copy_svector(docs[j]->fvec);
+	    flow->factor=-1.0;
+	    flow->next=NULL;
+	    fhigh=copy_svector(docs[i]->fvec);
+	    fhigh->factor=1.0;
+	    fhigh->next=flow;
+	    docdiff[k]=create_example(k,0,0,cost,fhigh);
+	  }
+	  target[k]=-1;
+	  greater[k]=i;
+	  lesser[k]=j;
+	  k++;
+	}
+      }
+    }
+  }
+  printf("done.\n"); fflush(stdout);
+
+  /* need to get a bigger kernel cache */
+  if(*kernel_cache) {
+    kernel_cache_size=(*kernel_cache)->buffsize*sizeof(CFLOAT)/(1024*1024);
+    kernel_cache_cleanup(*kernel_cache);
+    (*kernel_cache)=kernel_cache_init(totpair,kernel_cache_size);
+  }
+
+  /* must use unbiased hyperplane on difference vectors */
+  learn_parm->biased_hyperplane=0;
+  pairmodel=(MODEL *)my_malloc(sizeof(MODEL));
+  svm_learn_classification(docdiff,target,totpair,totwords,learn_parm,
+			   kernel_parm,(*kernel_cache),pairmodel,NULL);
+
+  /* Transfer the result into a more compact model. If you would like
+     to output the original model on pairs of documents, see below. */
+  alpha=(double *)my_malloc(sizeof(double)*totdoc); 
+  for(i=0;i<totdoc;i++) {
+    alpha[i]=0;
+  }
+  for(i=1;i<pairmodel->sv_num;i++) {
+    alpha[lesser[(pairmodel->supvec[i])->docnum]]-=pairmodel->alpha[i];
+    alpha[greater[(pairmodel->supvec[i])->docnum]]+=pairmodel->alpha[i];
+  }
+  model->supvec = (DOC **)my_malloc(sizeof(DOC *)*(totdoc+2));
+  model->alpha = (double *)my_malloc(sizeof(double)*(totdoc+2));
+  model->index = (long *)my_malloc(sizeof(long)*(totdoc+2));
+  model->supvec[0]=0;  /* element 0 reserved and empty for now */
+  model->alpha[0]=0;
+  model->sv_num=1;
+  for(i=0;i<totdoc;i++) {
+    if(alpha[i]) {
+      model->supvec[model->sv_num]=docs[i];
+      model->alpha[model->sv_num]=alpha[i];
+      model->index[i]=model->sv_num;
+      model->sv_num++;
+    }
+    else {
+      model->index[i]=-1;
+    }
+  }
+  model->at_upper_bound=0;
+  model->b=0;	       
+  model->lin_weights=NULL;
+  model->totwords=totwords;
+  model->totdoc=totdoc;
+  model->kernel_parm=(*kernel_parm);
+  model->loo_error=-1;
+  model->loo_recall=-1;
+  model->loo_precision=-1;
+  model->xa_error=-1;
+  model->xa_recall=-1;
+  model->xa_precision=-1;
+
+  free(alpha);
+  free(greater);
+  free(lesser);
+  free(target);
+
+  /* If you would like to output the original model on pairs of
+     document, replace the following lines with '(*model)=(*pairmodel);' */
+  for(i=0;i<totpair;i++)
+    free_example(docdiff[i],1);
+  free(docdiff);
+  free_model(pairmodel,0);
+}
+
+
+/* The following solves a freely defined and given set of
+   inequalities. The optimization problem is of the following form:
+
+   min 0.5 w*w + C sum_i C_i \xi_i
+   s.t. x_i * w > rhs_i - \xi_i
+
+   This corresponds to the -z o option. */
+
+void svm_learn_optimization(DOC **docs, double *rhs, long int
+			    totdoc, long int totwords, 
+			    LEARN_PARM *learn_parm, 
+			    KERNEL_PARM *kernel_parm, 
+			    KERNEL_CACHE *kernel_cache, MODEL *model,
+			    double *alpha)
+     /* docs:        Left-hand side of inequalities (x-part) */
+     /* rhs:         Right-hand side of inequalities */
+     /* totdoc:      Number of examples in docs/label */
+     /* totwords:    Number of features (i.e. highest feature index) */
+     /* learn_parm:  Learning paramenters */
+     /* kernel_parm: Kernel paramenters */
+     /* kernel_cache:Initialized Cache of size 1*totdoc, if using a kernel. 
+                     NULL if linear.*/
+     /* model:       Returns solution as SV expansion (assumed empty before called) */
+     /* alpha:       Start values for the alpha variables or NULL
+	             pointer. The new alpha values are returned after 
+		     optimization if not NULL. Array must be of size totdoc. */
+{
+  long i,*label;
+  long misclassified,upsupvecnum;
+  double loss,model_length,example_length;
+  double maxdiff,*lin,*a,*c;
+  long runtime_start,runtime_end;
+  long iterations,maxslackid,svsetnum;
+  long *unlabeled,*inconsistent;
+  double r_delta_sq=0,r_delta,r_delta_avg;
+  long *index,*index2dnum;
+  double *weights,*slack,*alphaslack;
+  CFLOAT *aicache;  /* buffer to keep one row of hessian */
+
+  TIMING timing_profile;
+  SHRINK_STATE shrink_state;
+
+  runtime_start=get_runtime();
+  timing_profile.time_kernel=0;
+  timing_profile.time_opti=0;
+  timing_profile.time_shrink=0;
+  timing_profile.time_update=0;
+  timing_profile.time_model=0;
+  timing_profile.time_check=0;
+  timing_profile.time_select=0;
+  kernel_cache_statistic=0;
+
+  learn_parm->totwords=totwords;
+
+  /* make sure -n value is reasonable */
+  if((learn_parm->svm_newvarsinqp < 2) 
+     || (learn_parm->svm_newvarsinqp > learn_parm->svm_maxqpsize)) {
+    learn_parm->svm_newvarsinqp=learn_parm->svm_maxqpsize;
+  }
+
+  init_shrink_state(&shrink_state,totdoc,(long)MAXSHRINK);
+
+  label = (long *)my_malloc(sizeof(long)*totdoc);
+  unlabeled = (long *)my_malloc(sizeof(long)*totdoc);
+  inconsistent = (long *)my_malloc(sizeof(long)*totdoc);
+  c = (double *)my_malloc(sizeof(double)*totdoc);
+  a = (double *)my_malloc(sizeof(double)*totdoc);
+  lin = (double *)my_malloc(sizeof(double)*totdoc);
+  learn_parm->svm_cost = (double *)my_malloc(sizeof(double)*totdoc);
+  model->supvec = (DOC **)my_malloc(sizeof(DOC *)*(totdoc+2));
+  model->alpha = (double *)my_malloc(sizeof(double)*(totdoc+2));
+  model->index = (long *)my_malloc(sizeof(long)*(totdoc+2));
+
+  model->at_upper_bound=0;
+  model->b=0;	       
+  model->supvec[0]=0;  /* element 0 reserved and empty for now */
+  model->alpha[0]=0;
+  model->lin_weights=NULL;
+  model->totwords=totwords;
+  model->totdoc=totdoc;
+  model->kernel_parm=(*kernel_parm);
+  model->sv_num=1;
+  model->loo_error=-1;
+  model->loo_recall=-1;
+  model->loo_precision=-1;
+  model->xa_error=-1;
+  model->xa_recall=-1;
+  model->xa_precision=-1;
+
+  r_delta=estimate_r_delta(docs,totdoc,kernel_parm);
+  r_delta_sq=r_delta*r_delta;
+
+  r_delta_avg=estimate_r_delta_average(docs,totdoc,kernel_parm);
+  if(learn_parm->svm_c == 0.0) {  /* default value for C */
+    learn_parm->svm_c=1.0/(r_delta_avg*r_delta_avg);
+    if(verbosity>=1) 
+      printf("Setting default regularization parameter C=%.4f\n",
+	     learn_parm->svm_c);
+  }
+
+  learn_parm->biased_hyperplane=0; /* learn an unbiased hyperplane */
+
+  learn_parm->eps=0.0;      /* No margin, unless explicitly handcoded
+                               in the right-hand side in the training
+                               set.  */
+
+  for(i=0;i<totdoc;i++) {    /* various inits */
+    docs[i]->docnum=i;
+    a[i]=0;
+    lin[i]=0;
+    c[i]=rhs[i];       /* set right-hand side */
+    unlabeled[i]=0;
+    inconsistent[i]=0;
+    learn_parm->svm_cost[i]=learn_parm->svm_c*learn_parm->svm_costratio*
+      docs[i]->costfactor;
+    label[i]=1;
+  }
+  if(learn_parm->sharedslack) /* if shared slacks are used, they must */
+    for(i=0;i<totdoc;i++)     /*  be used on every constraint */
+      if(!docs[i]->slackid) {
+	perror("Error: Missing shared slacks definitions in some of the examples.");
+	exit(0);
+      }
+      
+  /* compute starting state for initial alpha values */
+  if(alpha) {
+    if(verbosity>=1) {
+      printf("Computing starting state..."); fflush(stdout);
+    }
+    index = (long *)my_malloc(sizeof(long)*totdoc);
+    index2dnum = (long *)my_malloc(sizeof(long)*(totdoc+11));
+    weights=(double *)my_malloc(sizeof(double)*(totwords+1));
+    aicache = (CFLOAT *)my_malloc(sizeof(CFLOAT)*totdoc);
+    for(i=0;i<totdoc;i++) {    /* create full index and clip alphas */
+      index[i]=1;
+      alpha[i]=fabs(alpha[i]);
+      if(alpha[i]<0) alpha[i]=0;
+      if(alpha[i]>learn_parm->svm_cost[i]) alpha[i]=learn_parm->svm_cost[i];
+    }
+    if(kernel_parm->kernel_type != LINEAR) {
+      for(i=0;i<totdoc;i++)     /* fill kernel cache with unbounded SV */
+	if((alpha[i]>0) && (alpha[i]<learn_parm->svm_cost[i]) 
+	   && (kernel_cache_space_available(kernel_cache))) 
+	  cache_kernel_row(kernel_cache,docs,i,kernel_parm);
+      for(i=0;i<totdoc;i++)     /* fill rest of kernel cache with bounded SV */
+	if((alpha[i]==learn_parm->svm_cost[i]) 
+	   && (kernel_cache_space_available(kernel_cache))) 
+	  cache_kernel_row(kernel_cache,docs,i,kernel_parm);
+    }
+    (void)compute_index(index,totdoc,index2dnum);
+    update_linear_component(docs,label,index2dnum,alpha,a,index2dnum,totdoc,
+			    totwords,kernel_parm,kernel_cache,lin,aicache,
+			    weights);
+    (void)calculate_svm_model(docs,label,unlabeled,lin,alpha,a,c,
+			      learn_parm,index2dnum,index2dnum,model);
+    for(i=0;i<totdoc;i++) {    /* copy initial alphas */
+      a[i]=alpha[i];
+    }
+    free(index);
+    free(index2dnum);
+    free(weights);
+    free(aicache);
+    if(verbosity>=1) {
+      printf("done.\n");  fflush(stdout);
+    }   
+  } 
+
+  /* removing inconsistent does not work for general optimization problem */
+  if(learn_parm->remove_inconsistent) {	  
+    learn_parm->remove_inconsistent = 0;
+    printf("'remove inconsistent' not available in this mode. Switching option off!"); fflush(stdout);
+  }
+
+  /* caching makes no sense for linear kernel */
+  if(kernel_parm->kernel_type == LINEAR) {
+    kernel_cache = NULL;   
+  } 
+
+  if(verbosity==1) {
+    printf("Optimizing"); fflush(stdout);
+  }
+
+  /* train the svm */
+  if(learn_parm->sharedslack)
+    iterations=optimize_to_convergence_sharedslack(docs,label,totdoc,
+				     totwords,learn_parm,kernel_parm,
+				     kernel_cache,&shrink_state,model,
+				     a,lin,c,&timing_profile,
+				     &maxdiff);
+  else
+    iterations=optimize_to_convergence(docs,label,totdoc,
+				     totwords,learn_parm,kernel_parm,
+				     kernel_cache,&shrink_state,model,
+				     inconsistent,unlabeled,
+				     a,lin,c,&timing_profile,
+				     &maxdiff,(long)-1,(long)1);
+  
+  if(verbosity>=1) {
+    if(verbosity==1) printf("done. (%ld iterations)\n",iterations);
+
+    misclassified=0;
+    for(i=0;(i<totdoc);i++) { /* get final statistic */
+      if((lin[i]-model->b)*(double)label[i] <= 0.0) 
+	misclassified++;
+    }
+
+    printf("Optimization finished (maxdiff=%.5f).\n",maxdiff); 
+
+    runtime_end=get_runtime();
+    if(verbosity>=2) {
+      printf("Runtime in cpu-seconds: %.2f (%.2f%% for kernel/%.2f%% for optimizer/%.2f%% for final/%.2f%% for update/%.2f%% for model/%.2f%% for check/%.2f%% for select)\n",
+        ((float)runtime_end-(float)runtime_start)/100.0,
+        (100.0*timing_profile.time_kernel)/(float)(runtime_end-runtime_start),
+	(100.0*timing_profile.time_opti)/(float)(runtime_end-runtime_start),
+	(100.0*timing_profile.time_shrink)/(float)(runtime_end-runtime_start),
+        (100.0*timing_profile.time_update)/(float)(runtime_end-runtime_start),
+        (100.0*timing_profile.time_model)/(float)(runtime_end-runtime_start),
+        (100.0*timing_profile.time_check)/(float)(runtime_end-runtime_start),
+        (100.0*timing_profile.time_select)/(float)(runtime_end-runtime_start));
+    }
+    else {
+      printf("Runtime in cpu-seconds: %.2f\n",
+	     (runtime_end-runtime_start)/100.0);
+    }
+  }
+  if((verbosity>=1) && (!learn_parm->skip_final_opt_check)) {
+    loss=0;
+    model_length=0; 
+    for(i=0;i<totdoc;i++) {
+      if((lin[i]-model->b)*(double)label[i] < c[i]-learn_parm->epsilon_crit)
+	loss+=c[i]-(lin[i]-model->b)*(double)label[i];
+      model_length+=a[i]*label[i]*lin[i];
+    }
+    model_length=sqrt(model_length);
+    fprintf(stdout,"Norm of weight vector: |w|=%.5f\n",model_length);
+  }
+  
+  if(learn_parm->sharedslack) {
+    index = (long *)my_malloc(sizeof(long)*totdoc);
+    index2dnum = (long *)my_malloc(sizeof(long)*(totdoc+11));
+    maxslackid=0;
+    for(i=0;i<totdoc;i++) {    /* create full index */
+      index[i]=1;
+      if(maxslackid<docs[i]->slackid)
+	maxslackid=docs[i]->slackid;
+    }
+    (void)compute_index(index,totdoc,index2dnum);
+    slack=(double *)my_malloc(sizeof(double)*(maxslackid+1));
+    alphaslack=(double *)my_malloc(sizeof(double)*(maxslackid+1));
+    for(i=0;i<=maxslackid;i++) {    /* init shared slacks */
+      slack[i]=0;
+      alphaslack[i]=0;
+    }
+    compute_shared_slacks(docs,label,a,lin,c,index2dnum,learn_parm,
+			  slack,alphaslack);
+    loss=0;
+    model->at_upper_bound=0;
+    svsetnum=0;
+    for(i=0;i<=maxslackid;i++) {    /* create full index */
+      loss+=slack[i];
+      if(alphaslack[i] > (learn_parm->svm_c - learn_parm->epsilon_a)) 
+	model->at_upper_bound++;
+      if(alphaslack[i] > learn_parm->epsilon_a)
+	svsetnum++;
+    }
+    free(index);
+    free(index2dnum);
+    free(slack);
+    free(alphaslack);
+  }
+  
+  if((verbosity>=1) && (!learn_parm->skip_final_opt_check)) {
+    if(learn_parm->sharedslack) {
+      printf("Number of SV: %ld\n",
+	     model->sv_num-1);
+      printf("Number of non-zero slack variables: %ld (out of %ld)\n",
+	     model->at_upper_bound,svsetnum);
+      fprintf(stdout,"L1 loss: loss=%.5f\n",loss);
+    }
+    else {
+      upsupvecnum=0;
+      for(i=1;i<model->sv_num;i++) {
+	if(fabs(model->alpha[i]) >= 
+	   (learn_parm->svm_cost[(model->supvec[i])->docnum]-
+	    learn_parm->epsilon_a)) 
+	  upsupvecnum++;
+      }
+      printf("Number of SV: %ld (including %ld at upper bound)\n",
+	     model->sv_num-1,upsupvecnum);
+      fprintf(stdout,"L1 loss: loss=%.5f\n",loss);
+    }
+    example_length=estimate_sphere(model,kernel_parm); 
+    fprintf(stdout,"Norm of longest example vector: |x|=%.5f\n",
+	    length_of_longest_document_vector(docs,totdoc,kernel_parm));
+  }
+  if(verbosity>=1) {
+    printf("Number of kernel evaluations: %ld\n",kernel_cache_statistic);
+  }
+    
+  if(alpha) {
+    for(i=0;i<totdoc;i++) {    /* copy final alphas */
+      alpha[i]=a[i];
+    }
+  }
+ 
+  if(learn_parm->alphafile[0])
+    write_alphas(learn_parm->alphafile,a,label,totdoc);
+  
+  shrink_state_cleanup(&shrink_state);
+  free(label);
+  free(unlabeled);
+  free(inconsistent);
+  free(c);
+  free(a);
+  free(lin);
+  free(learn_parm->svm_cost);
+}
+
+
+long optimize_to_convergence(DOC **docs, long int *label, long int totdoc, 
+			     long int totwords, LEARN_PARM *learn_parm, 
+			     KERNEL_PARM *kernel_parm, 
+			     KERNEL_CACHE *kernel_cache, 
+			     SHRINK_STATE *shrink_state, MODEL *model, 
+			     long int *inconsistent, long int *unlabeled, 
+			     double *a, double *lin, double *c, 
+			     TIMING *timing_profile, double *maxdiff, 
+			     long int heldout, long int retrain)
+     /* docs: Training vectors (x-part) */
+     /* label: Training labels/value (y-part, zero if test example for
+			      transduction) */
+     /* totdoc: Number of examples in docs/label */
+     /* totwords: Number of features (i.e. highest feature index) */
+     /* laern_parm: Learning paramenters */
+     /* kernel_parm: Kernel paramenters */
+     /* kernel_cache: Initialized/partly filled Cache, if using a kernel. 
+                      NULL if linear. */
+     /* shrink_state: State of active variables */
+     /* model: Returns learning result */
+     /* inconsistent: examples thrown out as inconstistent */
+     /* unlabeled: test examples for transduction */
+     /* a: alphas */
+     /* lin: linear component of gradient */
+     /* c: right hand side of inequalities (margin) */
+     /* maxdiff: returns maximum violation of KT-conditions */
+     /* heldout: marks held-out example for leave-one-out (or -1) */
+     /* retrain: selects training mode (1=regular / 2=holdout) */
+{
+  long *chosen,*key,i,j,jj,*last_suboptimal_at,noshrink;
+  long inconsistentnum,choosenum,already_chosen=0,iteration;
+  long misclassified,supvecnum=0,*active2dnum,inactivenum;
+  long *working2dnum,*selexam;
+  long activenum;
+  double criterion,eq;
+  double *a_old;
+  long t0=0,t1=0,t2=0,t3=0,t4=0,t5=0,t6=0; /* timing */
+  long transductcycle;
+  long transduction;
+  double epsilon_crit_org; 
+  double bestmaxdiff;
+  long   bestmaxdiffiter,terminate;
+
+  double *selcrit;  /* buffer for sorting */        
+  CFLOAT *aicache;  /* buffer to keep one row of hessian */
+  double *weights;  /* buffer for weight vector in linear case */
+  QP qp;            /* buffer for one quadratic program */
+
+  epsilon_crit_org=learn_parm->epsilon_crit; /* save org */
+  if(kernel_parm->kernel_type == LINEAR) {
+    learn_parm->epsilon_crit=2.0;
+    kernel_cache=NULL;   /* caching makes no sense for linear kernel */
+  } 
+  learn_parm->epsilon_shrink=2;
+  (*maxdiff)=1;
+
+  learn_parm->totwords=totwords;
+
+  chosen = (long *)my_malloc(sizeof(long)*totdoc);
+  last_suboptimal_at = (long *)my_malloc(sizeof(long)*totdoc);
+  key = (long *)my_malloc(sizeof(long)*(totdoc+11)); 
+  selcrit = (double *)my_malloc(sizeof(double)*totdoc);
+  selexam = (long *)my_malloc(sizeof(long)*totdoc);
+  a_old = (double *)my_malloc(sizeof(double)*totdoc);
+  aicache = (CFLOAT *)my_malloc(sizeof(CFLOAT)*totdoc);
+  working2dnum = (long *)my_malloc(sizeof(long)*(totdoc+11));
+  active2dnum = (long *)my_malloc(sizeof(long)*(totdoc+11));
+  qp.opt_ce = (double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize);
+  qp.opt_ce0 = (double *)my_malloc(sizeof(double));
+  qp.opt_g = (double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize
+				 *learn_parm->svm_maxqpsize);
+  qp.opt_g0 = (double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize);
+  qp.opt_xinit = (double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize);
+  qp.opt_low=(double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize);
+  qp.opt_up=(double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize);
+  weights=(double *)my_malloc(sizeof(double)*(totwords+1));
+
+  choosenum=0;
+  inconsistentnum=0;
+  transductcycle=0;
+  transduction=0;
+  if(!retrain) retrain=1;
+  iteration=1;
+  bestmaxdiffiter=1;
+  bestmaxdiff=999999999;
+  terminate=0;
+
+  if(kernel_cache) {
+    kernel_cache->time=iteration;  /* for lru cache */
+    kernel_cache_reset_lru(kernel_cache);
+  }
+
+  for(i=0;i<totdoc;i++) {    /* various inits */
+    chosen[i]=0;
+    a_old[i]=a[i];
+    last_suboptimal_at[i]=1;
+    if(inconsistent[i]) 
+      inconsistentnum++;
+    if(unlabeled[i]) {
+      transduction=1;
+    }
+  }
+  activenum=compute_index(shrink_state->active,totdoc,active2dnum);
+  inactivenum=totdoc-activenum;
+  clear_index(working2dnum);
+
+                            /* repeat this loop until we have convergence */
+  for(;retrain && (!terminate);iteration++) {
+
+    if(kernel_cache)
+      kernel_cache->time=iteration;  /* for lru cache */
+    if(verbosity>=2) {
+      printf(
+	"Iteration %ld: ",iteration); fflush(stdout);
+    }
+    else if(verbosity==1) {
+      printf("."); fflush(stdout);
+    }
+
+    if(verbosity>=2) t0=get_runtime();
+    if(verbosity>=3) {
+      printf("\nSelecting working set... "); fflush(stdout); 
+    }
+
+    if(learn_parm->svm_newvarsinqp>learn_parm->svm_maxqpsize) 
+      learn_parm->svm_newvarsinqp=learn_parm->svm_maxqpsize;
+
+    i=0;
+    for(jj=0;(j=working2dnum[jj])>=0;jj++) { /* clear working set */
+      if((chosen[j]>=(learn_parm->svm_maxqpsize/
+		      minl(learn_parm->svm_maxqpsize,
+			   learn_parm->svm_newvarsinqp))) 
+	 || (inconsistent[j])
+	 || (j == heldout)) {
+	chosen[j]=0; 
+	choosenum--; 
+      }
+      else {
+	chosen[j]++;
+	working2dnum[i++]=j;
+      }
+    }
+    working2dnum[i]=-1;
+
+    if(retrain == 2) {
+      choosenum=0;
+      for(jj=0;(j=working2dnum[jj])>=0;jj++) { /* fully clear working set */
+	chosen[j]=0; 
+      }
+      clear_index(working2dnum);
+      for(i=0;i<totdoc;i++) { /* set inconsistent examples to zero (-i 1) */
+	if((inconsistent[i] || (heldout==i)) && (a[i] != 0.0)) {
+	  chosen[i]=99999;
+	  choosenum++;
+	  a[i]=0;
+	}
+      }
+      if(learn_parm->biased_hyperplane) {
+	eq=0;
+	for(i=0;i<totdoc;i++) { /* make sure we fulfill equality constraint */
+	  eq+=a[i]*label[i];
+	}
+	for(i=0;(i<totdoc) && (fabs(eq) > learn_parm->epsilon_a);i++) {
+	  if((eq*label[i] > 0) && (a[i] > 0)) {
+	    chosen[i]=88888;
+	    choosenum++;
+	    if((eq*label[i]) > a[i]) {
+	      eq-=(a[i]*label[i]);
+	      a[i]=0;
+	    }
+	    else {
+	      a[i]-=(eq*label[i]);
+	      eq=0;
+	    }
+	  }
+	}
+      }
+      compute_index(chosen,totdoc,working2dnum);
+    }
+    else {      /* select working set according to steepest gradient */
+      if(iteration % 101) {
+        already_chosen=0;
+	if((minl(learn_parm->svm_newvarsinqp,
+		 learn_parm->svm_maxqpsize-choosenum)>=4) 
+	   && (kernel_parm->kernel_type != LINEAR)) {
+	  /* select part of the working set from cache */
+	  already_chosen=select_next_qp_subproblem_grad(
+			      label,unlabeled,a,lin,c,totdoc,
+			      (long)(minl(learn_parm->svm_maxqpsize-choosenum,
+					  learn_parm->svm_newvarsinqp)
+				     /2),
+			      learn_parm,inconsistent,active2dnum,
+			      working2dnum,selcrit,selexam,kernel_cache,1,
+			      key,chosen);
+	  choosenum+=already_chosen;
+	}
+	choosenum+=select_next_qp_subproblem_grad(
+                              label,unlabeled,a,lin,c,totdoc,
+                              minl(learn_parm->svm_maxqpsize-choosenum,
+				   learn_parm->svm_newvarsinqp-already_chosen),
+                              learn_parm,inconsistent,active2dnum,
+			      working2dnum,selcrit,selexam,kernel_cache,0,key,
+			      chosen);
+      }
+      else { /* once in a while, select a somewhat random working set
+		to get unlocked of infinite loops due to numerical
+		inaccuracies in the core qp-solver */
+	choosenum+=select_next_qp_subproblem_rand(
+                              label,unlabeled,a,lin,c,totdoc,
+                              minl(learn_parm->svm_maxqpsize-choosenum,
+				   learn_parm->svm_newvarsinqp),
+                              learn_parm,inconsistent,active2dnum,
+			      working2dnum,selcrit,selexam,kernel_cache,key,
+			      chosen,iteration);
+      }
+    }
+
+    if(verbosity>=2) {
+      printf(" %ld vectors chosen\n",choosenum); fflush(stdout); 
+    }
+
+    if(verbosity>=2) t1=get_runtime();
+
+    if(kernel_cache) 
+      cache_multiple_kernel_rows(kernel_cache,docs,working2dnum,
+				 choosenum,kernel_parm); 
+    
+    if(verbosity>=2) t2=get_runtime();
+    if(retrain != 2) {
+      optimize_svm(docs,label,unlabeled,inconsistent,0.0,chosen,active2dnum,
+		   model,totdoc,working2dnum,choosenum,a,lin,c,learn_parm,
+		   aicache,kernel_parm,&qp,&epsilon_crit_org);
+    }
+
+    if(verbosity>=2) t3=get_runtime();
+    update_linear_component(docs,label,active2dnum,a,a_old,working2dnum,totdoc,
+			    totwords,kernel_parm,kernel_cache,lin,aicache,
+			    weights);
+
+    if(verbosity>=2) t4=get_runtime();
+    supvecnum=calculate_svm_model(docs,label,unlabeled,lin,a,a_old,c,
+		                  learn_parm,working2dnum,active2dnum,model);
+
+    if(verbosity>=2) t5=get_runtime();
+
+    /* The following computation of the objective function works only */
+    /* relative to the active variables */
+    if(verbosity>=3) {
+      criterion=compute_objective_function(a,lin,c,learn_parm->eps,label,
+		                           active2dnum);
+      printf("Objective function (over active variables): %.16f\n",criterion);
+      fflush(stdout); 
+    }
+
+    for(jj=0;(i=working2dnum[jj])>=0;jj++) {
+      a_old[i]=a[i];
+    }
+
+    if(retrain == 2) {  /* reset inconsistent unlabeled examples */
+      for(i=0;(i<totdoc);i++) {
+	if(inconsistent[i] && unlabeled[i]) {
+	  inconsistent[i]=0;
+	  label[i]=0;
+	}
+      }
+    }
+
+    retrain=check_optimality(model,label,unlabeled,a,lin,c,totdoc,learn_parm,
+			     maxdiff,epsilon_crit_org,&misclassified,
+			     inconsistent,active2dnum,last_suboptimal_at,
+			     iteration,kernel_parm);
+
+    if(verbosity>=2) {
+      t6=get_runtime();
+      timing_profile->time_select+=t1-t0;
+      timing_profile->time_kernel+=t2-t1;
+      timing_profile->time_opti+=t3-t2;
+      timing_profile->time_update+=t4-t3;
+      timing_profile->time_model+=t5-t4;
+      timing_profile->time_check+=t6-t5;
+    }
+
+    /* checking whether optimizer got stuck */
+    if((*maxdiff) < bestmaxdiff) {
+      bestmaxdiff=(*maxdiff);
+      bestmaxdiffiter=iteration;
+    }
+    if(iteration > (bestmaxdiffiter+learn_parm->maxiter)) { 
+      /* long time no progress? */
+      terminate=1;
+      retrain=0;
+      if(verbosity>=1) 
+	printf("\nWARNING: Relaxing KT-Conditions due to slow progress! Terminating!\n");
+    }
+
+    noshrink=0;
+    if((!retrain) && (inactivenum>0) 
+       && ((!learn_parm->skip_final_opt_check) 
+	   || (kernel_parm->kernel_type == LINEAR))) { 
+      if(((verbosity>=1) && (kernel_parm->kernel_type != LINEAR)) 
+	 || (verbosity>=2)) {
+	if(verbosity==1) {
+	  printf("\n");
+	}
+	printf(" Checking optimality of inactive variables..."); 
+	fflush(stdout);
+      }
+      t1=get_runtime();
+      reactivate_inactive_examples(label,unlabeled,a,shrink_state,lin,c,totdoc,
+				   totwords,iteration,learn_parm,inconsistent,
+				   docs,kernel_parm,kernel_cache,model,aicache,
+				   weights,maxdiff);
+      /* Update to new active variables. */
+      activenum=compute_index(shrink_state->active,totdoc,active2dnum);
+      inactivenum=totdoc-activenum;
+      /* reset watchdog */
+      bestmaxdiff=(*maxdiff);
+      bestmaxdiffiter=iteration;
+      /* termination criterion */
+      noshrink=1;
+      retrain=0;
+      if((*maxdiff) > learn_parm->epsilon_crit) 
+	retrain=1;
+      timing_profile->time_shrink+=get_runtime()-t1;
+      if(((verbosity>=1) && (kernel_parm->kernel_type != LINEAR)) 
+	 || (verbosity>=2)) {
+	printf("done.\n");  fflush(stdout);
+        printf(" Number of inactive variables = %ld\n",inactivenum);
+      }		  
+    }
+
+    if((!retrain) && (learn_parm->epsilon_crit>(*maxdiff))) 
+      learn_parm->epsilon_crit=(*maxdiff);
+    if((!retrain) && (learn_parm->epsilon_crit>epsilon_crit_org)) {
+      learn_parm->epsilon_crit/=2.0;
+      retrain=1;
+      noshrink=1;
+    }
+    if(learn_parm->epsilon_crit<epsilon_crit_org) 
+      learn_parm->epsilon_crit=epsilon_crit_org;
+    
+    if(verbosity>=2) {
+      printf(" => (%ld SV (incl. %ld SV at u-bound), max violation=%.5f)\n",
+	     supvecnum,model->at_upper_bound,(*maxdiff)); 
+      fflush(stdout);
+    }
+    if(verbosity>=3) {
+      printf("\n");
+    }
+
+    if((!retrain) && (transduction)) {
+      for(i=0;(i<totdoc);i++) {
+	shrink_state->active[i]=1;
+      }
+      activenum=compute_index(shrink_state->active,totdoc,active2dnum);
+      inactivenum=0;
+      if(verbosity==1) printf("done\n");
+      retrain=incorporate_unlabeled_examples(model,label,inconsistent,
+					     unlabeled,a,lin,totdoc,
+					     selcrit,selexam,key,
+					     transductcycle,kernel_parm,
+					     learn_parm);
+      epsilon_crit_org=learn_parm->epsilon_crit;
+      if(kernel_parm->kernel_type == LINEAR)
+	learn_parm->epsilon_crit=1; 
+      transductcycle++;
+      /* reset watchdog */
+      bestmaxdiff=(*maxdiff);
+      bestmaxdiffiter=iteration;
+    } 
+    else if(((iteration % 10) == 0) && (!noshrink)) {
+      activenum=shrink_problem(docs,learn_parm,shrink_state,kernel_parm,
+			       active2dnum,last_suboptimal_at,iteration,totdoc,
+			       maxl((long)(activenum/10),
+				    maxl((long)(totdoc/500),100)),
+			       a,inconsistent);
+      inactivenum=totdoc-activenum;
+      if((kernel_cache)
+	 && (supvecnum>kernel_cache->max_elems)
+	 && ((kernel_cache->activenum-activenum)>maxl((long)(activenum/10),500))) {
+	kernel_cache_shrink(kernel_cache,totdoc,
+			    minl((kernel_cache->activenum-activenum),
+				 (kernel_cache->activenum-supvecnum)),
+			    shrink_state->active); 
+      }
+    }
+
+    if((!retrain) && learn_parm->remove_inconsistent) {
+      if(verbosity>=1) {
+	printf(" Moving training errors to inconsistent examples...");
+	fflush(stdout);
+      }
+      if(learn_parm->remove_inconsistent == 1) {
+	retrain=identify_inconsistent(a,label,unlabeled,totdoc,learn_parm,
+				      &inconsistentnum,inconsistent); 
+      }
+      else if(learn_parm->remove_inconsistent == 2) {
+	retrain=identify_misclassified(lin,label,unlabeled,totdoc,
+				       model,&inconsistentnum,inconsistent); 
+      }
+      else if(learn_parm->remove_inconsistent == 3) {
+	retrain=identify_one_misclassified(lin,label,unlabeled,totdoc,
+				   model,&inconsistentnum,inconsistent);
+      }
+      if(retrain) {
+	if(kernel_parm->kernel_type == LINEAR) { /* reinit shrinking */
+	  learn_parm->epsilon_crit=2.0;
+	} 
+      }
+      if(verbosity>=1) {
+	printf("done.\n");
+	if(retrain) {
+	  printf(" Now %ld inconsistent examples.\n",inconsistentnum);
+	}
+      }
+    }
+  } /* end of loop */
+
+  free(chosen);
+  free(last_suboptimal_at);
+  free(key);
+  free(selcrit);
+  free(selexam);
+  free(a_old);
+  free(aicache);
+  free(working2dnum);
+  free(active2dnum);
+  free(qp.opt_ce);
+  free(qp.opt_ce0);
+  free(qp.opt_g);
+  free(qp.opt_g0);
+  free(qp.opt_xinit);
+  free(qp.opt_low);
+  free(qp.opt_up);
+  free(weights);
+
+  learn_parm->epsilon_crit=epsilon_crit_org; /* restore org */
+  model->maxdiff=(*maxdiff);
+
+  return(iteration);
+}
+
+long optimize_to_convergence_sharedslack(DOC **docs, long int *label, 
+			     long int totdoc, 
+			     long int totwords, LEARN_PARM *learn_parm, 
+			     KERNEL_PARM *kernel_parm, 
+			     KERNEL_CACHE *kernel_cache, 
+			     SHRINK_STATE *shrink_state, MODEL *model, 
+			     double *a, double *lin, double *c, 
+			     TIMING *timing_profile, double *maxdiff)
+     /* docs: Training vectors (x-part) */
+     /* label: Training labels/value (y-part, zero if test example for
+			      transduction) */
+     /* totdoc: Number of examples in docs/label */
+     /* totwords: Number of features (i.e. highest feature index) */
+     /* learn_parm: Learning paramenters */
+     /* kernel_parm: Kernel paramenters */
+     /* kernel_cache: Initialized/partly filled Cache, if using a kernel. 
+                      NULL if linear. */
+     /* shrink_state: State of active variables */
+     /* model: Returns learning result */
+     /* a: alphas */
+     /* lin: linear component of gradient */
+     /* c: right hand side of inequalities (margin) */
+     /* maxdiff: returns maximum violation of KT-conditions */
+{
+  long *chosen,*key,i,j,jj,*last_suboptimal_at,noshrink,*unlabeled;
+  long *inconsistent,choosenum,already_chosen=0,iteration;
+  long misclassified,supvecnum=0,*active2dnum,inactivenum;
+  long *working2dnum,*selexam,*ignore;
+  long activenum,retrain,maxslackid,slackset,jointstep;
+  double criterion,eq_target;
+  double *a_old,*alphaslack;
+  long t0=0,t1=0,t2=0,t3=0,t4=0,t5=0,t6=0; /* timing */
+  double epsilon_crit_org,maxsharedviol; 
+  double bestmaxdiff;
+  long   bestmaxdiffiter,terminate;
+
+  double *selcrit;  /* buffer for sorting */        
+  CFLOAT *aicache;  /* buffer to keep one row of hessian */
+  double *weights;  /* buffer for weight vector in linear case */
+  QP qp;            /* buffer for one quadratic program */
+  double *slack;    /* vector of slack variables for optimization with
+		       shared slacks */
+
+  epsilon_crit_org=learn_parm->epsilon_crit; /* save org */
+  if(kernel_parm->kernel_type == LINEAR) {
+    learn_parm->epsilon_crit=2.0;
+    kernel_cache=NULL;   /* caching makes no sense for linear kernel */
+  } 
+  learn_parm->epsilon_shrink=2;
+  (*maxdiff)=1;
+
+  learn_parm->totwords=totwords;
+
+  chosen = (long *)my_malloc(sizeof(long)*totdoc);
+  unlabeled = (long *)my_malloc(sizeof(long)*totdoc);
+  inconsistent = (long *)my_malloc(sizeof(long)*totdoc);
+  ignore = (long *)my_malloc(sizeof(long)*totdoc);
+  key = (long *)my_malloc(sizeof(long)*(totdoc+11)); 
+  selcrit = (double *)my_malloc(sizeof(double)*totdoc);
+  selexam = (long *)my_malloc(sizeof(long)*totdoc);
+  a_old = (double *)my_malloc(sizeof(double)*totdoc);
+  aicache = (CFLOAT *)my_malloc(sizeof(CFLOAT)*totdoc);
+  working2dnum = (long *)my_malloc(sizeof(long)*(totdoc+11));
+  active2dnum = (long *)my_malloc(sizeof(long)*(totdoc+11));
+  qp.opt_ce = (double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize);
+  qp.opt_ce0 = (double *)my_malloc(sizeof(double));
+  qp.opt_g = (double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize
+				 *learn_parm->svm_maxqpsize);
+  qp.opt_g0 = (double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize);
+  qp.opt_xinit = (double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize);
+  qp.opt_low=(double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize);
+  qp.opt_up=(double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize);
+  weights=(double *)my_malloc(sizeof(double)*(totwords+1));
+  maxslackid=0;
+  for(i=0;i<totdoc;i++) {    /* determine size of slack array */
+    if(maxslackid<docs[i]->slackid)
+      maxslackid=docs[i]->slackid;
+  }
+  slack=(double *)my_malloc(sizeof(double)*(maxslackid+1));
+  alphaslack=(double *)my_malloc(sizeof(double)*(maxslackid+1));
+  last_suboptimal_at = (long *)my_malloc(sizeof(long)*(maxslackid+1));
+  for(i=0;i<=maxslackid;i++) {    /* init shared slacks */
+    slack[i]=0;
+    alphaslack[i]=0;
+    last_suboptimal_at[i]=1;
+  }
+
+  choosenum=0;
+  retrain=1;
+  iteration=1;
+  bestmaxdiffiter=1;
+  bestmaxdiff=999999999;
+  terminate=0;
+
+  if(kernel_cache) {
+    kernel_cache->time=iteration;  /* for lru cache */
+    kernel_cache_reset_lru(kernel_cache);
+  }
+
+  for(i=0;i<totdoc;i++) {    /* various inits */
+    chosen[i]=0;
+    unlabeled[i]=0;
+    inconsistent[i]=0;
+    ignore[i]=0;
+    a_old[i]=a[i];
+  }
+  activenum=compute_index(shrink_state->active,totdoc,active2dnum);
+  inactivenum=totdoc-activenum;
+  clear_index(working2dnum);
+
+  /* call to init slack and alphaslack */
+  compute_shared_slacks(docs,label,a,lin,c,active2dnum,learn_parm,
+			slack,alphaslack);
+
+                            /* repeat this loop until we have convergence */
+  for(;retrain && (!terminate);iteration++) {
+
+    if(kernel_cache)
+      kernel_cache->time=iteration;  /* for lru cache */
+    if(verbosity>=2) {
+      printf(
+	"Iteration %ld: ",iteration); fflush(stdout);
+    }
+    else if(verbosity==1) {
+      printf("."); fflush(stdout);
+    }
+
+    if(verbosity>=2) t0=get_runtime();
+    if(verbosity>=3) {
+      printf("\nSelecting working set... "); fflush(stdout); 
+    }
+
+    if(learn_parm->svm_newvarsinqp>learn_parm->svm_maxqpsize) 
+      learn_parm->svm_newvarsinqp=learn_parm->svm_maxqpsize;
+
+    /* select working set according to steepest gradient */
+    jointstep=0;
+    eq_target=0;
+    if(iteration % 101) {
+      slackset=select_next_qp_slackset(docs,label,a,lin,slack,alphaslack,c,
+				       learn_parm,active2dnum,&maxsharedviol);
+      if((iteration % 2) 
+	 || (!slackset) || (maxsharedviol<learn_parm->epsilon_crit)){
+	/* do a step with examples from different slack sets */
+	if(verbosity >= 2) {
+	  printf("(i-step)"); fflush(stdout);
+	}
+	i=0;
+	for(jj=0;(j=working2dnum[jj])>=0;jj++) { /* clear old part of working set */
+	  if((chosen[j]>=(learn_parm->svm_maxqpsize/
+			  minl(learn_parm->svm_maxqpsize,
+			       learn_parm->svm_newvarsinqp)))) {
+	    chosen[j]=0; 
+	    choosenum--; 
+	  }
+	  else {
+	    chosen[j]++;
+	    working2dnum[i++]=j;
+	  }
+	}
+	working2dnum[i]=-1;
+	
+	already_chosen=0;
+	if((minl(learn_parm->svm_newvarsinqp,
+		 learn_parm->svm_maxqpsize-choosenum)>=4) 
+	   && (kernel_parm->kernel_type != LINEAR)) {
+	  /* select part of the working set from cache */
+	  already_chosen=select_next_qp_subproblem_grad(
+			      label,unlabeled,a,lin,c,totdoc,
+			      (long)(minl(learn_parm->svm_maxqpsize-choosenum,
+					  learn_parm->svm_newvarsinqp)
+				     /2),
+			      learn_parm,inconsistent,active2dnum,
+			      working2dnum,selcrit,selexam,kernel_cache,
+			      (long)1,key,chosen);
+	  choosenum+=already_chosen;
+	}
+	choosenum+=select_next_qp_subproblem_grad(
+                              label,unlabeled,a,lin,c,totdoc,
+                              minl(learn_parm->svm_maxqpsize-choosenum,
+				   learn_parm->svm_newvarsinqp-already_chosen),
+                              learn_parm,inconsistent,active2dnum,
+			      working2dnum,selcrit,selexam,kernel_cache,
+			      (long)0,key,chosen);
+      }
+      else { /* do a step with all examples from same slack set */
+	if(verbosity >= 2) {
+	  printf("(j-step on %ld)",slackset); fflush(stdout);
+	}
+	jointstep=1;
+	for(jj=0;(j=working2dnum[jj])>=0;jj++) { /* clear working set */
+	    chosen[j]=0; 
+	}
+	working2dnum[0]=-1;
+	eq_target=alphaslack[slackset];
+	for(j=0;j<totdoc;j++) {                  /* mask all but slackset */
+	  /* for(jj=0;(j=active2dnum[jj])>=0;jj++) { */
+	  if(docs[j]->slackid != slackset)
+	    ignore[j]=1; 
+	  else {
+	    ignore[j]=0; 
+	    learn_parm->svm_cost[j]=learn_parm->svm_c;
+	    /* printf("Inslackset(%ld,%ld)",j,shrink_state->active[j]); */
+	  }
+	}
+	learn_parm->biased_hyperplane=1;
+	choosenum=select_next_qp_subproblem_grad(
+                              label,unlabeled,a,lin,c,totdoc,
+                              learn_parm->svm_maxqpsize,
+                              learn_parm,ignore,active2dnum,
+			      working2dnum,selcrit,selexam,kernel_cache,
+			      (long)0,key,chosen);
+	learn_parm->biased_hyperplane=0;
+      }
+    }
+    else { /* once in a while, select a somewhat random working set
+	      to get unlocked of infinite loops due to numerical
+	      inaccuracies in the core qp-solver */
+      choosenum+=select_next_qp_subproblem_rand(
+                              label,unlabeled,a,lin,c,totdoc,
+                              minl(learn_parm->svm_maxqpsize-choosenum,
+				   learn_parm->svm_newvarsinqp),
+                              learn_parm,inconsistent,active2dnum,
+			      working2dnum,selcrit,selexam,kernel_cache,key,
+			      chosen,iteration);
+    }
+
+    if(verbosity>=2) {
+      printf(" %ld vectors chosen\n",choosenum); fflush(stdout); 
+    }
+
+    if(verbosity>=2) t1=get_runtime();
+
+    if(kernel_cache) 
+      cache_multiple_kernel_rows(kernel_cache,docs,working2dnum,
+				 choosenum,kernel_parm); 
+
+    if(verbosity>=2) t2=get_runtime();
+    if(jointstep) learn_parm->biased_hyperplane=1;
+    optimize_svm(docs,label,unlabeled,ignore,eq_target,chosen,active2dnum,
+		 model,totdoc,working2dnum,choosenum,a,lin,c,learn_parm,
+		 aicache,kernel_parm,&qp,&epsilon_crit_org);
+    learn_parm->biased_hyperplane=0;
+
+    for(jj=0;(i=working2dnum[jj])>=0;jj++)   /* recompute sums of alphas */
+      alphaslack[docs[i]->slackid]+=(a[i]-a_old[i]);
+    for(jj=0;(i=working2dnum[jj])>=0;jj++) { /* reduce alpha to fulfill
+						constraints */
+      if(alphaslack[docs[i]->slackid] > learn_parm->svm_c) {
+	if(a[i] < (alphaslack[docs[i]->slackid]-learn_parm->svm_c)) {
+	  alphaslack[docs[i]->slackid]-=a[i];
+	  a[i]=0;
+	}
+	else {
+	  a[i]-=(alphaslack[docs[i]->slackid]-learn_parm->svm_c);
+	  alphaslack[docs[i]->slackid]=learn_parm->svm_c;
+	}
+      }
+    }
+    for(jj=0;(i=active2dnum[jj])>=0;jj++) 
+      learn_parm->svm_cost[i]=a[i]+(learn_parm->svm_c
+				    -alphaslack[docs[i]->slackid]);
+
+    if(verbosity>=2) t3=get_runtime();
+    update_linear_component(docs,label,active2dnum,a,a_old,working2dnum,totdoc,
+			    totwords,kernel_parm,kernel_cache,lin,aicache,
+			    weights);
+    compute_shared_slacks(docs,label,a,lin,c,active2dnum,learn_parm,
+			  slack,alphaslack);
+
+    if(verbosity>=2) t4=get_runtime();
+    supvecnum=calculate_svm_model(docs,label,unlabeled,lin,a,a_old,c,
+		                  learn_parm,working2dnum,active2dnum,model);
+
+    if(verbosity>=2) t5=get_runtime();
+
+    /* The following computation of the objective function works only */
+    /* relative to the active variables */
+    if(verbosity>=3) {
+      criterion=compute_objective_function(a,lin,c,learn_parm->eps,label,
+		                           active2dnum);
+      printf("Objective function (over active variables): %.16f\n",criterion);
+      fflush(stdout); 
+    }
+
+    for(jj=0;(i=working2dnum[jj])>=0;jj++) {
+      a_old[i]=a[i];
+    }
+
+    retrain=check_optimality_sharedslack(docs,model,label,a,lin,c,
+                             slack,alphaslack,totdoc,learn_parm,
+			     maxdiff,epsilon_crit_org,&misclassified,
+			     active2dnum,last_suboptimal_at,
+			     iteration,kernel_parm);
+
+    if(verbosity>=2) {
+      t6=get_runtime();
+      timing_profile->time_select+=t1-t0;
+      timing_profile->time_kernel+=t2-t1;
+      timing_profile->time_opti+=t3-t2;
+      timing_profile->time_update+=t4-t3;
+      timing_profile->time_model+=t5-t4;
+      timing_profile->time_check+=t6-t5;
+    }
+
+    /* checking whether optimizer got stuck */
+    if((*maxdiff) < bestmaxdiff) {
+      bestmaxdiff=(*maxdiff);
+      bestmaxdiffiter=iteration;
+    }
+    if(iteration > (bestmaxdiffiter+learn_parm->maxiter)) { 
+      /* long time no progress? */
+      terminate=1;
+      retrain=0;
+      if(verbosity>=1) 
+	printf("\nWARNING: Relaxing KT-Conditions due to slow progress! Terminating!\n");
+    }
+
+    noshrink=0; 
+
+    if((!retrain) && (inactivenum>0) 
+       && ((!learn_parm->skip_final_opt_check) 
+	   || (kernel_parm->kernel_type == LINEAR))) { 
+      if(((verbosity>=1) && (kernel_parm->kernel_type != LINEAR)) 
+	 || (verbosity>=2)) {
+	if(verbosity==1) {
+	  printf("\n");
+	}
+	printf(" Checking optimality of inactive variables..."); 
+	fflush(stdout);
+      }
+      t1=get_runtime();
+      reactivate_inactive_examples(label,unlabeled,a,shrink_state,lin,c,totdoc,
+				   totwords,iteration,learn_parm,inconsistent,
+				   docs,kernel_parm,kernel_cache,model,aicache,
+				   weights,maxdiff);
+      /* Update to new active variables. */
+      activenum=compute_index(shrink_state->active,totdoc,active2dnum);
+      inactivenum=totdoc-activenum;
+      /* check optimality, since check in reactivate does not work for
+	 sharedslacks */
+      retrain=check_optimality_sharedslack(docs,model,label,a,lin,c,
+			     slack,alphaslack,totdoc,learn_parm,
+			     maxdiff,epsilon_crit_org,&misclassified,
+			     active2dnum,last_suboptimal_at,
+			     iteration,kernel_parm);
+
+      /* reset watchdog */
+      bestmaxdiff=(*maxdiff);
+      bestmaxdiffiter=iteration;
+      /* termination criterion */
+      noshrink=1;
+      retrain=0;
+      if((*maxdiff) > learn_parm->epsilon_crit) 
+	retrain=1;
+      timing_profile->time_shrink+=get_runtime()-t1;
+      if(((verbosity>=1) && (kernel_parm->kernel_type != LINEAR)) 
+	 || (verbosity>=2)) {
+	printf("done.\n");  fflush(stdout);
+        printf(" Number of inactive variables = %ld\n",inactivenum);
+      }		  
+    }
+
+    if((!retrain) && (learn_parm->epsilon_crit>(*maxdiff))) 
+      learn_parm->epsilon_crit=(*maxdiff);
+    if((!retrain) && (learn_parm->epsilon_crit>epsilon_crit_org)) {
+      learn_parm->epsilon_crit/=2.0;
+      retrain=1;
+      noshrink=1;
+    }
+    if(learn_parm->epsilon_crit<epsilon_crit_org) 
+      learn_parm->epsilon_crit=epsilon_crit_org;
+    
+    if(verbosity>=2) {
+      printf(" => (%ld SV (incl. %ld SV at u-bound), max violation=%.5f)\n",
+	     supvecnum,model->at_upper_bound,(*maxdiff)); 
+      fflush(stdout);
+    }
+    if(verbosity>=3) {
+      printf("\n");
+    }
+
+    if(((iteration % 10) == 0) && (!noshrink)) {
+      activenum=shrink_problem(docs,learn_parm,shrink_state,
+			       kernel_parm,active2dnum,
+			       last_suboptimal_at,iteration,totdoc,
+			       maxl((long)(activenum/10),
+				    maxl((long)(totdoc/500),100)),
+			       a,inconsistent);
+      inactivenum=totdoc-activenum;
+      if((kernel_cache)
+	 && (supvecnum>kernel_cache->max_elems)
+	 && ((kernel_cache->activenum-activenum)>maxl((long)(activenum/10),500))) {
+	kernel_cache_shrink(kernel_cache,totdoc,
+			    minl((kernel_cache->activenum-activenum),
+				 (kernel_cache->activenum-supvecnum)),
+			    shrink_state->active); 
+      }
+    }
+
+  } /* end of loop */
+
+
+  free(alphaslack);
+  free(slack);
+  free(chosen);
+  free(unlabeled);
+  free(inconsistent);
+  free(ignore);
+  free(last_suboptimal_at);
+  free(key);
+  free(selcrit);
+  free(selexam);
+  free(a_old);
+  free(aicache);
+  free(working2dnum);
+  free(active2dnum);
+  free(qp.opt_ce);
+  free(qp.opt_ce0);
+  free(qp.opt_g);
+  free(qp.opt_g0);
+  free(qp.opt_xinit);
+  free(qp.opt_low);
+  free(qp.opt_up);
+  free(weights);
+
+  learn_parm->epsilon_crit=epsilon_crit_org; /* restore org */
+  model->maxdiff=(*maxdiff);
+
+  return(iteration);
+}
+
+
+double compute_objective_function(double *a, double *lin, double *c, 
+				  double eps, long int *label, 
+				  long int *active2dnum)
+     /* Return value of objective function. */
+     /* Works only relative to the active variables! */
+{
+  long i,ii;
+  double criterion;
+  /* calculate value of objective function */
+  criterion=0;
+  for(ii=0;active2dnum[ii]>=0;ii++) {
+    i=active2dnum[ii];
+    criterion=criterion+(eps-(double)label[i]*c[i])*a[i]+0.5*a[i]*label[i]*lin[i];
+  } 
+  return(criterion);
+}
+
+void clear_index(long int *index)
+              /* initializes and empties index */
+{
+  index[0]=-1;
+} 
+
+void add_to_index(long int *index, long int elem)
+     /* initializes and empties index */
+{
+  register long i;
+  for(i=0;index[i] != -1;i++);
+  index[i]=elem;
+  index[i+1]=-1;
+}
+
+long compute_index(long int *binfeature, long int range, long int *index)
+     /* create an inverted index of binfeature */
+{               
+  register long i,ii;
+
+  ii=0;
+  for(i=0;i<range;i++) {
+    if(binfeature[i]) {
+      index[ii]=i;
+      ii++;
+    }
+  }
+  for(i=0;i<4;i++) {
+    index[ii+i]=-1;
+  }
+  return(ii);
+}
+
+
+void optimize_svm(DOC **docs, long int *label, long int *unlabeled, 
+		  long int *exclude_from_eq_const, double eq_target,
+		  long int *chosen, long int *active2dnum, MODEL *model, 
+		  long int totdoc, long int *working2dnum, long int varnum, 
+		  double *a, double *lin, double *c, LEARN_PARM *learn_parm, 
+		  CFLOAT *aicache, KERNEL_PARM *kernel_parm, QP *qp, 
+		  double *epsilon_crit_target)
+     /* Do optimization on the working set. */
+{
+    long i;
+    double *a_v;
+
+    compute_matrices_for_optimization(docs,label,unlabeled,
+				      exclude_from_eq_const,eq_target,chosen,
+				      active2dnum,working2dnum,model,a,lin,c,
+				      varnum,totdoc,learn_parm,aicache,
+				      kernel_parm,qp);
+
+    if(verbosity>=3) {
+      printf("Running optimizer..."); fflush(stdout);
+    }
+    /* call the qp-subsolver */
+    a_v=optimize_qp(qp,epsilon_crit_target,
+		    learn_parm->svm_maxqpsize,
+		    &(model->b),   /* in case the optimizer gives us */
+                                   /* the threshold for free. otherwise */
+                                   /* b is calculated in calculate_model. */
+		    learn_parm);
+    if(verbosity>=3) {         
+      printf("done\n");
+    }
+
+    for(i=0;i<varnum;i++) {
+      a[working2dnum[i]]=a_v[i];
+      /*
+      if(a_v[i]<=(0+learn_parm->epsilon_a)) {
+	a[working2dnum[i]]=0;
+      }
+      else if(a_v[i]>=(learn_parm->svm_cost[working2dnum[i]]-learn_parm->epsilon_a)) {
+	a[working2dnum[i]]=learn_parm->svm_cost[working2dnum[i]];
+      }
+      */
+    }
+}
+
+void compute_matrices_for_optimization(DOC **docs, long int *label, 
+          long int *unlabeled, long *exclude_from_eq_const, double eq_target,
+	  long int *chosen, long int *active2dnum, 
+          long int *key, MODEL *model, double *a, double *lin, double *c, 
+	  long int varnum, long int totdoc, LEARN_PARM *learn_parm, 
+          CFLOAT *aicache, KERNEL_PARM *kernel_parm, QP *qp)
+{
+  register long ki,kj,i,j;
+  register double kernel_temp;
+
+  if(verbosity>=3) {
+    fprintf(stdout,"Computing qp-matrices (type %ld kernel [degree %ld, rbf_gamma %f, coef_lin %f, coef_const %f])...",kernel_parm->kernel_type,kernel_parm->poly_degree,kernel_parm->rbf_gamma,kernel_parm->coef_lin,kernel_parm->coef_const); 
+    fflush(stdout);
+  }
+
+  qp->opt_n=varnum;
+  qp->opt_ce0[0]=-eq_target; /* compute the constant for equality constraint */
+  for(j=1;j<model->sv_num;j++) { /* start at 1 */
+    if((!chosen[(model->supvec[j])->docnum])
+       && (!exclude_from_eq_const[(model->supvec[j])->docnum])) {
+      qp->opt_ce0[0]+=model->alpha[j];
+    }
+  } 
+  if(learn_parm->biased_hyperplane) 
+    qp->opt_m=1;
+  else 
+    qp->opt_m=0;  /* eq-constraint will be ignored */
+
+  /* init linear part of objective function */
+  for(i=0;i<varnum;i++) {
+    qp->opt_g0[i]=lin[key[i]];
+  }
+
+  for(i=0;i<varnum;i++) {
+    ki=key[i];
+
+    /* Compute the matrix for equality constraints */
+    qp->opt_ce[i]=label[ki];
+    qp->opt_low[i]=0;
+    qp->opt_up[i]=learn_parm->svm_cost[ki];
+
+    kernel_temp=(double)kernel(kernel_parm,docs[ki],docs[ki]); 
+    /* compute linear part of objective function */
+    qp->opt_g0[i]-=(kernel_temp*a[ki]*(double)label[ki]); 
+    /* compute quadratic part of objective function */
+    qp->opt_g[varnum*i+i]=kernel_temp;
+    for(j=i+1;j<varnum;j++) {
+      kj=key[j];
+      kernel_temp=(double)kernel(kernel_parm,docs[ki],docs[kj]);
+      /* compute linear part of objective function */
+      qp->opt_g0[i]-=(kernel_temp*a[kj]*(double)label[kj]);
+      qp->opt_g0[j]-=(kernel_temp*a[ki]*(double)label[ki]); 
+      /* compute quadratic part of objective function */
+      qp->opt_g[varnum*i+j]=(double)label[ki]*(double)label[kj]*kernel_temp;
+      qp->opt_g[varnum*j+i]=(double)label[ki]*(double)label[kj]*kernel_temp;
+    }
+
+    if(verbosity>=3) {
+      if(i % 20 == 0) {
+	fprintf(stdout,"%ld..",i); fflush(stdout);
+      }
+    }
+  }
+
+  for(i=0;i<varnum;i++) {
+    /* assure starting at feasible point */
+    qp->opt_xinit[i]=a[key[i]];
+    /* set linear part of objective function */
+    qp->opt_g0[i]=(learn_parm->eps-(double)label[key[i]]*c[key[i]])+qp->opt_g0[i]*(double)label[key[i]];    
+  }
+
+  if(verbosity>=3) {
+    fprintf(stdout,"done\n");
+  }
+}
+
+long calculate_svm_model(DOC **docs, long int *label, long int *unlabeled, 
+			 double *lin, double *a, double *a_old, double *c, 
+			 LEARN_PARM *learn_parm, long int *working2dnum, 
+			 long int *active2dnum, MODEL *model)
+     /* Compute decision function based on current values */
+     /* of alpha. */
+{
+  long i,ii,pos,b_calculated=0,first_low,first_high;
+  double ex_c,b_temp,b_low,b_high;
+
+  if(verbosity>=3) {
+    printf("Calculating model..."); fflush(stdout);
+  }
+
+  if(!learn_parm->biased_hyperplane) {
+    model->b=0;
+    b_calculated=1;
+  }
+
+  for(ii=0;(i=working2dnum[ii])>=0;ii++) {
+    if((a_old[i]>0) && (a[i]==0)) { /* remove from model */
+      pos=model->index[i]; 
+      model->index[i]=-1;
+      (model->sv_num)--;
+      model->supvec[pos]=model->supvec[model->sv_num];
+      model->alpha[pos]=model->alpha[model->sv_num];
+      model->index[(model->supvec[pos])->docnum]=pos;
+    }
+    else if((a_old[i]==0) && (a[i]>0)) { /* add to model */
+      model->supvec[model->sv_num]=docs[i];
+      model->alpha[model->sv_num]=a[i]*(double)label[i];
+      model->index[i]=model->sv_num;
+      (model->sv_num)++;
+    }
+    else if(a_old[i]==a[i]) { /* nothing to do */
+    }
+    else {  /* just update alpha */
+      model->alpha[model->index[i]]=a[i]*(double)label[i];
+    }
+      
+    ex_c=learn_parm->svm_cost[i]-learn_parm->epsilon_a;
+    if((a_old[i]>=ex_c) && (a[i]<ex_c)) { 
+      (model->at_upper_bound)--;
+    }
+    else if((a_old[i]<ex_c) && (a[i]>=ex_c)) { 
+      (model->at_upper_bound)++;
+    }
+
+    if((!b_calculated) 
+       && (a[i]>learn_parm->epsilon_a) && (a[i]<ex_c)) {   /* calculate b */
+     	model->b=((double)label[i]*learn_parm->eps-c[i]+lin[i]); 
+	/* model->b=(-(double)label[i]+lin[i]); */
+	b_calculated=1;
+    }
+  }      
+
+  /* No alpha in the working set not at bounds, so b was not
+     calculated in the usual way. The following handles this special
+     case. */
+  if(learn_parm->biased_hyperplane 
+     && (!b_calculated)
+     && (model->sv_num-1 == model->at_upper_bound)) { 
+    first_low=1;
+    first_high=1;
+    b_low=0;
+    b_high=0;
+    for(ii=0;(i=active2dnum[ii])>=0;ii++) {
+      ex_c=learn_parm->svm_cost[i]-learn_parm->epsilon_a;
+      if(a[i]<ex_c) { 
+	if(label[i]>0)  {
+	  b_temp=-(learn_parm->eps-c[i]+lin[i]);
+	  if((b_temp>b_low) || (first_low)) {
+	    b_low=b_temp;
+	    first_low=0;
+	  }
+	}
+	else {
+	  b_temp=-(-learn_parm->eps-c[i]+lin[i]);
+	  if((b_temp<b_high) || (first_high)) {
+	    b_high=b_temp;
+	    first_high=0;
+	  }
+	}
+      }
+      else {
+	if(label[i]<0)  {
+	  b_temp=-(-learn_parm->eps-c[i]+lin[i]);
+	  if((b_temp>b_low) || (first_low)) {
+	    b_low=b_temp;
+	    first_low=0;
+	  }
+	}
+	else {
+	  b_temp=-(learn_parm->eps-c[i]+lin[i]);
+	  if((b_temp<b_high) || (first_high)) {
+	    b_high=b_temp;
+	    first_high=0;
+	  }
+	}
+      }
+    }
+    if(first_high) {
+      model->b=-b_low;
+    }
+    else if(first_low) {
+      model->b=-b_high;
+    }
+    else {
+      model->b=-(b_high+b_low)/2.0;  /* select b as the middle of range */
+      /* printf("\nb_low=%f, b_high=%f,b=%f\n",b_low,b_high,model->b); */
+    }
+  }
+
+  if(verbosity>=3) {
+    printf("done\n"); fflush(stdout);
+  }
+
+  return(model->sv_num-1); /* have to substract one, since element 0 is empty*/
+}
+
+long check_optimality(MODEL *model, long int *label, long int *unlabeled, 
+		      double *a, double *lin, double *c, long int totdoc, 
+		      LEARN_PARM *learn_parm, double *maxdiff, 
+		      double epsilon_crit_org, long int *misclassified, 
+		      long int *inconsistent, long int *active2dnum,
+		      long int *last_suboptimal_at, 
+		      long int iteration, KERNEL_PARM *kernel_parm)
+     /* Check KT-conditions */
+{
+  long i,ii,retrain;
+  double dist,ex_c,target;
+
+  if(kernel_parm->kernel_type == LINEAR) {  /* be optimistic */
+    learn_parm->epsilon_shrink=-learn_parm->epsilon_crit+epsilon_crit_org;  
+  }
+  else {  /* be conservative */
+    learn_parm->epsilon_shrink=learn_parm->epsilon_shrink*0.7+(*maxdiff)*0.3; 
+  }
+  retrain=0;
+  (*maxdiff)=0;
+  (*misclassified)=0;
+  for(ii=0;(i=active2dnum[ii])>=0;ii++) {
+    if((!inconsistent[i]) && label[i]) {
+      dist=(lin[i]-model->b)*(double)label[i];/* 'distance' from
+						 hyperplane*/
+      target=-(learn_parm->eps-(double)label[i]*c[i]);
+      ex_c=learn_parm->svm_cost[i]-learn_parm->epsilon_a;
+      if(dist <= 0) {       
+	(*misclassified)++;  /* does not work due to deactivation of var */
+      }
+      if((a[i]>learn_parm->epsilon_a) && (dist > target)) {
+	if((dist-target)>(*maxdiff))  /* largest violation */
+	  (*maxdiff)=dist-target;
+      }
+      else if((a[i]<ex_c) && (dist < target)) {
+	if((target-dist)>(*maxdiff))  /* largest violation */
+	  (*maxdiff)=target-dist;
+      }
+      /* Count how long a variable was at lower/upper bound (and optimal).*/
+      /* Variables, which were at the bound and optimal for a long */
+      /* time are unlikely to become support vectors. In case our */
+      /* cache is filled up, those variables are excluded to save */
+      /* kernel evaluations. (See chapter 'Shrinking').*/ 
+      if((a[i]>(learn_parm->epsilon_a)) 
+	 && (a[i]<ex_c)) { 
+	last_suboptimal_at[i]=iteration;         /* not at bound */
+      }
+      else if((a[i]<=(learn_parm->epsilon_a)) 
+	      && (dist < (target+learn_parm->epsilon_shrink))) {
+	last_suboptimal_at[i]=iteration;         /* not likely optimal */
+      }
+      else if((a[i]>=ex_c)
+	      && (dist > (target-learn_parm->epsilon_shrink)))  { 
+	last_suboptimal_at[i]=iteration;         /* not likely optimal */
+      }
+    }   
+  }
+  /* termination criterion */
+  if((!retrain) && ((*maxdiff) > learn_parm->epsilon_crit)) {  
+    retrain=1;
+  }
+  return(retrain);
+}
+
+long check_optimality_sharedslack(DOC **docs, MODEL *model, long int *label,
+		      double *a, double *lin, double *c, double *slack,
+		      double *alphaslack,
+		      long int totdoc, 
+		      LEARN_PARM *learn_parm, double *maxdiff, 
+		      double epsilon_crit_org, long int *misclassified, 
+		      long int *active2dnum,
+		      long int *last_suboptimal_at, 
+		      long int iteration, KERNEL_PARM *kernel_parm)
+     /* Check KT-conditions */
+{
+  long i,ii,retrain;
+  double dist,ex_c=0,target;
+
+  if(kernel_parm->kernel_type == LINEAR) {  /* be optimistic */
+    learn_parm->epsilon_shrink=-learn_parm->epsilon_crit+epsilon_crit_org;  
+  }
+  else {  /* be conservative */
+    learn_parm->epsilon_shrink=learn_parm->epsilon_shrink*0.7+(*maxdiff)*0.3; 
+  }
+
+  retrain=0;
+  (*maxdiff)=0;
+  (*misclassified)=0;
+  for(ii=0;(i=active2dnum[ii])>=0;ii++) {
+    /* 'distance' from hyperplane*/
+    dist=(lin[i]-model->b)*(double)label[i]+slack[docs[i]->slackid];
+    target=-(learn_parm->eps-(double)label[i]*c[i]);
+    ex_c=learn_parm->svm_c-learn_parm->epsilon_a;
+    if((a[i]>learn_parm->epsilon_a) && (dist > target)) {
+      if((dist-target)>(*maxdiff)) {  /* largest violation */
+	(*maxdiff)=dist-target;
+	if(verbosity>=5) printf("sid %ld: dist=%.2f, target=%.2f, slack=%.2f, a=%f, alphaslack=%f\n",docs[i]->slackid,dist,target,slack[docs[i]->slackid],a[i],alphaslack[docs[i]->slackid]);
+	if(verbosity>=5) printf(" (single %f)\n",(*maxdiff));
+      }
+    }
+    if((alphaslack[docs[i]->slackid]<ex_c) && (slack[docs[i]->slackid]>0)) {
+      if((slack[docs[i]->slackid])>(*maxdiff)) { /* largest violation */
+	(*maxdiff)=slack[docs[i]->slackid];
+	if(verbosity>=5) printf("sid %ld: dist=%.2f, target=%.2f, slack=%.2f, a=%f, alphaslack=%f\n",docs[i]->slackid,dist,target,slack[docs[i]->slackid],a[i],alphaslack[docs[i]->slackid]);
+	if(verbosity>=5) printf(" (joint %f)\n",(*maxdiff));
+      }
+    }
+    /* Count how long a variable was at lower/upper bound (and optimal).*/
+    /* Variables, which were at the bound and optimal for a long */
+    /* time are unlikely to become support vectors. In case our */
+    /* cache is filled up, those variables are excluded to save */
+    /* kernel evaluations. (See chapter 'Shrinking').*/ 
+    if((a[i]>(learn_parm->epsilon_a)) 
+       && (a[i]<ex_c)) { 
+      last_suboptimal_at[docs[i]->slackid]=iteration;  /* not at bound */
+    }
+    else if((a[i]<=(learn_parm->epsilon_a)) 
+	    && (dist < (target+learn_parm->epsilon_shrink))) {
+      last_suboptimal_at[docs[i]->slackid]=iteration;  /* not likely optimal */
+    }
+    else if((a[i]>=ex_c)
+	    && (slack[docs[i]->slackid] < learn_parm->epsilon_shrink))  { 
+      last_suboptimal_at[docs[i]->slackid]=iteration;  /* not likely optimal */
+    }
+  }   
+  /* termination criterion */
+  if((!retrain) && ((*maxdiff) > learn_parm->epsilon_crit)) {  
+    retrain=1;
+  }
+  return(retrain);
+}
+
+void compute_shared_slacks(DOC **docs, long int *label, 
+			   double *a, double *lin, 
+			   double *c, long int *active2dnum,
+			   LEARN_PARM *learn_parm, 
+			   double *slack, double *alphaslack)
+     /* compute the value of shared slacks and the joint alphas */
+{
+  long jj,i;
+  double dist,target;
+
+  for(jj=0;(i=active2dnum[jj])>=0;jj++) { /* clear slack variables */
+    slack[docs[i]->slackid]=0.0;
+    alphaslack[docs[i]->slackid]=0.0;
+  }
+  for(jj=0;(i=active2dnum[jj])>=0;jj++) { /* recompute slack variables */
+    dist=(lin[i])*(double)label[i];
+    target=-(learn_parm->eps-(double)label[i]*c[i]);
+    if((target-dist) > slack[docs[i]->slackid])
+      slack[docs[i]->slackid]=target-dist;
+    alphaslack[docs[i]->slackid]+=a[i];
+  }
+}
+
+
+long identify_inconsistent(double *a, long int *label, 
+			   long int *unlabeled, long int totdoc, 
+			   LEARN_PARM *learn_parm, 
+			   long int *inconsistentnum, long int *inconsistent)
+{
+  long i,retrain;
+
+  /* Throw out examples with multipliers at upper bound. This */
+  /* corresponds to the -i 1 option. */
+  /* ATTENTION: this is just a heuristic for finding a close */
+  /*            to minimum number of examples to exclude to */
+  /*            make the problem separable with desired margin */
+  retrain=0;
+  for(i=0;i<totdoc;i++) {
+    if((!inconsistent[i]) && (!unlabeled[i]) 
+       && (a[i]>=(learn_parm->svm_cost[i]-learn_parm->epsilon_a))) { 
+	(*inconsistentnum)++;
+	inconsistent[i]=1;  /* never choose again */
+	retrain=2;          /* start over */
+	if(verbosity>=3) {
+	  printf("inconsistent(%ld)..",i); fflush(stdout);
+	}
+    }
+  }
+  return(retrain);
+}
+
+long identify_misclassified(double *lin, long int *label, 
+			    long int *unlabeled, long int totdoc, 
+			    MODEL *model, long int *inconsistentnum, 
+			    long int *inconsistent)
+{
+  long i,retrain;
+  double dist;
+
+  /* Throw out misclassified examples. This */
+  /* corresponds to the -i 2 option. */
+  /* ATTENTION: this is just a heuristic for finding a close */
+  /*            to minimum number of examples to exclude to */
+  /*            make the problem separable with desired margin */
+  retrain=0;
+  for(i=0;i<totdoc;i++) {
+    dist=(lin[i]-model->b)*(double)label[i]; /* 'distance' from hyperplane*/  
+    if((!inconsistent[i]) && (!unlabeled[i]) && (dist <= 0)) { 
+	(*inconsistentnum)++;
+	inconsistent[i]=1;  /* never choose again */
+	retrain=2;          /* start over */
+	if(verbosity>=3) {
+	  printf("inconsistent(%ld)..",i); fflush(stdout);
+	}
+    }
+  }
+  return(retrain);
+}
+
+long identify_one_misclassified(double *lin, long int *label, 
+				long int *unlabeled, 
+				long int totdoc, MODEL *model, 
+				long int *inconsistentnum, 
+				long int *inconsistent)
+{
+  long i,retrain,maxex=-1;
+  double dist,maxdist=0;
+
+  /* Throw out the 'most misclassified' example. This */
+  /* corresponds to the -i 3 option. */
+  /* ATTENTION: this is just a heuristic for finding a close */
+  /*            to minimum number of examples to exclude to */
+  /*            make the problem separable with desired margin */
+  retrain=0;
+  for(i=0;i<totdoc;i++) {
+    if((!inconsistent[i]) && (!unlabeled[i])) {
+      dist=(lin[i]-model->b)*(double)label[i];/* 'distance' from hyperplane*/  
+      if(dist<maxdist) {
+	maxdist=dist;
+	maxex=i;
+      }
+    }
+  }
+  if(maxex>=0) {
+    (*inconsistentnum)++;
+    inconsistent[maxex]=1;  /* never choose again */
+    retrain=2;          /* start over */
+    if(verbosity>=3) {
+      printf("inconsistent(%ld)..",i); fflush(stdout);
+    }
+  }
+  return(retrain);
+}
+
+void update_linear_component(DOC **docs, long int *label, 
+			     long int *active2dnum, double *a, 
+			     double *a_old, long int *working2dnum, 
+			     long int totdoc, long int totwords, 
+			     KERNEL_PARM *kernel_parm, 
+			     KERNEL_CACHE *kernel_cache, 
+			     double *lin, CFLOAT *aicache, double *weights)
+     /* keep track of the linear component */
+     /* lin of the gradient etc. by updating */
+     /* based on the change of the variables */
+     /* in the current working set */
+{
+  register long i,ii,j,jj;
+  register double tec;
+  SVECTOR *f;
+
+  if(kernel_parm->kernel_type==0) { /* special linear case */
+    clear_vector_n(weights,totwords);
+    for(ii=0;(i=working2dnum[ii])>=0;ii++) {
+      if(a[i] != a_old[i]) {
+	for(f=docs[i]->fvec;f;f=f->next)  
+	  add_vector_ns(weights,f,
+			f->factor*((a[i]-a_old[i])*(double)label[i]));
+      }
+    }
+    for(jj=0;(j=active2dnum[jj])>=0;jj++) {
+      for(f=docs[j]->fvec;f;f=f->next)  
+	lin[j]+=f->factor*sprod_ns(weights,f);
+    }
+  }
+  else {                            /* general case */
+    for(jj=0;(i=working2dnum[jj])>=0;jj++) {
+      if(a[i] != a_old[i]) {
+	get_kernel_row(kernel_cache,docs,i,totdoc,active2dnum,aicache,
+		       kernel_parm);
+	for(ii=0;(j=active2dnum[ii])>=0;ii++) {
+	  tec=aicache[j];
+	  lin[j]+=(((a[i]*tec)-(a_old[i]*tec))*(double)label[i]);
+	}
+      }
+    }
+  }
+}
+
+
+long incorporate_unlabeled_examples(MODEL *model, long int *label, 
+				    long int *inconsistent, 
+				    long int *unlabeled, 
+				    double *a, double *lin, 
+				    long int totdoc, double *selcrit, 
+				    long int *select, long int *key, 
+				    long int transductcycle, 
+				    KERNEL_PARM *kernel_parm, 
+				    LEARN_PARM *learn_parm)
+{
+  long i,j,k,j1,j2,j3,j4,unsupaddnum1=0,unsupaddnum2=0;
+  long pos,neg,upos,uneg,orgpos,orgneg,nolabel,newpos,newneg,allunlab;
+  double dist,model_length,posratio,negratio;
+  long check_every=2;
+  double loss;
+  static double switchsens=0.0,switchsensorg=0.0;
+  double umin,umax,sumalpha;
+  long imin=0,imax=0;
+  static long switchnum=0;
+
+  switchsens/=1.2;
+
+  /* assumes that lin[] is up to date -> no inactive vars */
+
+  orgpos=0;
+  orgneg=0;
+  newpos=0;
+  newneg=0;
+  nolabel=0;
+  allunlab=0;
+  for(i=0;i<totdoc;i++) {
+    if(!unlabeled[i]) {
+      if(label[i] > 0) {
+	orgpos++;
+      }
+      else {
+	orgneg++;
+      }
+    }
+    else {
+      allunlab++;
+      if(unlabeled[i]) {
+	if(label[i] > 0) {
+	  newpos++;
+	}
+	else if(label[i] < 0) {
+	  newneg++;
+	}
+      }
+    }
+    if(label[i]==0) {
+      nolabel++;
+    }
+  }
+
+  if(learn_parm->transduction_posratio >= 0) {
+    posratio=learn_parm->transduction_posratio;
+  }
+  else {
+    posratio=(double)orgpos/(double)(orgpos+orgneg); /* use ratio of pos/neg */
+  }                                                  /* in training data */
+  negratio=1.0-posratio;
+
+  learn_parm->svm_costratio=1.0;                     /* global */
+  if(posratio>0) {
+    learn_parm->svm_costratio_unlab=negratio/posratio;
+  }
+  else {
+    learn_parm->svm_costratio_unlab=1.0;
+  }
+  
+  pos=0;
+  neg=0;
+  upos=0;
+  uneg=0;
+  for(i=0;i<totdoc;i++) {
+    dist=(lin[i]-model->b);  /* 'distance' from hyperplane*/
+    if(dist>0) {
+      pos++;
+    }
+    else {
+      neg++;
+    }
+    if(unlabeled[i]) {
+      if(dist>0) {
+	upos++;
+      }
+      else {
+	uneg++;
+      }
+    }
+    if((!unlabeled[i]) && (a[i]>(learn_parm->svm_cost[i]-learn_parm->epsilon_a))) {
+      /*      printf("Ubounded %ld (class %ld, unlabeled %ld)\n",i,label[i],unlabeled[i]); */
+    }
+  }
+  if(verbosity>=2) {
+    printf("POS=%ld, ORGPOS=%ld, ORGNEG=%ld\n",pos,orgpos,orgneg);
+    printf("POS=%ld, NEWPOS=%ld, NEWNEG=%ld\n",pos,newpos,newneg);
+    printf("pos ratio = %f (%f).\n",(double)(upos)/(double)(allunlab),posratio);
+    fflush(stdout);
+  }
+
+  if(transductcycle == 0) {
+    j1=0; 
+    j2=0;
+    j4=0;
+    for(i=0;i<totdoc;i++) {
+      dist=(lin[i]-model->b);  /* 'distance' from hyperplane*/
+      if((label[i]==0) && (unlabeled[i])) {
+	selcrit[j4]=dist;
+	key[j4]=i;
+	j4++;
+      }
+    }
+    unsupaddnum1=0;	
+    unsupaddnum2=0;	
+    select_top_n(selcrit,j4,select,(long)(allunlab*posratio+0.5));
+    for(k=0;(k<(long)(allunlab*posratio+0.5));k++) {
+      i=key[select[k]];
+      label[i]=1;
+      unsupaddnum1++;	
+      j1++;
+    }
+    for(i=0;i<totdoc;i++) {
+      if((label[i]==0) && (unlabeled[i])) {
+	label[i]=-1;
+	j2++;
+	unsupaddnum2++;
+      }
+    }
+    for(i=0;i<totdoc;i++) {  /* set upper bounds on vars */
+      if(unlabeled[i]) {
+	if(label[i] == 1) {
+	  learn_parm->svm_cost[i]=learn_parm->svm_c*
+	    learn_parm->svm_costratio_unlab*learn_parm->svm_unlabbound;
+	}
+	else if(label[i] == -1) {
+	  learn_parm->svm_cost[i]=learn_parm->svm_c*
+	    learn_parm->svm_unlabbound;
+	}
+      }
+    }
+    if(verbosity>=1) {
+      /* printf("costratio %f, costratio_unlab %f, unlabbound %f\n",
+	 learn_parm->svm_costratio,learn_parm->svm_costratio_unlab,
+	 learn_parm->svm_unlabbound); */
+      printf("Classifying unlabeled data as %ld POS / %ld NEG.\n",
+	     unsupaddnum1,unsupaddnum2); 
+      fflush(stdout);
+    }
+    if(verbosity >= 1) 
+      printf("Retraining.");
+    if(verbosity >= 2) printf("\n");
+    return((long)3);
+  }
+  if((transductcycle % check_every) == 0) {
+    if(verbosity >= 1) 
+      printf("Retraining.");
+    if(verbosity >= 2) printf("\n");
+    j1=0;
+    j2=0;
+    unsupaddnum1=0;
+    unsupaddnum2=0;
+    for(i=0;i<totdoc;i++) {
+      if((unlabeled[i] == 2)) {
+	unlabeled[i]=1;
+	label[i]=1;
+	j1++;
+	unsupaddnum1++;
+      }
+      else if((unlabeled[i] == 3)) {
+	unlabeled[i]=1;
+	label[i]=-1;
+	j2++;
+	unsupaddnum2++;
+      }
+    }
+    for(i=0;i<totdoc;i++) {  /* set upper bounds on vars */
+      if(unlabeled[i]) {
+	if(label[i] == 1) {
+	  learn_parm->svm_cost[i]=learn_parm->svm_c*
+	    learn_parm->svm_costratio_unlab*learn_parm->svm_unlabbound;
+	}
+	else if(label[i] == -1) {
+	  learn_parm->svm_cost[i]=learn_parm->svm_c*
+	    learn_parm->svm_unlabbound;
+	}
+      }
+    }
+
+    if(verbosity>=2) {
+      /* printf("costratio %f, costratio_unlab %f, unlabbound %f\n",
+	     learn_parm->svm_costratio,learn_parm->svm_costratio_unlab,
+	     learn_parm->svm_unlabbound); */
+      printf("%ld positive -> Added %ld POS / %ld NEG unlabeled examples.\n",
+	     upos,unsupaddnum1,unsupaddnum2); 
+      fflush(stdout);
+    }
+
+    if(learn_parm->svm_unlabbound == 1) {
+      learn_parm->epsilon_crit=0.001; /* do the last run right */
+    }
+    else {
+      learn_parm->epsilon_crit=0.01; /* otherwise, no need to be so picky */
+    }
+
+    return((long)3);
+  }
+  else if(((transductcycle % check_every) < check_every)) { 
+    model_length=0;
+    sumalpha=0;
+    loss=0;
+    for(i=0;i<totdoc;i++) {
+      model_length+=a[i]*label[i]*lin[i];
+      sumalpha+=a[i];
+      dist=(lin[i]-model->b);  /* 'distance' from hyperplane*/
+      if((label[i]*dist)<(1.0-learn_parm->epsilon_crit)) {
+	loss+=(1.0-(label[i]*dist))*learn_parm->svm_cost[i]; 
+      }
+    }
+    model_length=sqrt(model_length); 
+    if(verbosity>=2) {
+      printf("Model-length = %f (%f), loss = %f, objective = %f\n",
+	     model_length,sumalpha,loss,loss+0.5*model_length*model_length);
+      fflush(stdout);
+    }
+    j1=0;
+    j2=0;
+    j3=0;
+    j4=0;
+    unsupaddnum1=0;	
+    unsupaddnum2=0;	
+    umin=99999;
+    umax=-99999;
+    j4=1;
+    while(j4) {
+      umin=99999;
+      umax=-99999;
+      for(i=0;(i<totdoc);i++) { 
+	dist=(lin[i]-model->b);  
+	if((label[i]>0) && (unlabeled[i]) && (!inconsistent[i]) 
+	   && (dist<umin)) {
+	  umin=dist;
+	  imin=i;
+	}
+	if((label[i]<0) && (unlabeled[i])  && (!inconsistent[i]) 
+	   && (dist>umax)) {
+	  umax=dist;
+	  imax=i;
+	}
+      }
+      if((umin < (umax+switchsens-1E-4))) {
+	j1++;
+	j2++;
+	unsupaddnum1++;	
+	unlabeled[imin]=3;
+	inconsistent[imin]=1;
+	unsupaddnum2++;	
+	unlabeled[imax]=2;
+	inconsistent[imax]=1;
+      }
+      else
+	j4=0;
+      j4=0;
+    }
+    for(j=0;(j<totdoc);j++) {
+      if(unlabeled[j] && (!inconsistent[j])) {
+	if(label[j]>0) {
+	  unlabeled[j]=2;
+	}
+	else if(label[j]<0) {
+	  unlabeled[j]=3;
+	}
+	/* inconsistent[j]=1; */
+	j3++;
+      }
+    }
+    switchnum+=unsupaddnum1+unsupaddnum2;
+
+    /* stop and print out current margin
+       printf("switchnum %ld %ld\n",switchnum,kernel_parm->poly_degree);
+       if(switchnum == 2*kernel_parm->poly_degree) {
+       learn_parm->svm_unlabbound=1;
+       }
+       */
+
+    if((!unsupaddnum1) && (!unsupaddnum2)) {
+      if((learn_parm->svm_unlabbound>=1) && ((newpos+newneg) == allunlab)) {
+	for(j=0;(j<totdoc);j++) {
+	  inconsistent[j]=0;
+	  if(unlabeled[j]) unlabeled[j]=1;
+	}
+	write_prediction(learn_parm->predfile,model,lin,a,unlabeled,label,
+			 totdoc,learn_parm);  
+	if(verbosity>=1)
+	  printf("Number of switches: %ld\n",switchnum);
+	return((long)0);
+      }
+      switchsens=switchsensorg;
+      learn_parm->svm_unlabbound*=1.5;
+      if(learn_parm->svm_unlabbound>1) {
+	learn_parm->svm_unlabbound=1;
+      }
+      model->at_upper_bound=0; /* since upper bound increased */
+      if(verbosity>=1) 
+	printf("Increasing influence of unlabeled examples to %f%% .",
+	       learn_parm->svm_unlabbound*100.0);
+    }
+    else if(verbosity>=1) {
+      printf("%ld positive -> Switching labels of %ld POS / %ld NEG unlabeled examples.",
+	     upos,unsupaddnum1,unsupaddnum2); 
+      fflush(stdout);
+    }
+
+    if(verbosity >= 2) printf("\n");
+    
+    learn_parm->epsilon_crit=0.5; /* don't need to be so picky */
+
+    for(i=0;i<totdoc;i++) {  /* set upper bounds on vars */
+      if(unlabeled[i]) {
+	if(label[i] == 1) {
+	  learn_parm->svm_cost[i]=learn_parm->svm_c*
+	    learn_parm->svm_costratio_unlab*learn_parm->svm_unlabbound;
+	}
+	else if(label[i] == -1) {
+	  learn_parm->svm_cost[i]=learn_parm->svm_c*
+	    learn_parm->svm_unlabbound;
+	}
+      }
+    }
+
+    return((long)2);
+  }
+
+  return((long)0); 
+}
+
+/*************************** Working set selection ***************************/
+
+long select_next_qp_subproblem_grad(long int *label, 
+				    long int *unlabeled, 
+				    double *a, double *lin, 
+				    double *c, long int totdoc, 
+				    long int qp_size, 
+				    LEARN_PARM *learn_parm, 
+				    long int *inconsistent, 
+				    long int *active2dnum, 
+				    long int *working2dnum, 
+				    double *selcrit, 
+				    long int *select, 
+				    KERNEL_CACHE *kernel_cache, 
+				    long int cache_only,
+				    long int *key, long int *chosen)
+     /* Use the feasible direction approach to select the next
+      qp-subproblem (see chapter 'Selecting a good working set'). If
+      'cache_only' is true, then the variables are selected only among
+      those for which the kernel evaluations are cached. */
+{
+  long choosenum,i,j,k,activedoc,inum,valid;
+  double s;
+
+  for(inum=0;working2dnum[inum]>=0;inum++); /* find end of index */
+  choosenum=0;
+  activedoc=0;
+  for(i=0;(j=active2dnum[i])>=0;i++) {
+    s=-label[j];
+    if(kernel_cache && cache_only) 
+      valid=(kernel_cache->index[j]>=0);
+    else
+      valid=1;
+    if(valid
+       && (!((a[j]<=(0+learn_parm->epsilon_a)) && (s<0)))
+       && (!((a[j]>=(learn_parm->svm_cost[j]-learn_parm->epsilon_a)) 
+	     && (s>0)))
+       && (!chosen[j]) 
+       && (label[j])
+       && (!inconsistent[j]))
+      {
+      selcrit[activedoc]=(double)label[j]*(learn_parm->eps-(double)label[j]*c[j]+(double)label[j]*lin[j]);
+      /*      selcrit[activedoc]=(double)label[j]*(-1.0+(double)label[j]*lin[j]); */
+      key[activedoc]=j;
+      activedoc++;
+    }
+  }
+  select_top_n(selcrit,activedoc,select,(long)(qp_size/2));
+  for(k=0;(choosenum<(qp_size/2)) && (k<(qp_size/2)) && (k<activedoc);k++) {
+    /* if(learn_parm->biased_hyperplane || (selcrit[select[k]] > 0)) { */
+      i=key[select[k]];
+      chosen[i]=1;
+      working2dnum[inum+choosenum]=i;
+      choosenum+=1;
+      if(kernel_cache)
+	kernel_cache_touch(kernel_cache,i); /* make sure it does not get
+					       kicked out of cache */
+      /* } */
+  }
+
+  activedoc=0;
+  for(i=0;(j=active2dnum[i])>=0;i++) {
+    s=label[j];
+    if(kernel_cache && cache_only) 
+      valid=(kernel_cache->index[j]>=0);
+    else
+      valid=1;
+    if(valid
+       && (!((a[j]<=(0+learn_parm->epsilon_a)) && (s<0)))
+       && (!((a[j]>=(learn_parm->svm_cost[j]-learn_parm->epsilon_a)) 
+	     && (s>0))) 
+       && (!chosen[j]) 
+       && (label[j])
+       && (!inconsistent[j])) 
+      {
+      selcrit[activedoc]=-(double)label[j]*(learn_parm->eps-(double)label[j]*c[j]+(double)label[j]*lin[j]);
+      /*  selcrit[activedoc]=-(double)(label[j]*(-1.0+(double)label[j]*lin[j])); */
+      key[activedoc]=j;
+      activedoc++;
+    }
+  }
+  select_top_n(selcrit,activedoc,select,(long)(qp_size/2));
+  for(k=0;(choosenum<qp_size) && (k<(qp_size/2)) && (k<activedoc);k++) {
+    /* if(learn_parm->biased_hyperplane || (selcrit[select[k]] > 0)) { */
+      i=key[select[k]];
+      chosen[i]=1;
+      working2dnum[inum+choosenum]=i;
+      choosenum+=1;
+      if(kernel_cache)
+	kernel_cache_touch(kernel_cache,i); /* make sure it does not get
+					       kicked out of cache */
+      /* } */
+  } 
+  working2dnum[inum+choosenum]=-1; /* complete index */
+  return(choosenum);
+}
+
+long select_next_qp_subproblem_rand(long int *label, 
+				    long int *unlabeled, 
+				    double *a, double *lin, 
+				    double *c, long int totdoc, 
+				    long int qp_size, 
+				    LEARN_PARM *learn_parm, 
+				    long int *inconsistent, 
+				    long int *active2dnum, 
+				    long int *working2dnum, 
+				    double *selcrit, 
+				    long int *select, 
+				    KERNEL_CACHE *kernel_cache, 
+				    long int *key, 
+				    long int *chosen, 
+				    long int iteration)
+/* Use the feasible direction approach to select the next
+   qp-subproblem (see section 'Selecting a good working set'). Chooses
+   a feasible direction at (pseudo) random to help jump over numerical
+   problem. */
+{
+  long choosenum,i,j,k,activedoc,inum;
+  double s;
+
+  for(inum=0;working2dnum[inum]>=0;inum++); /* find end of index */
+  choosenum=0;
+  activedoc=0;
+  for(i=0;(j=active2dnum[i])>=0;i++) {
+    s=-label[j];
+    if((!((a[j]<=(0+learn_parm->epsilon_a)) && (s<0)))
+       && (!((a[j]>=(learn_parm->svm_cost[j]-learn_parm->epsilon_a)) 
+	     && (s>0)))
+       && (!inconsistent[j]) 
+       && (label[j])
+       && (!chosen[j])) {
+      selcrit[activedoc]=(j+iteration) % totdoc;
+      key[activedoc]=j;
+      activedoc++;
+    }
+  }
+  select_top_n(selcrit,activedoc,select,(long)(qp_size/2));
+  for(k=0;(choosenum<(qp_size/2)) && (k<(qp_size/2)) && (k<activedoc);k++) {
+    i=key[select[k]];
+    chosen[i]=1;
+    working2dnum[inum+choosenum]=i;
+    choosenum+=1;
+    kernel_cache_touch(kernel_cache,i); /* make sure it does not get kicked */
+                                        /* out of cache */
+  }
+
+  activedoc=0;
+  for(i=0;(j=active2dnum[i])>=0;i++) {
+    s=label[j];
+    if((!((a[j]<=(0+learn_parm->epsilon_a)) && (s<0)))
+       && (!((a[j]>=(learn_parm->svm_cost[j]-learn_parm->epsilon_a)) 
+	     && (s>0))) 
+       && (!inconsistent[j]) 
+       && (label[j])
+       && (!chosen[j])) {
+      selcrit[activedoc]=(j+iteration) % totdoc;
+      key[activedoc]=j;
+      activedoc++;
+    }
+  }
+  select_top_n(selcrit,activedoc,select,(long)(qp_size/2));
+  for(k=0;(choosenum<qp_size) && (k<(qp_size/2)) && (k<activedoc);k++) {
+    i=key[select[k]];
+    chosen[i]=1;
+    working2dnum[inum+choosenum]=i;
+    choosenum+=1;
+    kernel_cache_touch(kernel_cache,i); /* make sure it does not get kicked */
+                                        /* out of cache */
+  } 
+  working2dnum[inum+choosenum]=-1; /* complete index */
+  return(choosenum);
+}
+
+long select_next_qp_slackset(DOC **docs, long int *label, 
+			     double *a, double *lin, 
+			     double *slack, double *alphaslack, 
+			     double *c,
+			     LEARN_PARM *learn_parm, 
+			     long int *active2dnum, double *maxviol)
+     /* returns the slackset with the largest internal violation */
+{
+  long i,ii,maxdiffid;
+  double dist,target,maxdiff,ex_c;
+
+  maxdiff=0;
+  maxdiffid=0;
+  for(ii=0;(i=active2dnum[ii])>=0;ii++) {
+    ex_c=learn_parm->svm_c-learn_parm->epsilon_a;
+    if(alphaslack[docs[i]->slackid] >= ex_c) {
+      dist=(lin[i])*(double)label[i]+slack[docs[i]->slackid]; /* distance */
+      target=-(learn_parm->eps-(double)label[i]*c[i]); /* rhs of constraint */
+      if((a[i]>learn_parm->epsilon_a) && (dist > target)) {
+	if((dist-target)>maxdiff) { /* largest violation */
+	  maxdiff=dist-target;
+	  maxdiffid=docs[i]->slackid;
+	}
+      }
+    }
+  }
+  (*maxviol)=maxdiff;
+  return(maxdiffid);
+}
+
+
+void select_top_n(double *selcrit, long int range, long int *select, 
+		  long int n)
+{
+  register long i,j;
+
+  for(i=0;(i<n) && (i<range);i++) { /* Initialize with the first n elements */
+    for(j=i;j>=0;j--) {
+      if((j>0) && (selcrit[select[j-1]]<selcrit[i])){
+	select[j]=select[j-1];
+      }
+      else {
+	select[j]=i;
+	j=-1;
+      }
+    }
+  }
+  if(n>0) {
+    for(i=n;i<range;i++) {  
+      if(selcrit[i]>selcrit[select[n-1]]) {
+	for(j=n-1;j>=0;j--) {
+	  if((j>0) && (selcrit[select[j-1]]<selcrit[i])) {
+	    select[j]=select[j-1];
+	  }
+	  else {
+	    select[j]=i;
+	    j=-1;
+	  }
+	}
+      }
+    }
+  }
+}      
+      
+
+/******************************** Shrinking  *********************************/
+
+void init_shrink_state(SHRINK_STATE *shrink_state, long int totdoc, 
+		       long int maxhistory)
+{
+  long i;
+
+  shrink_state->deactnum=0;
+  shrink_state->active = (long *)my_malloc(sizeof(long)*totdoc);
+  shrink_state->inactive_since = (long *)my_malloc(sizeof(long)*totdoc);
+  shrink_state->a_history = (double **)my_malloc(sizeof(double *)*maxhistory);
+  shrink_state->maxhistory=maxhistory;
+  shrink_state->last_lin = (double *)my_malloc(sizeof(double)*totdoc);
+  shrink_state->last_a = (double *)my_malloc(sizeof(double)*totdoc);
+
+  for(i=0;i<totdoc;i++) { 
+    shrink_state->active[i]=1;
+    shrink_state->inactive_since[i]=0;
+    shrink_state->last_a[i]=0;
+    shrink_state->last_lin[i]=0;
+  }
+}
+
+void shrink_state_cleanup(SHRINK_STATE *shrink_state)
+{
+  free(shrink_state->active);
+  free(shrink_state->inactive_since);
+  if(shrink_state->deactnum > 0) 
+    free(shrink_state->a_history[shrink_state->deactnum-1]);
+  free(shrink_state->a_history);
+  free(shrink_state->last_a);
+  free(shrink_state->last_lin);
+}
+
+long shrink_problem(DOC **docs,
+		    LEARN_PARM *learn_parm, 
+		    SHRINK_STATE *shrink_state, 
+		    KERNEL_PARM *kernel_parm,
+		    long int *active2dnum, 
+		    long int *last_suboptimal_at, 
+		    long int iteration, 
+		    long int totdoc, 
+		    long int minshrink, 
+		    double *a, 
+		    long int *inconsistent)
+     /* Shrink some variables away.  Do the shrinking only if at least
+        minshrink variables can be removed. */
+{
+  long i,ii,change,activenum,lastiter;
+  double *a_old;
+  
+  activenum=0;
+  change=0;
+  for(ii=0;active2dnum[ii]>=0;ii++) {
+    i=active2dnum[ii];
+    activenum++;
+    if(learn_parm->sharedslack)
+      lastiter=last_suboptimal_at[docs[i]->slackid];
+    else
+      lastiter=last_suboptimal_at[i];
+    if(((iteration-lastiter) > learn_parm->svm_iter_to_shrink) 
+       || (inconsistent[i])) {
+      change++;
+    }
+  }
+  if((change>=minshrink) /* shrink only if sufficiently many candidates */
+     && (shrink_state->deactnum<shrink_state->maxhistory)) { /* and enough memory */
+    /* Shrink problem by removing those variables which are */
+    /* optimal at a bound for a minimum number of iterations */
+    if(verbosity>=2) {
+      printf(" Shrinking..."); fflush(stdout);
+    }
+    if(kernel_parm->kernel_type != LINEAR) { /*  non-linear case save alphas */
+      a_old=(double *)my_malloc(sizeof(double)*totdoc);
+      shrink_state->a_history[shrink_state->deactnum]=a_old;
+      for(i=0;i<totdoc;i++) {
+	a_old[i]=a[i];
+      }
+    }
+    for(ii=0;active2dnum[ii]>=0;ii++) {
+      i=active2dnum[ii];
+      if(learn_parm->sharedslack)
+	lastiter=last_suboptimal_at[docs[i]->slackid];
+      else
+	lastiter=last_suboptimal_at[i];
+      if(((iteration-lastiter) > learn_parm->svm_iter_to_shrink) 
+	 || (inconsistent[i])) {
+	shrink_state->active[i]=0;
+	shrink_state->inactive_since[i]=shrink_state->deactnum;
+      }
+    }
+    activenum=compute_index(shrink_state->active,totdoc,active2dnum);
+    shrink_state->deactnum++;
+    if(kernel_parm->kernel_type == LINEAR) { 
+      shrink_state->deactnum=0;
+    }
+    if(verbosity>=2) {
+      printf("done.\n"); fflush(stdout);
+      printf(" Number of inactive variables = %ld\n",totdoc-activenum);
+    }
+  }
+  return(activenum);
+} 
+
+
+void reactivate_inactive_examples(long int *label, 
+				  long int *unlabeled, 
+				  double *a, 
+				  SHRINK_STATE *shrink_state, 
+				  double *lin, 
+				  double *c, 
+				  long int totdoc, 
+				  long int totwords, 
+				  long int iteration, 
+				  LEARN_PARM *learn_parm, 
+				  long int *inconsistent, 
+				  DOC **docs, 
+				  KERNEL_PARM *kernel_parm, 
+				  KERNEL_CACHE *kernel_cache, 
+				  MODEL *model, 
+				  CFLOAT *aicache, 
+				  double *weights, 
+				  double *maxdiff)
+     /* Make all variables active again which had been removed by
+        shrinking. */
+     /* Computes lin for those variables from scratch. */
+{
+  register long i,j,ii,jj,t,*changed2dnum,*inactive2dnum;
+  long *changed,*inactive;
+  register double kernel_val,*a_old,dist;
+  double ex_c,target;
+  SVECTOR *f;
+
+  if(kernel_parm->kernel_type == LINEAR) { /* special linear case */
+    a_old=shrink_state->last_a;    
+    clear_vector_n(weights,totwords);
+    for(i=0;i<totdoc;i++) {
+      if(a[i] != a_old[i]) {
+	for(f=docs[i]->fvec;f;f=f->next)  
+	  add_vector_ns(weights,f,
+			f->factor*((a[i]-a_old[i])*(double)label[i]));
+	a_old[i]=a[i];
+      }
+    }
+    for(i=0;i<totdoc;i++) {
+      if(!shrink_state->active[i]) {
+	for(f=docs[i]->fvec;f;f=f->next)  
+	  lin[i]=shrink_state->last_lin[i]+f->factor*sprod_ns(weights,f);
+      }
+      shrink_state->last_lin[i]=lin[i];
+    }
+  }
+  else {
+    changed=(long *)my_malloc(sizeof(long)*totdoc);
+    changed2dnum=(long *)my_malloc(sizeof(long)*(totdoc+11));
+    inactive=(long *)my_malloc(sizeof(long)*totdoc);
+    inactive2dnum=(long *)my_malloc(sizeof(long)*(totdoc+11));
+    for(t=shrink_state->deactnum-1;(t>=0) && shrink_state->a_history[t];t--) {
+      if(verbosity>=2) {
+	printf("%ld..",t); fflush(stdout);
+      }
+      a_old=shrink_state->a_history[t];    
+      for(i=0;i<totdoc;i++) {
+	inactive[i]=((!shrink_state->active[i]) 
+		     && (shrink_state->inactive_since[i] == t));
+	changed[i]= (a[i] != a_old[i]);
+      }
+      compute_index(inactive,totdoc,inactive2dnum);
+      compute_index(changed,totdoc,changed2dnum);
+      
+      for(ii=0;(i=changed2dnum[ii])>=0;ii++) {
+	get_kernel_row(kernel_cache,docs,i,totdoc,inactive2dnum,aicache,
+		       kernel_parm);
+	for(jj=0;(j=inactive2dnum[jj])>=0;jj++) {
+	  kernel_val=aicache[j];
+	  lin[j]+=(((a[i]*kernel_val)-(a_old[i]*kernel_val))*(double)label[i]);
+	}
+      }
+    }
+    free(changed);
+    free(changed2dnum);
+    free(inactive);
+    free(inactive2dnum);
+  }
+  (*maxdiff)=0;
+  for(i=0;i<totdoc;i++) {
+    shrink_state->inactive_since[i]=shrink_state->deactnum-1;
+    if(!inconsistent[i]) {
+      dist=(lin[i]-model->b)*(double)label[i];
+      target=-(learn_parm->eps-(double)label[i]*c[i]);
+      ex_c=learn_parm->svm_cost[i]-learn_parm->epsilon_a;
+      if((a[i]>learn_parm->epsilon_a) && (dist > target)) {
+	if((dist-target)>(*maxdiff))  /* largest violation */
+	  (*maxdiff)=dist-target;
+      }
+      else if((a[i]<ex_c) && (dist < target)) {
+	if((target-dist)>(*maxdiff))  /* largest violation */
+	  (*maxdiff)=target-dist;
+      }
+      if((a[i]>(0+learn_parm->epsilon_a)) 
+	 && (a[i]<ex_c)) { 
+	shrink_state->active[i]=1;                         /* not at bound */
+      }
+      else if((a[i]<=(0+learn_parm->epsilon_a)) && (dist < (target+learn_parm->epsilon_shrink))) {
+	shrink_state->active[i]=1;
+      }
+      else if((a[i]>=ex_c)
+	      && (dist > (target-learn_parm->epsilon_shrink))) {
+	shrink_state->active[i]=1;
+      }
+      else if(learn_parm->sharedslack) { /* make all active when sharedslack */
+	shrink_state->active[i]=1;
+      }
+    }
+  }
+  if(kernel_parm->kernel_type != LINEAR) { /* update history for non-linear */
+    for(i=0;i<totdoc;i++) {
+      (shrink_state->a_history[shrink_state->deactnum-1])[i]=a[i];
+    }
+    for(t=shrink_state->deactnum-2;(t>=0) && shrink_state->a_history[t];t--) {
+      free(shrink_state->a_history[t]);
+      shrink_state->a_history[t]=0;
+    }
+  }
+}
+
+/****************************** Cache handling *******************************/
+
+void get_kernel_row(KERNEL_CACHE *kernel_cache, DOC **docs, 
+		    long int docnum, long int totdoc, 
+		    long int *active2dnum, CFLOAT *buffer, 
+		    KERNEL_PARM *kernel_parm)
+     /* Get's a row of the matrix of kernel values This matrix has the
+      same form as the Hessian, just that the elements are not
+      multiplied by */
+     /* y_i * y_j * a_i * a_j */
+     /* Takes the values from the cache if available. */
+{
+  register long i,j,start;
+  DOC *ex;
+
+  ex=docs[docnum];
+
+  if(kernel_cache->index[docnum] != -1) { /* row is cached? */
+    kernel_cache->lru[kernel_cache->index[docnum]]=kernel_cache->time; /* lru */
+    start=kernel_cache->activenum*kernel_cache->index[docnum];
+    for(i=0;(j=active2dnum[i])>=0;i++) {
+      if(kernel_cache->totdoc2active[j] >= 0) { /* column is cached? */
+	buffer[j]=kernel_cache->buffer[start+kernel_cache->totdoc2active[j]];
+      }
+      else {
+	buffer[j]=(CFLOAT)kernel(kernel_parm,ex,docs[j]);
+      }
+    }
+  }
+  else {
+    for(i=0;(j=active2dnum[i])>=0;i++) {
+      buffer[j]=(CFLOAT)kernel(kernel_parm,ex,docs[j]);
+    }
+  }
+}
+
+
+void cache_kernel_row(KERNEL_CACHE *kernel_cache, DOC **docs, 
+		      long int m, KERNEL_PARM *kernel_parm)
+     /* Fills cache for the row m */
+{
+  register DOC *ex;
+  register long j,k,l;
+  register CFLOAT *cache;
+
+  if(!kernel_cache_check(kernel_cache,m)) {  /* not cached yet*/
+    cache = kernel_cache_clean_and_malloc(kernel_cache,m);
+    if(cache) {
+      l=kernel_cache->totdoc2active[m];
+      ex=docs[m];
+      for(j=0;j<kernel_cache->activenum;j++) {  /* fill cache */
+	k=kernel_cache->active2totdoc[j];
+	if((kernel_cache->index[k] != -1) && (l != -1) && (k != m)) {
+	  cache[j]=kernel_cache->buffer[kernel_cache->activenum
+				       *kernel_cache->index[k]+l];
+	}
+	else {
+	  cache[j]=kernel(kernel_parm,ex,docs[k]);
+	} 
+      }
+    }
+    else {
+      perror("Error: Kernel cache full! => increase cache size");
+    }
+  }
+}
+
+ 
+void cache_multiple_kernel_rows(KERNEL_CACHE *kernel_cache, DOC **docs, 
+				long int *key, long int varnum, 
+				KERNEL_PARM *kernel_parm)
+     /* Fills cache for the rows in key */
+{
+  register long i;
+
+  for(i=0;i<varnum;i++) {  /* fill up kernel cache */
+    cache_kernel_row(kernel_cache,docs,key[i],kernel_parm);
+  }
+}
+
+ 
+void kernel_cache_shrink(KERNEL_CACHE *kernel_cache, long int totdoc, 
+			 long int numshrink, long int *after)
+     /* Remove numshrink columns in the cache which correspond to
+        examples marked 0 in after. */
+{
+  register long i,j,jj,from=0,to=0,scount;  
+  long *keep;
+
+  if(verbosity>=2) {
+    printf(" Reorganizing cache..."); fflush(stdout);
+  }
+
+  keep=(long *)my_malloc(sizeof(long)*totdoc);
+  for(j=0;j<totdoc;j++) {
+    keep[j]=1;
+  }
+  scount=0;
+  for(jj=0;(jj<kernel_cache->activenum) && (scount<numshrink);jj++) {
+    j=kernel_cache->active2totdoc[jj];
+    if(!after[j]) {
+      scount++;
+      keep[j]=0;
+    }
+  }
+
+  for(i=0;i<kernel_cache->max_elems;i++) {
+    for(jj=0;jj<kernel_cache->activenum;jj++) {
+      j=kernel_cache->active2totdoc[jj];
+      if(!keep[j]) {
+	from++;
+      }
+      else {
+	kernel_cache->buffer[to]=kernel_cache->buffer[from];
+	to++;
+	from++;
+      }
+    }
+  }
+
+  kernel_cache->activenum=0;
+  for(j=0;j<totdoc;j++) {
+    if((keep[j]) && (kernel_cache->totdoc2active[j] != -1)) {
+      kernel_cache->active2totdoc[kernel_cache->activenum]=j;
+      kernel_cache->totdoc2active[j]=kernel_cache->activenum;
+      kernel_cache->activenum++;
+    }
+    else {
+      kernel_cache->totdoc2active[j]=-1;
+    }
+  }
+
+  kernel_cache->max_elems=(long)(kernel_cache->buffsize/kernel_cache->activenum);
+  if(kernel_cache->max_elems>totdoc) {
+    kernel_cache->max_elems=totdoc;
+  }
+
+  free(keep);
+
+  if(verbosity>=2) {
+    printf("done.\n"); fflush(stdout);
+    printf(" Cache-size in rows = %ld\n",kernel_cache->max_elems);
+  }
+}
+
+KERNEL_CACHE *kernel_cache_init(long int totdoc, long int buffsize)
+{
+  long i;
+  KERNEL_CACHE *kernel_cache;
+
+  kernel_cache=(KERNEL_CACHE *)my_malloc(sizeof(KERNEL_CACHE));
+  kernel_cache->index = (long *)my_malloc(sizeof(long)*totdoc);
+  kernel_cache->occu = (long *)my_malloc(sizeof(long)*totdoc);
+  kernel_cache->lru = (long *)my_malloc(sizeof(long)*totdoc);
+  kernel_cache->invindex = (long *)my_malloc(sizeof(long)*totdoc);
+  kernel_cache->active2totdoc = (long *)my_malloc(sizeof(long)*totdoc);
+  kernel_cache->totdoc2active = (long *)my_malloc(sizeof(long)*totdoc);
+  kernel_cache->buffer = (CFLOAT *)my_malloc((size_t)(buffsize)*1024*1024);
+
+  kernel_cache->buffsize=(long)(buffsize/sizeof(CFLOAT)*1024*1024);
+
+  kernel_cache->max_elems=(long)(kernel_cache->buffsize/totdoc);
+  if(kernel_cache->max_elems>totdoc) {
+    kernel_cache->max_elems=totdoc;
+  }
+
+  if(verbosity>=2) {
+    printf(" Cache-size in rows = %ld\n",kernel_cache->max_elems);
+    printf(" Kernel evals so far: %ld\n",kernel_cache_statistic);    
+  }
+
+  kernel_cache->elems=0;   /* initialize cache */
+  for(i=0;i<totdoc;i++) {
+    kernel_cache->index[i]=-1;
+    kernel_cache->lru[i]=0;
+  }
+  for(i=0;i<totdoc;i++) {
+    kernel_cache->occu[i]=0;
+    kernel_cache->invindex[i]=-1;
+  }
+
+  kernel_cache->activenum=totdoc;;
+  for(i=0;i<totdoc;i++) {
+      kernel_cache->active2totdoc[i]=i;
+      kernel_cache->totdoc2active[i]=i;
+  }
+
+  kernel_cache->time=0;  
+
+  return(kernel_cache);
+} 
+
+void kernel_cache_reset_lru(KERNEL_CACHE *kernel_cache)
+{
+  long maxlru=0,k;
+  
+  for(k=0;k<kernel_cache->max_elems;k++) {
+    if(maxlru < kernel_cache->lru[k]) 
+      maxlru=kernel_cache->lru[k];
+  }
+  for(k=0;k<kernel_cache->max_elems;k++) {
+      kernel_cache->lru[k]-=maxlru;
+  }
+}
+
+void kernel_cache_cleanup(KERNEL_CACHE *kernel_cache)
+{
+  free(kernel_cache->index);
+  free(kernel_cache->occu);
+  free(kernel_cache->lru);
+  free(kernel_cache->invindex);
+  free(kernel_cache->active2totdoc);
+  free(kernel_cache->totdoc2active);
+  free(kernel_cache->buffer);
+  free(kernel_cache);
+}
+
+long kernel_cache_malloc(KERNEL_CACHE *kernel_cache)
+{
+  long i;
+
+  if(kernel_cache_space_available(kernel_cache)) {
+    for(i=0;i<kernel_cache->max_elems;i++) {
+      if(!kernel_cache->occu[i]) {
+	kernel_cache->occu[i]=1;
+	kernel_cache->elems++;
+	return(i);
+      }
+    }
+  }
+  return(-1);
+}
+
+void kernel_cache_free(KERNEL_CACHE *kernel_cache, long int i)
+{
+  kernel_cache->occu[i]=0;
+  kernel_cache->elems--;
+}
+
+long kernel_cache_free_lru(KERNEL_CACHE *kernel_cache) 
+     /* remove least recently used cache element */
+{                                     
+  register long k,least_elem=-1,least_time;
+
+  least_time=kernel_cache->time+1;
+  for(k=0;k<kernel_cache->max_elems;k++) {
+    if(kernel_cache->invindex[k] != -1) {
+      if(kernel_cache->lru[k]<least_time) {
+	least_time=kernel_cache->lru[k];
+	least_elem=k;
+      }
+    }
+  }
+  if(least_elem != -1) {
+    kernel_cache_free(kernel_cache,least_elem);
+    kernel_cache->index[kernel_cache->invindex[least_elem]]=-1;
+    kernel_cache->invindex[least_elem]=-1;
+    return(1);
+  }
+  return(0);
+}
+
+
+CFLOAT *kernel_cache_clean_and_malloc(KERNEL_CACHE *kernel_cache, 
+				      long int docnum)
+     /* Get a free cache entry. In case cache is full, the lru element
+        is removed. */
+{
+  long result;
+  if((result = kernel_cache_malloc(kernel_cache)) == -1) {
+    if(kernel_cache_free_lru(kernel_cache)) {
+      result = kernel_cache_malloc(kernel_cache);
+    }
+  }
+  kernel_cache->index[docnum]=result;
+  if(result == -1) {
+    return(0);
+  }
+  kernel_cache->invindex[result]=docnum;
+  kernel_cache->lru[kernel_cache->index[docnum]]=kernel_cache->time; /* lru */
+  return((CFLOAT *)((long)kernel_cache->buffer
+		    +(kernel_cache->activenum*sizeof(CFLOAT)*
+		      kernel_cache->index[docnum])));
+}
+
+long kernel_cache_touch(KERNEL_CACHE *kernel_cache, long int docnum)
+     /* Update lru time to avoid removal from cache. */
+{
+  if(kernel_cache && kernel_cache->index[docnum] != -1) {
+    kernel_cache->lru[kernel_cache->index[docnum]]=kernel_cache->time; /* lru */
+    return(1);
+  }
+  return(0);
+}
+  
+long kernel_cache_check(KERNEL_CACHE *kernel_cache, long int docnum)
+     /* Is that row cached? */
+{
+  return(kernel_cache->index[docnum] != -1);
+}
+  
+long kernel_cache_space_available(KERNEL_CACHE *kernel_cache)
+     /* Is there room for one more row? */
+{
+  return(kernel_cache->elems < kernel_cache->max_elems);
+}
+  
+/************************** Compute estimates ******************************/
+
+void compute_xa_estimates(MODEL *model, long int *label, 
+			  long int *unlabeled, long int totdoc, 
+			  DOC **docs, double *lin, double *a, 
+			  KERNEL_PARM *kernel_parm, 
+			  LEARN_PARM *learn_parm, double *error, 
+			  double *recall, double *precision) 
+     /* Computes xa-estimate of error rate, recall, and precision. See
+        T. Joachims, Estimating the Generalization Performance of an SVM
+        Efficiently, IMCL, 2000. */
+{
+  long i,looerror,looposerror,loonegerror;
+  long totex,totposex;
+  double xi,r_delta,r_delta_sq,sim=0;
+  long *sv2dnum=NULL,*sv=NULL,svnum;
+
+  r_delta=estimate_r_delta(docs,totdoc,kernel_parm); 
+  r_delta_sq=r_delta*r_delta;
+
+  looerror=0;
+  looposerror=0;
+  loonegerror=0;
+  totex=0;
+  totposex=0;
+  svnum=0;
+
+  if(learn_parm->xa_depth > 0) {
+    sv = (long *)my_malloc(sizeof(long)*(totdoc+11));
+    for(i=0;i<totdoc;i++) 
+      sv[i]=0;
+    for(i=1;i<model->sv_num;i++) 
+      if(a[model->supvec[i]->docnum] 
+	 < (learn_parm->svm_cost[model->supvec[i]->docnum]
+	    -learn_parm->epsilon_a)) {
+	sv[model->supvec[i]->docnum]=1;
+	svnum++;
+      }
+    sv2dnum = (long *)my_malloc(sizeof(long)*(totdoc+11));
+    clear_index(sv2dnum);
+    compute_index(sv,totdoc,sv2dnum);
+  }
+
+  for(i=0;i<totdoc;i++) {
+    if(unlabeled[i]) {
+      /* ignore it */
+    }
+    else {
+      xi=1.0-((lin[i]-model->b)*(double)label[i]);
+      if(xi<0) xi=0;
+      if(label[i]>0) {
+	totposex++;
+      }
+      if((learn_parm->rho*a[i]*r_delta_sq+xi) >= 1.0) {
+	if(learn_parm->xa_depth > 0) {  /* makes assumptions */
+	  sim=distribute_alpha_t_greedily(sv2dnum,svnum,docs,a,i,label,
+					  kernel_parm,learn_parm,
+		            (double)((1.0-xi-a[i]*r_delta_sq)/(2.0*a[i])));
+	}
+	if((learn_parm->xa_depth == 0) || 
+	   ((a[i]*kernel(kernel_parm,docs[i],docs[i])+a[i]*2.0*sim+xi) >= 1.0)) { 
+	  looerror++;
+	  if(label[i]>0) {
+	    looposerror++;
+	  }
+	  else {
+	    loonegerror++;
+	  }
+	}
+      }
+      totex++;
+    }
+  }
+
+  (*error)=((double)looerror/(double)totex)*100.0;
+  (*recall)=(1.0-(double)looposerror/(double)totposex)*100.0;
+  (*precision)=(((double)totposex-(double)looposerror)
+    /((double)totposex-(double)looposerror+(double)loonegerror))*100.0;
+
+  free(sv);
+  free(sv2dnum);
+}
+
+
+double distribute_alpha_t_greedily(long int *sv2dnum, long int svnum, 
+				   DOC **docs, double *a, 
+				   long int docnum, 
+				   long int *label, 
+				   KERNEL_PARM *kernel_parm, 
+				   LEARN_PARM *learn_parm, double thresh)
+     /* Experimental Code improving plain XiAlpha Estimates by
+	computing a better bound using a greedy optimzation strategy. */
+{
+  long best_depth=0;
+  long i,j,k,d,skip,allskip;
+  double best,best_val[101],val,init_val_sq,init_val_lin;
+  long best_ex[101];
+  CFLOAT *cache,*trow;
+
+  cache=(CFLOAT *)my_malloc(sizeof(CFLOAT)*learn_parm->xa_depth*svnum);
+  trow = (CFLOAT *)my_malloc(sizeof(CFLOAT)*svnum);
+
+  for(k=0;k<svnum;k++) {
+    trow[k]=kernel(kernel_parm,docs[docnum],docs[sv2dnum[k]]);
+  }
+
+  init_val_sq=0;
+  init_val_lin=0;
+  best=0;
+
+  for(d=0;d<learn_parm->xa_depth;d++) {
+    allskip=1;
+    if(d>=1) {
+      init_val_sq+=cache[best_ex[d-1]+svnum*(d-1)]; 
+      for(k=0;k<d-1;k++) {
+        init_val_sq+=2.0*cache[best_ex[k]+svnum*(d-1)]; 
+      }
+      init_val_lin+=trow[best_ex[d-1]]; 
+    }
+    for(i=0;i<svnum;i++) {
+      skip=0;
+      if(sv2dnum[i] == docnum) skip=1;
+      for(j=0;j<d;j++) {
+	if(i == best_ex[j]) skip=1;
+      }
+
+      if(!skip) {
+	val=init_val_sq;
+	if(kernel_parm->kernel_type == LINEAR) 
+	  val+=docs[sv2dnum[i]]->fvec->twonorm_sq;
+	else
+	  val+=kernel(kernel_parm,docs[sv2dnum[i]],docs[sv2dnum[i]]);
+	for(j=0;j<d;j++) {
+	  val+=2.0*cache[i+j*svnum];
+	}
+	val*=(1.0/(2.0*(d+1.0)*(d+1.0)));
+	val-=((init_val_lin+trow[i])/(d+1.0));
+
+	if(allskip || (val < best_val[d])) {
+	  best_val[d]=val;
+	  best_ex[d]=i;
+	}
+	allskip=0;
+	if(val < thresh) {
+	  i=svnum;
+	  /*	  printf("EARLY"); */
+	}
+      }
+    }
+    if(!allskip) {
+      for(k=0;k<svnum;k++) {
+	  cache[d*svnum+k]=kernel(kernel_parm,
+				  docs[sv2dnum[best_ex[d]]],
+				  docs[sv2dnum[k]]);
+      }
+    }
+    if((!allskip) && ((best_val[d] < best) || (d == 0))) {
+      best=best_val[d];
+      best_depth=d;
+    }
+    if(allskip || (best < thresh)) {
+      d=learn_parm->xa_depth;
+    }
+  }    
+
+  free(cache);
+  free(trow);
+
+  /*  printf("Distribute[%ld](%ld)=%f, ",docnum,best_depth,best); */
+  return(best);
+}
+
+
+void estimate_transduction_quality(MODEL *model, long int *label, 
+				   long int *unlabeled, 
+				   long int totdoc, DOC **docs, double *lin) 
+     /* Loo-bound based on observation that loo-errors must have an
+	equal distribution in both training and test examples, given
+	that the test examples are classified correctly. Compare
+	chapter "Constraints on the Transductive Hyperplane" in my
+	Dissertation. */
+{
+  long i,j,l=0,ulab=0,lab=0,labpos=0,labneg=0,ulabpos=0,ulabneg=0,totulab=0;
+  double totlab=0,totlabpos=0,totlabneg=0,labsum=0,ulabsum=0;
+  double r_delta,r_delta_sq,xi,xisum=0,asum=0;
+
+  r_delta=estimate_r_delta(docs,totdoc,&(model->kernel_parm)); 
+  r_delta_sq=r_delta*r_delta;
+
+  for(j=0;j<totdoc;j++) {
+    if(unlabeled[j]) {
+      totulab++;
+    }
+    else {
+      totlab++;
+      if(label[j] > 0) 
+	totlabpos++;
+      else 
+	totlabneg++;
+    }
+  }
+  for(j=1;j<model->sv_num;j++) {
+    i=model->supvec[j]->docnum;
+    xi=1.0-((lin[i]-model->b)*(double)label[i]);
+    if(xi<0) xi=0;
+
+    xisum+=xi;
+    asum+=fabs(model->alpha[j]);
+    if(unlabeled[i]) {
+      ulabsum+=(fabs(model->alpha[j])*r_delta_sq+xi);
+    }
+    else {
+      labsum+=(fabs(model->alpha[j])*r_delta_sq+xi);
+    }
+    if((fabs(model->alpha[j])*r_delta_sq+xi) >= 1) { 
+      l++;
+      if(unlabeled[model->supvec[j]->docnum]) {
+	ulab++;
+	if(model->alpha[j] > 0) 
+	  ulabpos++;
+	else 
+	  ulabneg++;
+      }
+      else {
+	lab++;
+	if(model->alpha[j] > 0) 
+	  labpos++;
+	else 
+	  labneg++;
+      }
+    }
+  }
+  printf("xacrit>=1: labeledpos=%.5f labeledneg=%.5f default=%.5f\n",(double)labpos/(double)totlab*100.0,(double)labneg/(double)totlab*100.0,(double)totlabpos/(double)(totlab)*100.0);
+  printf("xacrit>=1: unlabelpos=%.5f unlabelneg=%.5f\n",(double)ulabpos/(double)totulab*100.0,(double)ulabneg/(double)totulab*100.0);
+  printf("xacrit>=1: labeled=%.5f unlabled=%.5f all=%.5f\n",(double)lab/(double)totlab*100.0,(double)ulab/(double)totulab*100.0,(double)l/(double)(totdoc)*100.0);
+  printf("xacritsum: labeled=%.5f unlabled=%.5f all=%.5f\n",(double)labsum/(double)totlab*100.0,(double)ulabsum/(double)totulab*100.0,(double)(labsum+ulabsum)/(double)(totdoc)*100.0);
+  printf("r_delta_sq=%.5f xisum=%.5f asum=%.5f\n",r_delta_sq,xisum,asum);
+}
+
+double estimate_margin_vcdim(MODEL *model, double w, double R, 
+			     KERNEL_PARM *kernel_parm) 
+     /* optional: length of model vector in feature space */
+     /* optional: radius of ball containing the data */
+{
+  double h;
+
+  /* follows chapter 5.6.4 in [Vapnik/95] */
+
+  if(w<0) {
+    w=model_length_s(model,kernel_parm);
+  }
+  if(R<0) {
+    R=estimate_sphere(model,kernel_parm); 
+  }
+  h = w*w * R*R +1; 
+  return(h);
+}
+
+double estimate_sphere(MODEL *model, KERNEL_PARM *kernel_parm) 
+                          /* Approximates the radius of the ball containing */
+                          /* the support vectors by bounding it with the */
+{                         /* length of the longest support vector. This is */
+  register long j;        /* pretty good for text categorization, since all */
+  double xlen,maxxlen=0;  /* documents have feature vectors of length 1. It */
+  DOC *nulldoc;           /* assumes that the center of the ball is at the */
+  WORD nullword;          /* origin of the space. */
+
+  nullword.wnum=0;
+  nulldoc=create_example(-2,0,0,0.0,create_svector(&nullword,"",1.0)); 
+
+  for(j=1;j<model->sv_num;j++) {
+    xlen=sqrt(kernel(kernel_parm,model->supvec[j],model->supvec[j])
+	      -2*kernel(kernel_parm,model->supvec[j],nulldoc)
+	      +kernel(kernel_parm,nulldoc,nulldoc));
+    if(xlen>maxxlen) {
+      maxxlen=xlen;
+    }
+  }
+
+  free_example(nulldoc,1);
+  return(maxxlen);
+}
+
+double estimate_r_delta(DOC **docs, long int totdoc, KERNEL_PARM *kernel_parm)
+{
+  long i;
+  double maxxlen,xlen;
+  DOC *nulldoc;           /* assumes that the center of the ball is at the */
+  WORD nullword;          /* origin of the space. */
+
+  nullword.wnum=0;
+  nulldoc=create_example(-2,0,0,0.0,create_svector(&nullword,"",1.0)); 
+
+  maxxlen=0;
+  for(i=0;i<totdoc;i++) {
+    xlen=sqrt(kernel(kernel_parm,docs[i],docs[i])
+	      -2*kernel(kernel_parm,docs[i],nulldoc)
+	      +kernel(kernel_parm,nulldoc,nulldoc));
+    if(xlen>maxxlen) {
+      maxxlen=xlen;
+    }
+  }
+
+  free_example(nulldoc,1);
+  return(maxxlen);
+}
+
+double estimate_r_delta_average(DOC **docs, long int totdoc, 
+				KERNEL_PARM *kernel_parm)
+{
+  long i;
+  double avgxlen;
+  DOC *nulldoc;           /* assumes that the center of the ball is at the */
+  WORD nullword;          /* origin of the space. */
+
+  nullword.wnum=0;
+  nulldoc=create_example(-2,0,0,0.0,create_svector(&nullword,"",1.0)); 
+
+  avgxlen=0;
+  for(i=0;i<totdoc;i++) {
+    avgxlen+=sqrt(kernel(kernel_parm,docs[i],docs[i])
+		  -2*kernel(kernel_parm,docs[i],nulldoc)
+		  +kernel(kernel_parm,nulldoc,nulldoc));
+  }
+
+  free_example(nulldoc,1);
+  return(avgxlen/totdoc);
+}
+
+double length_of_longest_document_vector(DOC **docs, long int totdoc, 
+					 KERNEL_PARM *kernel_parm)
+{
+  long i;
+  double maxxlen,xlen;
+
+  maxxlen=0;
+  for(i=0;i<totdoc;i++) {
+    xlen=sqrt(kernel(kernel_parm,docs[i],docs[i]));
+    if(xlen>maxxlen) {
+      maxxlen=xlen;
+    }
+  }
+
+  return(maxxlen);
+}
+
+/****************************** IO-handling **********************************/
+
+void write_prediction(char *predfile, MODEL *model, double *lin, 
+		      double *a, long int *unlabeled, 
+		      long int *label, long int totdoc, 
+		      LEARN_PARM *learn_parm)
+{
+  FILE *predfl;
+  long i;
+  double dist,a_max;
+
+  if(verbosity>=1) {
+    printf("Writing prediction file..."); fflush(stdout);
+  }
+  if ((predfl = fopen (predfile, "w")) == NULL)
+  { perror (predfile); exit (1); }
+  a_max=learn_parm->epsilon_a;
+  for(i=0;i<totdoc;i++) {
+    if((unlabeled[i]) && (a[i]>a_max)) {
+      a_max=a[i];
+    }
+  }
+  for(i=0;i<totdoc;i++) {
+    if(unlabeled[i]) {
+      if((a[i]>(learn_parm->epsilon_a))) {
+	dist=(double)label[i]*(1.0-learn_parm->epsilon_crit-a[i]/(a_max*2.0));
+      }
+      else {
+	dist=(lin[i]-model->b);
+      }
+      if(dist>0) {
+	fprintf(predfl,"%.8g:+1 %.8g:-1\n",dist,-dist);
+      }
+      else {
+	fprintf(predfl,"%.8g:-1 %.8g:+1\n",-dist,dist);
+      }
+    }
+  }
+  fclose(predfl);
+  if(verbosity>=1) {
+    printf("done\n");
+  }
+}
+
+void write_alphas(char *alphafile, double *a, 
+		  long int *label, long int totdoc)
+{
+  FILE *alphafl;
+  long i;
+
+  if(verbosity>=1) {
+    printf("Writing alpha file..."); fflush(stdout);
+  }
+  if ((alphafl = fopen (alphafile, "w")) == NULL)
+  { perror (alphafile); exit (1); }
+  for(i=0;i<totdoc;i++) {
+    fprintf(alphafl,"%.18g\n",a[i]*(double)label[i]);
+  }
+  fclose(alphafl);
+  if(verbosity>=1) {
+    printf("done\n");
+  }
+}
+
diff --git a/thirdparty/svmlight/svm_learn.h b/thirdparty/svmlight/svm_learn.h
new file mode 100755
index 000000000..b1968ee5c
--- /dev/null
+++ b/thirdparty/svmlight/svm_learn.h
@@ -0,0 +1,169 @@
+/***********************************************************************/
+/*                                                                     */
+/*   svm_learn.h                                                       */
+/*                                                                     */
+/*   Declarations for learning module of Support Vector Machine.       */
+/*                                                                     */
+/*   Author: Thorsten Joachims                                         */
+/*   Date: 02.07.02                                                    */
+/*                                                                     */
+/*   Copyright (c) 2002  Thorsten Joachims - All rights reserved       */
+/*                                                                     */
+/*   This software is available for non-commercial use only. It must   */
+/*   not be modified and distributed without prior permission of the   */
+/*   author. The author is not responsible for implications from the   */
+/*   use of this software.                                             */
+/*                                                                     */
+/***********************************************************************/
+
+#ifndef SVM_LEARN
+#define SVM_LEARN
+
+void   svm_learn_classification(DOC **, double *, long, long, LEARN_PARM *, 
+				KERNEL_PARM *, KERNEL_CACHE *, MODEL *,
+				double *);
+void   svm_learn_regression(DOC **, double *, long, long, LEARN_PARM *, 
+			    KERNEL_PARM *, KERNEL_CACHE **, MODEL *);
+void   svm_learn_ranking(DOC **, double *, long, long, LEARN_PARM *, 
+			 KERNEL_PARM *, KERNEL_CACHE **, MODEL *);
+void   svm_learn_optimization(DOC **, double *, long, long, LEARN_PARM *, 
+			      KERNEL_PARM *, KERNEL_CACHE *, MODEL *,
+			      double *);
+long   optimize_to_convergence(DOC **, long *, long, long, LEARN_PARM *,
+			       KERNEL_PARM *, KERNEL_CACHE *, SHRINK_STATE *,
+			       MODEL *, long *, long *, double *,
+			       double *, double *,
+			       TIMING *, double *, long, long);
+long   optimize_to_convergence_sharedslack(DOC **, long *, long, long, 
+			       LEARN_PARM *,
+			       KERNEL_PARM *, KERNEL_CACHE *, SHRINK_STATE *,
+			       MODEL *, double *, double *, double *,
+			       TIMING *, double *);
+double compute_objective_function(double *, double *, double *, double,
+				  long *, long *);
+void   clear_index(long *);
+void   add_to_index(long *, long);
+long   compute_index(long *,long, long *);
+void   optimize_svm(DOC **, long *, long *, long *, double, long *, long *, 
+		    MODEL *, 
+		    long, long *, long, double *, double *, double *, 
+		    LEARN_PARM *, CFLOAT *, KERNEL_PARM *, QP *, double *);
+void   compute_matrices_for_optimization(DOC **, long *, long *, long *, double,
+					 long *,
+					 long *, long *, MODEL *, double *, 
+					 double *, double *, long, long, LEARN_PARM *, 
+					 CFLOAT *, KERNEL_PARM *, QP *);
+long   calculate_svm_model(DOC **, long *, long *, double *, double *, 
+			   double *, double *, LEARN_PARM *, long *,
+			   long *, MODEL *);
+long   check_optimality(MODEL *, long *, long *, double *, double *,
+			double *, long, 
+			LEARN_PARM *,double *, double, long *, long *, long *,
+			long *, long, KERNEL_PARM *);
+long   check_optimality_sharedslack(DOC **docs, MODEL *model, long int *label,
+		      double *a, double *lin, double *c, double *slack,
+		      double *alphaslack, long int totdoc, 
+		      LEARN_PARM *learn_parm, double *maxdiff, 
+		      double epsilon_crit_org, long int *misclassified, 
+		      long int *active2dnum,
+		      long int *last_suboptimal_at, 
+		      long int iteration, KERNEL_PARM *kernel_parm);
+void   compute_shared_slacks(DOC **docs, long int *label, double *a, 
+			     double *lin, double *c, long int *active2dnum, 
+			     LEARN_PARM *learn_parm,
+			     double *slack, double *alphaslack);
+long   identify_inconsistent(double *, long *, long *, long, LEARN_PARM *, 
+			     long *, long *);
+long   identify_misclassified(double *, long *, long *, long,
+			      MODEL *, long *, long *);
+long   identify_one_misclassified(double *, long *, long *, long,
+				  MODEL *, long *, long *);
+long   incorporate_unlabeled_examples(MODEL *, long *,long *, long *,
+				      double *, double *, long, double *,
+				      long *, long *, long, KERNEL_PARM *,
+				      LEARN_PARM *);
+void   update_linear_component(DOC **, long *, long *, double *, double *, 
+			       long *, long, long, KERNEL_PARM *, 
+			       KERNEL_CACHE *, double *,
+			       CFLOAT *, double *);
+long   select_next_qp_subproblem_grad(long *, long *, double *, 
+				      double *, double *, long,
+				      long, LEARN_PARM *, long *, long *, 
+				      long *, double *, long *, KERNEL_CACHE *,
+				      long, long *, long *);
+long   select_next_qp_subproblem_rand(long *, long *, double *, 
+				      double *, double *, long,
+				      long, LEARN_PARM *, long *, long *, 
+				      long *, double *, long *, KERNEL_CACHE *,
+				      long *, long *, long);
+long   select_next_qp_slackset(DOC **docs, long int *label, double *a, 
+			       double *lin, double *slack, double *alphaslack, 
+			       double *c, LEARN_PARM *learn_parm, 
+			       long int *active2dnum, double *maxviol);
+void   select_top_n(double *, long, long *, long);
+void   init_shrink_state(SHRINK_STATE *, long, long);
+void   shrink_state_cleanup(SHRINK_STATE *);
+long   shrink_problem(DOC **, LEARN_PARM *, SHRINK_STATE *, KERNEL_PARM *, 
+		      long *, long *, long, long, long, double *, long *);
+void   reactivate_inactive_examples(long *, long *, double *, SHRINK_STATE *,
+				    double *, double*, long, long, long, LEARN_PARM *, 
+				    long *, DOC **, KERNEL_PARM *,
+				    KERNEL_CACHE *, MODEL *, CFLOAT *, 
+				    double *, double *);
+
+/* cache kernel evalutations to improve speed */
+KERNEL_CACHE *kernel_cache_init(long, long);
+void   kernel_cache_cleanup(KERNEL_CACHE *);
+void   get_kernel_row(KERNEL_CACHE *,DOC **, long, long, long *, CFLOAT *, 
+		      KERNEL_PARM *);
+void   cache_kernel_row(KERNEL_CACHE *,DOC **, long, KERNEL_PARM *);
+void   cache_multiple_kernel_rows(KERNEL_CACHE *,DOC **, long *, long, 
+				  KERNEL_PARM *);
+void   kernel_cache_shrink(KERNEL_CACHE *,long, long, long *);
+void   kernel_cache_reset_lru(KERNEL_CACHE *);
+long   kernel_cache_malloc(KERNEL_CACHE *);
+void   kernel_cache_free(KERNEL_CACHE *,long);
+long   kernel_cache_free_lru(KERNEL_CACHE *);
+CFLOAT *kernel_cache_clean_and_malloc(KERNEL_CACHE *,long);
+long   kernel_cache_touch(KERNEL_CACHE *,long);
+long   kernel_cache_check(KERNEL_CACHE *,long);
+long   kernel_cache_space_available(KERNEL_CACHE *);
+
+void compute_xa_estimates(MODEL *, long *, long *, long, DOC **, 
+			  double *, double *, KERNEL_PARM *, 
+			  LEARN_PARM *, double *, double *, double *);
+double xa_estimate_error(MODEL *, long *, long *, long, DOC **, 
+			 double *, double *, KERNEL_PARM *, 
+			 LEARN_PARM *);
+double xa_estimate_recall(MODEL *, long *, long *, long, DOC **, 
+			  double *, double *, KERNEL_PARM *, 
+			  LEARN_PARM *);
+double xa_estimate_precision(MODEL *, long *, long *, long, DOC **, 
+			     double *, double *, KERNEL_PARM *, 
+			     LEARN_PARM *);
+void avg_similarity_of_sv_of_one_class(MODEL *, DOC **, double *, long *, KERNEL_PARM *, double *, double *);
+double most_similar_sv_of_same_class(MODEL *, DOC **, double *, long, long *, KERNEL_PARM *, LEARN_PARM *);
+double distribute_alpha_t_greedily(long *, long, DOC **, double *, long, long *, KERNEL_PARM *, LEARN_PARM *, double);
+double distribute_alpha_t_greedily_noindex(MODEL *, DOC **, double *, long, long *, KERNEL_PARM *, LEARN_PARM *, double); 
+void estimate_transduction_quality(MODEL *, long *, long *, long, DOC **, double *);
+double estimate_margin_vcdim(MODEL *, double, double, KERNEL_PARM *);
+double estimate_sphere(MODEL *, KERNEL_PARM *);
+double estimate_r_delta_average(DOC **, long, KERNEL_PARM *); 
+double estimate_r_delta(DOC **, long, KERNEL_PARM *); 
+double length_of_longest_document_vector(DOC **, long, KERNEL_PARM *); 
+
+void   write_model(char *, MODEL *);
+void   write_prediction(char *, MODEL *, double *, double *, long *, long *,
+			long, LEARN_PARM *);
+void   write_alphas(char *, double *, long *, long);
+
+typedef struct cache_parm_s {
+  KERNEL_CACHE *kernel_cache;
+  CFLOAT *cache;
+  DOC **docs; 
+  long m;
+  KERNEL_PARM *kernel_parm;
+  long offset,stepsize;
+} cache_parm_t;
+
+#endif
diff --git a/thirdparty/svmlight/svm_learn_main.c b/thirdparty/svmlight/svm_learn_main.c
new file mode 100755
index 000000000..e2a157dad
--- /dev/null
+++ b/thirdparty/svmlight/svm_learn_main.c
@@ -0,0 +1,397 @@
+/***********************************************************************/
+/*                                                                     */
+/*   svm_learn_main.c                                                  */
+/*                                                                     */
+/*   Command line interface to the learning module of the              */
+/*   Support Vector Machine.                                           */
+/*                                                                     */
+/*   Author: Thorsten Joachims                                         */
+/*   Date: 02.07.02                                                    */
+/*                                                                     */
+/*   Copyright (c) 2000  Thorsten Joachims - All rights reserved       */
+/*                                                                     */
+/*   This software is available for non-commercial use only. It must   */
+/*   not be modified and distributed without prior permission of the   */
+/*   author. The author is not responsible for implications from the   */
+/*   use of this software.                                             */
+/*                                                                     */
+/***********************************************************************/
+
+
+/* if svm-learn is used out of C++, define it as extern "C" */
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+# include "svm_common.h"
+# include "svm_learn.h"
+
+#ifdef __cplusplus
+}
+#endif
+
+char docfile[200];           /* file with training examples */
+char modelfile[200];         /* file for resulting classifier */
+char restartfile[200];       /* file with initial alphas */
+
+void   read_input_parameters(int, char **, char *, char *, char *, long *, 
+			     LEARN_PARM *, KERNEL_PARM *);
+void   wait_any_key();
+void   print_help();
+
+
+
+int main (int argc, char* argv[])
+{  
+  DOC **docs;  /* training examples */
+  long totwords,totdoc,i;
+  double *target;
+  double *alpha_in=NULL;
+  KERNEL_CACHE *kernel_cache;
+  LEARN_PARM learn_parm;
+  KERNEL_PARM kernel_parm;
+  MODEL *model=(MODEL *)my_malloc(sizeof(MODEL));
+
+  read_input_parameters(argc,argv,docfile,modelfile,restartfile,&verbosity,
+			&learn_parm,&kernel_parm);
+  read_documents(docfile,&docs,&target,&totwords,&totdoc);
+  if(restartfile[0]) alpha_in=read_alphas(restartfile,totdoc);
+
+  if(kernel_parm.kernel_type == LINEAR) { /* don't need the cache */
+    kernel_cache=NULL;
+  }
+  else {
+    /* Always get a new kernel cache. It is not possible to use the
+       same cache for two different training runs */
+    kernel_cache=kernel_cache_init(totdoc,learn_parm.kernel_cache_size);
+  }
+
+  if(learn_parm.type == CLASSIFICATION) {
+    svm_learn_classification(docs,target,totdoc,totwords,&learn_parm,
+			     &kernel_parm,kernel_cache,model,alpha_in);
+  }
+  else if(learn_parm.type == REGRESSION) {
+    svm_learn_regression(docs,target,totdoc,totwords,&learn_parm,
+			 &kernel_parm,&kernel_cache,model);
+  }
+  else if(learn_parm.type == RANKING) {
+    svm_learn_ranking(docs,target,totdoc,totwords,&learn_parm,
+		      &kernel_parm,&kernel_cache,model);
+  }
+  else if(learn_parm.type == OPTIMIZATION) {
+    svm_learn_optimization(docs,target,totdoc,totwords,&learn_parm,
+			   &kernel_parm,kernel_cache,model,alpha_in);
+  }
+
+  if(kernel_cache) {
+    /* Free the memory used for the cache. */
+    kernel_cache_cleanup(kernel_cache);
+  }
+
+  /* Warning: The model contains references to the original data 'docs'.
+     If you want to free the original data, and only keep the model, you 
+     have to make a deep copy of 'model'. */
+  /* deep_copy_of_model=copy_model(model); */
+  write_model(modelfile,model);
+
+  free(alpha_in);
+  free_model(model,0);
+  for(i=0;i<totdoc;i++) 
+    free_example(docs[i],1);
+  free(docs);
+  free(target);
+
+  return(0);
+}
+
+/*---------------------------------------------------------------------------*/
+
+void read_input_parameters(int argc,char *argv[],char *docfile,char *modelfile,
+			   char *restartfile,long *verbosity,
+			   LEARN_PARM *learn_parm,KERNEL_PARM *kernel_parm)
+{
+  long i;
+  char type[100];
+  
+  /* set default */
+  strcpy (modelfile, "svm_model");
+  strcpy (learn_parm->predfile, "trans_predictions");
+  strcpy (learn_parm->alphafile, "");
+  strcpy (restartfile, "");
+  (*verbosity)=1;
+  learn_parm->biased_hyperplane=1;
+  learn_parm->sharedslack=0;
+  learn_parm->remove_inconsistent=0;
+  learn_parm->skip_final_opt_check=0;
+  learn_parm->svm_maxqpsize=10;
+  learn_parm->svm_newvarsinqp=0;
+  learn_parm->svm_iter_to_shrink=-9999;
+  learn_parm->maxiter=100000;
+  learn_parm->kernel_cache_size=40;
+  learn_parm->svm_c=0.0;
+  learn_parm->eps=0.1;
+  learn_parm->transduction_posratio=-1.0;
+  learn_parm->svm_costratio=1.0;
+  learn_parm->svm_costratio_unlab=1.0;
+  learn_parm->svm_unlabbound=1E-5;
+  learn_parm->epsilon_crit=0.001;
+  learn_parm->epsilon_a=1E-15;
+  learn_parm->compute_loo=0;
+  learn_parm->rho=1.0;
+  learn_parm->xa_depth=0;
+  kernel_parm->kernel_type=0;
+  kernel_parm->poly_degree=3;
+  kernel_parm->rbf_gamma=1.0;
+  kernel_parm->coef_lin=1;
+  kernel_parm->coef_const=1;
+  strcpy(kernel_parm->custom,"empty");
+  strcpy(type,"c");
+
+  for(i=1;(i<argc) && ((argv[i])[0] == '-');i++) {
+    switch ((argv[i])[1]) 
+      { 
+      case '?': print_help(); exit(0);
+      case 'z': i++; strcpy(type,argv[i]); break;
+      case 'v': i++; (*verbosity)=atol(argv[i]); break;
+      case 'b': i++; learn_parm->biased_hyperplane=atol(argv[i]); break;
+      case 'i': i++; learn_parm->remove_inconsistent=atol(argv[i]); break;
+      case 'f': i++; learn_parm->skip_final_opt_check=!atol(argv[i]); break;
+      case 'q': i++; learn_parm->svm_maxqpsize=atol(argv[i]); break;
+      case 'n': i++; learn_parm->svm_newvarsinqp=atol(argv[i]); break;
+      case '#': i++; learn_parm->maxiter=atol(argv[i]); break;
+      case 'h': i++; learn_parm->svm_iter_to_shrink=atol(argv[i]); break;
+      case 'm': i++; learn_parm->kernel_cache_size=atol(argv[i]); break;
+      case 'c': i++; learn_parm->svm_c=atof(argv[i]); break;
+      case 'w': i++; learn_parm->eps=atof(argv[i]); break;
+      case 'p': i++; learn_parm->transduction_posratio=atof(argv[i]); break;
+      case 'j': i++; learn_parm->svm_costratio=atof(argv[i]); break;
+      case 'e': i++; learn_parm->epsilon_crit=atof(argv[i]); break;
+      case 'o': i++; learn_parm->rho=atof(argv[i]); break;
+      case 'k': i++; learn_parm->xa_depth=atol(argv[i]); break;
+      case 'x': i++; learn_parm->compute_loo=atol(argv[i]); break;
+      case 't': i++; kernel_parm->kernel_type=atol(argv[i]); break;
+      case 'd': i++; kernel_parm->poly_degree=atol(argv[i]); break;
+      case 'g': i++; kernel_parm->rbf_gamma=atof(argv[i]); break;
+      case 's': i++; kernel_parm->coef_lin=atof(argv[i]); break;
+      case 'r': i++; kernel_parm->coef_const=atof(argv[i]); break;
+      case 'u': i++; strcpy(kernel_parm->custom,argv[i]); break;
+      case 'l': i++; strcpy(learn_parm->predfile,argv[i]); break;
+      case 'a': i++; strcpy(learn_parm->alphafile,argv[i]); break;
+      case 'y': i++; strcpy(restartfile,argv[i]); break;
+      default: printf("\nUnrecognized option %s!\n\n",argv[i]);
+	       print_help();
+	       exit(0);
+      }
+  }
+  if(i>=argc) {
+    printf("\nNot enough input parameters!\n\n");
+    wait_any_key();
+    print_help();
+    exit(0);
+  }
+  strcpy (docfile, argv[i]);
+  if((i+1)<argc) {
+    strcpy (modelfile, argv[i+1]);
+  }
+  if(learn_parm->svm_iter_to_shrink == -9999) {
+    if(kernel_parm->kernel_type == LINEAR) 
+      learn_parm->svm_iter_to_shrink=2;
+    else
+      learn_parm->svm_iter_to_shrink=100;
+  }
+  if(strcmp(type,"c")==0) {
+    learn_parm->type=CLASSIFICATION;
+  }
+  else if(strcmp(type,"r")==0) {
+    learn_parm->type=REGRESSION;
+  }
+  else if(strcmp(type,"p")==0) {
+    learn_parm->type=RANKING;
+  }
+  else if(strcmp(type,"o")==0) {
+    learn_parm->type=OPTIMIZATION;
+  }
+  else if(strcmp(type,"s")==0) {
+    learn_parm->type=OPTIMIZATION;
+    learn_parm->sharedslack=1;
+  }
+  else {
+    printf("\nUnknown type '%s': Valid types are 'c' (classification), 'r' regession, and 'p' preference ranking.\n",type);
+    wait_any_key();
+    print_help();
+    exit(0);
+  }    
+  if((learn_parm->skip_final_opt_check) 
+     && (kernel_parm->kernel_type == LINEAR)) {
+    printf("\nIt does not make sense to skip the final optimality check for linear kernels.\n\n");
+    learn_parm->skip_final_opt_check=0;
+  }    
+  if((learn_parm->skip_final_opt_check) 
+     && (learn_parm->remove_inconsistent)) {
+    printf("\nIt is necessary to do the final optimality check when removing inconsistent \nexamples.\n");
+    wait_any_key();
+    print_help();
+    exit(0);
+  }    
+  if((learn_parm->svm_maxqpsize<2)) {
+    printf("\nMaximum size of QP-subproblems not in valid range: %ld [2..]\n",learn_parm->svm_maxqpsize); 
+    wait_any_key();
+    print_help();
+    exit(0);
+  }
+  if((learn_parm->svm_maxqpsize<learn_parm->svm_newvarsinqp)) {
+    printf("\nMaximum size of QP-subproblems [%ld] must be larger than the number of\n",learn_parm->svm_maxqpsize); 
+    printf("new variables [%ld] entering the working set in each iteration.\n",learn_parm->svm_newvarsinqp); 
+    wait_any_key();
+    print_help();
+    exit(0);
+  }
+  if(learn_parm->svm_iter_to_shrink<1) {
+    printf("\nMaximum number of iterations for shrinking not in valid range: %ld [1,..]\n",learn_parm->svm_iter_to_shrink);
+    wait_any_key();
+    print_help();
+    exit(0);
+  }
+  if(learn_parm->svm_c<0) {
+    printf("\nThe C parameter must be greater than zero!\n\n");
+    wait_any_key();
+    print_help();
+    exit(0);
+  }
+  if(learn_parm->transduction_posratio>1) {
+    printf("\nThe fraction of unlabeled examples to classify as positives must\n");
+    printf("be less than 1.0 !!!\n\n");
+    wait_any_key();
+    print_help();
+    exit(0);
+  }
+  if(learn_parm->svm_costratio<=0) {
+    printf("\nThe COSTRATIO parameter must be greater than zero!\n\n");
+    wait_any_key();
+    print_help();
+    exit(0);
+  }
+  if(learn_parm->epsilon_crit<=0) {
+    printf("\nThe epsilon parameter must be greater than zero!\n\n");
+    wait_any_key();
+    print_help();
+    exit(0);
+  }
+  if(learn_parm->rho<0) {
+    printf("\nThe parameter rho for xi/alpha-estimates and leave-one-out pruning must\n");
+    printf("be greater than zero (typically 1.0 or 2.0, see T. Joachims, Estimating the\n");
+    printf("Generalization Performance of an SVM Efficiently, ICML, 2000.)!\n\n");
+    wait_any_key();
+    print_help();
+    exit(0);
+  }
+  if((learn_parm->xa_depth<0) || (learn_parm->xa_depth>100)) {
+    printf("\nThe parameter depth for ext. xi/alpha-estimates must be in [0..100] (zero\n");
+    printf("for switching to the conventional xa/estimates described in T. Joachims,\n");
+    printf("Estimating the Generalization Performance of an SVM Efficiently, ICML, 2000.)\n");
+    wait_any_key();
+    print_help();
+    exit(0);
+  }
+}
+
+void wait_any_key()
+{
+  printf("\n(more)\n");
+  (void)getc(stdin);
+}
+
+void print_help()
+{
+  printf("\nSVM-light %s: Support Vector Machine, learning module     %s\n",VERSION,VERSION_DATE);
+  copyright_notice();
+  printf("   usage: svm_learn [options] example_file model_file\n\n");
+  printf("Arguments:\n");
+  printf("         example_file-> file with training data\n");
+  printf("         model_file  -> file to store learned decision rule in\n");
+
+  printf("General options:\n");
+  printf("         -?          -> this help\n");
+  printf("         -v [0..3]   -> verbosity level (default 1)\n");
+  printf("Learning options:\n");
+  printf("         -z {c,r,p}  -> select between classification (c), regression (r),\n");
+  printf("                        and preference ranking (p) (default classification)\n");
+  printf("         -c float    -> C: trade-off between training error\n");
+  printf("                        and margin (default [avg. x*x]^-1)\n");
+  printf("         -w [0..]    -> epsilon width of tube for regression\n");
+  printf("                        (default 0.1)\n");
+  printf("         -j float    -> Cost: cost-factor, by which training errors on\n");
+  printf("                        positive examples outweight errors on negative\n");
+  printf("                        examples (default 1) (see [4])\n");
+  printf("         -b [0,1]    -> use biased hyperplane (i.e. x*w+b>0) instead\n");
+  printf("                        of unbiased hyperplane (i.e. x*w>0) (default 1)\n");
+  printf("         -i [0,1]    -> remove inconsistent training examples\n");
+  printf("                        and retrain (default 0)\n");
+  printf("Performance estimation options:\n");
+  printf("         -x [0,1]    -> compute leave-one-out estimates (default 0)\n");
+  printf("                        (see [5])\n");
+  printf("         -o ]0..2]   -> value of rho for XiAlpha-estimator and for pruning\n");
+  printf("                        leave-one-out computation (default 1.0) (see [2])\n");
+  printf("         -k [0..100] -> search depth for extended XiAlpha-estimator \n");
+  printf("                        (default 0)\n");
+  printf("Transduction options (see [3]):\n");
+  printf("         -p [0..1]   -> fraction of unlabeled examples to be classified\n");
+  printf("                        into the positive class (default is the ratio of\n");
+  printf("                        positive and negative examples in the training data)\n");
+  printf("Kernel options:\n");
+  printf("         -t int      -> type of kernel function:\n");
+  printf("                        0: linear (default)\n");
+  printf("                        1: polynomial (s a*b+c)^d\n");
+  printf("                        2: radial basis function exp(-gamma ||a-b||^2)\n");
+  printf("                        3: sigmoid tanh(s a*b + c)\n");
+  printf("                        4: user defined kernel from kernel.h\n");
+  printf("         -d int      -> parameter d in polynomial kernel\n");
+  printf("         -g float    -> parameter gamma in rbf kernel\n");
+  printf("         -s float    -> parameter s in sigmoid/poly kernel\n");
+  printf("         -r float    -> parameter c in sigmoid/poly kernel\n");
+  printf("         -u string   -> parameter of user defined kernel\n");
+  printf("Optimization options (see [1]):\n");
+  printf("         -q [2..]    -> maximum size of QP-subproblems (default 10)\n");
+  printf("         -n [2..q]   -> number of new variables entering the working set\n");
+  printf("                        in each iteration (default n = q). Set n<q to prevent\n");
+  printf("                        zig-zagging.\n");
+  printf("         -m [5..]    -> size of cache for kernel evaluations in MB (default 40)\n");
+  printf("                        The larger the faster...\n");
+  printf("         -e float    -> eps: Allow that error for termination criterion\n");
+  printf("                        [y [w*x+b] - 1] >= eps (default 0.001)\n");
+  printf("         -y [0,1]    -> restart the optimization from alpha values in file\n");
+  printf("                        specified by -a option. (default 0)\n");
+  printf("         -h [5..]    -> number of iterations a variable needs to be\n"); 
+  printf("                        optimal before considered for shrinking (default 100)\n");
+  printf("         -f [0,1]    -> do final optimality check for variables removed\n");
+  printf("                        by shrinking. Although this test is usually \n");
+  printf("                        positive, there is no guarantee that the optimum\n");
+  printf("                        was found if the test is omitted. (default 1)\n");
+  printf("         -y string   -> if option is given, reads alphas from file with given\n");
+  printf("                        and uses them as starting point. (default 'disabled')\n");
+  printf("         -# int      -> terminate optimization, if no progress after this\n");
+  printf("                        number of iterations. (default 100000)\n");
+  printf("Output options:\n");
+  printf("         -l string   -> file to write predicted labels of unlabeled\n");
+  printf("                        examples into after transductive learning\n");
+  printf("         -a string   -> write all alphas to this file after learning\n");
+  printf("                        (in the same order as in the training set)\n");
+  wait_any_key();
+  printf("\nMore details in:\n");
+  printf("[1] T. Joachims, Making Large-Scale SVM Learning Practical. Advances in\n");
+  printf("    Kernel Methods - Support Vector Learning, B. Schölkopf and C. Burges and\n");
+  printf("    A. Smola (ed.), MIT Press, 1999.\n");
+  printf("[2] T. Joachims, Estimating the Generalization performance of an SVM\n");
+  printf("    Efficiently. International Conference on Machine Learning (ICML), 2000.\n");
+  printf("[3] T. Joachims, Transductive Inference for Text Classification using Support\n");
+  printf("    Vector Machines. International Conference on Machine Learning (ICML),\n");
+  printf("    1999.\n");
+  printf("[4] K. Morik, P. Brockhausen, and T. Joachims, Combining statistical learning\n");
+  printf("    with a knowledge-based approach - A case study in intensive care  \n");
+  printf("    monitoring. International Conference on Machine Learning (ICML), 1999.\n");
+  printf("[5] T. Joachims, Learning to Classify Text Using Support Vector\n");
+  printf("    Machines: Methods, Theory, and Algorithms. Dissertation, Kluwer,\n");
+  printf("    2002.\n\n");
+}
+
+
diff --git a/thirdparty/svmlight/svm_loqo.c b/thirdparty/svmlight/svm_loqo.c
new file mode 100755
index 000000000..ff31a655f
--- /dev/null
+++ b/thirdparty/svmlight/svm_loqo.c
@@ -0,0 +1,211 @@
+/***********************************************************************/
+/*                                                                     */
+/*   svm_loqo.c                                                        */
+/*                                                                     */
+/*   Interface to the PR_LOQO optimization package for SVM.            */
+/*                                                                     */
+/*   Author: Thorsten Joachims                                         */
+/*   Date: 19.07.99                                                    */
+/*                                                                     */
+/*   Copyright (c) 1999  Universitaet Dortmund - All rights reserved   */
+/*                                                                     */
+/*   This software is available for non-commercial use only. It must   */
+/*   not be modified and distributed without prior permission of the   */
+/*   author. The author is not responsible for implications from the   */
+/*   use of this software.                                             */
+/*                                                                     */
+/***********************************************************************/
+
+# include <math.h>
+# include "pr_loqo/pr_loqo.h"
+# include "svm_common.h"
+
+/* Common Block Declarations */
+
+long verbosity;
+
+/* /////////////////////////////////////////////////////////////// */
+
+# define DEF_PRECISION_LINEAR    1E-8
+# define DEF_PRECISION_NONLINEAR 1E-14
+
+double *optimize_qp();
+double *primal=0,*dual=0;
+double init_margin=0.15;
+long   init_iter=500,precision_violations=0;
+double model_b;
+double opt_precision=DEF_PRECISION_LINEAR;
+
+/* /////////////////////////////////////////////////////////////// */
+
+void *my_malloc();
+
+double *optimize_qp(qp,epsilon_crit,nx,threshold,learn_parm)
+QP *qp;
+double *epsilon_crit;
+long nx; /* Maximum number of variables in QP */
+double *threshold;
+LEARN_PARM *learn_parm;
+/* start the optimizer and return the optimal values */
+{
+  register long i,j,result;
+  double margin,obj_before,obj_after;
+  double sigdig,dist,epsilon_loqo;
+  int iter;
+ 
+  if(!primal) { /* allocate memory at first call */
+    primal=(double *)my_malloc(sizeof(double)*nx*3);
+    dual=(double *)my_malloc(sizeof(double)*(nx*2+1));
+  }
+  
+  if(verbosity>=4) { /* really verbose */
+    printf("\n\n");
+    for(i=0;i<qp->opt_n;i++) {
+      printf("%f: ",qp->opt_g0[i]);
+      for(j=0;j<qp->opt_n;j++) {
+	printf("%f ",qp->opt_g[i*qp->opt_n+j]);
+      }
+      printf(": a%ld=%.10f < %f",i,qp->opt_xinit[i],qp->opt_up[i]);
+      printf(": y=%f\n",qp->opt_ce[i]);
+    }
+    for(j=0;j<qp->opt_m;j++) {
+      printf("EQ-%ld: %f*a0",j,qp->opt_ce[j]);
+      for(i=1;i<qp->opt_n;i++) {
+	printf(" + %f*a%ld",qp->opt_ce[i],i);
+      }
+      printf(" = %f\n\n",-qp->opt_ce0[0]);
+    }
+}
+
+  obj_before=0; /* calculate objective before optimization */
+  for(i=0;i<qp->opt_n;i++) {
+    obj_before+=(qp->opt_g0[i]*qp->opt_xinit[i]);
+    obj_before+=(0.5*qp->opt_xinit[i]*qp->opt_xinit[i]*qp->opt_g[i*qp->opt_n+i]);
+    for(j=0;j<i;j++) {
+      obj_before+=(qp->opt_xinit[j]*qp->opt_xinit[i]*qp->opt_g[j*qp->opt_n+i]);
+    }
+  }
+
+  result=STILL_RUNNING;
+  qp->opt_ce0[0]*=(-1.0);
+  /* Run pr_loqo. If a run fails, try again with parameters which lead */
+  /* to a slower, but more robust setting. */
+  for(margin=init_margin,iter=init_iter;
+      (margin<=0.9999999) && (result!=OPTIMAL_SOLUTION);) {
+    sigdig=-log10(opt_precision);
+
+    result=pr_loqo((int)qp->opt_n,(int)qp->opt_m,
+		   (double *)qp->opt_g0,(double *)qp->opt_g,
+		   (double *)qp->opt_ce,(double *)qp->opt_ce0,
+		   (double *)qp->opt_low,(double *)qp->opt_up,
+		   (double *)primal,(double *)dual, 
+		   (int)(verbosity-2),
+		   (double)sigdig,(int)iter, 
+		   (double)margin,(double)(qp->opt_up[0])/4.0,(int)0);
+
+    if(isnan(dual[0])) {     /* check for choldc problem */
+      if(verbosity>=2) {
+	printf("NOTICE: Restarting PR_LOQO with more conservative parameters.\n");
+      }
+      if(init_margin<0.80) { /* become more conservative in general */
+	init_margin=(4.0*margin+1.0)/5.0;
+      }
+      margin=(margin+1.0)/2.0;
+      (opt_precision)*=10.0;   /* reduce precision */
+      if(verbosity>=2) {
+	printf("NOTICE: Reducing precision of PR_LOQO.\n");
+      }
+    }
+    else if(result!=OPTIMAL_SOLUTION) {
+      iter+=2000; 
+      init_iter+=10;
+      (opt_precision)*=10.0;   /* reduce precision */
+      if(verbosity>=2) {
+	printf("NOTICE: Reducing precision of PR_LOQO due to (%ld).\n",result);
+      }      
+    }
+  }
+
+  if(qp->opt_m)         /* Thanks to Alex Smola for this hint */
+    model_b=dual[0];
+  else
+    model_b=0;
+
+  /* Check the precision of the alphas. If results of current optimization */
+  /* violate KT-Conditions, relax the epsilon on the bounds on alphas. */
+  epsilon_loqo=1E-10;
+  for(i=0;i<qp->opt_n;i++) {
+    dist=-model_b*qp->opt_ce[i]; 
+    dist+=(qp->opt_g0[i]+1.0);
+    for(j=0;j<i;j++) {
+      dist+=(primal[j]*qp->opt_g[j*qp->opt_n+i]);
+    }
+    for(j=i;j<qp->opt_n;j++) {
+      dist+=(primal[j]*qp->opt_g[i*qp->opt_n+j]);
+    }
+    /*  printf("LOQO: a[%d]=%f, dist=%f, b=%f\n",i,primal[i],dist,dual[0]); */
+    if((primal[i]<(qp->opt_up[i]-epsilon_loqo)) && (dist < (1.0-(*epsilon_crit)))) {
+      epsilon_loqo=(qp->opt_up[i]-primal[i])*2.0;
+    }
+    else if((primal[i]>(0+epsilon_loqo)) && (dist > (1.0+(*epsilon_crit)))) {
+      epsilon_loqo=primal[i]*2.0;
+    }
+  }
+
+  for(i=0;i<qp->opt_n;i++) {  /* clip alphas to bounds */
+    if(primal[i]<=(0+epsilon_loqo)) {
+      primal[i]=0;
+    }
+    else if(primal[i]>=(qp->opt_up[i]-epsilon_loqo)) {
+      primal[i]=qp->opt_up[i];
+    }
+  }
+
+  obj_after=0;  /* calculate objective after optimization */
+  for(i=0;i<qp->opt_n;i++) {
+    obj_after+=(qp->opt_g0[i]*primal[i]);
+    obj_after+=(0.5*primal[i]*primal[i]*qp->opt_g[i*qp->opt_n+i]);
+    for(j=0;j<i;j++) {
+      obj_after+=(primal[j]*primal[i]*qp->opt_g[j*qp->opt_n+i]);
+    }
+  }
+
+  /* if optimizer returned NAN values, reset and retry with smaller */
+  /* working set. */
+  if(isnan(obj_after) || isnan(model_b)) {
+    for(i=0;i<qp->opt_n;i++) {
+      primal[i]=qp->opt_xinit[i];
+    }     
+    model_b=0;
+    if(learn_parm->svm_maxqpsize>2) {
+      learn_parm->svm_maxqpsize--;  /* decrease size of qp-subproblems */
+    }
+  }
+
+  if(obj_after >= obj_before) { /* check whether there was progress */
+    (opt_precision)/=100.0;
+    precision_violations++;
+    if(verbosity>=2) {
+      printf("NOTICE: Increasing Precision of PR_LOQO.\n");
+    }
+  }
+
+  if(precision_violations > 500) { 
+    (*epsilon_crit)*=10.0;
+    precision_violations=0;
+    if(verbosity>=1) {
+      printf("\nWARNING: Relaxing epsilon on KT-Conditions.\n");
+    }
+  }	  
+
+  (*threshold)=model_b;
+
+  if(result!=OPTIMAL_SOLUTION) {
+    printf("\nERROR: PR_LOQO did not converge. \n");
+    return(qp->opt_xinit);
+  }
+  else {
+    return(primal);
+  }
+}
+