From 8a59b384060adaf1c3379dd36c02cc2e1c55fe67 Mon Sep 17 00:00:00 2001
From: Nicu Tofan <nicu.tofan@gmail.com>
Date: Sun, 25 Aug 2013 23:14:40 +0300
Subject: [PATCH] Covering some parts of the cogutil in doxydocs; reformatted
 cluster.h & .c to make description visible to doxygen

---
 doc/doxydoc/libcogutil.dox    |  26 +-
 doc/doxydoc/progcogserver.dox |   6 +-
 doc/doxygen.conf.in           |   2 +-
 opencog/util/algorithm.h      |  48 +-
 opencog/util/ansi.h           |   9 +-
 opencog/util/cluster.c        | 850 +++++-----------------------------
 opencog/util/cluster.h        | 638 ++++++++++++++++++++++++-
 7 files changed, 797 insertions(+), 782 deletions(-)
diff --git a/doc/doxydoc/libcogutil.dox b/doc/doxydoc/libcogutil.dox
index fe242c608cf..b9d06fcc442 100644
--- a/doc/doxydoc/libcogutil.dox
+++ b/doc/doxydoc/libcogutil.dox
@@ -1,3 +1,4 @@
+namespace opencog {
 /**
 
 \page libcogutil cogutil library
@@ -8,7 +9,30 @@ found no place elsewhere. To build it use:
 make cogutil
 @endcode
 
+\section sect_files Files
 
+\subsection ssect_algorithm_h algorithm.h
+Hosts small templated functions like for_each(),
+accumulate2d(), etc. Sets utilities are also present.
+
+\subsection ssect_ansi ansi (.h & .cc)
+ANSI codes for colored output in terminals that support this feature.
+Instead of directly using the codes, make use of the ansi_code()
+function that checks the \b ANSI_ENABLED configuration variable
+and either appends the code or an empty string.
+
+\subsection ssect_basedvar based_variant.h
+Defines a simple variant with base; seems to be used only in defining
+moses::disc_knob type.
+
+\section sect_libs Libraries
+
+\subsection ssect_clustering The C Clustering Library
+This library (version 1.49) was written at the Laboratory of DNA Information Analysis,
+Human Genome Center, Institute of Medical Science, University of Tokyo
+by Michiel Jan Laurens de Hoon.
+
+Methods such as clusterdistance(), getclustercentroids(), kcluster() are provided.
 
 \if MARKER_TREE_START
 ignored by doxygen; used as markers for update-links.py;
@@ -23,4 +47,4 @@ ignored by doxygen; used as markers for update-links.py;
 ignored by doxygen; used as markers for update-links.py;
 \endif
 */
-
+} //~namespace opencog
diff --git a/doc/doxydoc/progcogserver.dox b/doc/doxydoc/progcogserver.dox
index 981345ab545..2f09545ac37 100644
--- a/doc/doxydoc/progcogserver.dox
+++ b/doc/doxydoc/progcogserver.dox
@@ -1,7 +1,11 @@
 /**
-
 \page progcogserver cogserver program
 
+To build it use:
+@code
+make cogserver
+@endcode
+
 \if MARKER_TREE_START
 ignored by doxygen; used as markers for update-links.py;
 \endif
diff --git a/doc/doxygen.conf.in b/doc/doxygen.conf.in
index 3bcbfb411bd..fc9009cbb8e 100644
--- a/doc/doxygen.conf.in
+++ b/doc/doxygen.conf.in
@@ -251,7 +251,7 @@ IDL_PROPERTY_SUPPORT   = YES
 # member in the group (if any) for the other members of the group. By default 
 # all members of a group must be documented explicitly.
 
-DISTRIBUTE_GROUP_DOC   = NO
+DISTRIBUTE_GROUP_DOC   = YES
 
 # Set the SUBGROUPING tag to YES (the default) to allow class member groups of 
 # the same type (for instance a group of public functions) to be put as a 
diff --git a/opencog/util/algorithm.h b/opencog/util/algorithm.h
index f2966540be1..5b15b871576 100644
--- a/opencog/util/algorithm.h
+++ b/opencog/util/algorithm.h
@@ -13,7 +13,7 @@
 namespace opencog
 {
 
-//these needs to be changed for non-gcc
+/// @todo these needs to be changed for non-gcc
 using __gnu_cxx::copy_n;
 using __gnu_cxx::lexicographical_compare_3way;
 using __gnu_cxx::random_sample_n;
@@ -21,7 +21,7 @@ using __gnu_cxx::random_sample;
 using __gnu_cxx::is_heap;
 using __gnu_cxx::is_sorted;
 
-//binary and ternary and quaternary for_each
+//! binary for_each
 template<typename It1, typename It2, typename F>
 F for_each(It1 from1, It1 to1, It2 from2, F f)
 {
@@ -29,6 +29,7 @@ F for_each(It1 from1, It1 to1, It2 from2, F f)
         f(*from1, *from2);
     return f;
 }
+//! ternary for_each
 template<typename It1, typename It2, typename It3, typename F>
 F for_each(It1 from1, It1 to1, It2 from2, It3 from3, F f)
 {
@@ -36,6 +37,7 @@ F for_each(It1 from1, It1 to1, It2 from2, It3 from3, F f)
         f(*from1, *from2, *from3);
     return f;
 }
+//! quaternary for_each
 template<typename It1, typename It2, typename It3, typename It4, typename F>
 F for_each(It1 from1, It1 to1, It2 from2, It3 from3, It4 from4, F f)
 {
@@ -44,7 +46,7 @@ F for_each(It1 from1, It1 to1, It2 from2, It3 from3, It4 from4, F f)
     return f;
 }
 
-//accumulate over a range of containers
+//! accumulate over a range of containers
 template <class iter, class T>
 T accumulate2d(iter first, iter last, T init)
 {
@@ -53,14 +55,14 @@ T accumulate2d(iter first, iter last, T init)
     return init;
 }
 
-// appends b at the end of a, that is a = a@b
+//! appends b at the end of a, that is a = a@b
 template<class T>
 void append(T& a, const T& b) {
     a.insert(a.end(), b.begin(), b.end());
 }
 
-//erase the intersection of sorted ranges [from1,to1) and [from2,to2) from c,
-//leaving the difference
+//! erase the intersection of sorted ranges [from1,to1) and [from2,to2) from c,
+//! leaving the difference
 template<typename Erase, typename It1, typename It2, typename Comp>
 void erase_set_intersection(Erase erase, It1 from1, It1 to1,
                             It2 from2, It2 to2, Comp comp)
@@ -81,8 +83,8 @@ void erase_set_intersection(Erase erase, It1 from1, It1 to1,
         }
 }
 
-//erase the difference of sorted ranges [from1,to1) and [from2,to2) from c,
-//leaving the intersection
+//! erase the difference of sorted ranges [from1,to1) and [from2,to2) from c,
+//! leaving the intersection
 template<typename Erase, typename It1, typename It2, typename Comp>
 void erase_set_difference(Erase erase, It1 from1, It1 to1,
                           It2 from2, It2 to2, Comp comp)
@@ -105,9 +107,9 @@ void erase_set_difference(Erase erase, It1 from1, It1 to1,
         erase(from1++);
 }
 
-//insert [from2,to2)-[from1,to1) with inserter
-//i.e., if insert inserts into the container holding [from1,to1),
-//it will now hold the union
+//! insert [from2,to2)-[from1,to1) with inserter
+//! i.e., if insert inserts into the container holding [from1,to1),
+//! it will now hold the union
 template<typename Insert, typename It1, typename It2, typename Comp>
 void insert_set_complement(Insert insert, It1 from1, It1 to1,
                            It2 from2, It2 to2, Comp comp)
@@ -133,6 +135,7 @@ void insert_set_complement(Insert insert, It1 from1, It1 to1,
     }
 }
 
+//! determine if the intersection of two sets is the empty set
 template<typename It1, typename It2, typename Comp>
 bool has_empty_intersection(It1 from1, It1 to1,
                             It2 from2, It2 to2, Comp comp)
@@ -152,6 +155,7 @@ bool has_empty_intersection(It1 from1, It1 to1,
     return true;
 }
 
+//! determine if the intersection of two sets is the empty set
 template<typename Set>
 bool has_empty_intersection(const Set& ls, const Set& rs) {
     return has_empty_intersection(ls.begin(), ls.end(),
@@ -192,7 +196,7 @@ bool is_disjoint(const Set1 &set1, const Set2 &set2)
 }
 
 /**
- * return a singleton set with the input given in it
+ * \return a singleton set with the input given in it
  */
 template<typename Set>
 Set make_singleton_set(const typename Set::value_type& v) {
@@ -202,7 +206,7 @@ Set make_singleton_set(const typename Set::value_type& v) {
 }
 
 /**
- * Return s1 union s2
+ * \return s1 union s2
  * s1 and s2 being std::set or similar concept
  */
 template<typename Set>
@@ -213,7 +217,7 @@ Set set_union(const Set& s1, const Set& s2) {
 }
 
 /**
- * Return s1 inter s2
+ * \return s1 inter s2
  * s1 and s2 must be sorted
  */
 template<typename Set>
@@ -225,7 +229,7 @@ Set set_intersection(const Set& s1, const Set& s2) {
 }
 
 /**
- * Returns s1 - s2
+ * \return s1 - s2
  * s1 and s2 must be sorted
  */
 template<typename Set>
@@ -236,8 +240,8 @@ Set set_difference(const Set& s1, const Set& s2) {
     return res;
 }
 
-// Predicate maps to the range [0, n)
-// n-1 values (the pivots) are copied to out
+//! Predicate maps to the range [0, n)
+//! n-1 values (the pivots) are copied to out
 template<typename It, typename Pred, typename Out>
 Out n_way_partition(It begin, It end, const Pred p, int n, Out out)
 {
@@ -248,16 +252,13 @@ Out n_way_partition(It begin, It end, const Pred p, int n, Out out)
 }
 
 /**
- * return the power set ps of s such that all elements of ps are
+ * \return the power set ps of s such that all elements of ps are
  * subsets of size n or below.
  *
  * @param s       the input set
- *
  * @param n       the size of the largest subset of s. If n is larger than
  *                |s| then the size of the largest subset of s will be |s|
- *
  * @param exact   if true then do not include subsets of size below n
- *
  * @return        the power set of s with subsets up to size n
  */
 template<typename Set> std::set<Set> powerset(const Set& s, size_t n,
@@ -280,8 +281,9 @@ template<typename Set> std::set<Set> powerset(const Set& s, size_t n,
         res.insert(Set());
     return res;
 }
+
 /**
- * return the power set of s.
+ * \return the power set of s.
  */
 template<typename Set> std::set<Set> powerset(const Set& s)
 {
@@ -302,7 +304,7 @@ Seq seq_filtered(const Seq& seq, const Indices& indices) {
         res.push_back(seq[idx]);
     return res;
 }
- 
+
     
 } //~namespace opencog
 
diff --git a/opencog/util/ansi.h b/opencog/util/ansi.h
index 337ad3d64c2..d5dec980ab3 100644
--- a/opencog/util/ansi.h
+++ b/opencog/util/ansi.h
@@ -26,6 +26,8 @@
 #include <opencog/util/Config.h>
 namespace opencog {
 
+//!@{
+//! color codes in ANSI
 static const char* const COLOR_OFF = "\033[0m";
 static const char* const BRIGHT = "\033[1m";
 static const char* const RED = "\033[31m";
@@ -35,11 +37,15 @@ static const char* const BLUE = "\033[34m";
 static const char* const MAGENTA = "\033[35m";
 static const char* const CYAN = "\033[36m";
 static const char* const WHITE = "\033[37m";
-  
+//!@}
+
+//! inserts the code only if \b ANSI_ENABLED variable is set
 inline void ansi_code(std::string &s,const std::string &code) {
     if (config().get_bool("ANSI_ENABLED")) s.append(code);
 }
 
+//!@{
+//! appends the code if \b ANSI_ENABLED variable is set
 inline void ansi_off(std::string &s) { ansi_code(s,COLOR_OFF); }
 inline void ansi_bright(std::string &s) { ansi_code(s,BRIGHT); }
 inline void ansi_red(std::string &s) { ansi_code(s,RED); }
@@ -49,5 +55,6 @@ inline void ansi_blue(std::string &s) { ansi_code(s,BLUE); }
 inline void ansi_magenta(std::string &s) { ansi_code(s,MAGENTA); }
 inline void ansi_cyan(std::string &s) { ansi_code(s,CYAN); }
 inline void ansi_white(std::string &s) { ansi_code(s,WHITE); }
+//!@}
 
 }
diff --git a/opencog/util/cluster.c b/opencog/util/cluster.c
index 839d7bdfb1d..c61d4779ffc 100644
--- a/opencog/util/cluster.c
+++ b/opencog/util/cluster.c
@@ -64,13 +64,6 @@ double mean(int n, double x[])
 /* ************************************************************************ */
 
 double median (int n, double x[])
-/*
-Find the median of X(1), ... , X(N), using as much of the quicksort
-algorithm as is needed to isolate it.
-N.B. On exit, the array X is partially ordered.
-Based on Alan J. Miller's median.f90 routine.
-*/
-
 { int i, j;
   int nr = n / 2;
   int nl = nr - 1;
@@ -159,15 +152,20 @@ Based on Alan J. Miller's median.f90 routine.
 
 /* ********************************************************************** */
 
-static const double* sortdata = NULL; /* used in the quicksort algorithm */
+//! used in the quicksort algorithm
+//! 
+static const double* sortdata = NULL; 
 
 /* ---------------------------------------------------------------------- */
 
+/**
+ *  \internal
+ *  Helper function for sort. Previously, this was a nested function under
+ *	sort, which is not allowed under ANSI C.
+ *	
+ */
 static
 int compare(const void* a, const void* b)
-/* Helper function for sort. Previously, this was a nested function under
- * sort, which is not allowed under ANSI C.
- */
 { const int i1 = *(const int*)a;
   const int i2 = *(const int*)b;
   const double term1 = sortdata[i1];
@@ -180,10 +178,6 @@ int compare(const void* a, const void* b)
 /* ---------------------------------------------------------------------- */
 
 void sort(int n, const double data[], int index[])
-/* Sets up an index table given the data, such that data[index[]] is in
- * increasing order. Sorting is done on the indices; the array data
- * is unchanged.
- */
 { int i;
   sortdata = data;
   for (i = 0; i < n; i++) index[i] = i;
@@ -192,13 +186,16 @@ void sort(int n, const double data[], int index[])
 
 /* ********************************************************************** */
 
-static double* getrank (int n, double data[])
-/* Calculates the ranks of the elements in the array data. Two elements with
+/**
+ * \internal
+ * Calculates the ranks of the elements in the array data. Two elements with
  * the same value get the same rank, equal to the average of the ranks had the
  * elements different values. The ranks are returned as a newly allocated
  * array that should be freed by the calling routine. If getrank fails due to
  * a memory allocation error, it returns NULL.
+ * 
  */
+static double* getrank (int n, double data[])
 { int i;
   double* rank;
   int* index;
@@ -284,9 +281,9 @@ freedatamask(int n, double** data, int** mask)
 
 /* ---------------------------------------------------------------------- */
 
-static
-double find_closest_pair(int n, double** distmatrix, int* ip, int* jp)
-/*
+/**
+\internal
+
 This function searches the distance matrix to find the pair with the shortest
 distance between them. The indices of the pair are returned in ip and jp; the
 distance itself is returned by the function.
@@ -305,7 +302,11 @@ the shortest distance.
 jp         (output) int*
 A pointer to the integer that is to receive the second index of the pair with
 the shortest distance.
+
+
 */
+static
+double find_closest_pair(int n, double** distmatrix, int* ip, int* jp)
 { int i, j;
   double temp;
   double distance = distmatrix[1][0];
@@ -326,8 +327,9 @@ the shortest distance.
 
 /* ********************************************************************* */
 
-static int svd(int m, int n, double** u, double w[], double** vt)
-/*
+/**
+ * \internal
+ *
  *   This subroutine is a translation of the Algol procedure svd,
  *   Num. Math. 14, 403-420(1970) by Golub and Reinsch.
  *   Handbook for Auto. Comp., Vol II-Linear Algebra, 134-151(1971).
@@ -390,7 +392,10 @@ static int svd(int m, int n, double** u, double w[], double** vt)
  *       internally in this routine, instead of passing it as an
  *       argument. If the allocation fails, svd returns -1.
  *   2003.06.05
+ *
+ * 
  */
+static int svd(int m, int n, double** u, double w[], double** vt)
 { int i, j, k, i1, k1, l1, its;
   double c,f,h,s,x,y,z;
   int l = 0;
@@ -809,63 +814,7 @@ static int svd(int m, int n, double** u, double w[], double** vt)
 /* ********************************************************************* */
 
 int pca(int nrows, int ncolumns, double** u, double** v, double* w)
-/*
-Purpose
-=======
-
-This subroutine uses the singular value decomposition to perform principal
-components analysis of a real nrows by ncolumns rectangular matrix.
-
-Arguments
-=========
-
-nrows     (input) int
-The number of rows in the matrix u.
 
-ncolumns  (input) int
-The number of columns in the matrix v.
-
-u          (input) double[nrows][ncolumns]
-On input, the array containing the data to which the principal component
-analysis should be applied. The function assumes that the mean has already been
-subtracted of each column, and hence that the mean of each column is zero.
-On output, see below.
-
-v          (input) double[n][n], where n = min(nrows, ncolumns)
-Not used on input.
-
-w          (input) double[n], where n = min(nrows, ncolumns)
-Not used on input.
-
-
-Return value
-============
-
-On output:
-
-If nrows >= ncolumns, then
-
-u contains the coordinates with respect to the principal components;
-v contains the principal component vectors.
-
-The dot product u . v reproduces the data that were passed in u.
-
-
-If nrows < ncolumns, then
-
-u contains the principal component vectors;
-v contains the coordinates with respect to the principal components.
-
-The dot product v . u reproduces the data that were passed in u.
-
-The eigenvalues of the covariance matrix are returned in w.
-
-The arrays u, v, and w are sorted according to eigenvalue, with the largest
-eigenvalues appearing first.
-
-The function returns 0 if successful, -1 if memory allocation fails, and a
-positive integer if the singular value decomposition fails to converge.
-*/
 {
     int i;
     int j;
@@ -932,11 +881,10 @@ positive integer if the singular value decomposition fails to converge.
 
 /* ********************************************************************* */
 
-static
-double euclid (int n, double** data1, double** data2, int** mask1, int** mask2,
-  const double weight[], int index1, int index2, int transpose)
- 
-/*
+
+/**
+\internal
+
 Purpose
 =======
 
@@ -977,9 +925,13 @@ transpose (input) int
 If transpose==0, the distance between two rows in the matrix is calculated.
 Otherwise, the distance between two columns in the matrix is calculated.
 
-============================================================================
+
 */
-{ double result = 0.;
+static
+double euclid (int n, double** data1, double** data2, int** mask1, int** mask2,
+  const double weight[], int index1, int index2, int transpose)
+{
+  double result = 0.;
   double tweight = 0;
   int i;
   if (transpose==0) /* Calculate the distance between two rows */
@@ -1007,13 +959,10 @@ Otherwise, the distance between two columns in the matrix is calculated.
 
 /* ********************************************************************* */
 
-static
-double cityblock (int n, double** data1, double** data2, int** mask1,
-  int** mask2, const double weight[], int index1, int index2, int transpose)
 
-/*
-Purpose
-=======
+/**
+\internal
+
 
 The cityblock routine calculates the weighted "City Block" distance between
 two rows or columns in a matrix. City Block distance is defined as the
@@ -1053,8 +1002,10 @@ Index of the second row or column.
 transpose (input) int
 If transpose==0, the distance between two rows in the matrix is calculated.
 Otherwise, the distance between two columns in the matrix is calculated.
-
-============================================================================ */
+*/
+static
+double cityblock (int n, double** data1, double** data2, int** mask1,
+  int** mask2, const double weight[], int index1, int index2, int transpose)
 { double result = 0.;
   double tweight = 0;
   int i;
@@ -1086,7 +1037,10 @@ Otherwise, the distance between two columns in the matrix is calculated.
 static
 double correlation (int n, double** data1, double** data2, int** mask1,
   int** mask2, const double weight[], int index1, int index2, int transpose)
-/*
+/**
+\internal
+
+
 Purpose
 =======
 
@@ -1130,7 +1084,6 @@ Index of the second row or column.
 transpose (input) int
 If transpose==0, the distance between two rows in the matrix is calculated.
 Otherwise, the distance between two columns in the matrix is calculated.
-============================================================================
 */
 { double result = 0.;
   double sum1 = 0.;
@@ -1186,7 +1139,9 @@ Otherwise, the distance between two columns in the matrix is calculated.
 static
 double acorrelation (int n, double** data1, double** data2, int** mask1,
   int** mask2, const double weight[], int index1, int index2, int transpose)
-/*
+/**
+\internal
+
 Purpose
 =======
 
@@ -1229,7 +1184,7 @@ Index of the second row or column.
 transpose (input) int
 If transpose==0, the distance between two rows in the matrix is calculated.
 Otherwise, the distance between two columns in the matrix is calculated.
-============================================================================
+
 */
 { double result = 0.;
   double sum1 = 0.;
@@ -1285,7 +1240,9 @@ Otherwise, the distance between two columns in the matrix is calculated.
 static
 double ucorrelation (int n, double** data1, double** data2, int** mask1,
   int** mask2, const double weight[], int index1, int index2, int transpose)
-/*
+/**
+\internal
+
 Purpose
 =======
 
@@ -1330,7 +1287,7 @@ Index of the second row or column.
 transpose (input) int
 If transpose==0, the distance between two rows in the matrix is calculated.
 Otherwise, the distance between two columns in the matrix is calculated.
-============================================================================
+
 */
 { double result = 0.;
   double denom1 = 0.;
@@ -1380,7 +1337,9 @@ Otherwise, the distance between two columns in the matrix is calculated.
 static
 double uacorrelation (int n, double** data1, double** data2, int** mask1,
   int** mask2, const double weight[], int index1, int index2, int transpose)
-/*
+/**
+\internal
+
 Purpose
 =======
 
@@ -1425,7 +1384,7 @@ Index of the second row or column.
 transpose (input) int
 If transpose==0, the distance between two rows in the matrix is calculated.
 Otherwise, the distance between two columns in the matrix is calculated.
-============================================================================
+
 */
 { double result = 0.;
   double denom1 = 0.;
@@ -1475,7 +1434,9 @@ Otherwise, the distance between two columns in the matrix is calculated.
 static
 double spearman (int n, double** data1, double** data2, int** mask1,
   int** mask2, const double weight[], int index1, int index2, int transpose)
-/*
+/**
+\internal
+
 Purpose
 =======
 
@@ -1517,7 +1478,7 @@ Index of the second row or column.
 transpose (input) int
 If transpose==0, the distance between two rows in the matrix is calculated.
 Otherwise, the distance between two columns in the matrix is calculated.
-============================================================================
+
 */
 { int i;
   int m = 0;
@@ -1603,7 +1564,9 @@ Otherwise, the distance between two columns in the matrix is calculated.
 static
 double kendall (int n, double** data1, double** data2, int** mask1, int** mask2,
   const double weight[], int index1, int index2, int transpose)
-/*
+/**
+\internal
+
 Purpose
 =======
 
@@ -1644,7 +1607,7 @@ Index of the second row or column.
 transpose (input) int
 If transpose==0, the distance between two rows in the matrix is calculated.
 Otherwise, the distance between two columns in the matrix is calculated.
-============================================================================
+
 */
 { int con = 0;
   int dis = 0;
@@ -1730,7 +1693,9 @@ static double(*setmetric(char dist))
 /* *********************************************************************  */
 
 static double uniform(void)
-/*
+/**
+\internal
+
 Purpose
 =======
 
@@ -1758,7 +1723,7 @@ Return value
 ============
 
 A double-precison number between 0.0 and 1.0.
-============================================================================
+
 */
 { int z;
   static const int m1 = 2147483563;
@@ -1793,7 +1758,9 @@ A double-precison number between 0.0 and 1.0.
 /* ************************************************************************ */
 
 static int binomial(int n, double p)
-/*
+/**
+\internal
+
 Purpose
 =======
 
@@ -1822,7 +1789,7 @@ Return value
 
 An integer drawn from a binomial distribution with parameters (p, n).
 
-============================================================================
+
 */
 { const double q = 1 - p;
   if (n*p < 30.0) /* Algorithm BINV */
@@ -1936,7 +1903,9 @@ An integer drawn from a binomial distribution with parameters (p, n).
 /* ************************************************************************ */
 
 static void randomassign (int nclusters, int nelements, int clusterid[])
-/*
+/**
+\internal
+
 Purpose
 =======
 
@@ -1959,7 +1928,7 @@ to be clustered).
 clusterid  (output) int[nelements]
 The cluster number to which an element was assigned.
 
-============================================================================
+
 */
 { int i, j;
   int k = 0;
@@ -1995,7 +1964,9 @@ The cluster number to which an element was assigned.
 static void getclustermeans(int nclusters, int nrows, int ncolumns,
   double** data, int** mask, int clusterid[], double** cdata, int** cmask,
   int transpose)
-/*
+/**
+\internal
+
 Purpose
 =======
 
@@ -2044,7 +2015,7 @@ transpose  (input) int
 If transpose==0, clusters of rows (genes) are specified. Otherwise, clusters of
 columns (microarrays) are specified.
 
-========================================================================
+
 */
 { int i, j, k;
   if (transpose==0)
@@ -2105,7 +2076,9 @@ static void
 getclustermedians(int nclusters, int nrows, int ncolumns,
   double** data, int** mask, int clusterid[], double** cdata, int** cmask,
   int transpose, double cache[])
-/*
+/**
+\internal
+
 Purpose
 =======
 
@@ -2160,7 +2133,7 @@ This array should be allocated before calling getclustermedians; its contents
 on input is not relevant. This array is used as a temporary storage space when
 calculating the medians.
 
-========================================================================
+
 */
 { int i, j, k;
   if (transpose==0)
@@ -2212,70 +2185,6 @@ calculating the medians.
 int getclustercentroids(int nclusters, int nrows, int ncolumns,
   double** data, int** mask, int clusterid[], double** cdata, int** cmask,
   int transpose, char method)
-/*
-Purpose
-=======
-
-The getclustercentroids routine calculates the cluster centroids, given to
-which cluster each element belongs. Depending on the argument method, the
-centroid is defined as either the mean or the median for each dimension over
-all elements belonging to a cluster.
-
-Arguments
-=========
-
-nclusters  (input) int
-The number of clusters.
-
-nrows     (input) int
-The number of rows in the gene expression data matrix, equal to the number of
-genes.
-
-ncolumns  (input) int
-The number of columns in the gene expression data matrix, equal to the number of
-microarrays.
-
-data       (input) double[nrows][ncolumns]
-The array containing the gene expression data.
-
-mask       (input) int[nrows][ncolumns]
-This array shows which data values are missing. If mask[i][j]==0, then
-data[i][j] is missing.
-
-clusterid  (output) int[nrows] if transpose==0
-                    int[ncolumns] if transpose==1
-The cluster number to which each element belongs. If transpose==0, then the
-dimension of clusterid is equal to nrows (the number of genes). Otherwise, it
-is equal to ncolumns (the number of microarrays).
-
-cdata      (output) double[nclusters][ncolumns] if transpose==0
-                    double[nrows][nclusters] if transpose==1
-On exit of getclustercentroids, this array contains the cluster centroids.
-
-cmask      (output) int[nclusters][ncolumns] if transpose==0
-                    int[nrows][nclusters] if transpose==1
-This array shows which data values of are missing for each centroid. If
-cmask[i][j]==0, then cdata[i][j] is missing. A data value is missing for
-a centroid if all corresponding data values of the cluster members are missing.
-
-transpose  (input) int
-If transpose==0, clusters of rows (genes) are specified. Otherwise, clusters of
-columns (microarrays) are specified.
-
-method     (input) char
-For method=='a', the centroid is defined as the mean over all elements
-belonging to a cluster for each dimension.
-For method=='m', the centroid is defined as the median over all elements
-belonging to a cluster for each dimension.
-
-Return value
-============
-
-The function returns an integer to indicate success or failure. If a
-memory error occurs, or if method is not 'm' or 'a', getclustercentroids
-returns 0. If successful, getclustercentroids returns 1.
-========================================================================
-*/
 { switch(method)
   { case 'm':
     { const int nelements = (transpose==0) ? nrows : ncolumns;
@@ -2299,41 +2208,6 @@ returns 0. If successful, getclustercentroids returns 1.
 
 void getclustermedoids(int nclusters, int nelements, double** distance,
   int clusterid[], int centroids[], double errors[])
-/*
-Purpose
-=======
-
-The getclustermedoids routine calculates the cluster centroids, given to which
-cluster each element belongs. The centroid is defined as the element with the
-smallest sum of distances to the other elements.
-
-Arguments
-=========
-
-nclusters  (input) int
-The number of clusters.
-
-nelements  (input) int
-The total number of elements.
-
-distmatrix (input) double array, ragged
-  (number of rows is nelements, number of columns is equal to the row number)
-The distance matrix. To save space, the distance matrix is given in the
-form of a ragged array. The distance matrix is symmetric and has zeros
-on the diagonal. See distancematrix for a description of the content.
-
-clusterid  (output) int[nelements]
-The cluster number to which each element belongs.
-
-centroid   (output) int[nclusters]
-The index of the element that functions as the centroid for each cluster.
-
-errors     (output) double[nclusters]
-The within-cluster sum of distances between the items and the cluster
-centroid.
-
-========================================================================
-*/
 { int i, j, k;
   for (j = 0; j < nclusters; j++) errors[j] = DBL_MAX;
   for (i = 0; i < nelements; i++)
@@ -2567,87 +2441,6 @@ void kcluster (int nclusters, int nrows, int ncolumns,
   double** data, int** mask, double weight[], int transpose,
   int npass, char method, char dist,
   int clusterid[], double* error, int* ifound)
-/*
-Purpose
-=======
-
-The kcluster routine performs k-means or k-median clustering on a given set of
-elements, using the specified distance measure. The number of clusters is given
-by the user. Multiple passes are being made to find the optimal clustering
-solution, each time starting from a different initial clustering.
-
-
-Arguments
-=========
-
-nclusters  (input) int
-The number of clusters to be found.
-
-data       (input) double[nrows][ncolumns]
-The array containing the data of the elements to be clustered (i.e., the gene
-expression data).
-
-mask       (input) int[nrows][ncolumns]
-This array shows which data values are missing. If
-mask[i][j] == 0, then data[i][j] is missing.
-
-nrows     (input) int
-The number of rows in the data matrix, equal to the number of genes.
-
-ncolumns  (input) int
-The number of columns in the data matrix, equal to the number of microarrays.
-
-weight (input) double[n]
-The weights that are used to calculate the distance.
-
-transpose  (input) int
-If transpose==0, the rows of the matrix are clustered. Otherwise, columns
-of the matrix are clustered.
-
-npass      (input) int
-The number of times clustering is performed. Clustering is performed npass
-times, each time starting from a different (random) initial assignment of 
-genes to clusters. The clustering solution with the lowest within-cluster sum
-of distances is chosen.
-If npass==0, then the clustering algorithm will be run once, where the initial
-assignment of elements to clusters is taken from the clusterid array.
-
-method     (input) char
-Defines whether the arithmetic mean (method=='a') or the median
-(method=='m') is used to calculate the cluster center.
-
-dist       (input) char
-Defines which distance measure is used, as given by the table:
-dist=='e': Euclidean distance
-dist=='b': City-block distance
-dist=='c': correlation
-dist=='a': absolute value of the correlation
-dist=='u': uncentered correlation
-dist=='x': absolute uncentered correlation
-dist=='s': Spearman's rank correlation
-dist=='k': Kendall's tau
-For other values of dist, the default (Euclidean distance) is used.
-
-clusterid  (output; input) int[nrows] if transpose==0
-                           int[ncolumns] if transpose==1
-The cluster number to which a gene or microarray was assigned. If npass==0,
-then on input clusterid contains the initial clustering assignment from which
-the clustering algorithm starts. On output, it contains the clustering solution
-that was found.
-
-error      (output) double*
-The sum of distances to the cluster center of each item in the optimal k-means
-clustering solution that was found.
-
-ifound     (output) int*
-The number of times the optimal clustering solution was
-found. The value of ifound is at least 1; its maximum value is npass. If the
-number of clusters is larger than the number of elements being clustered,
-*ifound is set to 0 as an error code. If a memory allocation error occurs,
-*ifound is set to -1.
-
-========================================================================
-*/
 { const int nelements = (transpose==0) ? nrows : ncolumns;
   const int ndata = (transpose==0) ? ncolumns : nrows;
 
@@ -2731,61 +2524,6 @@ number of clusters is larger than the number of elements being clustered,
 
 void kmedoids (int nclusters, int nelements, double** distmatrix,
   int npass, int clusterid[], double* error, int* ifound)
-/*
-Purpose
-=======
-
-The kmedoids routine performs k-medoids clustering on a given set of elements,
-using the distance matrix and the number of clusters passed by the user.
-Multiple passes are being made to find the optimal clustering solution, each
-time starting from a different initial clustering.
-
-
-Arguments
-=========
-
-nclusters  (input) int
-The number of clusters to be found.
-
-nelements  (input) int
-The number of elements to be clustered.
-
-distmatrix (input) double array, ragged
-  (number of rows is nelements, number of columns is equal to the row number)
-The distance matrix. To save space, the distance matrix is given in the
-form of a ragged array. The distance matrix is symmetric and has zeros
-on the diagonal. See distancematrix for a description of the content.
-
-npass      (input) int
-The number of times clustering is performed. Clustering is performed npass
-times, each time starting from a different (random) initial assignment of genes
-to clusters. The clustering solution with the lowest within-cluster sum of
-distances is chosen.
-If npass==0, then the clustering algorithm will be run once, where the initial
-assignment of elements to clusters is taken from the clusterid array.
-
-clusterid  (output; input) int[nelements]
-On input, if npass==0, then clusterid contains the initial clustering assignment
-from which the clustering algorithm starts; all numbers in clusterid should be
-between zero and nelements-1 inclusive. If npass!=0, clusterid is ignored on
-input.
-On output, clusterid contains the clustering solution that was found: clusterid
-contains the number of the cluster to which each item was assigned. On output,
-the number of a cluster is defined as the item number of the centroid of the
-cluster.
-
-error      (output) double
-The sum of distances to the cluster center of each item in the optimal k-medoids
-clustering solution that was found.
-
-ifound     (output) int
-If kmedoids is successful: the number of times the optimal clustering solution
-was found. The value of ifound is at least 1; its maximum value is npass.
-If the user requested more clusters than elements available, ifound is set
-to 0. If kmedoids fails due to a memory allocation error, ifound is set to -1.
-
-========================================================================
-*/
 { int i, j, icluster;
   int* tclusterid;
   int* saved;
@@ -2907,67 +2645,6 @@ to 0. If kmedoids fails due to a memory allocation error, ifound is set to -1.
 
 double** distancematrix (int nrows, int ncolumns, double** data,
   int** mask, double weights[], char dist, int transpose)
-/*
-Purpose
-=======
-
-The distancematrix routine calculates the distance matrix between genes or
-microarrays using their measured gene expression data. Several distance measures
-can be used. The routine returns a pointer to a ragged array containing the
-distances between the genes. As the distance matrix is symmetric, with zeros on
-the diagonal, only the lower triangular half of the distance matrix is saved.
-The distancematrix routine allocates space for the distance matrix. If the
-parameter transpose is set to a nonzero value, the distances between the columns
-(microarrays) are calculated, otherwise distances between the rows (genes) are
-calculated.
-If sufficient space in memory cannot be allocated to store the distance matrix,
-the routine returns a NULL pointer, and all memory allocated so far for the
-distance matrix is freed.
-
-
-Arguments
-=========
-
-nrows      (input) int
-The number of rows in the gene expression data matrix (i.e., the number of
-genes)
-
-ncolumns   (input) int
-The number of columns in the gene expression data matrix (i.e., the number of
-microarrays)
-
-data       (input) double[nrows][ncolumns]
-The array containing the gene expression data.
-
-mask       (input) int[nrows][ncolumns]
-This array shows which data values are missing. If mask[i][j]==0, then
-data[i][j] is missing.
-
-weight (input) double[n]
-The weights that are used to calculate the distance. The length of this vector
-is equal to the number of columns if the distances between genes are calculated,
-or the number of rows if the distances between microarrays are calculated.
-
-dist       (input) char
-Defines which distance measure is used, as given by the table:
-dist=='e': Euclidean distance
-dist=='b': City-block distance
-dist=='c': correlation
-dist=='a': absolute value of the correlation
-dist=='u': uncentered correlation
-dist=='x': absolute uncentered correlation
-dist=='s': Spearman's rank correlation
-dist=='k': Kendall's tau
-For other values of dist, the default (Euclidean distance) is used.
-
-transpose  (input) int
-If transpose is equal to zero, the distances between the rows is
-calculated. Otherwise, the distances between the columns is calculated.
-The former is needed when genes are being clustered; the latter is used
-when microarrays are being clustered.
-
-========================================================================
-*/
 { /* First determine the size of the distance matrix */
   const int n = (transpose==0) ? nrows : ncolumns;
   const int ndata = (transpose==0) ? ncolumns : nrows;
@@ -3009,73 +2686,6 @@ when microarrays are being clustered.
 double* calculate_weights(int nrows, int ncolumns, double** data, int** mask,
   double weights[], int transpose, char dist, double cutoff, double exponent)
 
-/*
-Purpose
-=======
-
-This function calculates the weights using the weighting scheme proposed by
-Michael Eisen:
-w[i] = 1.0 / sum_{j where d[i][j]<cutoff} (1 - d[i][j]/cutoff)^exponent
-where the cutoff and the exponent are specified by the user.
-
-
-Arguments
-=========
-
-nrows      (input) int
-The number of rows in the gene expression data matrix, equal to the number of
-genes.
-
-ncolumns   (input) int
-The number of columns in the gene expression data matrix, equal to the number of
-microarrays.
-
-data       (input) double[nrows][ncolumns]
-The array containing the gene expression data.
-
-mask       (input) int[nrows][ncolumns]
-This array shows which data values are missing. If mask[i][j]==0, then
-data[i][j] is missing.
-
-weight     (input) int[ncolumns] if transpose==0,
-                   int[nrows]    if transpose==1
-The weights that are used to calculate the distance. The length of this vector
-is ncolumns if gene weights are being clustered, and nrows if microarrays
-weights are being clustered.
-
-transpose (input) int
-If transpose==0, the weights of the rows of the data matrix are calculated.
-Otherwise, the weights of the columns of the data matrix are calculated.
-
-dist      (input) char
-Defines which distance measure is used, as given by the table:
-dist=='e': Euclidean distance
-dist=='b': City-block distance
-dist=='c': correlation
-dist=='a': absolute value of the correlation
-dist=='u': uncentered correlation
-dist=='x': absolute uncentered correlation
-dist=='s': Spearman's rank correlation
-dist=='k': Kendall's tau
-For other values of dist, the default (Euclidean distance) is used.
-
-cutoff    (input) double
-The cutoff to be used to calculate the weights.
-
-exponent  (input) double
-The exponent to be used to calculate the weights.
-
-
-Return value
-============
-
-The function returns a pointer to a newly allocated array containing the
-calculated weights for the rows (if transpose==0) or columns (if
-transpose==1). If not enough memory could be allocated to store the
-weights array, the function returns NULL.
-
-========================================================================
-*/
 { int i,j;
   const int ndata = (transpose==0) ? ncolumns : nrows;
   const int nelements = (transpose==0) ? nrows : ncolumns;
@@ -3109,37 +2719,6 @@ weights array, the function returns NULL.
 /* ******************************************************************** */
 
 void cuttree (int nelements, Node* tree, int nclusters, int clusterid[]) 
-
-/*
-Purpose
-=======
-
-The cuttree routine takes the output of a hierarchical clustering routine, and
-divides the elements in the tree structure into clusters based on the
-hierarchical clustering result. The number of clusters is specified by the user.
-
-Arguments
-=========
-
-nelements      (input) int
-The number of elements that were clustered.
-
-tree           (input) Node[nelements-1]
-The clustering solution. Each node in the array describes one linking event,
-with tree[i].left and tree[i].right representig the elements that were joined.
-The original elements are numbered 0..nelements-1, nodes are numbered
--1..-(nelements-1).
-
-nclusters      (input) int
-The number of clusters to be formed.
-
-clusterid      (output) int[nelements]
-The number of the cluster to which each element was assigned. Space for this
-array should be allocated before calling the cuttree routine. If a memory
-error occured, all elements in clusterid are set to -1.
-
-========================================================================
-*/
 { int i, j, k;
   int icluster = 0;
   const int n = nelements-nclusters; /* number of nodes to join */
@@ -3184,7 +2763,9 @@ static
 Node* pclcluster (int nrows, int ncolumns, double** data, int** mask,
   double weight[], double** distmatrix, char dist, int transpose)
 
-/*
+/**
+\internal
+
 
 Purpose
 =======
@@ -3246,7 +2827,7 @@ whether genes (rows) or microarrays (columns) were clustered, nelements is
 equal to nrows or ncolumns. See src/cluster.h for a description of the Node
 structure.
 If a memory error occurs, pclcluster returns NULL.
-========================================================================
+
 */
 { int i, j;
   const int nelements = (transpose==0) ? nrows : ncolumns;
@@ -3361,7 +2942,9 @@ static
 Node* pslcluster (int nrows, int ncolumns, double** data, int** mask,
   double weight[], double** distmatrix, char dist, int transpose)
 
-/*
+/**
+\internal
+
 
 Purpose
 =======
@@ -3438,7 +3021,7 @@ equal to nrows or ncolumns. See src/cluster.h for a description of the Node
 structure.
 If a memory error occurs, pslcluster returns NULL.
 
-========================================================================
+
 */
 { int i, j, k;
   const int nelements = transpose ? ncolumns : nrows;
@@ -3536,7 +3119,9 @@ If a memory error occurs, pslcluster returns NULL.
 /* ******************************************************************** */
 
 static Node* pmlcluster (int nelements, double** distmatrix)
-/*
+/**
+\internal
+
 
 Purpose
 =======
@@ -3564,7 +3149,7 @@ whether genes (rows) or microarrays (columns) were clustered, nelements is
 equal to nrows or ncolumns. See src/cluster.h for a description of the Node
 structure.
 If a memory error occurs, pmlcluster returns NULL.
-========================================================================
+
 */
 { int j;
   int n;
@@ -3612,7 +3197,9 @@ If a memory error occurs, pmlcluster returns NULL.
 /* ******************************************************************* */
 
 static Node* palcluster (int nelements, double** distmatrix)
-/*
+/**
+\internal
+
 Purpose
 =======
 
@@ -3639,7 +3226,7 @@ whether genes (rows) or microarrays (columns) were clustered, nelements is
 equal to nrows or ncolumns. See src/cluster.h for a description of the Node
 structure.
 If a memory error occurs, palcluster returns NULL.
-========================================================================
+
 */
 { int j;
   int n;
@@ -3718,84 +3305,7 @@ If a memory error occurs, palcluster returns NULL.
 
 Node* treecluster (int nrows, int ncolumns, double** data, int** mask,
   double weight[], int transpose, char dist, char method, double** distmatrix)
-/*
-Purpose
-=======
-
-The treecluster routine performs hierarchical clustering using pairwise
-single-, maximum-, centroid-, or average-linkage, as defined by method, on a
-given set of gene expression data, using the distance metric given by dist.
-If successful, the function returns a pointer to a newly allocated Tree struct
-containing the hierarchical clustering solution, and NULL if a memory error
-occurs. The pointer should be freed by the calling routine to prevent memory
-leaks.
-
-Arguments
-=========
-
-nrows     (input) int
-The number of rows in the data matrix, equal to the number of genes.
-
-ncolumns  (input) int
-The number of columns in the data matrix, equal to the number of microarrays.
-
-data       (input) double[nrows][ncolumns]
-The array containing the data of the vectors to be clustered.
-
-mask       (input) int[nrows][ncolumns]
-This array shows which data values are missing. If mask[i][j]==0, then
-data[i][j] is missing.
-
-weight (input) double array[n]
-The weights that are used to calculate the distance.
 
-transpose  (input) int
-If transpose==0, the rows of the matrix are clustered. Otherwise, columns
-of the matrix are clustered.
-
-dist       (input) char
-Defines which distance measure is used, as given by the table:
-dist=='e': Euclidean distance
-dist=='b': City-block distance
-dist=='c': correlation
-dist=='a': absolute value of the correlation
-dist=='u': uncentered correlation
-dist=='x': absolute uncentered correlation
-dist=='s': Spearman's rank correlation
-dist=='k': Kendall's tau
-For other values of dist, the default (Euclidean distance) is used.
-
-method     (input) char
-Defines which hierarchical clustering method is used:
-method=='s': pairwise single-linkage clustering
-method=='m': pairwise maximum- (or complete-) linkage clustering
-method=='a': pairwise average-linkage clustering
-method=='c': pairwise centroid-linkage clustering
-For the first three, either the distance matrix or the gene expression data is
-sufficient to perform the clustering algorithm. For pairwise centroid-linkage
-clustering, however, the gene expression data are always needed, even if the
-distance matrix itself is available.
-
-distmatrix (input) double**
-The distance matrix. If the distance matrix is zero initially, the distance
-matrix will be allocated and calculated from the data by treecluster, and
-deallocated before treecluster returns. If the distance matrix is passed by the
-calling routine, treecluster will modify the contents of the distance matrix as
-part of the clustering algorithm, but will not deallocate it. The calling
-routine should deallocate the distance matrix after the return from treecluster.
-
-Return value
-============
-
-A pointer to a newly allocated array of Node structs, describing the
-hierarchical clustering solution consisting of nelements-1 nodes. Depending on
-whether genes (rows) or microarrays (columns) were clustered, nelements is
-equal to nrows or ncolumns. See src/cluster.h for a description of the Node
-structure.
-If a memory error occurs, treecluster returns NULL.
-
-========================================================================
-*/
 { Node* result = NULL;
   const int nelements = (transpose==0) ? nrows : ncolumns;
   const int ldistmatrix = (distmatrix==NULL && method!='s') ? 1 : 0;
@@ -4125,82 +3635,6 @@ void somassign (int nrows, int ncolumns, double** data, int** mask,
 void somcluster (int nrows, int ncolumns, double** data, int** mask,
   const double weight[], int transpose, int nxgrid, int nygrid,
   double inittau, int niter, char dist, double*** celldata, int clusterid[][2])
-/*
-
-Purpose
-=======
-
-The somcluster routine implements a self-organizing map (Kohonen) on a
-rectangular grid, using a given set of vectors. The distance measure to be
-used to find the similarity between genes and nodes is given by dist.
-
-Arguments
-=========
-
-nrows     (input) int
-The number of rows in the data matrix, equal to the number of genes.
-
-ncolumns  (input) int
-The number of columns in the data matrix, equal to the number of microarrays.
-
-data       (input) double[nrows][ncolumns]
-The array containing the gene expression data.
-
-mask       (input) int[nrows][ncolumns]
-This array shows which data values are missing. If
-mask[i][j] == 0, then data[i][j] is missing.
-
-weights    (input) double[ncolumns] if transpose==0;
-                   double[nrows]    if transpose==1
-The weights that are used to calculate the distance. The length of this vector
-is ncolumns if genes are being clustered, or nrows if microarrays are being
-clustered.
-
-transpose  (input) int
-If transpose==0, the rows (genes) of the matrix are clustered. Otherwise,
-columns (microarrays) of the matrix are clustered.
-
-nxgrid    (input) int
-The number of grid cells horizontally in the rectangular topology of clusters.
-
-nygrid    (input) int
-The number of grid cells horizontally in the rectangular topology of clusters.
-
-inittau    (input) double
-The initial value of tau, representing the neighborhood function.
-
-niter      (input) int
-The number of iterations to be performed.
-
-dist       (input) char
-Defines which distance measure is used, as given by the table:
-dist=='e': Euclidean distance
-dist=='b': City-block distance
-dist=='c': correlation
-dist=='a': absolute value of the correlation
-dist=='u': uncentered correlation
-dist=='x': absolute uncentered correlation
-dist=='s': Spearman's rank correlation
-dist=='k': Kendall's tau
-For other values of dist, the default (Euclidean distance) is used.
-
-celldata (output) double[nxgrid][nygrid][ncolumns] if transpose==0;
-                  double[nxgrid][nygrid][nrows]    if tranpose==1
-The gene expression data for each node (cell) in the 2D grid. This can be
-interpreted as the centroid for the cluster corresponding to that cell. If
-celldata is NULL, then the centroids are not returned. If celldata is not
-NULL, enough space should be allocated to store the centroid data before callingsomcluster.
-
-clusterid (output), int[nrows][2]    if transpose==0;
-                    int[ncolumns][2] if transpose==1
-For each item (gene or microarray) that is clustered, the coordinates of the
-cell in the 2D grid to which the item was assigned. If clusterid is NULL, the
-cluster assignments are not returned. If clusterid is not NULL, enough memory
-should be allocated to store the clustering information before calling
-somcluster.
-
-========================================================================
-*/
 { const int nobjects = (transpose==0) ? nrows : ncolumns;
   const int ndata = (transpose==0) ? ncolumns : nrows;
   int i,j;
@@ -4238,83 +3672,7 @@ somcluster.
 double clusterdistance (int nrows, int ncolumns, double** data,
   int** mask, double weight[], int n1, int n2, int index1[], int index2[],
   char dist, char method, int transpose)
-              
-/*
-Purpose
-=======
-
-The clusterdistance routine calculates the distance between two clusters
-containing genes or microarrays using the measured gene expression vectors. The
-distance between clusters, given the genes/microarrays in each cluster, can be
-defined in several ways. Several distance measures can be used.
-
-The routine returns the distance in double precision.
-If the parameter transpose is set to a nonzero value, the clusters are
-interpreted as clusters of microarrays, otherwise as clusters of gene.
-
-Arguments
-=========
-
-nrows     (input) int
-The number of rows (i.e., the number of genes) in the gene expression data
-matrix.
-
-ncolumns      (input) int
-The number of columns (i.e., the number of microarrays) in the gene expression
-data matrix.
 
-data       (input) double[nrows][ncolumns]
-The array containing the data of the vectors.
-
-mask       (input) int[nrows][ncolumns]
-This array shows which data values are missing. If mask[i][j]==0, then
-data[i][j] is missing.
-
-weight     (input) double[ncolumns] if transpose==0;
-                   double[nrows]    if transpose==1
-The weights that are used to calculate the distance.
-
-n1         (input) int
-The number of elements in the first cluster.
-
-n2         (input) int
-The number of elements in the second cluster.
-
-index1     (input) int[n1]
-Identifies which genes/microarrays belong to the first cluster.
-
-index2     (input) int[n2]
-Identifies which genes/microarrays belong to the second cluster.
-
-dist       (input) char
-Defines which distance measure is used, as given by the table:
-dist=='e': Euclidean distance
-dist=='b': City-block distance
-dist=='c': correlation
-dist=='a': absolute value of the correlation
-dist=='u': uncentered correlation
-dist=='x': absolute uncentered correlation
-dist=='s': Spearman's rank correlation
-dist=='k': Kendall's tau
-For other values of dist, the default (Euclidean distance) is used.
-
-method     (input) char
-Defines how the distance between two clusters is defined, given which genes
-belong to which cluster:
-method=='a': the distance between the arithmetic means of the two clusters
-method=='m': the distance between the medians of the two clusters
-method=='s': the smallest pairwise distance between members of the two clusters
-method=='x': the largest pairwise distance between members of the two clusters
-method=='v': average of the pairwise distances between members of the clusters
-
-transpose  (input) int
-If transpose is equal to zero, the distances between the rows is
-calculated. Otherwise, the distances between the columns is calculated.
-The former is needed when genes are being clustered; the latter is used
-when microarrays are being clustered.
-
-========================================================================
-*/
 { /* Set the metric function as indicated by dist */
   double (*metric)
     (int, double**, double**, int**, int**, const double[], int, int, int) =
diff --git a/opencog/util/cluster.h b/opencog/util/cluster.h
index 2c429d1caa4..4b3722b709e 100644
--- a/opencog/util/cluster.h
+++ b/opencog/util/cluster.h
@@ -27,6 +27,16 @@
  * 
  */
 
+#ifndef	_C_CLUSTERING_LIBRARY_H
+#define	_C_CLUSTERING_LIBRARY_H
+
+/** @name C Clustering Library
+ * This library was written at the Laboratory of DNA Information Analysis,
+ * Human Genome Center, Institute of Medical Science, University of Tokyo,
+ * by Michiel Jan Laurens de Hoon
+ */
+///@{
+
 #ifndef min
 #define min(x, y)	((x) < (y) ? (x) : (y))
 #endif
@@ -39,31 +49,356 @@
 #endif
 
 #define CLUSTERVERSION "1.49"
-//#ifdef __cplusplus
-//extern "C" {
-    //#endif
+
 /* Chapter 2 */
+
+/**
+The clusterdistance routine calculates the distance between two clusters
+containing genes or microarrays using the measured gene expression vectors. The
+distance between clusters, given the genes/microarrays in each cluster, can be
+defined in several ways. Several distance measures can be used.
+
+The routine returns the distance in double precision.
+If the parameter transpose is set to a nonzero value, the clusters are
+interpreted as clusters of microarrays, otherwise as clusters of gene.
+
+\param nrows     (input) int
+The number of rows (i.e., the number of genes) in the gene expression data
+matrix.
+
+\param ncolumns      (input) int
+The number of columns (i.e., the number of microarrays) in the gene expression
+data matrix.
+
+\param data       (input) double[nrows][ncolumns]
+The array containing the data of the vectors.
+
+\param mask       (input) int[nrows][ncolumns]
+This array shows which data values are missing. If mask[i][j]==0, then
+data[i][j] is missing.
+
+\param weight     (input) double[ncolumns] if transpose==0;
+                   double[nrows]    if transpose==1
+The weights that are used to calculate the distance.
+
+\param n1         (input) int
+The number of elements in the first cluster.
+
+\param n2         (input) int
+The number of elements in the second cluster.
+
+\param index1     (input) int[n1]
+Identifies which genes/microarrays belong to the first cluster.
+
+\param index2     (input) int[n2]
+Identifies which genes/microarrays belong to the second cluster.
+
+\param dist       (input) char
+Defines which distance measure is used, as given by the table:
+dist=='e': Euclidean distance
+dist=='b': City-block distance
+dist=='c': correlation
+dist=='a': absolute value of the correlation
+dist=='u': uncentered correlation
+dist=='x': absolute uncentered correlation
+dist=='s': Spearman's rank correlation
+dist=='k': Kendall's tau
+For other values of dist, the default (Euclidean distance) is used.
+
+\param method     (input) char
+Defines how the distance between two clusters is defined, given which genes
+belong to which cluster:
+method=='a': the distance between the arithmetic means of the two clusters
+method=='m': the distance between the medians of the two clusters
+method=='s': the smallest pairwise distance between members of the two clusters
+method=='x': the largest pairwise distance between members of the two clusters
+method=='v': average of the pairwise distances between members of the clusters
+
+\param transpose  (input) int
+If transpose is equal to zero, the distances between the rows is
+calculated. Otherwise, the distances between the columns is calculated.
+The former is needed when genes are being clustered; the latter is used
+when microarrays are being clustered.
+*/
 double clusterdistance (int nrows, int ncolumns, double** data, int** mask,
   double weight[], int n1, int n2, int index1[], int index2[], char dist,
   char method, int transpose);
+
+/**
+The distancematrix routine calculates the distance matrix between genes or
+microarrays using their measured gene expression data. Several distance measures
+can be used. The routine returns a pointer to a ragged array containing the
+distances between the genes. As the distance matrix is symmetric, with zeros on
+the diagonal, only the lower triangular half of the distance matrix is saved.
+The distancematrix routine allocates space for the distance matrix. If the
+parameter transpose is set to a nonzero value, the distances between the columns
+(microarrays) are calculated, otherwise distances between the rows (genes) are
+calculated.
+If sufficient space in memory cannot be allocated to store the distance matrix,
+the routine returns a NULL pointer, and all memory allocated so far for the
+distance matrix is freed.
+
+
+\param nrows      (input) int
+The number of rows in the gene expression data matrix (i.e., the number of
+genes)
+
+\param ncolumns   (input) int
+The number of columns in the gene expression data matrix (i.e., the number of
+microarrays)
+
+\param data       (input) double[nrows][ncolumns]
+The array containing the gene expression data.
+
+\param mask       (input) int[nrows][ncolumns]
+This array shows which data values are missing. If mask[i][j]==0, then
+data[i][j] is missing.
+
+\param weight (input) double[n]
+The weights that are used to calculate the distance. The length of this vector
+is equal to the number of columns if the distances between genes are calculated,
+or the number of rows if the distances between microarrays are calculated.
+
+\param dist       (input) char
+Defines which distance measure is used, as given by the table:
+dist=='e': Euclidean distance
+dist=='b': City-block distance
+dist=='c': correlation
+dist=='a': absolute value of the correlation
+dist=='u': uncentered correlation
+dist=='x': absolute uncentered correlation
+dist=='s': Spearman's rank correlation
+dist=='k': Kendall's tau
+For other values of dist, the default (Euclidean distance) is used.
+
+\param transpose  (input) int
+If transpose is equal to zero, the distances between the rows is
+calculated. Otherwise, the distances between the columns is calculated.
+The former is needed when genes are being clustered; the latter is used
+when microarrays are being clustered.
+*/
 double** distancematrix (int ngenes, int ndata, double** data,
   int** mask, double* weight, char dist, int transpose);
 
 /* Chapter 3 */
+/**
+The getclustercentroids routine calculates the cluster centroids, given to
+which cluster each element belongs. Depending on the argument method, the
+centroid is defined as either the mean or the median for each dimension over
+all elements belonging to a cluster.
+
+\param nclusters  (input) int
+The number of clusters.
+
+\param nrows     (input) int
+The number of rows in the gene expression data matrix, equal to the number of
+genes.
+
+\param ncolumns  (input) int
+The number of columns in the gene expression data matrix, equal to the number of
+microarrays.
+
+\param data       (input) double[nrows][ncolumns]
+The array containing the gene expression data.
+
+\param mask       (input) int[nrows][ncolumns]
+This array shows which data values are missing. If mask[i][j]==0, then
+data[i][j] is missing.
+
+\param clusterid  (output) int[nrows] if transpose==0
+                    int[ncolumns] if transpose==1
+The cluster number to which each element belongs. If transpose==0, then the
+dimension of clusterid is equal to nrows (the number of genes). Otherwise, it
+is equal to ncolumns (the number of microarrays).
+
+\param cdata      (output) double[nclusters][ncolumns] if transpose==0
+                    double[nrows][nclusters] if transpose==1
+On exit of getclustercentroids, this array contains the cluster centroids.
+
+\param cmask      (output) int[nclusters][ncolumns] if transpose==0
+                    int[nrows][nclusters] if transpose==1
+This array shows which data values of are missing for each centroid. If
+cmask[i][j]==0, then cdata[i][j] is missing. A data value is missing for
+a centroid if all corresponding data values of the cluster members are missing.
+
+\param transpose  (input) int
+If transpose==0, clusters of rows (genes) are specified. Otherwise, clusters of
+columns (microarrays) are specified.
+
+\param method     (input) char
+For method=='a', the centroid is defined as the mean over all elements
+belonging to a cluster for each dimension.
+For method=='m', the centroid is defined as the median over all elements
+belonging to a cluster for each dimension.
+
+Return value
+============
+
+The function returns an integer to indicate success or failure. If a
+memory error occurs, or if method is not 'm' or 'a', getclustercentroids
+returns 0. If successful, getclustercentroids returns 1.
+*/
 int getclustercentroids(int nclusters, int nrows, int ncolumns,
   double** data, int** mask, int clusterid[], double** cdata, int** cmask,
   int transpose, char method);
+  
+/**
+The getclustermedoids routine calculates the cluster centroids, given to which
+cluster each element belongs. The centroid is defined as the element with the
+smallest sum of distances to the other elements.
+
+\param nclusters  (input) int
+The number of clusters.
+
+\param nelements  (input) int
+The total number of elements.
+
+\param distmatrix (input) double array, ragged
+  (number of rows is nelements, number of columns is equal to the row number)
+The distance matrix. To save space, the distance matrix is given in the
+form of a ragged array. The distance matrix is symmetric and has zeros
+on the diagonal. See distancematrix for a description of the content.
+
+\param clusterid  (output) int[nelements]
+The cluster number to which each element belongs.
+
+\param centroid   (output) int[nclusters]
+The index of the element that functions as the centroid for each cluster.
+
+\param errors     (output) double[nclusters]
+The within-cluster sum of distances between the items and the cluster
+centroid.
+*/
 void getclustermedoids(int nclusters, int nelements, double** distance,
   int clusterid[], int centroids[], double errors[]);
+
+/**
+The kcluster routine performs k-means or k-median clustering on a given set of
+elements, using the specified distance measure. The number of clusters is given
+by the user. Multiple passes are being made to find the optimal clustering
+solution, each time starting from a different initial clustering.
+
+nclusters  (input) int
+\param The number of clusters to be found.
+
+\param data       (input) double[nrows][ncolumns]
+The array containing the data of the elements to be clustered (i.e., the gene
+expression data).
+
+\param mask       (input) int[nrows][ncolumns]
+This array shows which data values are missing. If
+mask[i][j] == 0, then data[i][j] is missing.
+
+\param nrows     (input) int
+The number of rows in the data matrix, equal to the number of genes.
+
+\param ncolumns  (input) int
+The number of columns in the data matrix, equal to the number of microarrays.
+
+\param weight (input) double[n]
+The weights that are used to calculate the distance.
+
+\param transpose  (input) int
+If transpose==0, the rows of the matrix are clustered. Otherwise, columns
+of the matrix are clustered.
+
+\param npass      (input) int
+The number of times clustering is performed. Clustering is performed npass
+times, each time starting from a different (random) initial assignment of 
+genes to clusters. The clustering solution with the lowest within-cluster sum
+of distances is chosen.
+If npass==0, then the clustering algorithm will be run once, where the initial
+assignment of elements to clusters is taken from the clusterid array.
+
+\param method     (input) char
+Defines whether the arithmetic mean (method=='a') or the median
+(method=='m') is used to calculate the cluster center.
+
+\param dist       (input) char
+Defines which distance measure is used, as given by the table:
+dist=='e': Euclidean distance
+dist=='b': City-block distance
+dist=='c': correlation
+dist=='a': absolute value of the correlation
+dist=='u': uncentered correlation
+dist=='x': absolute uncentered correlation
+dist=='s': Spearman's rank correlation
+dist=='k': Kendall's tau
+For other values of dist, the default (Euclidean distance) is used.
+
+\param clusterid  (output; input) int[nrows] if transpose==0
+                           int[ncolumns] if transpose==1
+The cluster number to which a gene or microarray was assigned. If npass==0,
+then on input clusterid contains the initial clustering assignment from which
+the clustering algorithm starts. On output, it contains the clustering solution
+that was found.
+
+\param error      (output) double*
+The sum of distances to the cluster center of each item in the optimal k-means
+clustering solution that was found.
+
+\param ifound     (output) int*
+The number of times the optimal clustering solution was
+found. The value of ifound is at least 1; its maximum value is npass. If the
+number of clusters is larger than the number of elements being clustered,
+*ifound is set to 0 as an error code. If a memory allocation error occurs,
+*ifound is set to -1.
+*/
 void kcluster (int nclusters, int ngenes, int ndata, double** data,
   int** mask, double weight[], int transpose, int npass, char method, char dist,
   int clusterid[], double* error, int* ifound);
+
+/**
+The kmedoids routine performs k-medoids clustering on a given set of elements,
+using the distance matrix and the number of clusters passed by the user.
+Multiple passes are being made to find the optimal clustering solution, each
+time starting from a different initial clustering.
+
+\param nclusters  (input) int
+The number of clusters to be found.
+
+\param nelements  (input) int
+The number of elements to be clustered.
+
+\param distmatrix (input) double array, ragged
+  (number of rows is nelements, number of columns is equal to the row number)
+The distance matrix. To save space, the distance matrix is given in the
+form of a ragged array. The distance matrix is symmetric and has zeros
+on the diagonal. See distancematrix for a description of the content.
+
+\param npass      (input) int
+The number of times clustering is performed. Clustering is performed npass
+times, each time starting from a different (random) initial assignment of genes
+to clusters. The clustering solution with the lowest within-cluster sum of
+distances is chosen.
+If npass==0, then the clustering algorithm will be run once, where the initial
+assignment of elements to clusters is taken from the clusterid array.
+
+\param clusterid  (output; input) int[nelements]
+On input, if npass==0, then clusterid contains the initial clustering assignment
+from which the clustering algorithm starts; all numbers in clusterid should be
+between zero and nelements-1 inclusive. If npass!=0, clusterid is ignored on
+input.
+On output, clusterid contains the clustering solution that was found: clusterid
+contains the number of the cluster to which each item was assigned. On output,
+the number of a cluster is defined as the item number of the centroid of the
+cluster.
+
+\param error      (output) double
+The sum of distances to the cluster center of each item in the optimal k-medoids
+clustering solution that was found.
+
+\param ifound     (output) int
+If kmedoids is successful: the number of times the optimal clustering solution
+was found. The value of ifound is at least 1; its maximum value is npass.
+If the user requested more clusters than elements available, ifound is set
+to 0. If kmedoids fails due to a memory allocation error, ifound is set to -1.
+*/
 void kmedoids (int nclusters, int nelements, double** distance,
   int npass, int clusterid[], double* error, int* ifound);
 
 /* Chapter 4 */
-typedef struct {int left; int right; double distance;} Node;
-/*
+/**
  * A Node struct describes a single node in a tree created by hierarchical
  * clustering. The tree can be represented by an array of n Node structs,
  * where n is the number of elements minus one. The integers left and right
@@ -72,27 +407,312 @@ typedef struct {int left; int right; double distance;} Node;
  * nodes -1..-(nelements-1). For each node, distance contains the distance
  * between the two subnodes that were joined.
  */
+typedef struct {int left; int right; double distance;} Node;
+
+/**
+The treecluster routine performs hierarchical clustering using pairwise
+single-, maximum-, centroid-, or average-linkage, as defined by method, on a
+given set of gene expression data, using the distance metric given by dist.
+If successful, the function returns a pointer to a newly allocated Tree struct
+containing the hierarchical clustering solution, and NULL if a memory error
+occurs. The pointer should be freed by the calling routine to prevent memory
+leaks.
+
+\param nrows     (input) int
+The number of rows in the data matrix, equal to the number of genes.
+
+\param ncolumns  (input) int
+The number of columns in the data matrix, equal to the number of microarrays.
+
+\param data       (input) double[nrows][ncolumns]
+The array containing the data of the vectors to be clustered.
+
+\param mask       (input) int[nrows][ncolumns]
+This array shows which data values are missing. If mask[i][j]==0, then
+data[i][j] is missing.
+
+\param weight (input) double array[n]
+The weights that are used to calculate the distance.
+
+\param transpose  (input) int
+If transpose==0, the rows of the matrix are clustered. Otherwise, columns
+of the matrix are clustered.
 
+\param dist       (input) char
+Defines which distance measure is used, as given by the table:
+dist=='e': Euclidean distance
+dist=='b': City-block distance
+dist=='c': correlation
+dist=='a': absolute value of the correlation
+dist=='u': uncentered correlation
+dist=='x': absolute uncentered correlation
+dist=='s': Spearman's rank correlation
+dist=='k': Kendall's tau
+For other values of dist, the default (Euclidean distance) is used.
+
+\param method     (input) char
+Defines which hierarchical clustering method is used:
+method=='s': pairwise single-linkage clustering
+method=='m': pairwise maximum- (or complete-) linkage clustering
+method=='a': pairwise average-linkage clustering
+method=='c': pairwise centroid-linkage clustering
+For the first three, either the distance matrix or the gene expression data is
+sufficient to perform the clustering algorithm. For pairwise centroid-linkage
+clustering, however, the gene expression data are always needed, even if the
+distance matrix itself is available.
+
+\param distmatrix (input) double**
+The distance matrix. If the distance matrix is zero initially, the distance
+matrix will be allocated and calculated from the data by treecluster, and
+deallocated before treecluster returns. If the distance matrix is passed by the
+calling routine, treecluster will modify the contents of the distance matrix as
+part of the clustering algorithm, but will not deallocate it. The calling
+routine should deallocate the distance matrix after the return from treecluster.
+
+Return value
+============
+
+A pointer to a newly allocated array of Node structs, describing the
+hierarchical clustering solution consisting of nelements-1 nodes. Depending on
+whether genes (rows) or microarrays (columns) were clustered, nelements is
+equal to nrows or ncolumns. See src/cluster.h for a description of the Node
+structure.
+If a memory error occurs, treecluster returns NULL.
+*/
 Node* treecluster (int nrows, int ncolumns, double** data, int** mask,
   double weight[], int transpose, char dist, char method, double** distmatrix);
+
+
+/**
+The cuttree routine takes the output of a hierarchical clustering routine, and
+divides the elements in the tree structure into clusters based on the
+hierarchical clustering result. The number of clusters is specified by the user.
+
+\param nelements      (input) int
+The number of elements that were clustered.
+
+\param tree           (input) Node[nelements-1]
+The clustering solution. Each node in the array describes one linking event,
+with tree[i].left and tree[i].right representig the elements that were joined.
+The original elements are numbered 0..nelements-1, nodes are numbered
+-1..-(nelements-1).
+
+\param nclusters      (input) int
+The number of clusters to be formed.
+
+\param clusterid      (output) int[nelements]
+The number of the cluster to which each element was assigned. Space for this
+array should be allocated before calling the cuttree routine. If a memory
+error occured, all elements in clusterid are set to -1.
+*/
 void cuttree (int nelements, Node* tree, int nclusters, int clusterid[]);
 
 /* Chapter 5 */
+/**
+The somcluster routine implements a self-organizing map (Kohonen) on a
+rectangular grid, using a given set of vectors. The distance measure to be
+used to find the similarity between genes and nodes is given by dist.
+
+\param nrows     (input) int
+The number of rows in the data matrix, equal to the number of genes.
+
+\param ncolumns  (input) int
+The number of columns in the data matrix, equal to the number of microarrays.
+
+\param data       (input) double[nrows][ncolumns]
+The array containing the gene expression data.
+
+\param mask       (input) int[nrows][ncolumns]
+This array shows which data values are missing. If
+mask[i][j] == 0, then data[i][j] is missing.
+
+\param weights    (input) double[ncolumns] if transpose==0;
+                   double[nrows]    if transpose==1
+The weights that are used to calculate the distance. The length of this vector
+is ncolumns if genes are being clustered, or nrows if microarrays are being
+clustered.
+
+\param transpose  (input) int
+If transpose==0, the rows (genes) of the matrix are clustered. Otherwise,
+columns (microarrays) of the matrix are clustered.
+
+\param nxgrid    (input) int
+The number of grid cells horizontally in the rectangular topology of clusters.
+
+\param nygrid    (input) int
+The number of grid cells horizontally in the rectangular topology of clusters.
+
+\param inittau    (input) double
+The initial value of tau, representing the neighborhood function.
+
+\param niter      (input) int
+The number of iterations to be performed.
+
+\param dist       (input) char
+Defines which distance measure is used, as given by the table:
+dist=='e': Euclidean distance
+dist=='b': City-block distance
+dist=='c': correlation
+dist=='a': absolute value of the correlation
+dist=='u': uncentered correlation
+dist=='x': absolute uncentered correlation
+dist=='s': Spearman's rank correlation
+dist=='k': Kendall's tau
+For other values of dist, the default (Euclidean distance) is used.
+
+\param celldata (output) double[nxgrid][nygrid][ncolumns] if transpose==0;
+                  double[nxgrid][nygrid][nrows]    if tranpose==1
+The gene expression data for each node (cell) in the 2D grid. This can be
+interpreted as the centroid for the cluster corresponding to that cell. If
+celldata is NULL, then the centroids are not returned. If celldata is not
+NULL, enough space should be allocated to store the centroid data before callingsomcluster.
+
+\param clusterid (output), int[nrows][2]    if transpose==0;
+                    int[ncolumns][2] if transpose==1
+For each item (gene or microarray) that is clustered, the coordinates of the
+cell in the 2D grid to which the item was assigned. If clusterid is NULL, the
+cluster assignments are not returned. If clusterid is not NULL, enough memory
+should be allocated to store the clustering information before calling
+somcluster.
+*/
 void somcluster (int nrows, int ncolumns, double** data, int** mask,
   const double weight[], int transpose, int nxnodes, int nynodes,
   double inittau, int niter, char dist, double*** celldata,
   int clusterid[][2]);
 
 /* Chapter 6 */
+/**
+This subroutine uses the singular value decomposition to perform principal
+components analysis of a real nrows by ncolumns rectangular matrix.
+
+\param nrows     (input) int
+The number of rows in the matrix u.
+
+\param ncolumns  (input) int
+The number of columns in the matrix v.
+
+\param u          (input) double[nrows][ncolumns]
+On input, the array containing the data to which the principal component
+analysis should be applied. The function assumes that the mean has already been
+subtracted of each column, and hence that the mean of each column is zero.
+On output, see below.
+
+\param v          (input) double[n][n], where n = min(nrows, ncolumns)
+Not used on input.
+
+\param w          (input) double[n], where n = min(nrows, ncolumns)
+Not used on input.
+
+
+Return value
+============
+
+On output:
+
+If nrows >= ncolumns, then
+
+u contains the coordinates with respect to the principal components;
+v contains the principal component vectors.
+
+The dot product u . v reproduces the data that were passed in u.
+
+
+If nrows < ncolumns, then
+
+u contains the principal component vectors;
+v contains the coordinates with respect to the principal components.
+
+The dot product v . u reproduces the data that were passed in u.
+
+The eigenvalues of the covariance matrix are returned in w.
+
+The arrays u, v, and w are sorted according to eigenvalue, with the largest
+eigenvalues appearing first.
+
+The function returns 0 if successful, -1 if memory allocation fails, and a
+positive integer if the singular value decomposition fails to converge.
+*/
 int pca(int m, int n, double** u, double** v, double* w);
 
-/* Utility routines, currently undocumented */
+/**	Sets up an index table given the data, such that data[index[]] is in
+ *	increasing order. Sorting is done on the indices; the array data
+ *	is unchanged.
+ */
 void sort(int n, const double data[], int index[]);
+
+//! compute the mean of the numbers in list
 double mean(int n, double x[]);
+
+/**
+Find the median of X(1), ... , X(N), using as much of the quicksort
+algorithm as is needed to isolate it.
+N.B. On exit, the array X is partially ordered.
+Based on Alan J. Miller's median.f90 routine.
+*/
 double median (int n, double x[]);
 
+/**
+This function calculates the weights using the weighting scheme proposed by
+Michael Eisen:
+w[i] = 1.0 / sum_{j where d[i][j]<cutoff} (1 - d[i][j]/cutoff)^exponent
+where the cutoff and the exponent are specified by the user.
+
+\param nrows      (input) int
+The number of rows in the gene expression data matrix, equal to the number of
+genes.
+
+\param ncolumns   (input) int
+The number of columns in the gene expression data matrix, equal to the number of
+microarrays.
+
+\param data       (input) double[nrows][ncolumns]
+The array containing the gene expression data.
+
+\param mask       (input) int[nrows][ncolumns]
+This array shows which data values are missing. If mask[i][j]==0, then
+data[i][j] is missing.
+
+\param weight     (input) int[ncolumns] if transpose==0,
+                   int[nrows]    if transpose==1
+The weights that are used to calculate the distance. The length of this vector
+is ncolumns if gene weights are being clustered, and nrows if microarrays
+weights are being clustered.
+
+\param transpose (input) int
+If transpose==0, the weights of the rows of the data matrix are calculated.
+Otherwise, the weights of the columns of the data matrix are calculated.
+
+\param dist      (input) char
+Defines which distance measure is used, as given by the table:
+dist=='e': Euclidean distance
+dist=='b': City-block distance
+dist=='c': correlation
+dist=='a': absolute value of the correlation
+dist=='u': uncentered correlation
+dist=='x': absolute uncentered correlation
+dist=='s': Spearman's rank correlation
+dist=='k': Kendall's tau
+For other values of dist, the default (Euclidean distance) is used.
+
+\param cutoff    (input) double
+The cutoff to be used to calculate the weights.
+
+\param exponent  (input) double
+The exponent to be used to calculate the weights.
+
+
+Return value
+============
+
+The function returns a pointer to a newly allocated array containing the
+calculated weights for the rows (if transpose==0) or columns (if
+transpose==1). If not enough memory could be allocated to store the
+weights array, the function returns NULL.
+*/
 double* calculate_weights(int nrows, int ncolumns, double** data, int** mask,
   double weights[], int transpose, char dist, double cutoff, double exponent);
-    //#ifdef __cplusplus
-    //}
-//#endif
+
+
+///@}
+
+#endif /* _C_CLUSTERING_LIBRARY_H */