Skip to content

Commit

Permalink
Merge pull request opencv#2016 from pemmanuelviel:kmeansppSquareDist
Browse files Browse the repository at this point in the history
  • Loading branch information
vpisarev authored and OpenCV Buildbot committed Apr 15, 2014
2 parents df8e282 + 0d19685 commit 6a5a0fe
Show file tree
Hide file tree
Showing 3 changed files with 80 additions and 4 deletions.
60 changes: 60 additions & 0 deletions modules/flann/include/opencv2/flann/dist.h
Original file line number Diff line number Diff line change
Expand Up @@ -812,6 +812,66 @@ struct ZeroIterator

};


/*
* Depending on processed distances, some of them are already squared (e.g. L2)
* and some are not (e.g.Hamming). In KMeans++ for instance we want to be sure
* we are working on ^2 distances, thus following templates to ensure that.
*/
template <typename Distance, typename ElementType>
struct squareDistance
{
typedef typename Distance::ResultType ResultType;
ResultType operator()( ResultType dist ) { return dist*dist; }
};


template <typename ElementType>
struct squareDistance<L2_Simple<ElementType>, ElementType>
{
typedef typename L2_Simple<ElementType>::ResultType ResultType;
ResultType operator()( ResultType dist ) { return dist; }
};

template <typename ElementType>
struct squareDistance<L2<ElementType>, ElementType>
{
typedef typename L2<ElementType>::ResultType ResultType;
ResultType operator()( ResultType dist ) { return dist; }
};


template <typename ElementType>
struct squareDistance<MinkowskiDistance<ElementType>, ElementType>
{
typedef typename MinkowskiDistance<ElementType>::ResultType ResultType;
ResultType operator()( ResultType dist ) { return dist; }
};

template <typename ElementType>
struct squareDistance<HellingerDistance<ElementType>, ElementType>
{
typedef typename HellingerDistance<ElementType>::ResultType ResultType;
ResultType operator()( ResultType dist ) { return dist; }
};

template <typename ElementType>
struct squareDistance<ChiSquareDistance<ElementType>, ElementType>
{
typedef typename ChiSquareDistance<ElementType>::ResultType ResultType;
ResultType operator()( ResultType dist ) { return dist; }
};


template <typename Distance>
typename Distance::ResultType ensureSquareDistance( typename Distance::ResultType dist )
{
typedef typename Distance::ElementType ElementType;

squareDistance<Distance, ElementType> dummy;
return dummy( dist );
}

}

#endif //OPENCV_FLANN_DIST_H_
Original file line number Diff line number Diff line change
Expand Up @@ -210,8 +210,11 @@ class HierarchicalClusteringIndex : public NNIndex<Distance>
assert(index >=0 && index < n);
centers[0] = dsindices[index];

// Computing distance^2 will have the advantage of even higher probability further to pick new centers
// far from previous centers (and this complies to "k-means++: the advantages of careful seeding" article)
for (int i = 0; i < n; i++) {
closestDistSq[i] = distance(dataset[dsindices[i]], dataset[dsindices[index]], dataset.cols);
closestDistSq[i] = ensureSquareDistance<Distance>( closestDistSq[i] );
currentPot += closestDistSq[i];
}

Expand All @@ -237,7 +240,10 @@ class HierarchicalClusteringIndex : public NNIndex<Distance>

// Compute the new potential
double newPot = 0;
for (int i = 0; i < n; i++) newPot += std::min( distance(dataset[dsindices[i]], dataset[dsindices[index]], dataset.cols), closestDistSq[i] );
for (int i = 0; i < n; i++) {
DistanceType dist = distance(dataset[dsindices[i]], dataset[dsindices[index]], dataset.cols);
newPot += std::min( ensureSquareDistance<Distance>(dist), closestDistSq[i] );
}

// Store the best result
if ((bestNewPot < 0)||(newPot < bestNewPot)) {
Expand All @@ -249,7 +255,10 @@ class HierarchicalClusteringIndex : public NNIndex<Distance>
// Add the appropriate center
centers[centerCount] = dsindices[bestNewIndex];
currentPot = bestNewPot;
for (int i = 0; i < n; i++) closestDistSq[i] = std::min( distance(dataset[dsindices[i]], dataset[dsindices[bestNewIndex]], dataset.cols), closestDistSq[i] );
for (int i = 0; i < n; i++) {
DistanceType dist = distance(dataset[dsindices[i]], dataset[dsindices[bestNewIndex]], dataset.cols);
closestDistSq[i] = std::min( ensureSquareDistance<Distance>(dist), closestDistSq[i] );
}
}

centers_length = centerCount;
Expand Down
11 changes: 9 additions & 2 deletions modules/flann/include/opencv2/flann/kmeans_index.h
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,7 @@ class KMeansIndex : public NNIndex<Distance>

for (int i = 0; i < n; i++) {
closestDistSq[i] = distance_(dataset_[indices[i]], dataset_[indices[index]], dataset_.cols);
closestDistSq[i] = ensureSquareDistance<Distance>( closestDistSq[i] );
currentPot += closestDistSq[i];
}

Expand All @@ -236,7 +237,10 @@ class KMeansIndex : public NNIndex<Distance>

// Compute the new potential
double newPot = 0;
for (int i = 0; i < n; i++) newPot += std::min( distance_(dataset_[indices[i]], dataset_[indices[index]], dataset_.cols), closestDistSq[i] );
for (int i = 0; i < n; i++) {
DistanceType dist = distance_(dataset_[indices[i]], dataset_[indices[index]], dataset_.cols);
newPot += std::min( ensureSquareDistance<Distance>(dist), closestDistSq[i] );
}

// Store the best result
if ((bestNewPot < 0)||(newPot < bestNewPot)) {
Expand All @@ -248,7 +252,10 @@ class KMeansIndex : public NNIndex<Distance>
// Add the appropriate center
centers[centerCount] = indices[bestNewIndex];
currentPot = bestNewPot;
for (int i = 0; i < n; i++) closestDistSq[i] = std::min( distance_(dataset_[indices[i]], dataset_[indices[bestNewIndex]], dataset_.cols), closestDistSq[i] );
for (int i = 0; i < n; i++) {
DistanceType dist = distance_(dataset_[indices[i]], dataset_[indices[bestNewIndex]], dataset_.cols);
closestDistSq[i] = std::min( ensureSquareDistance<Distance>(dist), closestDistSq[i] );
}
}

centers_length = centerCount;
Expand Down

0 comments on commit 6a5a0fe

Please sign in to comment.