forked from motiwari/BanditPAM
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbanditpam_orig.hpp
152 lines (142 loc) · 5.6 KB
/
banditpam_orig.hpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
#ifndef HEADERS_ALGORITHMS_BANDITPAM_ORIG_HPP_
#define HEADERS_ALGORITHMS_BANDITPAM_ORIG_HPP_
#include <omp.h>
#include <armadillo>
#include <vector>
#include <fstream>
#include <iostream>
#include "kmedoids_algorithm.hpp"
namespace km {
/**
* @brief Contains all necessary BanditPAM_orig functions
*/
class BanditPAM_orig : public km::KMedoids {
public:
/**
* @brief Runs BanditPAM_orig to identify a dataset's medoids.
*
* @param inputData Input data to cluster
*/
void fitBanditPAM_orig(
const arma::fmat& inputData,
std::optional<std::reference_wrapper<const arma::fmat>> distMat);
/**
* @brief Empirical estimation of standard deviation of arm returns
* in the BUILD step.
*
* @param data Transposed input data to cluster
* @param bestDistances Contains best distances from each point to medoids
* @param useAbsolute Flag to use the absolute distance to each arm instead
* of improvement over prior loss; necessary for the first BUILD step
*
* @returns Estimate of each arm's standard deviation
*/
arma::frowvec buildSigma(
const arma::fmat& data,
std::optional<std::reference_wrapper<const arma::fmat>> distMat,
const arma::frowvec& bestDistances,
const bool useAbsolute);
/**
* @brief Estimates the mean reward for each arm in the BUILD steps.
*
* @param data Transposed input data to cluster
* @param target Candidate datapoints to consider adding as medoids
* @param bestDistances Contains best distances from each point to medoids
* @param useAbsolute Flag to use the absolute distance to each arm instead
* of improvement over prior loss; necessary for the first BUILD step
* @param exact 0 if using standard batch size; size of dataset otherwise
*
* @returns Estimate of each arm's change in loss
*/
arma::frowvec buildTarget(
const arma::fmat& data,
std::optional<std::reference_wrapper<const arma::fmat>> distMat,
const arma::uvec* target,
const arma::frowvec* bestDistances,
const bool useAbsolute,
const size_t exact);
/**
* @brief Performs the BUILD step of BanditPAM_orig.
*
* Draws batch size reference points with replacement and uses the estimated
* reward of adding candidate medoids to the set of medoids. Constructs
* confidence intervals for expected loss when adding each candidate as a
* medoid, and continues drawing reference points until the best candidate's
* confidence interval is disjoint from all others.
*
* @param data Transposed input data to cluster
* @param medoidIndices Array of medoids that is modified in place
* as medoids are identified
* @param medoids Matrix that contains the coordinates of each medoid
*/
void build(
const arma::fmat& data,
std::optional<std::reference_wrapper<const arma::fmat>> distMat,
arma::urowvec* medoidIndices,
arma::fmat* medoids);
/**
* @brief Empirical estimation of standard deviation of arm returns
* in the SWAP step.
*
* @param data Transposed input data to cluster
* @param bestDistances Contains best distances from each point to medoids
* @param secondBestDistances Contains second best distances from each point to medoids
* @param assignments Assignments of datapoints to their closest medoid
*
* @returns Estimate of each arm's standard deviation
*/
arma::fmat swapSigma(
const arma::fmat& data,
std::optional<std::reference_wrapper<const arma::fmat>> distMat,
const arma::frowvec* bestDistances,
const arma::frowvec* secondBestDistances,
const arma::urowvec* assignments);
/**
* @brief Estimates the mean reward for each arm in SWAP step.
*
* @param data Transposed input data to cluster
* @param medoidIndices Array of medoids that is modified in place
* as medoids are swapped in and out
* @param targets Set of potential swaps to be evaluated
* @param bestDistances Contains best distances from each point to medoids
* @param secondBestDistances Contains second best distances
* from each point to medoids
* @param assignments Assignments of datapoints to their closest medoid
* @param exact 0 if using standard batch size; size of dataset otherwise
*
* @returns Estimate of each arm's change in loss
*/
arma::fvec swapTarget(
const arma::fmat& data,
std::optional<std::reference_wrapper<const arma::fmat>> distMat,
const arma::urowvec* medoidIndices,
const arma::uvec* targets,
const arma::frowvec* bestDistances,
const arma::frowvec* secondBestDistances,
const arma::urowvec* assignments,
const size_t exact);
/**
* @brief Performs the SWAP step of BanditPAM_orig.
*
* Draws batch size reference points with replacement and uses the estimated
* reward of performing a (medoid, non-medoid) swap. Constructs
* confidence intervals for expected loss when performing the swap,
* and continues drawing reference points until the best candidate's
* confidence interval is disjoint from all others.
*
* @param data Transposed input data to cluster
* @param medoidIndices Array of medoid indices created from the BUILD step
* that is modified in place as better medoids are identified
* @param medoids Matrix of possible medoids that is updated as the bandit
* learns which datapoints will be unlikely to be good candidates
* @param assignments Array of containing the medoid each point is closest to
*/
void swap(
const arma::fmat& data,
std::optional<std::reference_wrapper<const arma::fmat>> distMat,
arma::urowvec* medoidIndices,
arma::fmat* medoids,
arma::urowvec* assignments);
};
} // namespace km
#endif // HEADERS_ALGORITHMS_BANDITPAM_ORIG_HPP_