forked from imbs-hl/ranger
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathTreeProbability.h
112 lines (84 loc) · 4.18 KB
/
TreeProbability.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
/*-------------------------------------------------------------------------------
This file is part of Ranger.
Ranger is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Ranger is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Ranger. If not, see <http://www.gnu.org/licenses/>.
Written by:
Marvin N. Wright
Institut für Medizinische Biometrie und Statistik
Universität zu Lübeck
Ratzeburger Allee 160
23562 Lübeck
Germany
http://www.imbs-luebeck.de
#-------------------------------------------------------------------------------*/
#ifndef TREEPROBABILITY_H_
#define TREEPROBABILITY_H_
#include <map>
#include "globals.h"
#include "Tree.h"
class TreeProbability: public Tree {
public:
TreeProbability(std::vector<double>* class_values, std::vector<uint>* response_classIDs);
// Create from loaded forest
TreeProbability(std::vector<std::vector<size_t>>& child_nodeIDs, std::vector<size_t>& split_varIDs,
std::vector<double>& split_values, std::vector<double>* class_values, std::vector<uint>* response_classIDs,
std::vector<std::vector<double>>& terminal_class_counts);
virtual ~TreeProbability();
void initInternal();
void addToTerminalNodes(size_t nodeID);
void computePermutationImportanceInternal(std::vector<std::vector<size_t>>* permutations);
void appendToFileInternal(std::ofstream& file);
const std::vector<double>& getPrediction(size_t sampleID) const {
size_t terminal_nodeID = prediction_terminal_nodeIDs[sampleID];
return terminal_class_counts[terminal_nodeID];
}
size_t getPredictionTerminalNodeID(size_t sampleID) const {
return prediction_terminal_nodeIDs[sampleID];
}
const std::vector<std::vector<double>>& getTerminalClassCounts() const {
return terminal_class_counts;
}
private:
bool splitNodeInternal(size_t nodeID, std::vector<size_t>& possible_split_varIDs);
void createEmptyNodeInternal();
double computePredictionAccuracyInternal();
// Called by splitNodeInternal(). Sets split_varIDs and split_values.
bool findBestSplit(size_t nodeID, std::vector<size_t>& possible_split_varIDs);
void findBestSplitValueSmallQ(size_t nodeID, size_t varID, size_t num_classes, size_t* class_counts,
size_t num_samples_node, double& best_value, size_t& best_varID, double& best_decrease);
void findBestSplitValueLargeQ(size_t nodeID, size_t varID, size_t num_classes, size_t* class_counts,
size_t num_samples_node, double& best_value, size_t& best_varID, double& best_decrease);
void findBestSplitValueUnordered(size_t nodeID, size_t varID, size_t num_classes, size_t* class_counts,
size_t num_samples_node, double& best_value, size_t& best_varID, double& best_decrease);
bool findBestSplitExtraTrees(size_t nodeID, std::vector<size_t>& possible_split_varIDs);
void findBestSplitValueExtraTrees(size_t nodeID, size_t varID, size_t num_classes, size_t* class_counts,
size_t num_samples_node, double& best_value, size_t& best_varID, double& best_decrease);
void findBestSplitValueExtraTreesUnordered(size_t nodeID, size_t varID, size_t num_classes, size_t* class_counts,
size_t num_samples_node, double& best_value, size_t& best_varID, double& best_decrease);
void addImpurityImportance(size_t nodeID, size_t varID, double decrease);
void cleanUpInternal() {
if (counter != 0) {
delete[] counter;
}
if (counter_per_class != 0) {
delete[] counter_per_class;
}
}
// Classes of the dependent variable and classIDs for responses
std::vector<double>* class_values;
std::vector<uint>* response_classIDs;
// Class counts in terminal nodes. Empty for non-terminal nodes.
std::vector<std::vector<double>> terminal_class_counts;
size_t* counter;
size_t* counter_per_class;
DISALLOW_COPY_AND_ASSIGN(TreeProbability);
};
#endif /* TREEPROBABILITY_H_ */