Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/rel-3.44.0'
Browse files Browse the repository at this point in the history
  • Loading branch information
CI user committed Jan 10, 2024
2 parents 3416f95 + fa3104c commit 28dc228
Show file tree
Hide file tree
Showing 16 changed files with 186 additions and 19 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
``custom_metric_func``
----------------------

- Available in: GBM, DRF, Deeplearning, Stacked Ensembles, GLM
- Available in: GBM, DRF, Deeplearning, Stacked Ensembles, GLM, XGBoost
- Hyperparameter: no

Description
Expand Down
2 changes: 2 additions & 0 deletions h2o-docs/src/product/data-science/deep-learning.rst
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ Algorithm-specific parameters

- **col_major**: Specify whether to use a column major weight matrix for the input layer. This option can speed up forward propagation but may reduce the speed of back propagation. This option defaults to ``False`` (disabled).

- `custom_metric_func <algo-params/custom_metric_func.html>`__: Specify a custom evaluation function.

- **diagnostics**: Specify whether to compute the variable importances for input features (using the Gedeon method). For large networks, enabling this option can reduce speed. This option defaults to ``True`` (enabled).

- **elastic_averaging**: Specify whether to enable elastic averaging between computing nodes, which can improve distributed model convergence. This option defaults to ``False`` (disabled).
Expand Down
2 changes: 2 additions & 0 deletions h2o-docs/src/product/data-science/stacked-ensembles.rst
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,8 @@ Algorithm-specific parameters

- **score_training_samples**: Specify the number of training set samples for scoring. The value must be :math:`\geq` 0. To use all training samples, enter ``0``. This option defaults to ``10000``.

- `custom_metric_func <algo-params/custom_metric_func.html>`__: Specify a custom evaluation function.

Common parameters
'''''''''''''''''

Expand Down
2 changes: 2 additions & 0 deletions h2o-docs/src/product/data-science/xgboost.rst
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ Algorithm-specific parameters

- **colsample_bynode**: Specify the column subsampling rate per tree node. This method samples without replacement. Note that it is multiplicative with ``col_sample_rate`` and ``col_sample_rate_per_tree``, so setting all parameters to ``0.8``, for example, results in 51% of columns being considered at any given node to split. This value defaults to ``1.0`` and can be a value from 0.0 to 1.0.

- `custom_metric_func <algo-params/custom_metric_func.html>`__: (Applicable only if ``eval_metric`` is not set) Specify a custom evaluation function.

- **eval_metric**: Specify the `evaluation metric <https://xgboost.readthedocs.io/en/stable/parameter.html#learning-task-parameters>`__ that will be passed to the native XGBoost backend. To use ``eval_metric`` for early stopping, you need to specify ``stopping_metric="custom"``. This option defaults to ``"None"``.

- **dmatrix_type**: Specify the type of DMatrix. Valid options include the following: ``"auto"`` (default), ``"dense"``, and ``"sparse"``. Note that for ``dmatrix_type="sparse"``, NAs and 0 are treated equally.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ public static final class XGBoostParametersV3 extends ModelParametersSchemaV3<XG
"quiet_mode",
"checkpoint",
"export_checkpoints_dir",
"custom_metric_func",

// model specific
"ntrees",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -263,11 +263,20 @@ is being scored (on CPU) the other one is running on GPU and the GPU is never id
error("_tree_method", "exact is not supported in distributed environment, set build_tree_one_node to true to use exact");

CalibrationHelper.initCalibration(this, _parms, expensive);

if (_parms.hasCustomMetricFunc() && _parms._eval_metric != null) {
error("custom_metric_func", "Custom metric is not supported together with eval_metric parameter. Please use only one of them.");
}

if (_parms._score_eval_metric_only && _parms._eval_metric == null) {
warn("score_eval_metric_only", "score_eval_metric_only is set but eval_metric parameter is not defined");
}
}

protected void checkCustomMetricForEarlyStopping() {
if (_parms._eval_metric == null) {
if (_parms._eval_metric == null && !_parms.hasCustomMetricFunc()) {
error("_eval_metric", "Evaluation metric needs to be defined in order to use it for early stopping.");
super.checkCustomMetricForEarlyStopping();
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import water.codegen.CodeGeneratorPipeline;
import water.fvec.Frame;
import water.fvec.Vec;
import water.udf.CFuncRef;
import water.util.*;

import java.util.*;
Expand Down Expand Up @@ -599,20 +600,28 @@ public XGBoostMojoWriter getMojo() {

private ModelMetrics makeMetrics(Frame data, Frame originalData, boolean isTrain, String description) {
LOG.debug("Making metrics: " + description);
return new XGBoostModelMetrics(_output, data, originalData, isTrain, this).compute();
return new XGBoostModelMetrics(_output, data, originalData, isTrain, this, CFuncRef.from(_parms._custom_metric_func)).compute();
}

final void doScoring(Frame train, Frame trainOrig, CustomMetric trainCustomMetric,
Frame valid, Frame validOrig, CustomMetric validCustomMetric) {
ModelMetrics mm = makeMetrics(train, trainOrig, true, "Metrics reported on training frame");
_output._training_metrics = mm;
_output._scored_train[_output._ntrees].fillFrom(mm, trainCustomMetric);
if (trainCustomMetric == null) {
_output._scored_train[_output._ntrees].fillFrom(mm, mm._custom_metric);
} else {
_output._scored_train[_output._ntrees].fillFrom(mm, trainCustomMetric);
}
addModelMetrics(mm);
// Optional validation part
if (valid != null) {
mm = makeMetrics(valid, validOrig, false, "Metrics reported on validation frame");
_output._validation_metrics = mm;
_output._scored_valid[_output._ntrees].fillFrom(mm, validCustomMetric);
if (validCustomMetric == null) {
_output._scored_valid[_output._ntrees].fillFrom(mm, mm._custom_metric);
} else {
_output._scored_valid[_output._ntrees].fillFrom(mm, validCustomMetric);
}
addModelMetrics(mm);
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import water.Scope;
import water.fvec.Frame;
import water.fvec.Vec;
import water.udf.CFuncRef;

import java.util.Arrays;

Expand All @@ -28,15 +29,16 @@ public XGBoostModelMetrics(
Frame data,
Frame originalData,
boolean isTrain,
XGBoostModel model
XGBoostModel model,
CFuncRef customMetricFunc
) {
_output = output;
_data = data;
_originalData = originalData;
_model = model;

_task = new XGBoostScoreTask(
_output, _data.find(_model._parms._weights_column), isTrain, _model
_output, _data.find(_model._parms._weights_column), isTrain, _model, customMetricFunc
);
}

Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,6 @@
package hex.tree.xgboost.task;

import hex.ModelMetrics;
import hex.ModelMetricsBinomial;
import hex.ModelMetricsMultinomial;
import hex.ModelMetricsRegression;
import hex.*;
import hex.tree.xgboost.XGBoostModel;
import hex.tree.xgboost.XGBoostOutput;
import hex.tree.xgboost.predict.XGBoostBigScorePredict;
Expand All @@ -12,8 +9,9 @@
import water.MemoryManager;
import water.fvec.Chunk;
import water.fvec.NewChunk;
import water.udf.CFuncRef;

public class XGBoostScoreTask extends MRTask<XGBoostScoreTask> { // used to score model metrics
public class XGBoostScoreTask extends CMetricScoringTask<XGBoostScoreTask> { // used to score model metrics

private final XGBoostOutput _output;
private final int _weightsChunkId;
Expand All @@ -29,8 +27,10 @@ public XGBoostScoreTask(
final XGBoostOutput output,
final int weightsChunkId,
final boolean isTrain,
final XGBoostModel model
final XGBoostModel model,
CFuncRef customMetricFunc
) {
super(customMetricFunc);
_output = output;
_weightsChunkId = weightsChunkId;
_model = model;
Expand Down Expand Up @@ -58,6 +58,7 @@ private ModelMetrics.MetricBuilder createMetricsBuilder(final int responseClasse

@Override
protected void setupLocal() {
super.setupLocal();
_predict = _model.setupBigScorePredict(_isTrain);
}

Expand All @@ -79,6 +80,7 @@ public void map(Chunk[] cs, NewChunk[] ncs) {
yact[0] = (float) responseChunk.atd(j);
double weight = _weightsChunkId != -1 ? cs[_weightsChunkId].atd(j) : 1; // If there is no chunk with weights, the weight is considered to be 1
_metricBuilder.perRow(currentPred, yact, weight, 0, _model);
customMetricPerRow(currentPred, yact, weight, 0, _model);
}
for (int i = 0; i < cs[0]._len; ++i) {
ncs[0].addNum(preds[i][0]);
Expand All @@ -99,6 +101,7 @@ public void map(Chunk[] cs, NewChunk[] ncs) {
double weight = _weightsChunkId != -1 ? cs[_weightsChunkId].atd(i) : 1; // If there is no chunk with weights, the weight is considered to be 1
yact[0] = (float) responseChunk.atd(i);
_metricBuilder.perRow(row, yact, weight, 0, _model);
customMetricPerRow(row, yact, weight, 0, _model);
}
} else {
float[] yact = new float[1];
Expand All @@ -114,6 +117,7 @@ public void map(Chunk[] cs, NewChunk[] ncs) {
yact[0] = (float) responseChunk.atd(i);
double weight = _weightsChunkId != -1 ? cs[_weightsChunkId].atd(i) : 1; // If there is no chunk with weights, the weight is considered to be 1
_metricBuilder.perRow(row, yact, weight, 0, _model);
customMetricPerRow(row, yact, weight, 0, _model);
}
}
}
Expand All @@ -124,4 +128,12 @@ public void reduce(XGBoostScoreTask mrt) {
_metricBuilder.reduce(mrt._metricBuilder);
}

@Override protected void postGlobal() {
super.postGlobal();
if(_metricBuilder != null) {
_metricBuilder.postGlobal(getComputedCustomMetric());
if (null != cFuncRef)
_metricBuilder._CMetricScoringTask = this;
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -3252,5 +3252,32 @@ public void testConcurrentModelsOnGPU() {
}

}

@Test
public void testWarnEvalMetricOnlyWithouEvalMetric() {
Scope.enter();
try {
String response = "CAPSULE";
Frame train = parseAndTrackTestFile("./smalldata/logreg/prostate_train.csv");
train.toCategoricalCol(response);

XGBoostModel.XGBoostParameters parms = new XGBoostModel.XGBoostParameters();
parms._ntrees = 1;
parms._train = train._key;
parms._response_column = response;
parms._score_eval_metric_only = true;

ModelBuilder job = new hex.tree.xgboost.XGBoost(parms);

XGBoostModel xgboost = (XGBoostModel) job.trainModel().get();
Scope.track_generic(xgboost);
assertNotNull(xgboost);
assertTrue("Parameter is not validate", job.validationWarnings().contains("score_eval_metric_only is set but eval_metric parameter is not defined"));
System.out.println(job.validationWarnings());
}
finally {
Scope.exit();
}
}

}
19 changes: 19 additions & 0 deletions h2o-py/h2o/estimators/xgboost.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ def __init__(self,
quiet_mode=True, # type: bool
checkpoint=None, # type: Optional[Union[None, str, H2OEstimator]]
export_checkpoints_dir=None, # type: Optional[str]
custom_metric_func=None, # type: Optional[str]
ntrees=50, # type: int
max_depth=6, # type: int
min_rows=1.0, # type: float
Expand Down Expand Up @@ -200,6 +201,9 @@ def __init__(self,
:param export_checkpoints_dir: Automatically export generated models to this directory.
Defaults to ``None``.
:type export_checkpoints_dir: str, optional
:param custom_metric_func: Reference to custom evaluation function, format: `language:keyName=funcName`
Defaults to ``None``.
:type custom_metric_func: str, optional
:param ntrees: (same as n_estimators) Number of trees.
Defaults to ``50``.
:type ntrees: int
Expand Down Expand Up @@ -379,6 +383,7 @@ def __init__(self,
self.quiet_mode = quiet_mode
self.checkpoint = checkpoint
self.export_checkpoints_dir = export_checkpoints_dir
self.custom_metric_func = custom_metric_func
self.ntrees = ntrees
self.max_depth = max_depth
self.min_rows = min_rows
Expand Down Expand Up @@ -1208,6 +1213,20 @@ def export_checkpoints_dir(self, export_checkpoints_dir):
assert_is_type(export_checkpoints_dir, None, str)
self._parms["export_checkpoints_dir"] = export_checkpoints_dir

@property
def custom_metric_func(self):
"""
Reference to custom evaluation function, format: `language:keyName=funcName`
Type: ``str``.
"""
return self._parms.get("custom_metric_func")

@custom_metric_func.setter
def custom_metric_func(self, custom_metric_func):
assert_is_type(custom_metric_func, None, str)
self._parms["custom_metric_func"] = custom_metric_func

@property
def ntrees(self):
"""
Expand Down
1 change: 0 additions & 1 deletion h2o-py/tests/pyunit_utils/utils_model_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,6 @@ def assert_metrics_equal(metric, metric_name1, metric_name2, msg=None, delta=1e-
m2 = metric._metric_json[metric_name2]
m1 = float(m1) if m1 != "NaN" else 0
m2 = float(m2) if m2 != "NaN" else 0
print("{} == {}".format(m1, m2))
assert abs(m1-m2) <= delta, "{}: {} != {}".format(msg, m1, m2)


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ def test_custom_metric_computation_multinomial_autoencoder():
params = {"autoencoder": True}
try:
multinomial_model(H2ODeepLearningEstimator, custom_rmse_mm(), params)
raise "Should fail"
except H2OResponseError as e:
assert "Custom metric is not supported for Autoencoder." in str(e)

Expand Down
63 changes: 63 additions & 0 deletions h2o-py/tests/testdir_algos/xgboost/pyunit_xgboost_custom_metric.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
import sys

sys.path.insert(1, "../../../")
import h2o
from tests import pyunit_utils
from tests.pyunit_utils import CustomMaeFunc, CustomRmseFunc, \
assert_correct_custom_metric, regression_model, multinomial_model, binomial_model
from h2o.estimators import H2OXGBoostEstimator
from h2o.exceptions import H2OResponseError


# Custom model metrics fixture
def custom_mae_mm():
return h2o.upload_custom_metric(CustomMaeFunc, func_name="mae-custom", func_file="mm_mae.py")


def custom_rmse_mm():
return h2o.upload_custom_metric(CustomRmseFunc, func_name="rmse-custom", func_file="mm_rmse.py")


# Test that the custom model metric is computed
# and compare them with implicit custom metric
def test_custom_metric_computation_regression():
(model, f_test) = regression_model(H2OXGBoostEstimator, custom_mae_mm())
print(model)
assert_correct_custom_metric(model, f_test, "mae", "Regression on prostate")


def test_custom_metric_computation_binomial():
(model, f_test) = binomial_model(H2OXGBoostEstimator, custom_rmse_mm())
print(model)
assert_correct_custom_metric(model, f_test, "rmse", "Binomial on prostate")


def test_custom_metric_computation_together_with_eval_metric():
params = {"eval_metric": "[email protected]"}
try:
binomial_model(H2OXGBoostEstimator, custom_rmse_mm(), params)
raise "Should fail"
except H2OResponseError as e:
assert "Custom metric is not supported together with eval_metric parameter" in str(e)


def test_custom_metric_computation_multinomial():
(model, f_test) = multinomial_model(H2OXGBoostEstimator, custom_rmse_mm())
print(model)
assert_correct_custom_metric(model, f_test, "rmse", "Multinomial on iris")


# Tests to invoke in this suite
__TESTS__ = [
test_custom_metric_computation_binomial,
test_custom_metric_computation_regression,
test_custom_metric_computation_multinomial,
test_custom_metric_computation_together_with_eval_metric
]

if __name__ == "__main__":
for func in __TESTS__:
pyunit_utils.standalone_test(func)
else:
for func in __TESTS__:
func()
Loading

0 comments on commit 28dc228

Please sign in to comment.