diff --git a/modules/ml/include/opencv2/ml.hpp b/modules/ml/include/opencv2/ml.hpp index a8b711467441..eb2d7ed2a723 100644 --- a/modules/ml/include/opencv2/ml.hpp +++ b/modules/ml/include/opencv2/ml.hpp @@ -1503,14 +1503,18 @@ class CV_EXPORTS_W ANN_MLP : public StatModel enum ActivationFunctions { /** Identity function: \f$f(x)=x\f$ */ IDENTITY = 0, - /** Symmetrical sigmoid: \f$f(x)=\beta*(1-e^{-\alpha x})/(1+e^{-\alpha x}\f$ + /** Symmetrical sigmoid: \f$f(x)=\beta*(1-e^{-\alpha x})/(1+e^{-\alpha x})\f$ @note If you are using the default sigmoid activation function with the default parameter values fparam1=0 and fparam2=0 then the function used is y = 1.7159\*tanh(2/3 \* x), so the output will range from [-1.7159, 1.7159], instead of [0,1].*/ SIGMOID_SYM = 1, /** Gaussian function: \f$f(x)=\beta e^{-\alpha x*x}\f$ */ - GAUSSIAN = 2 + GAUSSIAN = 2, + /** ReLU function: \f$f(x)=max(0,x)\f$ */ + RELU = 3, + /** Leaky ReLU function: for x>0 \f$f(x)=x \f$ and x<=0 \f$f(x)=\alpha x \f$*/ + LEAKYRELU= 4 }; /** Train options */ diff --git a/modules/ml/src/ann_mlp.cpp b/modules/ml/src/ann_mlp.cpp index fdc73d959a47..cbaa6dbf3929 100644 --- a/modules/ml/src/ann_mlp.cpp +++ b/modules/ml/src/ann_mlp.cpp @@ -135,7 +135,7 @@ class ANN_MLPImpl : public ANN_MLP void setActivationFunction(int _activ_func, double _f_param1, double _f_param2 ) { - if( _activ_func < 0 || _activ_func > GAUSSIAN ) + if( _activ_func < 0 || _activ_func > LEAKYRELU) CV_Error( CV_StsOutOfRange, "Unknown activation function" ); activ_func = _activ_func; @@ -153,11 +153,23 @@ class ANN_MLPImpl : public ANN_MLP case GAUSSIAN: max_val = 1.; min_val = 0.05; max_val1 = 1.; min_val1 = 0.02; - if( fabs(_f_param1) < FLT_EPSILON ) + if (fabs(_f_param1) < FLT_EPSILON) _f_param1 = 1.; - if( fabs(_f_param2) < FLT_EPSILON ) + if (fabs(_f_param2) < FLT_EPSILON) _f_param2 = 1.; break; + case RELU: + if (fabs(_f_param1) < FLT_EPSILON) + _f_param1 = 1; + min_val = max_val = min_val1 = max_val1 = 0.; + _f_param2 = 0.; + break; + case LEAKYRELU: + if (fabs(_f_param1) < FLT_EPSILON) + _f_param1 = 0.01; + min_val = max_val = min_val1 = max_val1 = 0.; + _f_param2 = 0.; + break; default: min_val = max_val = min_val1 = max_val1 = 0.; _f_param1 = 1.; @@ -368,47 +380,61 @@ class ANN_MLPImpl : public ANN_MLP } } - void calc_activ_func( Mat& sums, const Mat& w ) const + void calc_activ_func(Mat& sums, const Mat& w) const { - const double* bias = w.ptr(w.rows-1); + const double* bias = w.ptr(w.rows - 1); int i, j, n = sums.rows, cols = sums.cols; double scale = 0, scale2 = f_param2; - switch( activ_func ) + switch (activ_func) { - case IDENTITY: - scale = 1.; - break; - case SIGMOID_SYM: - scale = -f_param1; - break; - case GAUSSIAN: - scale = -f_param1*f_param1; - break; - default: - ; + case IDENTITY: + scale = 1.; + break; + case SIGMOID_SYM: + scale = -f_param1; + break; + case GAUSSIAN: + scale = -f_param1*f_param1; + break; + case RELU: + scale = 1; + break; + case LEAKYRELU: + scale = 1; + break; + default: + ; } - CV_Assert( sums.isContinuous() ); + CV_Assert(sums.isContinuous()); - if( activ_func != GAUSSIAN ) + if (activ_func != GAUSSIAN) { - for( i = 0; i < n; i++ ) + for (i = 0; i < n; i++) { double* data = sums.ptr(i); - for( j = 0; j < cols; j++ ) + for (j = 0; j < cols; j++) + { data[j] = (data[j] + bias[j])*scale; + if (activ_func == RELU) + if (data[j] < 0) + data[j] = 0; + if (activ_func == LEAKYRELU) + if (data[j] < 0) + data[j] *= f_param1; + } } - if( activ_func == IDENTITY ) + if (activ_func == IDENTITY || activ_func == RELU || activ_func == LEAKYRELU) return; } else { - for( i = 0; i < n; i++ ) + for (i = 0; i < n; i++) { double* data = sums.ptr(i); - for( j = 0; j < cols; j++ ) + for (j = 0; j < cols; j++) { double t = data[j] + bias[j]; data[j] = t*t*scale; @@ -416,92 +442,132 @@ class ANN_MLPImpl : public ANN_MLP } } - exp( sums, sums ); + exp(sums, sums); - if( sums.isContinuous() ) + if (sums.isContinuous()) { cols *= n; n = 1; } - switch( activ_func ) + switch (activ_func) { - case SIGMOID_SYM: - for( i = 0; i < n; i++ ) + case SIGMOID_SYM: + for (i = 0; i < n; i++) + { + double* data = sums.ptr(i); + for (j = 0; j < cols; j++) { - double* data = sums.ptr(i); - for( j = 0; j < cols; j++ ) + if (!cvIsInf(data[j])) { - if(!cvIsInf(data[j])) - { - double t = scale2*(1. - data[j])/(1. + data[j]); - data[j] = t; - } - else - { - data[j] = -scale2; - } + double t = scale2*(1. - data[j]) / (1. + data[j]); + data[j] = t; + } + else + { + data[j] = -scale2; } } - break; + } + break; - case GAUSSIAN: - for( i = 0; i < n; i++ ) - { - double* data = sums.ptr(i); - for( j = 0; j < cols; j++ ) - data[j] = scale2*data[j]; - } - break; + case GAUSSIAN: + for (i = 0; i < n; i++) + { + double* data = sums.ptr(i); + for (j = 0; j < cols; j++) + data[j] = scale2*data[j]; + } + break; - default: - ; + default: + ; } } - void calc_activ_func_deriv( Mat& _xf, Mat& _df, const Mat& w ) const + void calc_activ_func_deriv(Mat& _xf, Mat& _df, const Mat& w) const { - const double* bias = w.ptr(w.rows-1); + const double* bias = w.ptr(w.rows - 1); int i, j, n = _xf.rows, cols = _xf.cols; - if( activ_func == IDENTITY ) + if (activ_func == IDENTITY) { - for( i = 0; i < n; i++ ) + for (i = 0; i < n; i++) { double* xf = _xf.ptr(i); double* df = _df.ptr(i); - for( j = 0; j < cols; j++ ) + for (j = 0; j < cols; j++) { xf[j] += bias[j]; df[j] = 1; } } } - else if( activ_func == GAUSSIAN ) + else if (activ_func == RELU) + { + for (i = 0; i < n; i++) + { + double* xf = _xf.ptr(i); + double* df = _df.ptr(i); + + for (j = 0; j < cols; j++) + { + xf[j] += bias[j]; + if (xf[j] < 0) + { + xf[j] = 0; + df[j] = 0; + } + else + df[j] = 1; + } + } + } + else if (activ_func == LEAKYRELU) + { + for (i = 0; i < n; i++) + { + double* xf = _xf.ptr(i); + double* df = _df.ptr(i); + + for (j = 0; j < cols; j++) + { + xf[j] += bias[j]; + if (xf[j] < 0) + { + xf[j] = f_param1*xf[j]; + df[j] = f_param1; + } + else + df[j] = 1; + } + } + } + else if (activ_func == GAUSSIAN) { double scale = -f_param1*f_param1; double scale2 = scale*f_param2; - for( i = 0; i < n; i++ ) + for (i = 0; i < n; i++) { double* xf = _xf.ptr(i); double* df = _df.ptr(i); - for( j = 0; j < cols; j++ ) + for (j = 0; j < cols; j++) { double t = xf[j] + bias[j]; - df[j] = t*2*scale2; + df[j] = t * 2 * scale2; xf[j] = t*t*scale; } } - exp( _xf, _xf ); + exp(_xf, _xf); - for( i = 0; i < n; i++ ) + for (i = 0; i < n; i++) { double* xf = _xf.ptr(i); double* df = _df.ptr(i); - for( j = 0; j < cols; j++ ) + for (j = 0; j < cols; j++) df[j] *= xf[j]; } } @@ -510,34 +576,34 @@ class ANN_MLPImpl : public ANN_MLP double scale = f_param1; double scale2 = f_param2; - for( i = 0; i < n; i++ ) + for (i = 0; i < n; i++) { double* xf = _xf.ptr(i); double* df = _df.ptr(i); - for( j = 0; j < cols; j++ ) + for (j = 0; j < cols; j++) { xf[j] = (xf[j] + bias[j])*scale; df[j] = -fabs(xf[j]); } } - exp( _df, _df ); + exp(_df, _df); // ((1+exp(-ax))^-1)'=a*((1+exp(-ax))^-2)*exp(-ax); // ((1-exp(-ax))/(1+exp(-ax)))'=(a*exp(-ax)*(1+exp(-ax)) + a*exp(-ax)*(1-exp(-ax)))/(1+exp(-ax))^2= // 2*a*exp(-ax)/(1+exp(-ax))^2 - scale *= 2*f_param2; - for( i = 0; i < n; i++ ) + scale *= 2 * f_param2; + for (i = 0; i < n; i++) { double* xf = _xf.ptr(i); double* df = _df.ptr(i); - for( j = 0; j < cols; j++ ) + for (j = 0; j < cols; j++) { int s0 = xf[j] > 0 ? 1 : -1; - double t0 = 1./(1. + df[j]); - double t1 = scale*df[j]*t0*t0; + double t0 = 1. / (1. + df[j]); + double t1 = scale*df[j] * t0*t0; t0 *= scale2*(1. - df[j])*s0; df[j] = t1; xf[j] = t0; @@ -1110,7 +1176,9 @@ class ANN_MLPImpl : public ANN_MLP { const char* activ_func_name = activ_func == IDENTITY ? "IDENTITY" : activ_func == SIGMOID_SYM ? "SIGMOID_SYM" : - activ_func == GAUSSIAN ? "GAUSSIAN" : 0; + activ_func == GAUSSIAN ? "GAUSSIAN" : + activ_func == RELU ? "RELU" : + activ_func == LEAKYRELU ? "LEAKYRELU" : 0; if( activ_func_name ) fs << "activation_function" << activ_func_name; @@ -1191,6 +1259,8 @@ class ANN_MLPImpl : public ANN_MLP { activ_func = activ_func_name == "SIGMOID_SYM" ? SIGMOID_SYM : activ_func_name == "IDENTITY" ? IDENTITY : + activ_func_name == "RELU" ? RELU : + activ_func_name == "LEAKYRELU" ? LEAKYRELU : activ_func_name == "GAUSSIAN" ? GAUSSIAN : -1; CV_Assert( activ_func >= 0 ); } diff --git a/modules/ml/test/test_mltests2.cpp b/modules/ml/test/test_mltests2.cpp index b823b84a0b59..f6b9bb7eb179 100644 --- a/modules/ml/test/test_mltests2.cpp +++ b/modules/ml/test/test_mltests2.cpp @@ -85,6 +85,22 @@ int str_to_ann_train_method( String& str ) return -1; } +int str_to_ann_activation_function(String& str) +{ + if (!str.compare("IDENTITY")) + return ANN_MLP::IDENTITY; + if (!str.compare("SIGMOID_SYM")) + return ANN_MLP::SIGMOID_SYM; + if (!str.compare("GAUSSIAN")) + return ANN_MLP::GAUSSIAN; + if (!str.compare("RELU")) + return ANN_MLP::RELU; + if (!str.compare("LEAKYRELU")) + return ANN_MLP::LEAKYRELU; + CV_Error(CV_StsBadArg, "incorrect ann activation function string"); + return -1; +} + void ann_check_data( Ptr _data ) { CV_TRACE_FUNCTION(); @@ -177,6 +193,62 @@ float ann_calc_error( Ptr ann, Ptr _data, map& c return err; } +TEST(ML_ANN, ActivationFunction) +{ + String folder = string(cvtest::TS::ptr()->get_data_path()); + String original_path = folder + "waveform.data"; + String dataname = folder + "waveform"; + + Ptr tdata = TrainData::loadFromCSV(original_path, 0); + + ASSERT_FALSE(tdata.empty()) << "Could not find test data file : " << original_path; + RNG& rng = theRNG(); + rng.state = 1027401484159173092; + tdata->setTrainTestSplit(500); + + vector activationType; + activationType.push_back(ml::ANN_MLP::IDENTITY); + activationType.push_back(ml::ANN_MLP::SIGMOID_SYM); + activationType.push_back(ml::ANN_MLP::GAUSSIAN); + activationType.push_back(ml::ANN_MLP::RELU); + activationType.push_back(ml::ANN_MLP::LEAKYRELU); + vector activationName; + activationName.push_back("_identity"); + activationName.push_back("_sigmoid_sym"); + activationName.push_back("_gaussian"); + activationName.push_back("_relu"); + activationName.push_back("_leakyrelu"); + for (size_t i = 0; i < activationType.size(); i++) + { + Ptr x = ml::ANN_MLP::create(); + Mat_ layerSizes(1, 4); + layerSizes(0, 0) = tdata->getNVars(); + layerSizes(0, 1) = 100; + layerSizes(0, 2) = 100; + layerSizes(0, 3) = tdata->getResponses().cols; + x->setLayerSizes(layerSizes); + x->setActivationFunction(activationType[i]); + x->setTrainMethod(ml::ANN_MLP::RPROP, 0.01, 0.1); + x->setTermCriteria(TermCriteria(TermCriteria::COUNT, 300, 0.01)); + x->train(tdata, ml::ANN_MLP::NO_OUTPUT_SCALE); + ASSERT_TRUE(x->isTrained()) << "Could not train networks with " << activationName[i]; +#ifdef GENERATE_TESTDATA + x->save(dataname + activationName[i] + ".yml"); +#else + Ptr y = Algorithm::load(dataname + activationName[i] + ".yml"); + ASSERT_TRUE(y != NULL) << "Could not load " << dataname + activationName[i] + ".yml"; + Mat testSamples = tdata->getTestSamples(); + Mat rx, ry, dst; + x->predict(testSamples, rx); + y->predict(testSamples, ry); + absdiff(rx, ry, dst); + double minVal, maxVal; + minMaxLoc(dst, &minVal, &maxVal); + ASSERT_TRUE(maxVal