Skip to content

Commit

Permalink
c++ (based on Eigen )implement of logistic regression
Browse files Browse the repository at this point in the history
  • Loading branch information
wepe committed Sep 22, 2015
1 parent d85a7da commit 3df6d4c
Show file tree
Hide file tree
Showing 10 changed files with 383 additions and 1 deletion.
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,9 @@ MachineLearning

- **logistic regression**

基于python+numpy实现了logistic回归(二类别),详细的介绍:[文章链接](http://blog.csdn.net/u012162613/article/details/41844495)
- 基于C++以及线性代数库Eigen实现的logistic回归,[代码]()

- 基于python+numpy实现了logistic回归(二类别),详细的介绍:[文章链接](http://blog.csdn.net/u012162613/article/details/41844495)

- **ManifoldLearning**

Expand Down
52 changes: 52 additions & 0 deletions logistic regression/use_cpp_and_eigen/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
###A tiny C++ implement of logistic regression.

- Based on Eigen
- Support L2 regulation
- Support save/load weights

###Dependency

- Eigen


###Usage
```
//data prepare,10 samples
MatrixXd X(10,2);
X<<1.0,0.8,2.0,1.7,3.0,2.5,4.0,3.6,5.0,4.9,
1.0,1.2,2.0,2.5,3.0,3.4,4.0,4.5,5.0,6.0;
VectorXi y(10);
y<<0,0,0,0,0,1,1,1,1,1;
//train and save the weights
LR clf1 = LR(200,0.01,0.05,0.01); //max_iter=200,alpha=0.01(learning rate),l2_lambda=0.05,tolerance=0.01
clf1.fit(X,y);
cout<<"weights:\n"<<clf1.getW()<<endl;
clf1.saveWeights("test.weights");
//load the weights and predict
LR clf2 = LR();
clf2.loadWeights("test.weights");
cout<<"Predict:\n"<<clf2.predict(X)<<endl;
```

###DirTree

```
.
|-- common_functions.h
|-- common_functions.cc
|-- lr.h
|-- lr.cc
|-- main.cc
|-- makefile
|-- weights [direction where save the weights]
| `-- test.weights
|-- eigen_tutorial [show the basic usage of Eigen]
`-- eigen_usage.cc
```


18 changes: 18 additions & 0 deletions logistic regression/use_cpp_and_eigen/common_functions.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#include <cmath>
#include "common_functions.h"


double CommonFunctions::sigmod(double x){
return 1.0/(1.0+exp(-x));
}


double CommonFunctions::crossEntropyLoss(Eigen::VectorXi y,Eigen::VectorXd h){
Eigen::VectorXd y_d = y.cast<double>();
int n = y_d.size();
double loss;
for(int i=0;i<n;i++){
loss -= (y_d(i)*log2(h(i))+(1-y_d(i))*log2(1-h(i)));
}
return loss/n;
}
17 changes: 17 additions & 0 deletions logistic regression/use_cpp_and_eigen/common_functions.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#ifndef __COMMON_FUNCTIONS_H__
#define __COMMON_FUNCTIONS_H__

#include <eigen3/Eigen/Dense>


class CommonFunctions{
public:
// sigmod function, depend on <cmath> library
static double sigmod(double x);
static double crossEntropyLoss(Eigen::VectorXi y,Eigen::VectorXd h);

};



#endif
110 changes: 110 additions & 0 deletions logistic regression/use_cpp_and_eigen/eigen_tutorial/eigen_usage.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
/*
Created on 2015/09/14
Author: wepon, http://2hwp.com
Reference: http://eigen.tuxfamily.org/dox/group__TutorialMatrixArithmetic.html
*/

#include <iostream>
#include <eigen3/Eigen/Dense>
using namespace Eigen;
int main()
{

/* Matrix */
Matrix2d m; //2*2,double
m(0,0) = 3;
m(1,0) = 2.5;
m(0,1) = -1;
m(1,1) = m(1,0) + m(0,1);
std::cout << "m:\n" << m << std::endl;


//MatrixXd m1 = MatrixXd::Random(3,3); //Dynamic,double
//MatrixXd m1 = MatrixXd::Zero(3,3);
//MatrixXd m1 = MatrixXd::Ones(3,3);
MatrixXd m1 = MatrixXd::Identity(3,3);
std::cout << "m1:\n" << m1 << std::endl;



MatrixXd m2(2,2); //Dynamic,double
m2<<1,2,3,4;
std::cout << "m2:\n" << m2.size() << std::endl; //size: 4

std::cout << "m2*m2:\n" << m2*m2 << std::endl; //cross product




int rows=5;
int cols=5;
MatrixXf m3(rows,cols);
m3<<( Matrix3f()<<1,2,3,4,5,6,7,8,9 ).finished(),
MatrixXf::Zero(3,cols-3),MatrixXf::Zero(rows-3,3),
MatrixXf::Identity(rows-3,cols-3);
std::cout << "m3=\n" << m3 << std::endl;
std::cout << "m3.rows: " << m3.rows() << std::endl;
std::cout << "m3.cols: " << m3.cols() << std::endl;

std::cout << "m3.transpose():\n" << m3.transpose() << std::endl;
std::cout << "m3.adjoint():\n" << m3.adjoint() << std::endl;

Matrix2d m4 = Matrix2d::Constant(3.0);
std::cout << "m4:\n" << m4 << std::endl;





/* Vector */
Vector2f v; //2,float
//Vector2d v;
//VectorXd v(2);
v(0) = 4.4;
v(1) = v(0) - 1;
std::cout << "v:\n" << v << std::endl;

Vector2i vv;
vv<<1,2;
//std::cout<< "v-vv:\n"<<v-vv<<std::endl; // error

Vector2f v1; //2,float
v1 << 4.0,8.0;
std::cout << "v1:\n" << v1 << std::endl;

std::cout << "v.*v1:\n" << v.dot(v1) << std::endl; //dot product

std::cout << "v1.norm():\n" << v1.norm() << std::endl;
std::cout << "v1.squaredNorm():\n" << v1.squaredNorm() << std::endl;

Matrix2f m5;
m5<<1.0,2.0,3.0,4.0;

MatrixXf m6(2,3);
m6<<1.0,2.0,3.0,4.0,5.0,6.0;
std::cout<< "m6:\n"<<m6<<std::endl;
std::cout<< "m6:\n"<<m6.row(0)<<std::endl; //1 2 3





/* Array */
Array4i v2;
v2<<1,2,3,4;
std::cout << "v2:\n" << v2 << std::endl;

MatrixXd m8(2,3);
std::cout<< m8.rows() << " " << m8.cols() << std::endl;



MatrixXd X(10,2);
X<<1.0,0.8,2.0,1.7,3.0,2.5,4.0,3.6,5.0,4.9,
1.0,1.2,2.0,2.5,3.0,3.4,4.0,4.5,5.0,6.0;
MatrixXd X_new(X.rows(),X.cols()+1);
X_new<<X,MatrixXd::Ones(X.rows(),1);
std::cout << "X:\n" << X_new << std::endl << X_new.rows() << " " << X_new.cols() << std::endl;


}
123 changes: 123 additions & 0 deletions logistic regression/use_cpp_and_eigen/lr.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
#include <iostream>
#include <fstream>
#include <sstream>
#include <string>
#include <vector>
#include "lr.h"
#include "common_functions.h"

using namespace Eigen;

LR::LR(int max_i,double alp,double l2_lambda,double tolerance){
lambda = l2_lambda;
max_iter = max_i;
tol = tolerance;
alpha = alp;
}

LR::~LR(){}



void LR::fit(MatrixXd X,VectorXi y){
//learn VectorXd W, consider reg,max_iter,tol.
//TODO: check X,y

//VectorXd W = VectorXd::Random(X.cols()+1); wrong! u can not declare W again,otherwise it didn't represent the class member
W = VectorXd::Random(X.cols()+1); //the last column of weight represent b
MatrixXd X_new(X.rows(),X.cols()+1);
X_new<<X,MatrixXd::Ones(X.rows(),1); //last column is 1.0

for(int iter=0;iter<max_iter;iter++){
VectorXd y_pred = predict_prob(X);
VectorXd y_d = y.cast<double>(); //cast type first
VectorXd E = y_pred - y_d;

W = (1.0-lambda/y.size())*W - alpha*X_new.transpose()*E; //W:= (1-lambda/n_samples)W-alpha*X^T*E
//reference : http://blog.csdn.net/pakko/article/details/37878837

//when loss<tol, break
double loss = CommonFunctions::crossEntropyLoss(y,predict_prob(X));
if(loss<=tol) break;

}

}


VectorXd LR::predict_prob(MatrixXd X){
//predict the probability (of label 1) for given data X
MatrixXd X_new(X.rows(),X.cols()+1);
X_new<<X,MatrixXd::Ones(X.rows(),1);
int num_samples = X_new.rows();
VectorXd y_pred_prob = VectorXd::Zero(num_samples);
for(int num=0;num<num_samples;num++){
y_pred_prob(num) = CommonFunctions::sigmod(X_new.row(num).dot(W));
}

return y_pred_prob;
}


VectorXi LR::predict(MatrixXd X){
//predict the label for given data X
VectorXd y_pred_prob = predict_prob(X);
VectorXi y_pred(y_pred_prob.size());
for(int num=0;num<y_pred_prob.size();num++){
y_pred(num) = y_pred_prob(num)>0.5?1:0;
}
return y_pred;
}


Eigen::VectorXd LR::getW(){
return W;
}

void LR::saveWeights(std::string filename){
//save the model (save the weight ) into filename.
std::ofstream ofile;
std::string path = "./weights/"+filename;
ofile.open(path.c_str());
if (!ofile.is_open()){
std::cerr<<"Can not open the file when call LR::saveParams"<<std::endl;
return;
}

//W write into the file
for(int i=0;i<W.size()-1;i++){
ofile<<W(i)<<" ";
}
ofile<<W(W.size()-1);
ofile.close();
}


void LR::loadWeights(std::string filename){
//load the model (load the weight ) from filename.
std::ifstream ifile;
std::string path = "./weights/"+filename;
ifile.open(path.c_str());
if (!ifile.is_open()){
std::cerr<<"Can not open the file when call LR::loadParams"<<std::endl;
return;
}

//read the weights into vector<double>
std::string line;
std::vector<double> weights;
getline(ifile,line); //only one line
std::stringstream ss(line);
double tmp;
while(!ss.eof()){
ss>>tmp;
weights.push_back(tmp);
}

//initialize VectorXd with std::vector
W = VectorXd::Map(weights.data(),weights.size());

ifile.close();
}


27 changes: 27 additions & 0 deletions logistic regression/use_cpp_and_eigen/lr.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
#ifndef __LR_H__
#define __LR_H__

#include <eigen3/Eigen/Dense>
#include <string>

class LR{
public:
LR(int max_i=100,double alp=0.01,double l2_lambda=0.05,double tolerance=0.01); //the params name can't be the same as the class member?
~LR();
void fit(Eigen::MatrixXd X,Eigen::VectorXi y);
Eigen::VectorXd getW();
Eigen::VectorXd predict_prob(Eigen::MatrixXd X);
Eigen::VectorXi predict(Eigen::MatrixXd X);
void saveWeights(std::string filename);
void loadWeights(std::string filename);
private:
Eigen::VectorXd W;
int max_iter;
double lambda; //l2 regulization
double tol; // error tolence
double alpha;
};



#endif
29 changes: 29 additions & 0 deletions logistic regression/use_cpp_and_eigen/main.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#include <iostream>
#include <eigen3/Eigen/Dense>
#include "lr.h"

using namespace std;
using namespace Eigen;

int main(){

//data prepare,10 samples
MatrixXd X(10,2);
X<<1.0,0.8,2.0,1.7,3.0,2.5,4.0,3.6,5.0,4.9,
1.0,1.2,2.0,2.5,3.0,3.4,4.0,4.5,5.0,6.0;
VectorXi y(10);
y<<0,0,0,0,0,1,1,1,1,1;

//train and save the weights
LR clf1 = LR(200,0.01,0.05,0.01); //max_iter=200,alpha=0.01(learning rate),l2_lambda=0.05,tolerance=0.01
clf1.fit(X,y);
cout<<"weights:\n"<<clf1.getW()<<endl;
clf1.saveWeights("test.weights");

//load the weights and predict
LR clf2 = LR();
clf2.loadWeights("test.weights");
cout<<"Predict:\n"<<clf2.predict(X)<<endl;

return 0;
}
Loading

0 comments on commit 3df6d4c

Please sign in to comment.