Skip to content

Commit

Permalink
add skew-opt (#49)
Browse files Browse the repository at this point in the history
  • Loading branch information
ynchuang authored Sep 19, 2022
1 parent 93c38eb commit 4fa4eac
Show file tree
Hide file tree
Showing 7 changed files with 310 additions and 3 deletions.
2 changes: 1 addition & 1 deletion cli/Makefile
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
CC= g++
CPPFLAGS= -std=c++11 -fopenmp -lm -Ofast
OBJS= deepwalk line walklets hpe app mf bpr hoprec warp nemf nerank
OBJS= deepwalk line walklets hpe app mf bpr hoprec warp nemf nerank skewopt
LIBS= -L ../bin -lproNet

all: $(OBJS)
Expand Down
77 changes: 77 additions & 0 deletions cli/skewopt.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
#define _GLIBCXX_USE_CXX11_ABI 1
#include "../src/model/SkewOPT.h"

int ArgPos(char *str, int argc, char **argv) {
int a;
for (a = 1; a < argc; a++) if (!strcmp(str, argv[a])) {
if (a == argc - 1) {
printf("Argument missing for %s\n", str);
exit(1);
}
return a;
}
return -1;
}

int main(int argc, char **argv){

int i;

if (argc == 1) {
printf("[proNet-core]\n");
printf("\tcommand spr interface for proNet-core\n\n");
printf("Options Description:\n");
printf("\t-train <string>\n");
printf("\t\tTrain the Network data\n");
printf("\t-save <string>\n");
printf("\t\tSave the representation data\n");
printf("\t-dimensions <int>\n");
printf("\t\tDimension of vertex representation; default is 64\n");
printf("\t-sample_times <int>\n");
printf("\t\tNumber of training samples *Million; default is 10\n");
printf("\t-threads <int>\n");
printf("\t\tNumber of training threads; default is 1\n");
printf("\t-reg <float>\n");
printf("\t\tThe regularization term; default is 0.01\n");
printf("\t-xi <float>\n");
printf("\t\tThe xi term; default is 10.0\n");
printf("\t-omega <float>\n");
printf("\t\tThe omega term; default is 3.0\n");
printf("\t-eta <float>\n");
printf("\t\tThe eta term; default is 3.0\n");
printf("\t-alpha <float>\n");
printf("\t\tInit learning rate; default is 0.025\n");

printf("Usage:\n");
printf("\n[SkewOPT]\n");
printf("./spr -train net.txt -save rep.txt -dimensions 64 -sample_times 10 -alpha 0.025 -threads 1\n");

return 0;
}

char network_file[100], rep_file[100];
int dimensions=64, negative_samples=5, sample_times=10, threads=1, eta=3;
double init_alpha=0.025, reg=0.01, xi=10.0, omega=3.0;

if ((i = ArgPos((char *)"-train", argc, argv)) > 0) strcpy(network_file, argv[i + 1]);
if ((i = ArgPos((char *)"-save", argc, argv)) > 0) strcpy(rep_file, argv[i + 1]);
if ((i = ArgPos((char *)"-dimensions", argc, argv)) > 0) dimensions = atoi(argv[i + 1]);
if ((i = ArgPos((char *)"-sample_times", argc, argv)) > 0) sample_times = atoi(argv[i + 1]);
if ((i = ArgPos((char *)"-reg", argc, argv)) > 0) reg = atof(argv[i + 1]);
if ((i = ArgPos((char *)"-xi", argc, argv)) > 0) xi = atof(argv[i + 1]);
if ((i = ArgPos((char *)"-omega", argc, argv)) > 0) omega = atof(argv[i + 1]);
if ((i = ArgPos((char *)"-eta", argc, argv)) > 0) eta = atoi(argv[i + 1]);
if ((i = ArgPos((char *)"-alpha", argc, argv)) > 0) init_alpha = atof(argv[i + 1]);
if ((i = ArgPos((char *)"-threads", argc, argv)) > 0) threads = atoi(argv[i + 1]);

SPR *spr;
spr = new SPR();
spr->LoadEdgeList(network_file, 0);
spr->Init(dimensions);
spr->Train(sample_times, negative_samples, init_alpha, reg, xi, omega, eta, threads);
spr->SaveWeights(rep_file);

return 0;


}
2 changes: 1 addition & 1 deletion src/Makefile
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
CC = g++
CPPFLAGS = -std=c++11 -fPIC -fopenmp -lm -Ofast
OBJECTS = util.o random.o proNet.o ./model/DeepWalk.o ./model/Walklets.o ./model/LINE.o ./model/HPE.o ./model/APP.o ./model/MF.o ./model/BPR.o ./model/HBPR.o ./model/NEMF.o ./model/WARP.o ./model/NERANK.o ./model/ECO.o ./model/GCN.o ./model/TEXTGCN.o ./model/TEXTGCNdev.o
OBJECTS = util.o random.o proNet.o ./model/DeepWalk.o ./model/Walklets.o ./model/LINE.o ./model/HPE.o ./model/APP.o ./model/MF.o ./model/BPR.o ./model/HBPR.o ./model/NEMF.o ./model/WARP.o ./model/NERANK.o ./model/ECO.o ./model/GCN.o ./model/TEXTGCN.o ./model/TEXTGCNdev.o ./model/SkewOPT.o
all: $(OBJECTS)
mkdir -p ../bin
ar rcs ../bin/libproNet.a $(OBJECTS)
Expand Down
111 changes: 111 additions & 0 deletions src/model/SkewOPT.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
#include "SkewOPT.h"
#include <omp.h>

SPR::SPR() {
char negative_method[15] = "no_degrees";
pnet.SetNegativeMethod(negative_method);
}
SPR::~SPR() {
}

void SPR::LoadEdgeList(string filename, bool undirect) {
pnet.LoadEdgeList(filename, undirect);
}

void SPR::SaveWeights(string model_name){

cout << "Save Model:" << endl;
ofstream model(model_name);
if (model)
{
model << pnet.MAX_vid << " " << dim << endl;
for (long vid=0; vid!=pnet.MAX_vid; vid++)
{
model << pnet.vertex_hash.keys[vid];
for (int d=0; d<dim; ++d)
model << " " << w_vertex[vid][d];
model << endl;
}
cout << "\tSave to <" << model_name << ">" << endl;
}
else
{
cout << "\tfail to open file" << endl;
}
}

void SPR::Init(int dim) {

this->dim = dim;
cout << "Model Setting:" << endl;
cout << "\tdimension:\t\t" << dim << endl;

w_vertex.resize(pnet.MAX_vid);

for (long vid=0; vid<pnet.MAX_vid; ++vid)
{
w_vertex[vid].resize(dim);
for (int d=0; d<dim;++d)
w_vertex[vid][d] = (rand()/(double)RAND_MAX - 0.5) / dim + 0.01;
}

}


void SPR::Train(int sample_times, int negative_samples, double alpha, double reg, double xi, double omega, int eta, int workers){

omp_set_num_threads(workers);

cout << "Model:" << endl;
cout << "\t[Skew-OPT]" << endl;

cout << "Learning Parameters:" << endl;
cout << "\tsample_times:\t\t" << sample_times << endl;
cout << "\talpha:\t\t\t" << alpha << endl;
cout << "\tregularization:\t\t" << reg << endl;
cout << "\txi:\t\t\t" << xi << endl;
cout << "\tomega:\t\t\t" << omega << endl;
cout << "\teta:\t\t\t" << eta << endl;
cout << "\tworkers:\t\t" << workers << endl;

cout << "Start Training:" << endl;

unsigned long long total_sample_times = (unsigned long long)sample_times*1000000;
double alpha_min = alpha * 0.0001;
double alpha_last;

unsigned long long current_sample = 0;
unsigned long long jobs = total_sample_times/workers;

#pragma omp parallel for
for (int worker=0; worker<workers; ++worker)
{

long v1, v2, v3;
unsigned long long count = 0;
double _alpha = alpha;

while (count<jobs)
{
v1 = pnet.SourceSample();
v2 = pnet.TargetSample(v1);

pnet.UpdateSBPRPair(w_vertex, w_vertex, v1, v2, dim, reg, xi, omega, eta, _alpha);

count ++;
if (count % MONITOR == 0)
{
_alpha = alpha* ( 1.0 - (double)(current_sample)/total_sample_times );
current_sample += MONITOR;
if (_alpha < alpha_min) _alpha = alpha_min;
alpha_last = _alpha;
printf("\tAlpha: %.6f\tProgress: %.3f %%%c", _alpha, (double)(current_sample)/total_sample_times * 100, 13);
fflush(stdout);
}
}

}
printf("\tAlpha: %.6f\tProgress: 100.00 %%\n", alpha_last);

}

34 changes: 34 additions & 0 deletions src/model/SkewOPT.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
#ifndef SPR_H
#define SPR_H

#include "../proNet.h"

/*****
* SkewOPT
* **************************************************************/

class SPR {

public:

SPR();
~SPR();

proNet pnet;

// parameters
int dim; // representation dimensions
vector< vector<double> > w_vertex;

// data function
void LoadEdgeList(string, bool);
void SaveWeights(string);

// model function
void Init(int);
void Train(int, int, double, double, double, double, int, int);

};


#endif
81 changes: 80 additions & 1 deletion src/proNet.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1067,6 +1067,35 @@ void proNet::Opt_BPRSGD(vector<double>& w_vertex_ptr, vector<double>& w_context_
loss_context_ptr[d] += g * w_vertex_ptr[d];
}

int proNet::Opt_SBPRSGD(vector<double>& w_vertex_ptr, vector<double>& w_context_ptr, double xi, double omega, int eta, double alpha, vector<double>& loss_vertex_ptr, vector<double>& loss_context_ptr){

int d = 0;
double f = 0, g = 0, g_in_sigmoid=1, g_chain_diff=1;
int dimension = w_vertex_ptr.size();

for (d=0; d<dimension; ++d) // prediciton
f += w_vertex_ptr[d] * w_context_ptr[d];

g = (f-xi)/omega;
if (g > 2.0) return 0;
if (g < -2.0) g = -2.0;

for( int i=0; i<eta; i++)
g_in_sigmoid *= g;
g_chain_diff = g_in_sigmoid/g;

g = fastSigmoid(-1*g_in_sigmoid)*g_chain_diff/omega; // auto-gradient w.r.t eta
//g = fastSigmoid(-g*g*g)*g*g/omega; // when eta=5
//g = fastSigmoid(-g*g*g*g*g)*g*g*g*g/omega; // when eta=5

g *= alpha;

for (d=0; d<dimension; ++d) // store the back propagation error
loss_vertex_ptr[d] += g * w_context_ptr[d];
for (d=0; d<dimension; ++d) // update context
loss_context_ptr[d] += g * w_vertex_ptr[d];
return 1;
}

void proNet::Opt_SigmoidSGD1(double* w_vertex_ptr, double* w_context_ptr, double label, int dimension, double alpha, double* loss_vertex_ptr, double* loss_context_ptr){

Expand Down Expand Up @@ -1485,6 +1514,57 @@ void proNet::UpdateFBPRPair(vector< vector<double> >& w_vertex, vector< vector<d

}

void proNet::UpdateSBPRPair(vector< vector<double> >& w_vertex, vector< vector<double> >& w_context, long vertex, long context_i, int dimension, double reg, double xi, double omega, int eta, double alpha){

long context_j;
vector< double > vertex_err;
vector< double > context_err;
vector< double > context_vec;
vector< long > context_collection;
vertex_err.resize(dimension, 0.0);
context_err.resize(dimension, 0.0);
context_vec.resize(dimension, 0.0);

int d;
int update = 0;
double f=0.0;

for (int n=0; n<16; n++)
{
//context_i = TargetSample(vertex);
context_j = NegativeSample();

for (int d=0; d<dimension; d++)
{
context_err[d] = 0.0;
context_vec[d] = w_context[context_i][d] - w_context[context_j][d];
}

if (Opt_SBPRSGD(w_vertex[vertex], context_vec, xi, omega, eta, alpha, vertex_err, context_err)!=0)
{
for (int d=0; d<dimension; d++)
{
w_context[context_i][d] -= alpha*0.01*w_context[context_i][d];
w_context[context_j][d] -= alpha*0.01*w_context[context_j][d];
//w_vertex[vertex][d] -= alpha*0.01*w_vertex[vertex][d];

w_context[context_i][d] += context_err[d];
w_context[context_j][d] -= context_err[d];
//w_vertex[vertex][d] += vertex_err[d];
}
update += 1.0;
//return;
}
}

if (update!=0)
for (int d=0; d<dimension; d++)
{
w_vertex[vertex][d] -= alpha*0.01*w_vertex[vertex][d];
w_vertex[vertex][d] += vertex_err[d]/update;
}
}

void proNet::UpdateBPRPairs(vector< vector<double> >& w_vertex, vector< vector<double> >& w_context, vector<long>& vertex, vector<long>& context_i, vector<long>& context_j, int dimension, double reg, double alpha){

vector<long>::iterator it_v = vertex.begin();
Expand All @@ -1501,7 +1581,6 @@ void proNet::UpdateBPRPairs(vector< vector<double> >& w_vertex, vector< vector<d

}


void proNet::UpdateFreezePair(vector< vector<double> >& w_vertex, vector< vector<double> >& w_context, long vertex, long context, int dimension, int negative_samples, double alpha){

vector< double > back_err;
Expand Down
6 changes: 6 additions & 0 deletions src/proNet.h
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,9 @@ class proNet {
void Opt_PUBPRSGD(vector<double>&, vector<double>&, double, vector<double>&, vector<double>&);
int Opt_FBPRSGD(vector<double>&, vector<double>&, double, vector<double>&, vector<double>&, double);

// vertex representation, context representation, margin, sigma, alpha, vertex loss, context loss, alpha
int Opt_SBPRSGD(vector<double>&, vector<double>&, double, double, int, double, vector<double>&, vector<double>&);

// vertex representation, context representation, label, alpha, vertex loss, context loss, alpha
void Opt_SigmoidSGD(vector<double>&, vector<double>&, double, int, double, vector<double>&, vector<double>&);
void Opt_CosineSGD(vector<double>&, vector<double>&, double, int, double, vector<double>&, vector<double>&);
Expand Down Expand Up @@ -219,6 +222,9 @@ class proNet {
// vertex vector, context vector, vertex, context, dimension, negative samples, alpha
void UpdateBPRPairs(vector< vector<double> >&, vector< vector<double> >&, vector<long>&, vector<long>&, vector<long>&, int, double, double);

// vertex vector, context vector, vertex, context, dimension, reg, margin, sigma, alpha
void UpdateSBPRPair(vector< vector<double> >&, vector< vector<double> >&, long, long, int, double, double, double, int, double);

// vertex vector, context vector, vertex, context, dimension, regularization, negative samples, alpha
void UpdateFactorizedPair(vector< vector<double> >&, vector< vector<double> >&, long, long, int, double, int, double);
void UpdateChoice(vector< vector<double> >&, vector< vector<double> >&, long, long, int, double, int, double);
Expand Down

0 comments on commit 4fa4eac

Please sign in to comment.