-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathsent2vec.cpp
90 lines (77 loc) · 2.73 KB
/
sent2vec.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
#include "sent2vec.h"
#include "cmdline.h"
#include "time.h"
using namespace std;
using namespace sent2vec;
int main(int argc, char *argv[])
{
//assert(1 == 2);
CMDLine cmdline(argc, argv);
costr param_trainset_ph = cmdline.registerParameter("train", "path of the training set");
costr param_window_size = cmdline.registerParameter("ws", "size of window, default is 3");
costr param_nthreads = cmdline.registerParameter("nthreads", "number of threads");
costr param_k = cmdline.registerParameter("k", "number of negative samples, default is 15");
costr param_alpha = cmdline.registerParameter("alpha", "learning rate, default is 0.1");
costr param_convergence = cmdline.registerParameter("c", "convergence rate, default is 0.01");
costr param_model_ph = cmdline.registerParameter("o", "model output path");
costr param_help = cmdline.registerParameter("help", "this scree");
// args
string train_ph;
int windowSize = 3;
int nThreads = 1;
int k = 15;
float alpha = 0.1;
float convergence = 0.01;
string model_ph;
if(cmdline.hasParameter(param_help) || argc == 1) {
cout << "============================" << endl;
cout << "sentence to vector" << endl;
cout << "============================" << endl;
cmdline.print_help();
return 0;
}
if(! cmdline.hasParameter(param_trainset_ph)) {
cout << "missing " << param_trainset_ph << " arg" << endl;
return 0;
}
train_ph = cmdline.getValue(param_trainset_ph);
if (cmdline.hasParameter(param_window_size)) {
windowSize = stoi(cmdline.getValue(param_window_size));
}
if (cmdline.hasParameter(param_nthreads)) {
nThreads = stoi(cmdline.getValue(param_nthreads));
}
if (cmdline.hasParameter(param_k)) {
k = stoi(cmdline.getValue(param_k));
}
if (cmdline.hasParameter(param_alpha)) {
alpha = stof(cmdline.getValue(param_alpha));
}
if (cmdline.hasParameter(param_convergence)) {
convergence = stof(cmdline.getValue(param_convergence));
}
if (cmdline.hasParameter(param_model_ph)) {
model_ph = cmdline.getValue(param_model_ph);
}
srand((unsigned) time(NULL));
Sent2Vec sent2vec(
train_ph,
windowSize, // windowsize
nThreads, // n thread
k, // k
alpha, // alpha
convergence, // convergence
model_ph
);
time_t time_start = time(0);
sent2vec.initData();
sent2vec.run();
//sent.tofile("2.sent");
//vocab.tofile("2.vocab");
sent2vec.tofile();
time_t time_end = time(0);
double time = difftime(time_end, time_start) * 1000.0;
printf("main done!");
cout << "spent time: " << time << endl;
return 0;
}