Skip to content

Commit

Permalink
resolve conflict
Browse files Browse the repository at this point in the history
  • Loading branch information
Oneplus committed Jan 14, 2014
1 parent a55726e commit 306be8c
Show file tree
Hide file tree
Showing 13 changed files with 516 additions and 26 deletions.
3 changes: 2 additions & 1 deletion src/__ltp_dll/Ltp.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ class LTP {
*/
int srl(XML4NLP & xml);

int splitSentence_dummy(XML4NLP & xml);
private:

/*
Expand All @@ -100,7 +101,7 @@ class LTP {
* @param[in/out] xml the xml storing ltp result
* @return int 0 on success, otherwise -1
*/
int splitSentence_dummy(XML4NLP & xml);
//int splitSentence_dummy(XML4NLP & xml);

/*
* parse the config file, and load resource according the config
Expand Down
2 changes: 2 additions & 0 deletions src/parser/options.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ struct TrainOptions {
string algorithm; /*< the algorithm */
string model_name; /*< the model name */
bool conservative_update; /*< conservative update */
std::string use_update;
int min_update;
};

struct TestOptions {
Expand Down
153 changes: 150 additions & 3 deletions src/parser/parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@ void Parser::init_opt() {
train_opt.max_iter = 10;
train_opt.algorithm = "pa";
train_opt.model_name = "";
train_opt.use_update = "false";
train_opt.min_update = 0;

test_opt.test_file = "";
test_opt.model_file = "";
Expand Down Expand Up @@ -72,6 +74,54 @@ void Parser::init_opt() {

}


Model * Parser::truncate_prune(int *updates) {
Model * new_model = new Model;
for(int i = 0 ;i < model -> deprels.size(); ++i) {
const char * key = model-> deprels.at(i);
new_model->deprels.push(key);
}
for(int i=0 ; i< model->postags.size(); ++i) {
const char * key = model -> postags.at(i);
new_model->postags.push(key);
}

build_feature_space_truncate(new_model);

if(feat_opt.use_dependency) {//DEP
copy_featurespace_prune(new_model,FeatureSpace::DEP,updates);
}

if(feat_opt.use_sibling) {//SIB
copy_featurespace_prune(new_model,FeatureSpace::SIB,updates);
}

if(feat_opt.use_grand) {//GRD
copy_featurespace_prune(new_model,FeatureSpace::GRD,updates);
}
TRACE_LOG("Scanning old features space, building new feature space is done");

new_model->space.set_offset_truncate();
TRACE_LOG("Setting offset for each collection is done");

new_model->param.realloc(new_model->dim());
TRACE_LOG("Parameter dimension of new model is [%d]",new_model->space.dim());

if(feat_opt.use_dependency) {//DEP
copy_parameters(new_model,FeatureSpace::DEP);
}

if(feat_opt.use_sibling) {//SIB
copy_parameters(new_model,FeatureSpace::SIB);
}

if(feat_opt.use_grand) {//GRD
copy_parameters(new_model,FeatureSpace::GRD);
}
TRACE_LOG("Building new model is done");
return new_model;
}

Model * Parser::truncate() {
// main process of performing model truncation
// this process can mainly be divied into 3 steps:
Expand Down Expand Up @@ -127,6 +177,33 @@ Model * Parser::truncate() {
TRACE_LOG("Building new model is done");
return new_model;
}
void Parser::copy_featurespace_prune(Model * new_model,int gid,int *updates) {
for (FeatureSpaceIterator itx = model->space.begin(gid); !itx.end(); ++itx) {
const char * key = itx.key();
//std::cout<<"countDEP : "<<countDEP<<" model_count : "<<itx.getDicts()->dim()<<std::endl;
//std::cout<<"_i : "<<itx.getI()<<std::endl;
int tid = itx.tid();
int id = model->space.index(gid,tid,key);
bool flag = false;
int L = model-> num_deprels();
for (int l=0;l<L;++l) {
double p = model -> param.dot(id+l);
if(p!=0.) {
flag=true;
}
}
if(!flag) {
continue;
}

int idx = id/L;
if(updates[idx]<train_opt.min_update){
continue;
}
new_model->space.retrieve(gid,tid,key,true);
}
}


void Parser::copy_featurespace(Model * new_model, int gid) {
// perform the feature space truncation
Expand Down Expand Up @@ -225,11 +302,24 @@ bool Parser::parse_cfg(utility::ConfigParser & cfg) {
train_opt.model_name.c_str());
}

if (cfg.get("train", "use_update", strbuf)) {
train_opt.use_update = strbuf;
} else {
WARNING_LOG("use_update is not configed, false is set as default");
}

if (cfg.get_integer("train", "max-iter", intbuf)) {
train_opt.max_iter = intbuf;
} else {
WARNING_LOG("max-iter is not configed, [10] is set as default.");
}

if (cfg.get_integer("train", "min_update", intbuf)) {
train_opt.min_update = intbuf;
std::cout<<"min_update:"<<train_opt.min_update<<std::endl;
} else {
WARNING_LOG("min_update is not configed, [0] is set as default.");
}
} // end for cfg.has_section("train")

__TEST__ = false;
Expand Down Expand Up @@ -875,8 +965,22 @@ void Parser::train(void) {
model->param.realloc(model->dim());
TRACE_LOG("Allocate a parameter vector of [%d] dimension.", model->dim());

int feature_count = model->num_features();
//int offset_count = model->dim();
int num_l = model->num_deprels();
//int *updates = new int [offset_count];
int *updates_group = new int [feature_count];

for(int i = 0;i<feature_count;i++){
updates_group[i]=0;
}

decoder = build_decoder();

int best_result = -1;
double best_las = -1;
double best_uas = -1;

for (int iter = 0; iter < train_opt.max_iter; ++ iter) {
TRACE_LOG("Start training epoch #%d.", (iter + 1));

Expand All @@ -897,6 +1001,7 @@ void Parser::train(void) {
update_features.add(train_dat[i]->features, 1.);
update_features.add(train_dat[i]->predicted_features, -1.);

update_features.update_counter(updates_group,feature_count,num_l);
double error = train_dat[i]->num_errors();
double score = model->param.dot(update_features, false);
double norm = update_features.L2();
Expand All @@ -917,6 +1022,7 @@ void Parser::train(void) {
update_features.add(train_dat[i]->features, 1.);
update_features.add(train_dat[i]->predicted_features, -1.);

update_features.update_counter(updates_group,feature_count,num_l);
model->param.add(update_features,
iter * train_dat.size() + i + 1,
1.);
Expand All @@ -930,27 +1036,65 @@ void Parser::train(void) {
}

model->param.flush( train_dat.size() * (iter + 1) );
Model * new_model = truncate();

Model * new_model;

// for(int m = 0; m<feature_count; m++) {
// int sum = 0;
// for(int n = 0;n<num_l;n++) {
// sum+=updates[m*num_l+n];
// }
// updates_group[m]=sum;
// }

if(train_opt.use_update=="true") {
new_model = truncate_prune(updates_group);
} else {
new_model = truncate();
}

swap(model,new_model);
evaluate();

double las, uas;
evaluate(las, uas);

if(las > best_las) {
best_las = las;
best_uas = uas;
best_result = iter;
}

string saved_model_file = (train_opt.model_name + "." + to_str(iter) + ".model");
ofstream fout(saved_model_file.c_str(), std::ofstream::binary);

swap(model,new_model);
new_model->save(fout);
delete new_model;

TRACE_LOG("Model for iteration [%d] is saved to [%s]",
iter + 1,
saved_model_file.c_str());

}

delete updates_group;
TRACE_LOG("Best result is:");
TRACE_LOG("las: %lf ;uas: %lf ;iter: %d", best_las,
best_uas,
best_result);
delete model;
model = 0;
}

void Parser::evaluate(void) {
void Parser::optimise_model() {
Model *new_model = truncate();
std::string saved_model_file=("small.model");
std::ofstream ofs(saved_model_file.c_str(),std::ofstream::binary);
new_model->save(ofs);
delete(new_model);
}

void Parser::evaluate(double &las, double &uas) {
const char * holdout_file = train_opt.holdout_file.c_str();

int head_correct = 0;
Expand Down Expand Up @@ -984,12 +1128,15 @@ void Parser::evaluate(void) {
delete inst;
}

uas=(double)head_correct / total_rels;
TRACE_LOG("UAS: %.4lf ( %d / %d )",
(double)head_correct / total_rels,
head_correct,
total_rels);

las = 0;
if (model_opt.labeled) {
las = (double)label_correct / total_rels;
TRACE_LOG("LAS: %.4lf ( %d / %d )",
(double)label_correct / total_rels,
label_correct,
Expand Down
9 changes: 8 additions & 1 deletion src/parser/parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ class Parser{

void train(void);

void evaluate(void);
void evaluate(double &las,double &uas);

void test(void);

Expand All @@ -93,6 +93,12 @@ class Parser{
void collect_features_of_one_instance(Instance * inst,
bool gold = false);

/*
*
*
*/
void copy_featurespace_prune(Model * new_model, int gid, int * updates);

/*
* perform the feature space truncation
*
Expand All @@ -115,6 +121,7 @@ class Parser{
* @return Model * pointer to the new model
*/
Model * truncate();
Model * truncate_prune(int * updates);

protected:
Decoder * build_decoder(void);
Expand Down
4 changes: 4 additions & 0 deletions src/postagger/featurespace.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,10 @@ int FeatureSpace::index(int prev_lid, int lid) {
return _offset * _num_labels + prev_lid * _num_labels + lid;
}

int FeatureSpace::get_offset() {
return _offset;
}

int FeatureSpace::num_features() {
return _offset;
}
Expand Down
3 changes: 3 additions & 0 deletions src/postagger/options.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,11 @@ struct TrainOptions {
std::string holdout_file;
std::string model_name;
std::string algorithm;
std::string use_update;
int max_iter;
int display_interval;
int min_update;
double min_f;
};

struct TestOptions {
Expand Down
Loading

0 comments on commit 306be8c

Please sign in to comment.