Skip to content

Commit

Permalink
Merge pull request HIT-SCIR#132 from endyul/hotfix
Browse files Browse the repository at this point in the history
Hotfix
  • Loading branch information
Oneplus committed Aug 18, 2015
2 parents 0214ea5 + 3014c08 commit 4c3ae7d
Show file tree
Hide file tree
Showing 6 changed files with 46 additions and 19 deletions.
4 changes: 2 additions & 2 deletions src/ner/instance.h
Original file line number Diff line number Diff line change
Expand Up @@ -66,15 +66,15 @@ class Instance {
return ret;
}

size_t num_recalled_entites() const {
size_t num_recalled_entities() const {
size_t len = 0;
size_t ret = 0;
size_t gold_len = 0, predict_len = 0;

for (size_t i = 0; i < entities.size(); ++ i) { len += entities[i].size(); }

for (size_t i = 0, j = 0; i < entities.size() && j < predict_entities.size(); ) {
if ((entities[i] == predict_entities[j]) &&
if ((entities[i] == predict_entities[j]) &&
(entities_tags[i] == predict_entities_tags[j])) {
if (entities_tags[i] != OTHER) {
++ ret;
Expand Down
31 changes: 28 additions & 3 deletions src/ner/ner_frontend.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ NamedEntityRecognizerFrontend::NamedEntityRecognizerFrontend(

NamedEntityRecognizerFrontend::~NamedEntityRecognizerFrontend() {
if (glob_con) { delete glob_con; glob_con = 0; }

for (size_t i = 0; i < train_dat.size(); ++ i) {
if (train_dat[i]) { delete train_dat[i]; train_dat[i] = 0; }
}
Expand Down Expand Up @@ -230,7 +230,7 @@ void NamedEntityRecognizerFrontend::train(void) {
updated_features.add(ctx.correct_features, 1.);
updated_features.add(ctx.predict_features, -1.);

learn(train_opt.algorithm, updated_features,
learn(train_opt.algorithm, updated_features,
iter*train_dat.size() + i + 1, inst->num_errors(), model);

if (train_opt.rare_feature_threshold > 0) {
Expand Down Expand Up @@ -305,7 +305,7 @@ void NamedEntityRecognizerFrontend::evaluate(double& f_score) {
build_entities(inst, inst->predict_tagsidx, inst->predict_entities,
inst->predict_entities_tags);

num_recalled_entities += inst->num_recalled_entites();
num_recalled_entities += inst->num_recalled_entities();
num_predict_entities += inst->num_predict_entities();
num_gold_entities += inst->num_gold_entities();

Expand Down Expand Up @@ -350,6 +350,10 @@ void NamedEntityRecognizerFrontend::test(void) {
INFO_LOG("report: number of features %d", model->space.num_features());
INFO_LOG("report: number of dimension %d", model->space.dim());

size_t num_recalled_entities = 0;
size_t num_predict_entities = 0;
size_t num_gold_entities = 0;

const char* test_file = test_opt.test_file.c_str();
std::ifstream ifs(test_file);

Expand Down Expand Up @@ -382,10 +386,31 @@ void NamedEntityRecognizerFrontend::test(void) {
for(size_t i = 0; i < len; ++i) {
inst->predict_tags[i] = model->labels.at(inst->predict_tagsidx[i]);
}

if (test_opt.evaluate) {
build_entities(inst, inst->tagsidx, inst->entities,
inst->entities_tags);
build_entities(inst, inst->predict_tagsidx, inst->predict_entities,
inst->predict_entities_tags);
num_recalled_entities += inst->num_recalled_entities();
num_predict_entities += inst->num_predict_entities();
num_gold_entities += inst->num_gold_entities();
}

writer.write(inst);
delete inst;
}

if (test_opt.evaluate) {
double p = (double)num_recalled_entities / num_predict_entities;
double r = (double)num_recalled_entities / num_gold_entities;
double f_score = 2*p*r / (p + r);

INFO_LOG("P: %lf ( %d / %d )", p, num_recalled_entities, num_predict_entities);
INFO_LOG("R: %lf ( %d / %d )", r, num_recalled_entities, num_gold_entities);
INFO_LOG("F: %lf" , f_score);
}

INFO_LOG("Elapsed time %lf", t.elapsed());
return;
}
Expand Down
3 changes: 2 additions & 1 deletion src/postagger/postagger_frontend.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -331,10 +331,11 @@ void PostaggerFrontend::test(void) {
delete inst;
}

double p = (double)num_recalled_tags / num_tags;
if (test_opt.evaluate) {
double p = (double)num_recalled_tags / num_tags;
INFO_LOG("P: %lf ( %d / %d )", p, num_recalled_tags, num_tags);
}

INFO_LOG("Elapsed time %lf", t.elapsed());

//sleep(1000000);
Expand Down
2 changes: 1 addition & 1 deletion src/segmentor/instance.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ class Instance {
std::vector< int > lexicon_match_state;
std::vector< std::string > tags; // tags of characters, {B I E S}
std::vector< int > tagsidx; // int tags
std::vector< std::string > predict_tags;
std::vector< std::string > predict_tags;
std::vector< int > predict_tagsidx;
std::vector< std::string > words; // words of the input
std::vector< std::string > predict_words;
Expand Down
4 changes: 2 additions & 2 deletions src/segmentor/otcws.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ int learn(int argc, const char* argv[]) {

int test(int argc, const char* argv[]) {
std::string usage = EXECUTABLE "(test) in LTP " LTP_VERSION " - (C) 2012-2015 HIT-SCIR\n";
usage += "Testing suite for " DESCRIPTION "\n\n";
usage += "Testing suite for " DESCRIPTION "\n\n";
usage += "usage: ./" EXECUTABLE " test <options>\n\n";
usage += "options";

Expand Down Expand Up @@ -144,7 +144,7 @@ int test(int argc, const char* argv[]) {

int dump(int argc, const char* argv[]) {
std::string usage = EXECUTABLE "(dump) in LTP " LTP_VERSION " - (C) 2012-2015 HIT-SCIR\n";
usage += "Model visualization suite for " DESCRIPTION "\n\n";
usage += "Model visualization suite for " DESCRIPTION "\n\n";
usage += "usage: ./" EXECUTABLE " dump <options>\n\n";
usage += "options";

Expand Down
21 changes: 11 additions & 10 deletions src/segmentor/segmentor_frontend.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -519,28 +519,29 @@ void SegmentorFrontend::test(void) {
decoder.decode(scm, con, inst->predict_tagsidx);
ctx.clear();

std::vector<std::string> predict_words;
build_words(inst->raw_forms, inst->predict_tagsidx, predict_words);
build_words(inst->raw_forms, inst->predict_tagsidx, inst->predict_words);

if (test_opt.evaluate) {
std::vector<std::string> answer_words;
build_words(inst->raw_forms, inst->tagsidx, answer_words);

num_recalled_words += InstanceUtils::num_recalled_words(answer_words, predict_words);
num_predicted_words += predict_words.size();
num_recalled_words += InstanceUtils::num_recalled_words(answer_words, inst->predict_words);
num_predicted_words += inst->predict_words.size();
num_gold_words += answer_words.size();
}
writer.write(inst);
delete inst;
}

double p = (double)num_recalled_words / num_predicted_words;
double r = (double)num_recalled_words / num_gold_words;
double f = 2 * p * r / (p + r);
if (test_opt.evaluate) {
double p = (double)num_recalled_words / num_predicted_words;
double r = (double)num_recalled_words / num_gold_words;
double f = 2 * p * r / (p + r);

INFO_LOG("P: %lf ( %d / %d )", p, num_recalled_words, num_predicted_words);
INFO_LOG("R: %lf ( %d / %d )", r, num_recalled_words, num_gold_words);
INFO_LOG("F: %lf" , f);
INFO_LOG("P: %lf ( %d / %d )", p, num_recalled_words, num_predicted_words);
INFO_LOG("R: %lf ( %d / %d )", r, num_recalled_words, num_gold_words);
INFO_LOG("F: %lf" , f);
}
INFO_LOG("Elapsed time %lf", t.elapsed());
return;
}
Expand Down

0 comments on commit 4c3ae7d

Please sign in to comment.