Skip to content

Commit

Permalink
cws, pos, ne and par pass test.
Browse files Browse the repository at this point in the history
  • Loading branch information
Oneplus committed May 18, 2015
1 parent ecf1542 commit 7652db0
Show file tree
Hide file tree
Showing 43 changed files with 621 additions and 251 deletions.
10 changes: 7 additions & 3 deletions src/console/cws_cmdline.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ using boost::program_options::variables_map;
using boost::program_options::store;
using boost::program_options::parse_command_line;
using ltp::strutils::join;
using ltp::utility::timer;
using ltp::utility::WallClockTimer;

void multithreaded_segment( void * args) {
std::string sentence;
Expand Down Expand Up @@ -64,7 +64,10 @@ int main(int argc, char ** argv) {
"The path to the external lexicon in segmentor [optional].")
("help,h", "Show help information");

void* engine = 0;
if (argc == 1) {
std::cerr << optparser << std::endl;
return 1;
}

variables_map vm;
store(parse_command_line(argc, argv, optparser), vm);
Expand Down Expand Up @@ -96,6 +99,7 @@ int main(int argc, char ** argv) {
segmentor_lexicon= vm["segmentor-lexicon"].as<std::string>();
}

void* engine = 0;
if (segmentor_lexicon == "") {
engine = segmentor_create_segmentor(segmentor_model.c_str());
} else {
Expand All @@ -119,7 +123,7 @@ int main(int argc, char ** argv) {
}

Dispatcher* dispatcher = new Dispatcher( engine, (*is), std::cout );
timer t;
WallClockTimer t;
std::list<tthread::thread *> thread_list;
for (int i = 0; i < threads; ++ i) {
tthread::thread* t = new tthread::thread( multithreaded_segment, (void *)dispatcher );
Expand Down
35 changes: 22 additions & 13 deletions src/console/ltp_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,17 +35,17 @@ void multithreaded_ltp( void * args) {
XML4NLP xml4nlp;
xml4nlp.CreateDOMFromString(sentence);

if (type == "sp"){
if (type == "sp") {
engine->splitSentence_dummy(xml4nlp);
} else if(type == "ws"){
} else if(type == LTP_SERVICE_NAME_SEGMENT) {
engine->wordseg(xml4nlp);
} else if(type == "pos"){
} else if(type == LTP_SERVICE_NAME_POSTAG) {
engine->postag(xml4nlp);
} else if(type == "ner"){
} else if(type == LTP_SERVICE_NAME_NER) {
engine->ner(xml4nlp);
} else if(type == "dp"){
} else if(type == LTP_SERVICE_NAME_DEPPARSE) {
engine->parser(xml4nlp);
} else if(type == "srl"){
} else if(type == LTP_SERVICE_NAME_SRL) {
engine->srl(xml4nlp);
} else {
engine->srl(xml4nlp);
Expand All @@ -72,11 +72,11 @@ int main(int argc, char *argv[]) {
"The last stage of analysis. This option can be used when the user only"
"wants to perform early stage analysis, like only segment without postagging."
"value includes:\n"
"- ws: Chinese word segmentation\n"
"- pos: Part of speech tagging\n"
"- ne: Named entity recognization\n"
"- dp: Dependency parsing\n"
"- srl: Semantic role labeling (equals to all)\n"
"- " LTP_SERVICE_NAME_SEGMENT ": Chinese word segmentation\n"
"- " LTP_SERVICE_NAME_POSTAG ": Part of speech tagging\n"
"- " LTP_SERVICE_NAME_NER ": Named entity recognization\n"
"- " LTP_SERVICE_NAME_DEPPARSE ": Dependency parsing\n"
"- " LTP_SERVICE_NAME_SRL ": Semantic role labeling (equals to all)\n"
"- all: The whole pipeline [default]")
("input", value<std::string>(), "The path to the input file.")
("segmentor-model", value<std::string>(),
Expand All @@ -96,6 +96,11 @@ int main(int argc, char *argv[]) {
("debug-level", value<int>(), "The debug level.")
("help,h", "Show help information");

if (argc == 1) {
std::cerr << optparser << std::endl;
return 1;
}

variables_map vm;
store(parse_command_line(argc, argv, optparser), vm);

Expand All @@ -116,8 +121,12 @@ int main(int argc, char *argv[]) {
std::string last_stage = "all";
if (vm.count("last-stage")) {
last_stage = vm["last-stage"].as<std::string>();
if (last_stage != "ws" && last_stage != "pos" && last_stage != "dp"
&& last_stage != "ne" && last_stage != "srl" && last_stage != "all") {
if (last_stage != LTP_SERVICE_NAME_SEGMENT
&& last_stage != LTP_SERVICE_NAME_POSTAG
&& last_stage != LTP_SERVICE_NAME_NER
&& last_stage != LTP_SERVICE_NAME_DEPPARSE
&& last_stage != LTP_SERVICE_NAME_SRL
&& last_stage != "all") {
std::cerr << "Unknown stage name:" << last_stage << ", reset to 'all'" << std::endl;
last_stage = "all";
}
Expand Down
20 changes: 12 additions & 8 deletions src/console/par_cmdline.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ using boost::program_options::value;
using boost::program_options::variables_map;
using boost::program_options::store;
using boost::program_options::parse_command_line;
using ltp::utility::timer;
using ltp::utility::WallClockTimer;
using ltp::strutils::split;

void multithreaded_parse( void * args) {
Expand Down Expand Up @@ -75,6 +75,11 @@ int main(int argc, char * argv[]) {
"The path to the postag model [default=ltp_data/parser.model].")
("help,h", "Show help information");

if (argc == 1) {
std::cerr << optparser << std::endl;
return 1;
}

variables_map vm;
store(parse_command_line(argc, argv, optparser), vm);

Expand All @@ -95,13 +100,12 @@ int main(int argc, char * argv[]) {
std::string input = "";
if (vm.count("input")) { input = vm["input"].as<std::string>(); }

std::string postagger_model = "ltp_data/parser.model";
std::string parser_model = "ltp_data/parser.model";
if (vm.count("parser-model")) {
postagger_model= vm["parser-model"].as<std::string>();
parser_model= vm["parser-model"].as<std::string>();
}

void *engine = parser_create_parser(postagger_model.c_str());

void *engine = parser_create_parser(parser_model.c_str());
if (!engine) {
return 1;
}
Expand All @@ -120,16 +124,16 @@ int main(int argc, char * argv[]) {
}

Dispatcher * dispatcher = new Dispatcher( engine, (*is), std::cout );
timer t;
WallClockTimer t;
std::list<tthread::thread *> thread_list;
for (int i = 0; i < threads; ++ i) {
tthread::thread * t = new tthread::thread( multithreaded_parse, (void *)dispatcher );
tthread::thread * t = new tthread::thread( multithreaded_parse, (void *)dispatcher );
thread_list.push_back( t );
}

for (std::list<tthread::thread *>::iterator i = thread_list.begin();
i != thread_list.end(); ++ i) {
tthread::thread * t = *i;
tthread::thread * t = *i;
t->join();
delete t;
}
Expand Down
13 changes: 9 additions & 4 deletions src/console/pos_cmdline.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ using boost::program_options::value;
using boost::program_options::variables_map;
using boost::program_options::store;
using boost::program_options::parse_command_line;
using ltp::utility::timer;
using ltp::utility::WallClockTimer;
using ltp::strutils::split;

void multithreaded_postag( void * args) {
Expand Down Expand Up @@ -73,6 +73,11 @@ int main(int argc, char ** argv) {
"The path to the external lexicon in postagger [optional].")
("help,h", "Show help information");

if (argc == 1) {
std::cerr << optparser << std::endl;
return 1;
}

variables_map vm;
store(parse_command_line(argc, argv, optparser), vm);

Expand Down Expand Up @@ -128,16 +133,16 @@ int main(int argc, char ** argv) {
}

Dispatcher * dispatcher = new Dispatcher( engine, (*is), std::cout );
timer t;
WallClockTimer t;
std::list<tthread::thread *> thread_list;
for (int i = 0; i < threads; ++ i) {
tthread::thread * t = new tthread::thread( multithreaded_postag, (void *)dispatcher );
tthread::thread * t = new tthread::thread( multithreaded_postag, (void *)dispatcher );
thread_list.push_back( t );
}

for (std::list<tthread::thread *>::iterator i = thread_list.begin();
i != thread_list.end(); ++ i) {
tthread::thread * t = *i;
tthread::thread * t = *i;
t->join();
delete t;
}
Expand Down
21 changes: 11 additions & 10 deletions src/ltp/LTPResource.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ void LTPResource::ReleaseSegmentorResource() {
if (!m_isSegmentorResourceLoaded) { return; }

segmentor_release_segmentor(m_segmentor);
TRACE_LOG("segmentor model is released.");
INFO_LOG("segmentor model is released.");
m_segmentor = 0;
m_isSegmentorResourceLoaded = false;
}
Expand Down Expand Up @@ -139,6 +139,7 @@ void LTPResource::ReleasePostaggerResource() {
postagger_release_postagger(m_postagger);
m_postagger = 0;
m_isPostaggerResourceLoaded = false;
INFO_LOG("postagger resource is released");
}

void * LTPResource::GetPostagger() { return m_postagger; }
Expand All @@ -151,7 +152,7 @@ int LTPResource::LoadNEResource(const char * model_file) {
return 0;
}

TRACE_LOG("Loading NER resource from \"%s\"", model_file);
INFO_LOG("Loading NER resource from \"%s\"", model_file);

m_ner = ner_create_recognizer(model_file);

Expand All @@ -161,7 +162,7 @@ int LTPResource::LoadNEResource(const char * model_file) {
}

m_isNEResourceLoaded = true;
TRACE_LOG("NER resource is loaded.");
INFO_LOG("NER resource is loaded.");
return 0;
}

Expand All @@ -178,7 +179,7 @@ void LTPResource::ReleaseNEResource() {

m_ner = NULL;
m_isNEResourceLoaded = false;
TRACE_LOG("NER resource is released");
INFO_LOG("NER resource is released");
}

void * LTPResource::GetNER() {
Expand All @@ -193,15 +194,15 @@ int LTPResource::LoadParserResource(const char * model_file) {
return 0;
}

TRACE_LOG("Loading parser resource from \"%s\"", model_file);
INFO_LOG("Loading parser resource from \"%s\"", model_file);

m_parser = parser_create_parser(model_file);
if (!m_parser) {
ERROR_LOG("Failed to create parser");
return -1;
}

TRACE_LOG("Parser is loaded.");
INFO_LOG("parser is loaded.");

m_isParserResourceLoaded = true;
return 0;
Expand All @@ -217,7 +218,7 @@ void LTPResource::ReleaseParserResource() {
}

parser_release_parser(m_parser);
TRACE_LOG("Parser is released");
INFO_LOG("Parser is released");

m_parser = NULL;
m_isParserResourceLoaded = false;
Expand All @@ -235,14 +236,14 @@ int LTPResource::LoadSRLResource(const char *data_folder) {
return 0;
}

TRACE_LOG("Loading SRL resource from \"%s\"", data_folder);
INFO_LOG("Loading SRL resource from \"%s\"", data_folder);

if (0 != SRL_LoadResource(string(data_folder))) {
ERROR_LOG("Failed to load SRL resource.");
return -1;
}

TRACE_LOG("SRL resource is loaded.");
INFO_LOG("SRL resource is loaded.");
m_isSRLResourceLoaded = true;
return 0;
}
Expand All @@ -261,7 +262,7 @@ void LTPResource::ReleaseSRLResource() {
return;
}

TRACE_LOG("SRL is released");
INFO_LOG("SRL is released");

m_isSRLResourceLoaded = false;
return;
Expand Down
12 changes: 6 additions & 6 deletions src/ltp/Ltp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,15 +50,15 @@ bool LTP::load(const std::string& last_stage,
const std::string& srl_model_dir) {

size_t target_mask = 0;
if (last_stage == "ws") {
if (last_stage == LTP_SERVICE_NAME_SEGMENT) {
target_mask = kActiveSegmentor;
} else if (last_stage == "pos") {
} else if (last_stage == LTP_SERVICE_NAME_POSTAG) {
target_mask = (kActiveSegmentor|kActivePostagger);
} else if (last_stage == "ner") {
} else if (last_stage == LTP_SERVICE_NAME_NER) {
target_mask = (kActiveSegmentor|kActivePostagger|kActiveNER);
} else if (last_stage == "dp") {
} else if (last_stage == LTP_SERVICE_NAME_DEPPARSE) {
target_mask = (kActiveSegmentor|kActivePostagger|kActiveParser);
} else if ((last_stage == "srl") || (last_stage == "all")) {
} else if ((last_stage == LTP_SERVICE_NAME_SRL) || (last_stage == "all")) {
target_mask =
(kActiveSegmentor|kActivePostagger|kActiveNER|kActiveParser|kActiveSRL);
}
Expand Down Expand Up @@ -87,7 +87,7 @@ bool LTP::load(const std::string& last_stage,
ret = _resource.LoadPostaggerResource(postagger_model_file, postagger_lexicon_file);
}
if (0 != ret) {
ERROR_LOG("in LTP::wordseg, failed to load postagger resource");
ERROR_LOG("in LTP::wordseg, failed to load postagger resource");
return false;
}
loaded_mask |= kActivePostagger;
Expand Down
10 changes: 8 additions & 2 deletions src/ltp/Ltp.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,14 @@
#include <cstring>
#include <cassert>

#define MAX_SENTENCE_LEN 300
#define MAX_WORDS_NUM 100
#define MAX_SENTENCE_LEN 1024
#define MAX_WORDS_NUM 256

#define LTP_SERVICE_NAME_SEGMENT "ws"
#define LTP_SERVICE_NAME_POSTAG "pos"
#define LTP_SERVICE_NAME_NER "ne"
#define LTP_SERVICE_NAME_DEPPARSE "dp"
#define LTP_SERVICE_NAME_SRL "srl"

enum ErrorCodes {
kEmptyStringError = 1, /*< The input sentence is empty */
Expand Down
7 changes: 4 additions & 3 deletions src/ner/decoder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,11 @@ NERTransitionConstrain::NERTransitionConstrain(const utility::IndexableSmartMap&
int from = alphabet.index(trim_copy(tokens[0]));
int to = alphabet.index(trim_copy(tokens[1]));
if (-1 == from || -1 == to) {
WARNING_LOG("label in constrain text \"%s\" is not in alphabet.", include.c_str());
WARNING_LOG("label in constrain text \"%s,%s\" is not in alphabet.",
trim_copy(tokens[0]).c_str(), trim_copy(tokens[1]).c_str());
} else {
rep.insert(from * T + to);
}

rep.insert(from * T + to);
}
}

Expand Down
1 change: 1 addition & 0 deletions src/ner/ner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ void NamedEntityRecognizer::build_glob_tran_cons(
}

INFO_LOG("build-config: add %d constrains.", includes.size());
INFO_LOG("report: number of labels %d", model->num_labels());
glob_con = new NERTransitionConstrain(model->labels, includes);
}

Expand Down
Loading

0 comments on commit 7652db0

Please sign in to comment.