Skip to content

Commit

Permalink
add lexicon strategy in segment dll
Browse files Browse the repository at this point in the history
  • Loading branch information
Oneplus committed Jul 23, 2013
1 parent 33ab169 commit d5959a2
Show file tree
Hide file tree
Showing 3 changed files with 33 additions and 12 deletions.
22 changes: 19 additions & 3 deletions src/segmentor/segment_dll.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ class SegmentorWrapper : public ltp::segmentor::Segmentor {

~SegmentorWrapper() {}

bool load(const char * model_file) {
bool load(const char * model_file, const char * lexicon_file = NULL) {
std::ifstream mfs(model_file, std::ifstream::binary);

if (!mfs) {
Expand All @@ -30,6 +30,22 @@ class SegmentorWrapper : public ltp::segmentor::Segmentor {
return false;
}

if (NULL != lexicon_file) {
std::ifstream lfs(lexicon_file);

if (lfs) {
std::string buffer;
while (std::getline(lfs, buffer)) {
buffer = ltp::strutils::chomp(buffer);
if (buffer.size() == 0) {
continue;
}
model->external_lexicon.set(buffer.c_str(), true);
}
}
}


ltp::segmentor::rulebase::RuleBase base(model->labels);
decoder = new ltp::segmentor::Decoder(model->num_labels(), base);

Expand Down Expand Up @@ -67,10 +83,10 @@ class SegmentorWrapper : public ltp::segmentor::Segmentor {
int beg_tag1;
};

void * segmentor_create_segmentor(const char * path) {
void * segmentor_create_segmentor(const char * path, const char * lexicon_file) {
SegmentorWrapper * wrapper = new SegmentorWrapper();

if (!wrapper->load(path)) {
if (!wrapper->load(path, lexicon_file)) {
return 0;
}

Expand Down
2 changes: 1 addition & 1 deletion src/segmentor/segment_dll.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
* @param[in] path the path of the model
* @return void * the pointer to the segmentor
*/
SEGMENTOR_DLL_API void * segmentor_create_segmentor(const char * path);
SEGMENTOR_DLL_API void * segmentor_create_segmentor(const char * path, const char * lexicon_file = NULL);

/*
* release the segmentor resources
Expand Down
21 changes: 13 additions & 8 deletions src/segmentor/segmentor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -593,15 +593,20 @@ void Segmentor::test(void) {

// load exteranl lexicon
const char * lexicon_file =test_opt.lexicon_file.c_str();
ifstream lfs(lexicon_file);
std::string buffer;
while (std::getline(lfs, buffer)) {
buffer = strutils::chomp(buffer);
if (buffer.size() == 0) {
continue;
}

model->external_lexicon.set(buffer.c_str(), true);
if (NULL != lexicon_file) {
ifstream lfs(lexicon_file);

if (lfs) {
std::string buffer;
while (std::getline(lfs, buffer)) {
buffer = strutils::chomp(buffer);
if (buffer.size() == 0) {
continue;
}
model->external_lexicon.set(buffer.c_str(), true);
}
}
}

const char * test_file = test_opt.test_file.c_str();
Expand Down

0 comments on commit d5959a2

Please sign in to comment.