Skip to content

Commit

Permalink
update ner to new framework
Browse files Browse the repository at this point in the history
  • Loading branch information
Oneplus committed Aug 3, 2013
1 parent 04564e7 commit e4ee49c
Show file tree
Hide file tree
Showing 28 changed files with 2,538 additions and 95 deletions.
4 changes: 2 additions & 2 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ set (segmentor_DIR ${SOURCE_DIR}/segmentor)
set (postagger_DIR ${SOURCE_DIR}/postagger)
set (parser_DIR ${SOURCE_DIR}/parser)
set (splitsnt_DIR ${SOURCE_DIR}/_split_sentence/)
set (ner_DIR ${SOURCE_DIR}/_ner/)
set (ner_DIR ${SOURCE_DIR}/ner/)
set (srl_DIR ${SOURCE_DIR}/_srl/)
set (ltp_dll_DIR ${SOURCE_DIR}/__ltp_dll/)
set (server_DIR ${SOURCE_DIR}/server/)
Expand All @@ -15,8 +15,8 @@ add_subdirectory ("__xml4nlp")
add_subdirectory ("_split_sentence")
add_subdirectory ("segmentor")
add_subdirectory ("postagger")
add_subdirectory ("ner")
add_subdirectory ("parser")
add_subdirectory ("_ner")
add_subdirectory ("_srl")
add_subdirectory ("__ltp_dll")

Expand Down
2 changes: 1 addition & 1 deletion src/__ltp_dll/LTPOption.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ struct tagLTPOption {
std::string segmentor_model_path;
std::string postagger_model_path;
std::string parser_model_path;
std::string ner_data_dir;
std::string ner_model_path;
std::string srl_data_dir;

NEOption neOpt;
Expand Down
23 changes: 10 additions & 13 deletions src/__ltp_dll/LTPResource.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
#include "segment_dll.h"
#include "postag_dll.h"
#include "parser_dll.h"
#include "NER_DLL.h"
#include "ner_dll.h"
#include "SRL_DLL.h"

#if _WIN32
Expand Down Expand Up @@ -132,38 +132,35 @@ void * LTPResource::GetPostagger() {
/* ======================================================== *
* NER related resource management *
* ======================================================== */
int LTPResource::LoadNEResource(const char *data_folder) {
int LTPResource::LoadNEResource(const char * model_file) {
if (m_isNEResourceLoaded) {
return 0;
}

TRACE_LOG("Loading NER resource from \"%s\"", data_folder);
TRACE_LOG("Loading NER resource from \"%s\"", model_file);

if ( !NER_LoadResource(const_cast<char *>(data_folder)) ) {
//加载资源
ERROR_LOG("Failed to load NER resource");
m_ner = ner_create_recognizer(model_file);

if (0 == m_ner) {
ERROR_LOG("Failed to load ner model");
return -1;
}

m_ner = NER_CreateNErecoger();
//创建NE识别器

m_isNEResourceLoaded = true;
TRACE_LOG("NER resource is loaded.");
return 0;
}

int LTPResource::LoadNEResource(const std::string & data_folder) {
return LoadNEResource(data_folder.c_str());
int LTPResource::LoadNEResource(const std::string & model_file) {
return LoadNEResource(model_file.c_str());
}

void LTPResource::ReleaseNEResource() {
if (!m_isNEResourceLoaded) {
return;
}

NER_ReleaseNErecoger(m_ner); //销毁NE识别器
NER_ReleaseResource(); //释放资源
ner_release_recognizer(m_ner);

m_ner = NULL;
m_isNEResourceLoaded = false;
Expand Down
4 changes: 2 additions & 2 deletions src/__ltp_dll/LTPResource.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,12 +45,12 @@ class LTPResource {
* @param[in] model_file
* @return int 0 on success, otherwise -1
*/
int LoadNEResource(const char * data_folder);
int LoadNEResource(const char * model_file);

/*
* std::string wrapper for LoadNEResource(const char *)
*/
int LoadNEResource(const std::string & data_folder);
int LoadNEResource(const std::string & model_file);

/*
* load parser resource from model file. Return 0 on success,
Expand Down
18 changes: 7 additions & 11 deletions src/__ltp_dll/Ltp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
#include "segment_dll.h"
#include "postag_dll.h"
#include "parser_dll.h"
#include "NER_DLL.h"
#include "ner_dll.h"
#include "SRL_DLL.h"

#if _WIN32
Expand Down Expand Up @@ -74,7 +74,7 @@ int LTP::ReadConfFile(const char * config_file) {
m_ltpOption.segmentor_model_path = "";
m_ltpOption.postagger_model_path = "";
m_ltpOption.parser_model_path = "";
m_ltpOption.ner_data_dir = "";
m_ltpOption.ner_model_path = "";
m_ltpOption.srl_data_dir = "";

string buffer;
Expand All @@ -97,10 +97,10 @@ int LTP::ReadConfFile(const char * config_file) {
WARNING_LOG("No \"parser-model\" config is found");
}

if (cfg.get("ner-data", buffer)) {
m_ltpOption.ner_data_dir = buffer;
if (cfg.get("ner-model", buffer)) {
m_ltpOption.ner_model_path = buffer;
} else {
WARNING_LOG("No \"ner-data\" config is found");
WARNING_LOG("No \"ner-model\" config is found");
}

if (cfg.get("srl-data", buffer)) {
Expand Down Expand Up @@ -285,7 +285,7 @@ int LTP::ner() {
return -1;
}

if (0 != m_ltpResource.LoadNEResource(m_ltpOption.ner_data_dir)) {
if (0 != m_ltpResource.LoadNEResource(m_ltpOption.ner_model_path)) {
ERROR_LOG("in LTP::ner, failed to load ner resource");
return -1;
}
Expand All @@ -297,10 +297,6 @@ int LTP::ner() {
return -1;
}

NER_SetOption(m_ltpOption.neOpt.isEntity,
m_ltpOption.neOpt.isTime,
m_ltpOption.neOpt.isNum);

int stnsNum = m_xml4nlp.CountSentenceInDocument();

if (stnsNum == 0) {
Expand All @@ -323,7 +319,7 @@ int LTP::ner() {
return -1;
}

if (0 != NER(ner, vecWord, vecPOS, vecNETag)) {
if (0 != ner_recognize(ner, vecWord, vecPOS, vecNETag)) {
ERROR_LOG("in LTP::ner, failed to perform ner on sent. #%d", i+1);
return -1;
}
Expand Down
26 changes: 26 additions & 0 deletions src/ner/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
include_directories (
${SOURCE_DIR}/ner
${SOURCE_DIR}/utils
${SOURCE_DIR}/utils/math)
# ${THIRDPARTY_DIR}/boost/include)

set (ner_SRC
options.cpp
decoder.cpp
featurespace.cpp
model.cpp
extractor.cpp
ner.cpp)

add_library (ner ner_dll.cpp ${ner_SRC})

add_executable (otner otner.cpp ${ner_SRC})
link_directories ( ${LIBRARY_OUTPUT_PATH} )
#target_link_libraries (otner boost_regex)

# redirect the output binary to tools/train
set_target_properties (otner
PROPERTIES
OUTPUT_NAME otner
RUNTIME_OUTPUT_DIRECTORY ${TOOLS_DIR}/train/)

103 changes: 103 additions & 0 deletions src/ner/decoder.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
#include "decoder.h"

namespace ltp {
namespace ner {

void Decoder::decode(Instance * inst) {
init_lattice(inst);
viterbi_decode(inst);
get_result(inst);
free_lattice();
}

void Decoder::init_lattice(const Instance * inst) {
int len = inst->size();
lattice.resize(len, L);
lattice = NULL;
}

void Decoder::viterbi_decode(const Instance * inst) {
int len = inst->size();
for (int i = 0; i < len; ++ i) {
for (int l = 0; l < L; ++ l) {
if (i == 0) {
LatticeItem * item = new LatticeItem(i, l, inst->uni_scores[i][l], NULL);
lattice_insert(lattice[i][l], item);
} else {
for (int pl = 0; pl < L; ++ pl) {
if (false == base.legal_trans(pl, l)) {
continue;
}

double score = 0.;
const LatticeItem * prev = lattice[i-1][pl];

if (!prev) {
continue;
}

// std::cout << i << " " << pl << " " << l << std::endl;
score = inst->uni_scores[i][l] + inst->bi_scores[pl][l] + prev->score;
const LatticeItem * item = new LatticeItem(i, l, score, prev);
lattice_insert(lattice[i][l], item);
}
} // end for if i == 0
}
}
}

void Decoder::get_result(Instance * inst) {
int len = inst->size();
const LatticeItem * best_item = NULL;
for (int l = 0; l < L; ++ l) {
if (!lattice[len-1][l]) {
continue;
}
if (best_item == NULL || (lattice[len-1][l]->score > best_item->score)) {
best_item = lattice[len - 1][l];
}
}

const LatticeItem * item = best_item;
inst->predicted_tagsidx.resize(len);

while (item) {
inst->predicted_tagsidx[item->i] = item->l;
// std::cout << item->i << " " << item->l << std::endl;
item = item->prev;
}
}

void Decoder::free_lattice() {
for (int i = 0; i < lattice.nrows(); ++ i) {
for (int j = 0; j < lattice.ncols(); ++ j) {
if (lattice[i][j]) delete lattice[i][j];
}
}
}

/*void KBestDecoder::decode(Instance * inst, KBestDecodeResult & result) {
init_lattice(inst);
kbest_viterbi_decode(inst);
get_result(result);
free_lattice();
}
void KBestDecoder::init_lattice(const Instance * inst) {
int len = inst->len();
lattice.resize(len, L);
for (int i = 0; i < len; ++ i) {
for (int l = 0; l < L; ++ l) {
lattice[i][l] = new KHeap<LatticeItem>(k);
}
}
}
void KBestDecoder::kbest_viterbi_decode(const Instance * inst) {
}*/


} // end for namespace ner
} // end for namespace ltp

87 changes: 87 additions & 0 deletions src/ner/decoder.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
#ifndef __LTP_NER_DECODER_H__
#define __LTP_NER_DECODER_H__

#include <iostream>
#include <vector>
#include "instance.h"
#include "mat.h"
#include "rulebase.h"

namespace ltp {
namespace ner {

// data structure for lattice item
class LatticeItem {
public:
LatticeItem (int _i, int _l, double _score, const LatticeItem * _prev) :
i(_i),
l(_l),
score(_score),
prev(_prev) {}

LatticeItem (int _l, double _score) :
i(0),
l(_l),
score(_score),
prev(0) {}

public:
int i;
int l;
double score;
const LatticeItem * prev;
};

class Decoder {
public:
Decoder (int _L, rulebase::RuleBase & _base) : L(_L), base(_base) {}
void decode(Instance * inst);

private:
void init_lattice(const Instance * inst);
void viterbi_decode(const Instance * inst);
void get_result(Instance * inst);
void free_lattice();

private:
int L;

math::Mat< const LatticeItem * > lattice;
rulebase::RuleBase base;

void lattice_insert(const LatticeItem * &position, const LatticeItem * const item) {
if (position == NULL) {
position = item;
} else if (position->score < item->score) {
delete position;
position = item;
} else {
delete item;
}
}
};

// maintain kest best list of
/*class KBestDecoder {
public:
typedef std::vector< std::vector<int> > KBestDecodeResult;
public:
KBestDecoder (int _L) : L(_L) {}
void decode(Instance * inst, KBestDecodeResult & result);
private:
void init_lattice(const Instance * inst);
void kbest_viterbi_decode(const Instance * inst);
void get_result(KBestDecodeResult & result);
void free_lattice();
private:
int L;
Mat< KHeap< const LatticeItem * > > lattice;
};*/

} // end for namespace ner
} // end for namespace ltp
#endif // end for __LTP_NER_DECODER_H__
Loading

0 comments on commit e4ee49c

Please sign in to comment.