forked from HIT-SCIR/ltp
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
28 changed files
with
2,538 additions
and
95 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
include_directories ( | ||
${SOURCE_DIR}/ner | ||
${SOURCE_DIR}/utils | ||
${SOURCE_DIR}/utils/math) | ||
# ${THIRDPARTY_DIR}/boost/include) | ||
|
||
set (ner_SRC | ||
options.cpp | ||
decoder.cpp | ||
featurespace.cpp | ||
model.cpp | ||
extractor.cpp | ||
ner.cpp) | ||
|
||
add_library (ner ner_dll.cpp ${ner_SRC}) | ||
|
||
add_executable (otner otner.cpp ${ner_SRC}) | ||
link_directories ( ${LIBRARY_OUTPUT_PATH} ) | ||
#target_link_libraries (otner boost_regex) | ||
|
||
# redirect the output binary to tools/train | ||
set_target_properties (otner | ||
PROPERTIES | ||
OUTPUT_NAME otner | ||
RUNTIME_OUTPUT_DIRECTORY ${TOOLS_DIR}/train/) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,103 @@ | ||
#include "decoder.h" | ||
|
||
namespace ltp { | ||
namespace ner { | ||
|
||
void Decoder::decode(Instance * inst) { | ||
init_lattice(inst); | ||
viterbi_decode(inst); | ||
get_result(inst); | ||
free_lattice(); | ||
} | ||
|
||
void Decoder::init_lattice(const Instance * inst) { | ||
int len = inst->size(); | ||
lattice.resize(len, L); | ||
lattice = NULL; | ||
} | ||
|
||
void Decoder::viterbi_decode(const Instance * inst) { | ||
int len = inst->size(); | ||
for (int i = 0; i < len; ++ i) { | ||
for (int l = 0; l < L; ++ l) { | ||
if (i == 0) { | ||
LatticeItem * item = new LatticeItem(i, l, inst->uni_scores[i][l], NULL); | ||
lattice_insert(lattice[i][l], item); | ||
} else { | ||
for (int pl = 0; pl < L; ++ pl) { | ||
if (false == base.legal_trans(pl, l)) { | ||
continue; | ||
} | ||
|
||
double score = 0.; | ||
const LatticeItem * prev = lattice[i-1][pl]; | ||
|
||
if (!prev) { | ||
continue; | ||
} | ||
|
||
// std::cout << i << " " << pl << " " << l << std::endl; | ||
score = inst->uni_scores[i][l] + inst->bi_scores[pl][l] + prev->score; | ||
const LatticeItem * item = new LatticeItem(i, l, score, prev); | ||
lattice_insert(lattice[i][l], item); | ||
} | ||
} // end for if i == 0 | ||
} | ||
} | ||
} | ||
|
||
void Decoder::get_result(Instance * inst) { | ||
int len = inst->size(); | ||
const LatticeItem * best_item = NULL; | ||
for (int l = 0; l < L; ++ l) { | ||
if (!lattice[len-1][l]) { | ||
continue; | ||
} | ||
if (best_item == NULL || (lattice[len-1][l]->score > best_item->score)) { | ||
best_item = lattice[len - 1][l]; | ||
} | ||
} | ||
|
||
const LatticeItem * item = best_item; | ||
inst->predicted_tagsidx.resize(len); | ||
|
||
while (item) { | ||
inst->predicted_tagsidx[item->i] = item->l; | ||
// std::cout << item->i << " " << item->l << std::endl; | ||
item = item->prev; | ||
} | ||
} | ||
|
||
void Decoder::free_lattice() { | ||
for (int i = 0; i < lattice.nrows(); ++ i) { | ||
for (int j = 0; j < lattice.ncols(); ++ j) { | ||
if (lattice[i][j]) delete lattice[i][j]; | ||
} | ||
} | ||
} | ||
|
||
/*void KBestDecoder::decode(Instance * inst, KBestDecodeResult & result) { | ||
init_lattice(inst); | ||
kbest_viterbi_decode(inst); | ||
get_result(result); | ||
free_lattice(); | ||
} | ||
void KBestDecoder::init_lattice(const Instance * inst) { | ||
int len = inst->len(); | ||
lattice.resize(len, L); | ||
for (int i = 0; i < len; ++ i) { | ||
for (int l = 0; l < L; ++ l) { | ||
lattice[i][l] = new KHeap<LatticeItem>(k); | ||
} | ||
} | ||
} | ||
void KBestDecoder::kbest_viterbi_decode(const Instance * inst) { | ||
}*/ | ||
|
||
|
||
} // end for namespace ner | ||
} // end for namespace ltp | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,87 @@ | ||
#ifndef __LTP_NER_DECODER_H__ | ||
#define __LTP_NER_DECODER_H__ | ||
|
||
#include <iostream> | ||
#include <vector> | ||
#include "instance.h" | ||
#include "mat.h" | ||
#include "rulebase.h" | ||
|
||
namespace ltp { | ||
namespace ner { | ||
|
||
// data structure for lattice item | ||
class LatticeItem { | ||
public: | ||
LatticeItem (int _i, int _l, double _score, const LatticeItem * _prev) : | ||
i(_i), | ||
l(_l), | ||
score(_score), | ||
prev(_prev) {} | ||
|
||
LatticeItem (int _l, double _score) : | ||
i(0), | ||
l(_l), | ||
score(_score), | ||
prev(0) {} | ||
|
||
public: | ||
int i; | ||
int l; | ||
double score; | ||
const LatticeItem * prev; | ||
}; | ||
|
||
class Decoder { | ||
public: | ||
Decoder (int _L, rulebase::RuleBase & _base) : L(_L), base(_base) {} | ||
void decode(Instance * inst); | ||
|
||
private: | ||
void init_lattice(const Instance * inst); | ||
void viterbi_decode(const Instance * inst); | ||
void get_result(Instance * inst); | ||
void free_lattice(); | ||
|
||
private: | ||
int L; | ||
|
||
math::Mat< const LatticeItem * > lattice; | ||
rulebase::RuleBase base; | ||
|
||
void lattice_insert(const LatticeItem * &position, const LatticeItem * const item) { | ||
if (position == NULL) { | ||
position = item; | ||
} else if (position->score < item->score) { | ||
delete position; | ||
position = item; | ||
} else { | ||
delete item; | ||
} | ||
} | ||
}; | ||
|
||
// maintain kest best list of | ||
/*class KBestDecoder { | ||
public: | ||
typedef std::vector< std::vector<int> > KBestDecodeResult; | ||
public: | ||
KBestDecoder (int _L) : L(_L) {} | ||
void decode(Instance * inst, KBestDecodeResult & result); | ||
private: | ||
void init_lattice(const Instance * inst); | ||
void kbest_viterbi_decode(const Instance * inst); | ||
void get_result(KBestDecodeResult & result); | ||
void free_lattice(); | ||
private: | ||
int L; | ||
Mat< KHeap< const LatticeItem * > > lattice; | ||
};*/ | ||
|
||
} // end for namespace ner | ||
} // end for namespace ltp | ||
#endif // end for __LTP_NER_DECODER_H__ |
Oops, something went wrong.