From 8a60cb9d83095ef7d58ad0f98f259d0e5908a297 Mon Sep 17 00:00:00 2001 From: Oneplus Date: Tue, 14 Jan 2014 20:49:59 +0800 Subject: [PATCH] revision segmentor --- src/segmentor/decoder.cpp | 136 +-- src/segmentor/decoder.h | 103 +-- src/segmentor/extractor.cpp | 124 +-- src/segmentor/extractor.h | 18 +- src/segmentor/featurespace.cpp | 142 ++-- src/segmentor/featurespace.h | 184 ++-- src/segmentor/model.cpp | 126 +-- src/segmentor/model.h | 84 +- src/segmentor/options.h | 31 +- src/segmentor/parameter.h | 256 +++--- src/segmentor/rulebase.h | 340 ++++---- src/segmentor/segmentor.cpp | 1456 +++++++++++++++----------------- src/segmentor/segmentor.h | 206 ++--- src/segmentor/segmentreader.h | 184 ++-- src/segmentor/segmentwriter.h | 116 +-- 15 files changed, 1741 insertions(+), 1765 deletions(-) diff --git a/src/segmentor/decoder.cpp b/src/segmentor/decoder.cpp index 07b3c5d1b..369e8218a 100644 --- a/src/segmentor/decoder.cpp +++ b/src/segmentor/decoder.cpp @@ -5,104 +5,104 @@ namespace segmentor { void Decoder::decode(Instance * inst) { - init_lattice(inst); - viterbi_decode(inst); - get_result(inst); - free_lattice(); + init_lattice(inst); + viterbi_decode(inst); + get_result(inst); + free_lattice(); } void Decoder::init_lattice(const Instance * inst) { - int len = inst->size(); - lattice.resize(len, L); - lattice = NULL; + int len = inst->size(); + lattice.resize(len, L); + lattice = NULL; } void Decoder::viterbi_decode(const Instance * inst) { - int len = inst->size(); - for (int i = 0; i < len; ++ i) { - for (int l = 0; l < L; ++ l) { - if (false == base.legal_emit(inst->chartypes[i], l)) { - continue; - } - - if (i == 0) { - LatticeItem * item = new LatticeItem(i, l, inst->uni_scores[i][l], NULL); - lattice_insert(lattice[i][l], item); - } else { - for (int pl = 0; pl < L; ++ pl) { - if (false == base.legal_trans(pl, l)) { - continue; - } - - double score = 0.; - const LatticeItem * prev = lattice[i-1][pl]; - - if (!prev) { - continue; - } - - // std::cout << i << " " << pl << " " << l << std::endl; - score = inst->uni_scores[i][l] + inst->bi_scores[pl][l] + prev->score; - const LatticeItem * item = new LatticeItem(i, l, score, prev); - lattice_insert(lattice[i][l], item); - } - } // end for if i == 0 + int len = inst->size(); + for (int i = 0; i < len; ++ i) { + for (int l = 0; l < L; ++ l) { + if (false == base.legal_emit(inst->chartypes[i], l)) { + continue; + } + + if (i == 0) { + LatticeItem * item = new LatticeItem(i, l, inst->uni_scores[i][l], NULL); + lattice_insert(lattice[i][l], item); + } else { + for (int pl = 0; pl < L; ++ pl) { + if (false == base.legal_trans(pl, l)) { + continue; + } + + double score = 0.; + const LatticeItem * prev = lattice[i-1][pl]; + + if (!prev) { + continue; + } + + // std::cout << i << " " << pl << " " << l << std::endl; + score = inst->uni_scores[i][l] + inst->bi_scores[pl][l] + prev->score; + const LatticeItem * item = new LatticeItem(i, l, score, prev); + lattice_insert(lattice[i][l], item); } + } // end for if i == 0 } + } } void Decoder::get_result(Instance * inst) { - int len = inst->size(); - const LatticeItem * best_item = NULL; - for (int l = 0; l < L; ++ l) { - if (!lattice[len-1][l]) { - continue; - } - if (best_item == NULL || (lattice[len-1][l]->score > best_item->score)) { - best_item = lattice[len - 1][l]; - } + int len = inst->size(); + const LatticeItem * best_item = NULL; + for (int l = 0; l < L; ++ l) { + if (!lattice[len-1][l]) { + continue; } + if (best_item == NULL || (lattice[len-1][l]->score > best_item->score)) { + best_item = lattice[len - 1][l]; + } + } - const LatticeItem * item = best_item; - inst->predicted_tagsidx.resize(len); + const LatticeItem * item = best_item; + inst->predicted_tagsidx.resize(len); - while (item) { - inst->predicted_tagsidx[item->i] = item->l; - // std::cout << item->i << " " << item->l << std::endl; - item = item->prev; - } + while (item) { + inst->predicted_tagsidx[item->i] = item->l; + // std::cout << item->i << " " << item->l << std::endl; + item = item->prev; + } } void Decoder::free_lattice() { - for (int i = 0; i < lattice.nrows(); ++ i) { - for (int j = 0; j < lattice.ncols(); ++ j) { - if (lattice[i][j]) delete lattice[i][j]; - } + for (int i = 0; i < lattice.nrows(); ++ i) { + for (int j = 0; j < lattice.ncols(); ++ j) { + if (lattice[i][j]) delete lattice[i][j]; } + } } /*void KBestDecoder::decode(Instance * inst, KBestDecodeResult & result) { - init_lattice(inst); - kbest_viterbi_decode(inst); - get_result(result); - free_lattice(); + init_lattice(inst); + kbest_viterbi_decode(inst); + get_result(result); + free_lattice(); } void KBestDecoder::init_lattice(const Instance * inst) { - int len = inst->len(); - lattice.resize(len, L); + int len = inst->len(); + lattice.resize(len, L); - for (int i = 0; i < len; ++ i) { - for (int l = 0; l < L; ++ l) { - lattice[i][l] = new KHeap(k); - } + for (int i = 0; i < len; ++ i) { + for (int l = 0; l < L; ++ l) { + lattice[i][l] = new KHeap(k); } + } } void KBestDecoder::kbest_viterbi_decode(const Instance * inst) { }*/ -} // end for namespace segmentor -} // end for namespace ltp +} // end for namespace segmentor +} // end for namespace ltp diff --git a/src/segmentor/decoder.h b/src/segmentor/decoder.h index 747f6bd59..c5a920322 100644 --- a/src/segmentor/decoder.h +++ b/src/segmentor/decoder.h @@ -13,75 +13,78 @@ namespace segmentor { // data structure for lattice item class LatticeItem { public: - LatticeItem (int _i, int _l, double _score, const LatticeItem * _prev) : - i(_i), - l(_l), - score(_score), - prev(_prev) {} - - LatticeItem (int _l, double _score) : - i(0), - l(_l), - score(_score), - prev(0) {} + LatticeItem (int _i, int _l, double _score, const LatticeItem * _prev) : + i(_i), + l(_l), + score(_score), + prev(_prev) {} + + LatticeItem (int _l, double _score) : + i(0), + l(_l), + score(_score), + prev(0) {} public: - int i; - int l; - double score; - const LatticeItem * prev; + int i; + int l; + double score; + const LatticeItem * prev; }; class Decoder { public: - Decoder (int _L, rulebase::RuleBase & _base) : L(_L), base(_base) {} - void decode(Instance * inst); + Decoder (int _L, rulebase::RuleBase & _base) : L(_L), base(_base) {} + void decode(Instance * inst); private: - void init_lattice(const Instance * inst); - void viterbi_decode(const Instance * inst); - void get_result(Instance * inst); - void free_lattice(); + void init_lattice(const Instance * inst); + void viterbi_decode(const Instance * inst); + void get_result(Instance * inst); + void free_lattice(); private: - int L; - - math::Mat< const LatticeItem * > lattice; - rulebase::RuleBase base; - - void lattice_insert(const LatticeItem * &position, const LatticeItem * const item) { - if (position == NULL) { - position = item; - } else if (position->score < item->score) { - delete position; - position = item; - } else { - delete item; - } + int L; + + math::Mat< const LatticeItem * > lattice; + rulebase::RuleBase base; + + void lattice_insert(const LatticeItem * &position, + const LatticeItem * const item) { + if (position == NULL) { + position = item; + } else if (position->score < item->score) { + delete position; + position = item; + } else { + delete item; } + } }; -// maintain kest best list of -/*class KBestDecoder { +// Source code for k-best decoding, it's not implemented. +/* +class KBestDecoder { public: - typedef std::vector< std::vector > KBestDecodeResult; + typedef std::vector< std::vector > KBestDecodeResult; public: - KBestDecoder (int _L) : L(_L) {} + KBestDecoder (int _L) : L(_L) {} - void decode(Instance * inst, KBestDecodeResult & result); + void decode(Instance * inst, KBestDecodeResult & result); private: - void init_lattice(const Instance * inst); - void kbest_viterbi_decode(const Instance * inst); - void get_result(KBestDecodeResult & result); - void free_lattice(); + void init_lattice(const Instance * inst); + void kbest_viterbi_decode(const Instance * inst); + void get_result(KBestDecodeResult & result); + void free_lattice(); private: - int L; + int L; - Mat< KHeap< const LatticeItem * > > lattice; -};*/ + Mat< KHeap< const LatticeItem * > > lattice; +}; +*/ -} // end for namespace segmentor -} // end for namespace ltp -#endif // end for __LTP_SEGMENTOR_DECODER_H__ +} // end for namespace segmentor +} // end for namespace ltp +#endif // end for __LTP_SEGMENTOR_DECODER_H__ diff --git a/src/segmentor/extractor.cpp b/src/segmentor/extractor.cpp index eef00e893..6fc7ee054 100644 --- a/src/segmentor/extractor.cpp +++ b/src/segmentor/extractor.cpp @@ -11,90 +11,90 @@ Extractor * Extractor::instance_ = 0; std::vector