forked from HIT-SCIR/ltp
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsegmentor.h
107 lines (94 loc) · 3.07 KB
/
segmentor.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
#ifndef __LTP_SEGMENTOR_SEGMENTOR_H__
#define __LTP_SEGMENTOR_SEGMENTOR_H__
#include "framework/decoder.h"
#include "segmentor/model.h"
#include "segmentor/decoder.h"
#include "segmentor/preprocessor.h"
#include "segmentor/options.h"
#include "segmentor/instance.h"
namespace ltp {
namespace segmentor {
class Segmentor {
protected:
Model* model;
Preprocessor preprocessor;
SegmentationConstrain con;
static const std::string model_header;
public:
Segmentor();
~Segmentor();
protected:
/**
* Extract features from one instance, store the extracted features in a
* DecodeContext class.
*
* @param[in] inst The instance.
* @param[out] model The model.
* @param[out] ctx The decode context result.
* @param[in] create If create is true, create feature for new feature
* in the model otherwise not create.
*/
void extract_features(const Instance& inst, Model* mdl,
framework::ViterbiFeatureContext* ctx,
bool create = false) const;
/**
* Build lexicon match state of the instance
*
* @param[in/out] inst The instance.
*/
virtual void build_lexicon_match_state(
const std::vector<const Model::lexicon_t*>& lexicons,
Instance* inst) const;
/**
* Cache all the score for the certain instance. The cached results are
* stored in a ScoreMatrix.
*
* @param[in] inst The instance
* @param[in] mdl The model.
* @param[in] ctx The decode context.
* @param[in] avg use to specify use average parameter
* @param[out] scm The score matrix.
*/
void calculate_scores(const Instance& inst,
const Model& mdl,
const framework::ViterbiFeatureContext& ctx,
bool avg,
framework::ViterbiScoreMatrix* scm);
void calculate_scores(const Instance& inst,
const Model& bs_mdl,
const Model& mdl,
const framework::ViterbiFeatureContext& bs_ctx,
const framework::ViterbiFeatureContext& ctx,
bool avg,
framework::ViterbiScoreMatrix* scm);
/**
* build words from tags for certain instance
*
* @param[in/out] inst the instance
* @param[in] tagsidx the index of tags
* @param[out] words the output words
* @param[in] begtag0 first of the word begin tag
* @param[in] begtag1 second of the word begin tag
*/
void build_words(const std::vector<std::string>& chars,
const std::vector<int>& tagsidx,
std::vector<std::string>& words);
/**
* Load lexicon from file.
*
* @param[in] filename The filename
* @param[out] lexicon The pointer to the lexicon.
*/
void load_lexicon(const char* filename, Model::lexicon_t* lexicon) const;
/**
* Load lexicon from string vector.
*
* @param[in] filename The filename
* @param[out] lexicon The pointer to the lexicon.
*/
void load_lexicon(const std::vector<std::string>& texts,
Model::lexicon_t* lexicon) const;
};
} // end for namespace segmentor
} // end for namespace ltp
#endif // end for __LTP_SEGMENTOR_SEGMENTOR_H__