forked from tesseract-ocr/tesseract
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtrainingsample.h
250 lines (230 loc) · 8.41 KB
/
trainingsample.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
// Copyright 2010 Google Inc. All Rights Reserved.
// Author: [email protected] (Ray Smith)
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
///////////////////////////////////////////////////////////////////////
#ifndef TESSERACT_TRAINING_TRAININGSAMPLE_H_
#define TESSERACT_TRAINING_TRAININGSAMPLE_H_
#include "elst.h"
#include "featdefs.h"
#include "intfx.h"
#include "intmatcher.h"
#include "matrix.h"
#include "mf.h"
#include "picofeat.h"
#include "shapetable.h"
#include "unicharset.h"
struct Pix;
namespace tesseract {
class IntFeatureMap;
class IntFeatureSpace;
class ShapeTable;
// Number of elements of cn_feature_.
static const int kNumCNParams = 4;
// Number of ways to shift the features when randomizing.
static const int kSampleYShiftSize = 5;
// Number of ways to scale the features when randomizing.
static const int kSampleScaleSize = 3;
// Total number of different ways to manipulate the features when randomizing.
// The first and last combinations are removed to avoid an excessive
// top movement (first) and an identity transformation (last).
// WARNING: To avoid patterned duplication of samples, be sure to keep
// kSampleRandomSize prime!
// Eg with current values (kSampleYShiftSize = 5 and TkSampleScaleSize = 3)
// kSampleRandomSize is 13, which is prime.
static const int kSampleRandomSize = kSampleYShiftSize * kSampleScaleSize - 2;
// ASSERT_IS_PRIME(kSampleRandomSize) !!
class TrainingSample : public ELIST_LINK {
public:
TrainingSample()
: class_id_(INVALID_UNICHAR_ID), font_id_(0), page_num_(0),
num_features_(0), num_micro_features_(0), outline_length_(0),
features_(NULL), micro_features_(NULL), weight_(1.0),
max_dist_(0.0), sample_index_(0),
features_are_indexed_(false), features_are_mapped_(false),
is_error_(false) {
}
~TrainingSample();
// Saves the given features into a TrainingSample. The features are copied,
// so may be deleted afterwards. Delete the return value after use.
static TrainingSample* CopyFromFeatures(const INT_FX_RESULT_STRUCT& fx_info,
const TBOX& bounding_box,
const INT_FEATURE_STRUCT* features,
int num_features);
// Returns the cn_feature as a FEATURE_STRUCT* needed by cntraining.
FEATURE_STRUCT* GetCNFeature() const;
// Constructs and returns a copy "randomized" by the method given by
// the randomizer index. If index is out of [0, kSampleRandomSize) then
// an exact copy is returned.
TrainingSample* RandomizedCopy(int index) const;
// Constructs and returns an exact copy.
TrainingSample* Copy() const;
// WARNING! Serialize/DeSerialize do not save/restore the "cache" data
// members, which is mostly the mapped features, and the weight.
// It is assumed these can all be reconstructed from what is saved.
// Writes to the given file. Returns false in case of error.
bool Serialize(FILE* fp) const;
// Creates from the given file. Returns NULL in case of error.
// If swap is true, assumes a big/little-endian swap is needed.
static TrainingSample* DeSerializeCreate(bool swap, FILE* fp);
// Reads from the given file. Returns false in case of error.
// If swap is true, assumes a big/little-endian swap is needed.
bool DeSerialize(bool swap, FILE* fp);
// Extracts the needed information from the CHAR_DESC_STRUCT.
void ExtractCharDesc(int feature_type, int micro_type,
int cn_type, int geo_type,
CHAR_DESC_STRUCT* char_desc);
// Sets the mapped_features_ from the features_ using the provided
// feature_space to the indexed versions of the features.
void IndexFeatures(const IntFeatureSpace& feature_space);
// Sets the mapped_features_ from the features_ using the provided
// feature_map.
void MapFeatures(const IntFeatureMap& feature_map);
// Returns a pix representing the sample. (Int features only.)
Pix* RenderToPix(const UNICHARSET* unicharset) const;
// Displays the features in the given window with the given color.
void DisplayFeatures(ScrollView::Color color, ScrollView* window) const;
// Returns a pix of the original sample image. The pix is padded all round
// by padding wherever possible.
// The returned Pix must be pixDestroyed after use.
// If the input page_pix is NULL, NULL is returned.
Pix* GetSamplePix(int padding, Pix* page_pix) const;
// Accessors.
UNICHAR_ID class_id() const {
return class_id_;
}
void set_class_id(int id) {
class_id_ = id;
}
int font_id() const {
return font_id_;
}
void set_font_id(int id) {
font_id_ = id;
}
int page_num() const {
return page_num_;
}
void set_page_num(int page) {
page_num_ = page;
}
const TBOX& bounding_box() const {
return bounding_box_;
}
void set_bounding_box(const TBOX& box) {
bounding_box_ = box;
}
int num_features() const {
return num_features_;
}
const INT_FEATURE_STRUCT* features() const {
return features_;
}
int num_micro_features() const {
return num_micro_features_;
}
const MicroFeature* micro_features() const {
return micro_features_;
}
int outline_length() const {
return outline_length_;
}
float cn_feature(int index) const {
return cn_feature_[index];
}
int geo_feature(int index) const {
return geo_feature_[index];
}
double weight() const {
return weight_;
}
void set_weight(double value) {
weight_ = value;
}
double max_dist() const {
return max_dist_;
}
void set_max_dist(double value) {
max_dist_ = value;
}
int sample_index() const {
return sample_index_;
}
void set_sample_index(int value) {
sample_index_ = value;
}
bool features_are_mapped() const {
return features_are_mapped_;
}
const GenericVector<int>& mapped_features() const {
ASSERT_HOST(features_are_mapped_);
return mapped_features_;
}
const GenericVector<int>& indexed_features() const {
ASSERT_HOST(features_are_indexed_);
return mapped_features_;
}
bool is_error() const {
return is_error_;
}
void set_is_error(bool value) {
is_error_ = value;
}
private:
// Unichar id that this sample represents. There obviously must be a
// reference UNICHARSET somewhere. Usually in TrainingSampleSet.
UNICHAR_ID class_id_;
// Font id in which this sample was printed. Refers to a fontinfo_table_ in
// MasterTrainer.
int font_id_;
// Number of page that the sample came from.
int page_num_;
// Bounding box of sample in original image.
TBOX bounding_box_;
// Number of INT_FEATURE_STRUCT in features_ array.
int num_features_;
// Number of MicroFeature in micro_features_ array.
int num_micro_features_;
// Total length of outline in the baseline normalized coordinate space.
// See comment in WERD_RES class definition for a discussion of coordinate
// spaces.
int outline_length_;
// Array of features.
INT_FEATURE_STRUCT* features_;
// Array of features.
MicroFeature* micro_features_;
// The one and only CN feature. Indexed by NORM_PARAM_NAME enum.
float cn_feature_[kNumCNParams];
// The one and only geometric feature. (Aims at replacing cn_feature_).
// Indexed by GeoParams enum in picofeat.h
int geo_feature_[GeoCount];
// Non-serialized cache data.
// Weight used for boosting training.
double weight_;
// Maximum distance to other samples of same class/font used in computing
// the canonical sample.
double max_dist_;
// Global index of this sample.
int sample_index_;
// Indexed/mapped features, as indicated by the bools below.
GenericVector<int> mapped_features_;
bool features_are_indexed_;
bool features_are_mapped_;
// True if the last classification was an error by the current definition.
bool is_error_;
// Randomizing factors.
static const int kYShiftValues[kSampleYShiftSize];
static const double kScaleValues[kSampleScaleSize];
};
ELISTIZEH(TrainingSample)
} // namespace tesseract
#endif // TESSERACT_TRAINING_TRAININGSAMPLE_H_