forked from tesseract-ocr/tesseract
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathchar_samp_set.cpp
182 lines (171 loc) · 4.52 KB
/
char_samp_set.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
/**********************************************************************
* File: char_samp_enum.cpp
* Description: Implementation of a Character Sample Set Class
* Author: Ahmad Abdulkader
* Created: 2007
*
* (C) Copyright 2008, Google Inc.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
#include <stdlib.h>
#include <string>
#include "char_samp_set.h"
#include "cached_file.h"
namespace tesseract {
CharSampSet::CharSampSet() {
cnt_ = 0;
samp_buff_ = NULL;
own_samples_ = false;
}
CharSampSet::~CharSampSet() {
Cleanup();
}
// free buffers and init vars
void CharSampSet::Cleanup() {
if (samp_buff_ != NULL) {
// only free samples if owned by class
if (own_samples_ == true) {
for (int samp_idx = 0; samp_idx < cnt_; samp_idx++) {
if (samp_buff_[samp_idx] != NULL) {
delete samp_buff_[samp_idx];
}
}
}
delete []samp_buff_;
}
cnt_ = 0;
samp_buff_ = NULL;
}
// add a new sample
bool CharSampSet::Add(CharSamp *char_samp) {
if ((cnt_ % SAMP_ALLOC_BLOCK) == 0) {
// create an extended buffer
CharSamp **new_samp_buff =
reinterpret_cast<CharSamp **>(new CharSamp *[cnt_ + SAMP_ALLOC_BLOCK]);
if (new_samp_buff == NULL) {
return false;
}
// copy old contents
if (cnt_ > 0) {
memcpy(new_samp_buff, samp_buff_, cnt_ * sizeof(*samp_buff_));
delete []samp_buff_;
}
samp_buff_ = new_samp_buff;
}
samp_buff_[cnt_++] = char_samp;
return true;
}
// load char samples from file
bool CharSampSet::LoadCharSamples(FILE *fp) {
// free existing
Cleanup();
// samples are created here and owned by the class
own_samples_ = true;
// start loading char samples
while (feof(fp) == 0) {
CharSamp *new_samp = CharSamp::FromCharDumpFile(fp);
if (new_samp != NULL) {
if (Add(new_samp) == false) {
return false;
}
}
}
return true;
}
// creates a CharSampSet object from file
CharSampSet * CharSampSet::FromCharDumpFile(string file_name) {
FILE *fp;
unsigned int val32;
// open the file
fp = fopen(file_name.c_str(), "rb");
if (fp == NULL) {
return NULL;
}
// read and verify marker
if (fread(&val32, 1, sizeof(val32), fp) != sizeof(val32)) {
fclose(fp);
return NULL;
}
if (val32 != 0xfefeabd0) {
fclose(fp);
return NULL;
}
// create an object
CharSampSet *samp_set = new CharSampSet();
if (samp_set == NULL) {
fclose(fp);
return NULL;
}
if (samp_set->LoadCharSamples(fp) == false) {
delete samp_set;
samp_set = NULL;
}
fclose(fp);
return samp_set;
}
// Create a new Char Dump file
FILE *CharSampSet::CreateCharDumpFile(string file_name) {
FILE *fp;
unsigned int val32;
// create the file
fp = fopen(file_name.c_str(), "wb");
if (!fp) {
return NULL;
}
// read and verify marker
val32 = 0xfefeabd0;
if (fwrite(&val32, 1, sizeof(val32), fp) != sizeof(val32)) {
fclose(fp);
return NULL;
}
return fp;
}
// Enumerate the Samples in the set one-by-one calling the enumertor's
// EnumCharSamp method for each sample
bool CharSampSet::EnumSamples(string file_name, CharSampEnum *enum_obj) {
CachedFile *fp_in;
unsigned int val32;
long i64_size,
i64_pos;
// open the file
fp_in = new CachedFile(file_name);
if (fp_in == NULL) {
return false;
}
i64_size = fp_in->Size();
if (i64_size < 1) {
return false;
}
// read and verify marker
if (fp_in->Read(&val32, sizeof(val32)) != sizeof(val32)) {
return false;
}
if (val32 != 0xfefeabd0) {
return false;
}
// start loading char samples
while (fp_in->eof() == false) {
CharSamp *new_samp = CharSamp::FromCharDumpFile(fp_in);
i64_pos = fp_in->Tell();
if (new_samp != NULL) {
bool ret_flag = (enum_obj)->EnumCharSamp(new_samp,
(100.0f * i64_pos / i64_size));
delete new_samp;
if (ret_flag == false) {
break;
}
}
}
delete fp_in;
return true;
}
} // namespace ocrlib