Skip to content

Commit

Permalink
Fixed issues 899/1220/1246 (mixed eng+ara)
Browse files Browse the repository at this point in the history
  • Loading branch information
theraysmith committed Sep 18, 2014
1 parent e46b605 commit 2f197cd
Show file tree
Hide file tree
Showing 4 changed files with 20 additions and 6 deletions.
8 changes: 5 additions & 3 deletions ccmain/control.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -357,7 +357,7 @@ bool Tesseract::recog_all_words(PAGE_RES* page_res,

// ****************** Pass 2 *******************
if (tessedit_tess_adaption_mode != 0x0 && !tessedit_test_adaption &&
tessedit_ocr_engine_mode != OEM_CUBE_ONLY ) {
AnyTessLang()) {
page_res_it.restart_page();
GenericVector<WordData> words;
SetupAllWordsPassN(2, target_word_box, word_config, page_res, &words);
Expand All @@ -371,8 +371,7 @@ bool Tesseract::recog_all_words(PAGE_RES* page_res,

// The next passes can only be run if tesseract has been used, as cube
// doesn't set all the necessary outputs in WERD_RES.
if (tessedit_ocr_engine_mode == OEM_TESSERACT_ONLY ||
tessedit_ocr_engine_mode == OEM_TESSERACT_CUBE_COMBINED) {
if (AnyTessLang()) {
// ****************** Pass 3 *******************
// Fix fuzzy spaces.
set_global_loc_code(LOC_FUZZY_SPACE);
Expand Down Expand Up @@ -1098,6 +1097,9 @@ void Tesseract::classify_word_pass2(const WordData& word_data,
tessedit_ocr_engine_mode != OEM_TESSERACT_CUBE_COMBINED &&
word_data.word->best_choice != NULL)
return;
if (tessedit_ocr_engine_mode == OEM_CUBE_ONLY) {
return;
}
ROW* row = word_data.row;
BLOCK* block = word_data.block;
WERD_RES* word = *in_word;
Expand Down
4 changes: 2 additions & 2 deletions ccmain/cube_control.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -383,8 +383,8 @@ bool Tesseract::cube_recognize(CubeObject *cube_obj, BLOCK* block,
for (int i = 0; i < num_chars; ++i) {
UNICHAR_ID uch_id =
cube_cntxt_->CharacterSet()->UnicharID(char_samples[i]->StrLabel());
choices[i] = new BLOB_CHOICE(uch_id, 0.0, cube_certainty, -1, -1,
0, 0, 0, 0, BCC_STATIC_CLASSIFIER);
choices[i] = new BLOB_CHOICE(uch_id, -cube_certainty, cube_certainty,
-1, -1, 0, 0, 0, 0, BCC_STATIC_CLASSIFIER);
}
word->FakeClassifyWord(num_chars, choices);
// within a word, cube recognizes the word in reading order.
Expand Down
9 changes: 9 additions & 0 deletions ccmain/tesseractclass.h
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,15 @@ class Tesseract : public Wordrec {
Tesseract* get_sub_lang(int index) const {
return sub_langs_[index];
}
// Returns true if any language uses Tesseract (as opposed to cube).
bool AnyTessLang() const {
if (tessedit_ocr_engine_mode != OEM_CUBE_ONLY) return true;
for (int i = 0; i < sub_langs_.size(); ++i) {
if (sub_langs_[i]->tessedit_ocr_engine_mode != OEM_CUBE_ONLY)
return true;
}
return false;
}

void SetBlackAndWhitelist();

Expand Down
5 changes: 4 additions & 1 deletion ccstruct/pageres.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -485,7 +485,10 @@ void WERD_RES::DebugWordChoices(bool debug, const char* word_to_debug) {
void WERD_RES::DebugTopChoice(const char* msg) const {
tprintf("Best choice: accepted=%d, adaptable=%d, done=%d : ",
tess_accepted, tess_would_adapt, done);
best_choice->print(msg);
if (best_choice == NULL)
tprintf("<Null choice>\n");
else
best_choice->print(msg);
}

// Removes from best_choices all choices which are not within a reasonable
Expand Down

0 comments on commit 2f197cd

Please sign in to comment.