Skip to content

Commit

Permalink
remove parameters tessedit_pdf_jpg_quality, tessedit_pdf_compression …
Browse files Browse the repository at this point in the history
…(reasons are in i1300 and i1285)
  • Loading branch information
zdenop committed Oct 7, 2014
1 parent 55d11ad commit d0cb107
Show file tree
Hide file tree
Showing 4 changed files with 21 additions and 47 deletions.
46 changes: 19 additions & 27 deletions api/pdfrenderer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -431,8 +431,7 @@ bool TessPDFRenderer::BeginDocumentHandler() {
return true;
}

bool TessPDFRenderer::imageToPDFObj(TessBaseAPI* api,
Pix *pix,
bool TessPDFRenderer::imageToPDFObj(Pix *pix,
char *filename,
long int objnum,
char **pdf_object,
Expand All @@ -449,32 +448,25 @@ bool TessPDFRenderer::imageToPDFObj(TessBaseAPI* api,
return false;

L_COMP_DATA *cid = NULL;
int kJpegQuality;
int encoding_type;
api->GetIntVariable("tessedit_pdf_jpg_quality", &kJpegQuality);
api->GetIntVariable("tessedit_pdf_compression", &encoding_type);
if (encoding_type > 0 && encoding_type < 4) {
if (pixGenerateCIData(pix, encoding_type, kJpegQuality, 0, &cid) != 0)
return false;
const int kJpegQuality = 85;
l_generateCIDataForPdf(filename, pix, kJpegQuality, &cid);
// TODO(jbreiden) Leptonica 1.71 doesn't correctly handle certain
// types of PNG files, especially if there are 2 samples per pixel.
// We can get rid of this logic after Leptonica 1.72 is released and
// has propagated everywhere. Bug discussion as follows.
// https://code.google.com/p/tesseract-ocr/issues/detail?id=1300
int format, sad;
findFileFormat(filename, &format);
if (pixGetSpp(pix) == 4 && format == IFF_PNG) {
pixSetSpp(pix, 3);
sad = pixGenerateCIData(pix, L_FLATE_ENCODE, 0, 0, &cid);
} else {
// TODO(jbreiden) Leptonica 1.71 doesn't correctly handle certain
// types of PNG files, especially if there are 2 samples per pixel.
// We can get rid of this logic after Leptonica 1.72 is released and
// has propagated everywhere. Bug discussion as follows.
// https://code.google.com/p/tesseract-ocr/issues/detail?id=1300
int format, sad;
findFileFormat(filename, &format);
if (pixGetSpp(pix) == 4 && format == IFF_PNG) {
pixSetSpp(pix, 3);
sad = pixGenerateCIData(pix, L_FLATE_ENCODE, 0, 0, &cid);
} else {
sad = l_generateCIDataForPdf(filename, pix, kJpegQuality, &cid);
}
sad = l_generateCIDataForPdf(filename, pix, kJpegQuality, &cid);
}

if (sad || !cid) {
l_CIDataDestroy(&cid);
return false;
}
if (sad || !cid) {
l_CIDataDestroy(&cid);
return false;
}

const char *group4 = "";
Expand Down Expand Up @@ -665,7 +657,7 @@ bool TessPDFRenderer::AddImageHandler(TessBaseAPI* api) {
AppendPDFObjectDIY(objsize);

char *pdf_object;
if (!imageToPDFObj(api, pix, filename, obj_, &pdf_object, &objsize)) {
if (!imageToPDFObj(pix, filename, obj_, &pdf_object, &objsize)) {
return false;
}
AppendData(pdf_object, objsize);
Expand Down
5 changes: 2 additions & 3 deletions api/renderer.h
Original file line number Diff line number Diff line change
Expand Up @@ -194,9 +194,8 @@ class TESS_API TessPDFRenderer : public TessResultRenderer {
static char* GetPDFTextObjects(TessBaseAPI* api,
double width, double height);
// Turn an image into a PDF object. Only transcode if we have to.
static bool imageToPDFObj(tesseract::TessBaseAPI *api, Pix *pix,
char *filename, long int objnum, char **pdf_object,
long int *pdf_object_size);
static bool imageToPDFObj(Pix *pix, char *filename, long int objnum,
char **pdf_object, long int *pdf_object_size);
};


Expand Down
10 changes: 0 additions & 10 deletions ccmain/tesseractclass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -318,16 +318,6 @@ Tesseract::Tesseract()
"Write .html hOCR output file", this->params()),
BOOL_MEMBER(tessedit_create_pdf, false,
"Write .pdf output file", this->params()),
INT_MEMBER(tessedit_pdf_compression, 0,
"Type of image compression in pdf output: "
"0 - autoselection (default); "
"1 - jpeg; "
"2 - G4; "
"3 - flate",
this->params()),
INT_MEMBER(tessedit_pdf_jpg_quality, 85,
"Quality level of jpeg image compression in pdf output",
this->params()),
STRING_MEMBER(unrecognised_char, "|",
"Output char for unidentified blobs", this->params()),
INT_MEMBER(suspect_level, 99, "Suspect marker level", this->params()),
Expand Down
7 changes: 0 additions & 7 deletions ccmain/tesseractclass.h
Original file line number Diff line number Diff line change
Expand Up @@ -930,13 +930,6 @@ class Tesseract : public Wordrec {
BOOL_VAR_H(tessedit_create_txt, true, "Write .txt output file");
BOOL_VAR_H(tessedit_create_hocr, false, "Write .html hOCR output file");
BOOL_VAR_H(tessedit_create_pdf, false, "Write .pdf output file");
INT_VAR_H(tessedit_pdf_compression, 0, "Type of image encoding in pdf output:"
"0 - autoselection (default); "
"1 - jpeg; "
"2 - G4; "
"3 - flate");
INT_VAR_H(tessedit_pdf_jpg_quality, 85, "Quality level of jpeg image "
"compression in pdf output");
STRING_VAR_H(unrecognised_char, "|",
"Output char for unidentified blobs");
INT_VAR_H(suspect_level, 99, "Suspect marker level");
Expand Down

0 comments on commit d0cb107

Please sign in to comment.