Skip to content

Commit

Permalink
Add API to render a PDF to a data buffer
Browse files Browse the repository at this point in the history
  • Loading branch information
sebastato committed Jan 25, 2021
1 parent 95e879f commit 83a1bd6
Show file tree
Hide file tree
Showing 3 changed files with 48 additions and 7 deletions.
19 changes: 14 additions & 5 deletions src/api/pdfrenderer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -178,11 +178,20 @@ static const int kMaxBytesPerCodepoint = 20;
**********************************************************************/
TessPDFRenderer::TessPDFRenderer(const char *outputbase, const char *datadir,
bool textonly)
: TessResultRenderer(outputbase, "pdf"),
datadir_(datadir) {
obj_ = 0;
textonly_ = textonly;
offsets_.push_back(0);
: TessResultRenderer(outputbase, "pdf"), datadir_(datadir) {
obj_ = 0;
textonly_ = textonly;
offsets_.push_back(0);
writeToBuffer_ = false;
}

TessPDFRenderer::TessPDFRenderer(const char* datadir, bool textonly)
: TessResultRenderer("pdf"), datadir_(datadir) {
obj_ = 0;
textonly_ = textonly;
offsets_.push_back(0);
writeToBuffer_ = true;
outputBuffer_ = std::vector<char>();
}

void TessPDFRenderer::AppendPDFObjectDIY(size_t objectsize) {
Expand Down
21 changes: 19 additions & 2 deletions src/api/renderer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,8 @@ TessResultRenderer::TessResultRenderer(const char *outputbase,
title_(""), imagenum_(-1),
fout_(stdout),
next_(nullptr),
happy_(true) {
happy_(true),
writeToBuffer_(false) {
if (strcmp(outputbase, "-") && strcmp(outputbase, "stdout")) {
STRING outfile = STRING(outputbase) + STRING(".") + STRING(file_extension_);
fout_ = fopen(outfile.string(), "wb");
Expand All @@ -46,6 +47,15 @@ TessResultRenderer::TessResultRenderer(const char *outputbase,
}
}

TessResultRenderer::TessResultRenderer(const char* extension)
: file_extension_(extension),
title_(""), imagenum_(-1),
fout_(stdout),
next_(nullptr),
happy_(true),
writeToBuffer_(true)
{ }

TessResultRenderer::~TessResultRenderer() {
if (fout_ != nullptr) {
if (fout_ != stdout)
Expand Down Expand Up @@ -104,7 +114,14 @@ void TessResultRenderer::AppendString(const char* s) {
}

void TessResultRenderer::AppendData(const char* s, int len) {
if (!tesseract::Serialize(fout_, s, len)) happy_ = false;
if (writeToBuffer_) {
outputBuffer_.reserve(outputBuffer_.size() + len);
for (int i = 0; i < len; i++) {
outputBuffer_.push_back(s[i]);
}
} else {
if (!tesseract::Serialize(fout_, s, len)) happy_ = false;
}
}

bool TessResultRenderer::BeginDocumentHandler() {
Expand Down
15 changes: 15 additions & 0 deletions src/api/renderer.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,10 @@ class TESS_API TessResultRenderer {
bool happy() {
return happy_;
}

std::vector<char> outputBuffer() {
return outputBuffer_;
}

/**
* Returns the index of the last image given to AddImage
Expand All @@ -118,6 +122,8 @@ class TESS_API TessResultRenderer {
* will produce .hocr files.
*/
TessResultRenderer(const char* outputbase, const char* extension);

TessResultRenderer(const char* extension);

// Hook for specialized handling in BeginDocument()
virtual bool BeginDocumentHandler();
Expand Down Expand Up @@ -147,6 +153,8 @@ class TESS_API TessResultRenderer {
FILE* fout_; // output file pointer
TessResultRenderer* next_; // Can link multiple renderers together
bool happy_; // I get grumpy when the disk fills up, etc.
bool writeToBuffer_;
std::vector<char> outputBuffer_;
};

/**
Expand Down Expand Up @@ -217,6 +225,11 @@ class TESS_API TessPDFRenderer : public TessResultRenderer {
TessPDFRenderer(const char* outputbase, const char* datadir,
bool textonly = false);

TessPDFRenderer(const char* datadir, bool textonly = false);


std::vector<char> getOutputBuffer();

protected:
bool BeginDocumentHandler() override;
bool AddImageHandler(TessBaseAPI* api) override;
Expand All @@ -232,6 +245,8 @@ class TESS_API TessPDFRenderer : public TessResultRenderer {
GenericVector<long int> offsets_; // offset of every PDF object in bytes
GenericVector<long int> pages_; // object number for every /Page object
std::string datadir_; // where to find the custom font
std::vector<char> outputBuffer_;
bool writeToBuffer_;
bool textonly_; // skip images if set
// Bookkeeping only. DIY = Do It Yourself.
void AppendPDFObjectDIY(size_t objectsize);
Expand Down

0 comments on commit 83a1bd6

Please sign in to comment.