linux下暂时禁用tesseract-ocr

This commit is contained in:
luoliangyi 2022-10-22 15:15:41 +08:00
parent 080915ddd2
commit 6f9d5de778
86 changed files with 17 additions and 22959 deletions

View File

@ -29,8 +29,6 @@
<Add option="../../../third_party/opencv/uos/amd64/lib/libittnotify.a" />
<Add option="../../../third_party/opencv/uos/amd64/lib/libzlib.a" />
<Add option="../../../third_party/freetype/uos/amd64/lib/libfreetype.a" />
<Add option="../../../third_party/ocr/tesseract-ocr/uos/amd64/lib/libtesseract.a" />
<Add option="../../../third_party/leptonica/uos/amd64/lib/libleptonica.a" />
<Add option="-L../HGBase/bin/uos_x86_64_Debug -lHGBase" />
<Add option="-L../HGImgFmt/bin/uos_x86_64_Debug -lHGImgFmt" />
<Add option="-ldl" />
@ -61,8 +59,6 @@
<Add option="../../../third_party/opencv/uos/amd64/lib/libittnotify.a" />
<Add option="../../../third_party/opencv/uos/amd64/lib/libzlib.a" />
<Add option="../../../third_party/freetype/uos/amd64/lib/libfreetype.a" />
<Add option="../../../third_party/ocr/tesseract-ocr/uos/amd64/lib/libtesseract.a" />
<Add option="../../../third_party/leptonica/uos/amd64/lib/libleptonica.a" />
<Add option="-L../../../../release/uos/x86_64 -lHGBase -lHGImgFmt" />
<Add option="-ldl" />
<Add option="-lpthread" />
@ -495,8 +491,6 @@
<Unit filename="../../../modules/imgproc/HGOCRHanvon.hpp" />
<Unit filename="../../../modules/imgproc/HGOCRRetImpl.cpp" />
<Unit filename="../../../modules/imgproc/HGOCRRetImpl.hpp" />
<Unit filename="../../../modules/imgproc/HGOCRTesseract.cpp" />
<Unit filename="../../../modules/imgproc/HGOCRTesseract.hpp" />
<Unit filename="../../../modules/imgproc/ImageProcess/ImageApply.cpp" />
<Unit filename="../../../modules/imgproc/ImageProcess/ImageApply.h" />
<Unit filename="../../../modules/imgproc/ImageProcess/ImageApplyAdjustColors.cpp" />

View File

@ -1,7 +1,9 @@
#include "HGOCR.h"
#include "HGOCRBase.hpp"
#include "HGOCRHanvon.hpp"
#if defined(HG_CMP_MSC)
#include "HGOCRTesseract.hpp"
#endif
#include "HGOCRRetImpl.hpp"
HGResult HGAPI HGImgProc_CreateOCRMgr(HGUInt algo, HGOCRMgr* ocrMgr)
@ -13,6 +15,7 @@ HGResult HGAPI HGImgProc_CreateOCRMgr(HGUInt algo, HGOCRMgr* ocrMgr)
if (HGIMGPROC_OCRALGO_DEFAULT == algo)
{
#if defined(HG_CMP_MSC)
HGOCRBase* ocrMgrImpl = new HGOCRHanvon;
HGResult ret = ocrMgrImpl->Init();
if (HGBASE_ERR_OK != ret)
@ -26,6 +29,15 @@ HGResult HGAPI HGImgProc_CreateOCRMgr(HGUInt algo, HGOCRMgr* ocrMgr)
return ret;
}
}
#else
HGOCRBase* ocrMgrImpl = new HGOCRHanvon;
HGResult ret = ocrMgrImpl->Init();
if (HGBASE_ERR_OK != ret)
{
delete ocrMgrImpl;
return ret;
}
#endif
*ocrMgr = (HGOCRMgr)ocrMgrImpl;
return HGBASE_ERR_OK;
@ -45,6 +57,7 @@ HGResult HGAPI HGImgProc_CreateOCRMgr(HGUInt algo, HGOCRMgr* ocrMgr)
}
else if (HGIMGPROC_OCRALGO_TESSERACT == algo)
{
#if defined(HG_CMP_MSC)
HGOCRBase* ocrMgrImpl = new HGOCRTesseract;
HGResult ret = ocrMgrImpl->Init();
if (HGBASE_ERR_OK != ret)
@ -55,6 +68,10 @@ HGResult HGAPI HGImgProc_CreateOCRMgr(HGUInt algo, HGOCRMgr* ocrMgr)
*ocrMgr = (HGOCRMgr)ocrMgrImpl;
return HGBASE_ERR_OK;
#else
return HGBASE_ERR_INVALIDARG;
#endif
}
return HGBASE_ERR_INVALIDARG;

View File

@ -1,812 +0,0 @@
// SPDX-License-Identifier: Apache-2.0
// File: baseapi.h
// Description: Simple API for calling tesseract.
// Author: Ray Smith
//
// (C) Copyright 2006, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef TESSERACT_API_BASEAPI_H_
#define TESSERACT_API_BASEAPI_H_
#ifdef HAVE_CONFIG_H
# include "config_auto.h" // DISABLED_LEGACY_ENGINE
#endif
#include "export.h"
#include "pageiterator.h"
#include "publictypes.h"
#include "resultiterator.h"
#include "unichar.h"
#include "version.h"
#include <cstdio>
#include <vector> // for std::vector
struct Pix;
struct Pixa;
struct Boxa;
namespace tesseract {
class PAGE_RES;
class ParagraphModel;
class BLOCK_LIST;
class ETEXT_DESC;
struct OSResults;
class UNICHARSET;
class Dawg;
class Dict;
class EquationDetect;
class PageIterator;
class ImageThresholder;
class LTRResultIterator;
class ResultIterator;
class MutableIterator;
class TessResultRenderer;
class Tesseract;
// Function to read a std::vector<char> from a whole file.
// Returns false on failure.
using FileReader = bool (*)(const char *filename, std::vector<char> *data);
using DictFunc = int (Dict::*)(void *, const UNICHARSET &, UNICHAR_ID,
bool) const;
using ProbabilityInContextFunc = double (Dict::*)(const char *, const char *,
int, const char *, int);
/**
* Base class for all tesseract APIs.
* Specific classes can add ability to work on different inputs or produce
* different outputs.
* This class is mostly an interface layer on top of the Tesseract instance
* class to hide the data types so that users of this class don't have to
* include any other Tesseract headers.
*/
class TESS_API TessBaseAPI {
public:
TessBaseAPI();
virtual ~TessBaseAPI();
// Copy constructor and assignment operator are currently unsupported.
TessBaseAPI(TessBaseAPI const &) = delete;
TessBaseAPI &operator=(TessBaseAPI const &) = delete;
/**
* Returns the version identifier as a static string. Do not delete.
*/
static const char *Version();
/**
* If compiled with OpenCL AND an available OpenCL
* device is deemed faster than serial code, then
* "device" is populated with the cl_device_id
* and returns sizeof(cl_device_id)
* otherwise *device=nullptr and returns 0.
*/
static size_t getOpenCLDevice(void **device);
/**
* Set the name of the input file. Needed for training and
* reading a UNLV zone file, and for searchable PDF output.
*/
void SetInputName(const char *name);
/**
* These functions are required for searchable PDF output.
* We need our hands on the input file so that we can include
* it in the PDF without transcoding. If that is not possible,
* we need the original image. Finally, resolution metadata
* is stored in the PDF so we need that as well.
*/
const char *GetInputName();
// Takes ownership of the input pix.
void SetInputImage(Pix *pix);
Pix *GetInputImage();
int GetSourceYResolution();
const char *GetDatapath();
/** Set the name of the bonus output files. Needed only for debugging. */
void SetOutputName(const char *name);
/**
* Set the value of an internal "parameter."
* Supply the name of the parameter and the value as a string, just as
* you would in a config file.
* Returns false if the name lookup failed.
* Eg SetVariable("tessedit_char_blacklist", "xyz"); to ignore x, y and z.
* Or SetVariable("classify_bln_numeric_mode", "1"); to set numeric-only mode.
* SetVariable may be used before Init, but settings will revert to
* defaults on End().
*
* Note: Must be called after Init(). Only works for non-init variables
* (init variables should be passed to Init()).
*/
bool SetVariable(const char *name, const char *value);
bool SetDebugVariable(const char *name, const char *value);
/**
* Returns true if the parameter was found among Tesseract parameters.
* Fills in value with the value of the parameter.
*/
bool GetIntVariable(const char *name, int *value) const;
bool GetBoolVariable(const char *name, bool *value) const;
bool GetDoubleVariable(const char *name, double *value) const;
/**
* Returns the pointer to the string that represents the value of the
* parameter if it was found among Tesseract parameters.
*/
const char *GetStringVariable(const char *name) const;
#ifndef DISABLED_LEGACY_ENGINE
/**
* Print Tesseract fonts table to the given file.
*/
void PrintFontsTable(FILE *fp) const;
#endif
/**
* Print Tesseract parameters to the given file.
*/
void PrintVariables(FILE *fp) const;
/**
* Get value of named variable as a string, if it exists.
*/
bool GetVariableAsString(const char *name, std::string *val) const;
/**
* Instances are now mostly thread-safe and totally independent,
* but some global parameters remain. Basically it is safe to use multiple
* TessBaseAPIs in different threads in parallel, UNLESS:
* you use SetVariable on some of the Params in classify and textord.
* If you do, then the effect will be to change it for all your instances.
*
* Start tesseract. Returns zero on success and -1 on failure.
* NOTE that the only members that may be called before Init are those
* listed above here in the class definition.
*
* The datapath must be the name of the tessdata directory.
* The language is (usually) an ISO 639-3 string or nullptr will default to
* eng. It is entirely safe (and eventually will be efficient too) to call
* Init multiple times on the same instance to change language, or just
* to reset the classifier.
* The language may be a string of the form [~]<lang>[+[~]<lang>]* indicating
* that multiple languages are to be loaded. Eg hin+eng will load Hindi and
* English. Languages may specify internally that they want to be loaded
* with one or more other languages, so the ~ sign is available to override
* that. Eg if hin were set to load eng by default, then hin+~eng would force
* loading only hin. The number of loaded languages is limited only by
* memory, with the caveat that loading additional languages will impact
* both speed and accuracy, as there is more work to do to decide on the
* applicable language, and there is more chance of hallucinating incorrect
* words.
* WARNING: On changing languages, all Tesseract parameters are reset
* back to their default values. (Which may vary between languages.)
* If you have a rare need to set a Variable that controls
* initialization for a second call to Init you should explicitly
* call End() and then use SetVariable before Init. This is only a very
* rare use case, since there are very few uses that require any parameters
* to be set before Init.
*
* If set_only_non_debug_params is true, only params that do not contain
* "debug" in the name will be set.
*/
int Init(const char *datapath, const char *language, OcrEngineMode mode,
char **configs, int configs_size,
const std::vector<std::string> *vars_vec,
const std::vector<std::string> *vars_values,
bool set_only_non_debug_params);
int Init(const char *datapath, const char *language, OcrEngineMode oem) {
return Init(datapath, language, oem, nullptr, 0, nullptr, nullptr, false);
}
int Init(const char *datapath, const char *language) {
return Init(datapath, language, OEM_DEFAULT, nullptr, 0, nullptr, nullptr,
false);
}
// In-memory version reads the traineddata file directly from the given
// data[data_size] array, and/or reads data via a FileReader.
int Init(const char *data, int data_size, const char *language,
OcrEngineMode mode, char **configs, int configs_size,
const std::vector<std::string> *vars_vec,
const std::vector<std::string> *vars_values,
bool set_only_non_debug_params, FileReader reader);
/**
* Returns the languages string used in the last valid initialization.
* If the last initialization specified "deu+hin" then that will be
* returned. If hin loaded eng automatically as well, then that will
* not be included in this list. To find the languages actually
* loaded use GetLoadedLanguagesAsVector.
* The returned string should NOT be deleted.
*/
const char *GetInitLanguagesAsString() const;
/**
* Returns the loaded languages in the vector of std::string.
* Includes all languages loaded by the last Init, including those loaded
* as dependencies of other loaded languages.
*/
void GetLoadedLanguagesAsVector(std::vector<std::string> *langs) const;
/**
* Returns the available languages in the sorted vector of std::string.
*/
void GetAvailableLanguagesAsVector(std::vector<std::string> *langs) const;
/**
* Init only for page layout analysis. Use only for calls to SetImage and
* AnalysePage. Calls that attempt recognition will generate an error.
*/
void InitForAnalysePage();
/**
* Read a "config" file containing a set of param, value pairs.
* Searches the standard places: tessdata/configs, tessdata/tessconfigs
* and also accepts a relative or absolute path name.
* Note: only non-init params will be set (init params are set by Init()).
*/
void ReadConfigFile(const char *filename);
/** Same as above, but only set debug params from the given config file. */
void ReadDebugConfigFile(const char *filename);
/**
* Set the current page segmentation mode. Defaults to PSM_SINGLE_BLOCK.
* The mode is stored as an IntParam so it can also be modified by
* ReadConfigFile or SetVariable("tessedit_pageseg_mode", mode as string).
*/
void SetPageSegMode(PageSegMode mode);
/** Return the current page segmentation mode. */
PageSegMode GetPageSegMode() const;
/**
* Recognize a rectangle from an image and return the result as a string.
* May be called many times for a single Init.
* Currently has no error checking.
* Greyscale of 8 and color of 24 or 32 bits per pixel may be given.
* Palette color images will not work properly and must be converted to
* 24 bit.
* Binary images of 1 bit per pixel may also be given but they must be
* byte packed with the MSB of the first byte being the first pixel, and a
* 1 represents WHITE. For binary images set bytes_per_pixel=0.
* The recognized text is returned as a char* which is coded
* as UTF8 and must be freed with the delete [] operator.
*
* Note that TesseractRect is the simplified convenience interface.
* For advanced uses, use SetImage, (optionally) SetRectangle, Recognize,
* and one or more of the Get*Text functions below.
*/
char *TesseractRect(const unsigned char *imagedata, int bytes_per_pixel,
int bytes_per_line, int left, int top, int width,
int height);
/**
* Call between pages or documents etc to free up memory and forget
* adaptive data.
*/
void ClearAdaptiveClassifier();
/**
* @defgroup AdvancedAPI Advanced API
* The following methods break TesseractRect into pieces, so you can
* get hold of the thresholded image, get the text in different formats,
* get bounding boxes, confidences etc.
*/
/* @{ */
/**
* Provide an image for Tesseract to recognize. Format is as
* TesseractRect above. Copies the image buffer and converts to Pix.
* SetImage clears all recognition results, and sets the rectangle to the
* full image, so it may be followed immediately by a GetUTF8Text, and it
* will automatically perform recognition.
*/
void SetImage(const unsigned char *imagedata, int width, int height,
int bytes_per_pixel, int bytes_per_line);
/**
* Provide an image for Tesseract to recognize. As with SetImage above,
* Tesseract takes its own copy of the image, so it need not persist until
* after Recognize.
* Pix vs raw, which to use?
* Use Pix where possible. Tesseract uses Pix as its internal representation
* and it is therefore more efficient to provide a Pix directly.
*/
void SetImage(Pix *pix);
/**
* Set the resolution of the source image in pixels per inch so font size
* information can be calculated in results. Call this after SetImage().
*/
void SetSourceResolution(int ppi);
/**
* Restrict recognition to a sub-rectangle of the image. Call after SetImage.
* Each SetRectangle clears the recogntion results so multiple rectangles
* can be recognized with the same image.
*/
void SetRectangle(int left, int top, int width, int height);
/**
* Get a copy of the internal thresholded image from Tesseract.
* Caller takes ownership of the Pix and must pixDestroy it.
* May be called any time after SetImage, or after TesseractRect.
*/
Pix *GetThresholdedImage();
/**
* Get the result of page layout analysis as a leptonica-style
* Boxa, Pixa pair, in reading order.
* Can be called before or after Recognize.
*/
Boxa *GetRegions(Pixa **pixa);
/**
* Get the textlines as a leptonica-style
* Boxa, Pixa pair, in reading order.
* Can be called before or after Recognize.
* If raw_image is true, then extract from the original image instead of the
* thresholded image and pad by raw_padding pixels.
* If blockids is not nullptr, the block-id of each line is also returned as
* an array of one element per line. delete [] after use. If paraids is not
* nullptr, the paragraph-id of each line within its block is also returned as
* an array of one element per line. delete [] after use.
*/
Boxa *GetTextlines(bool raw_image, int raw_padding, Pixa **pixa,
int **blockids, int **paraids);
/*
Helper method to extract from the thresholded image. (most common usage)
*/
Boxa *GetTextlines(Pixa **pixa, int **blockids) {
return GetTextlines(false, 0, pixa, blockids, nullptr);
}
/**
* Get textlines and strips of image regions as a leptonica-style Boxa, Pixa
* pair, in reading order. Enables downstream handling of non-rectangular
* regions.
* Can be called before or after Recognize.
* If blockids is not nullptr, the block-id of each line is also returned as
* an array of one element per line. delete [] after use.
*/
Boxa *GetStrips(Pixa **pixa, int **blockids);
/**
* Get the words as a leptonica-style
* Boxa, Pixa pair, in reading order.
* Can be called before or after Recognize.
*/
Boxa *GetWords(Pixa **pixa);
/**
* Gets the individual connected (text) components (created
* after pages segmentation step, but before recognition)
* as a leptonica-style Boxa, Pixa pair, in reading order.
* Can be called before or after Recognize.
* Note: the caller is responsible for calling boxaDestroy()
* on the returned Boxa array and pixaDestroy() on cc array.
*/
Boxa *GetConnectedComponents(Pixa **cc);
/**
* Get the given level kind of components (block, textline, word etc.) as a
* leptonica-style Boxa, Pixa pair, in reading order.
* Can be called before or after Recognize.
* If blockids is not nullptr, the block-id of each component is also returned
* as an array of one element per component. delete [] after use.
* If blockids is not nullptr, the paragraph-id of each component with its
* block is also returned as an array of one element per component. delete []
* after use. If raw_image is true, then portions of the original image are
* extracted instead of the thresholded image and padded with raw_padding. If
* text_only is true, then only text components are returned.
*/
Boxa *GetComponentImages(PageIteratorLevel level, bool text_only,
bool raw_image, int raw_padding, Pixa **pixa,
int **blockids, int **paraids);
// Helper function to get binary images with no padding (most common usage).
Boxa *GetComponentImages(const PageIteratorLevel level, const bool text_only,
Pixa **pixa, int **blockids) {
return GetComponentImages(level, text_only, false, 0, pixa, blockids,
nullptr);
}
/**
* Returns the scale factor of the thresholded image that would be returned by
* GetThresholdedImage() and the various GetX() methods that call
* GetComponentImages().
* Returns 0 if no thresholder has been set.
*/
int GetThresholdedImageScaleFactor() const;
/**
* Runs page layout analysis in the mode set by SetPageSegMode.
* May optionally be called prior to Recognize to get access to just
* the page layout results. Returns an iterator to the results.
* If merge_similar_words is true, words are combined where suitable for use
* with a line recognizer. Use if you want to use AnalyseLayout to find the
* textlines, and then want to process textline fragments with an external
* line recognizer.
* Returns nullptr on error or an empty page.
* The returned iterator must be deleted after use.
* WARNING! This class points to data held within the TessBaseAPI class, and
* therefore can only be used while the TessBaseAPI class still exists and
* has not been subjected to a call of Init, SetImage, Recognize, Clear, End
* DetectOS, or anything else that changes the internal PAGE_RES.
*/
PageIterator *AnalyseLayout();
PageIterator *AnalyseLayout(bool merge_similar_words);
/**
* Recognize the image from SetAndThresholdImage, generating Tesseract
* internal structures. Returns 0 on success.
* Optional. The Get*Text functions below will call Recognize if needed.
* After Recognize, the output is kept internally until the next SetImage.
*/
int Recognize(ETEXT_DESC *monitor);
/**
* Methods to retrieve information after SetAndThresholdImage(),
* Recognize() or TesseractRect(). (Recognize is called implicitly if needed.)
*/
/**
* Turns images into symbolic text.
*
* filename can point to a single image, a multi-page TIFF,
* or a plain text list of image filenames.
*
* retry_config is useful for debugging. If not nullptr, you can fall
* back to an alternate configuration if a page fails for some
* reason.
*
* timeout_millisec terminates processing if any single page
* takes too long. Set to 0 for unlimited time.
*
* renderer is responible for creating the output. For example,
* use the TessTextRenderer if you want plaintext output, or
* the TessPDFRender to produce searchable PDF.
*
* If tessedit_page_number is non-negative, will only process that
* single page. Works for multi-page tiff file, or filelist.
*
* Returns true if successful, false on error.
*/
bool ProcessPages(const char *filename, const char *retry_config,
int timeout_millisec, TessResultRenderer *renderer);
// Does the real work of ProcessPages.
bool ProcessPagesInternal(const char *filename, const char *retry_config,
int timeout_millisec, TessResultRenderer *renderer);
/**
* Turn a single image into symbolic text.
*
* The pix is the image processed. filename and page_index are
* metadata used by side-effect processes, such as reading a box
* file or formatting as hOCR.
*
* See ProcessPages for descriptions of other parameters.
*/
bool ProcessPage(Pix *pix, int page_index, const char *filename,
const char *retry_config, int timeout_millisec,
TessResultRenderer *renderer);
/**
* Get a reading-order iterator to the results of LayoutAnalysis and/or
* Recognize. The returned iterator must be deleted after use.
* WARNING! This class points to data held within the TessBaseAPI class, and
* therefore can only be used while the TessBaseAPI class still exists and
* has not been subjected to a call of Init, SetImage, Recognize, Clear, End
* DetectOS, or anything else that changes the internal PAGE_RES.
*/
ResultIterator *GetIterator();
/**
* Get a mutable iterator to the results of LayoutAnalysis and/or Recognize.
* The returned iterator must be deleted after use.
* WARNING! This class points to data held within the TessBaseAPI class, and
* therefore can only be used while the TessBaseAPI class still exists and
* has not been subjected to a call of Init, SetImage, Recognize, Clear, End
* DetectOS, or anything else that changes the internal PAGE_RES.
*/
MutableIterator *GetMutableIterator();
/**
* The recognized text is returned as a char* which is coded
* as UTF8 and must be freed with the delete [] operator.
*/
char *GetUTF8Text();
/**
* Make a HTML-formatted string with hOCR markup from the internal
* data structures.
* page_number is 0-based but will appear in the output as 1-based.
* monitor can be used to
* cancel the recognition
* receive progress callbacks
* Returned string must be freed with the delete [] operator.
*/
char *GetHOCRText(ETEXT_DESC *monitor, int page_number);
/**
* Make a HTML-formatted string with hOCR markup from the internal
* data structures.
* page_number is 0-based but will appear in the output as 1-based.
* Returned string must be freed with the delete [] operator.
*/
char *GetHOCRText(int page_number);
/**
* Make an XML-formatted string with Alto markup from the internal
* data structures.
*/
char *GetAltoText(ETEXT_DESC *monitor, int page_number);
/**
* Make an XML-formatted string with Alto markup from the internal
* data structures.
*/
char *GetAltoText(int page_number);
/**
* Make a TSV-formatted string from the internal data structures.
* page_number is 0-based but will appear in the output as 1-based.
* Returned string must be freed with the delete [] operator.
*/
char *GetTSVText(int page_number);
/**
* Make a box file for LSTM training from the internal data structures.
* Constructs coordinates in the original image - not just the rectangle.
* page_number is a 0-based page index that will appear in the box file.
* Returned string must be freed with the delete [] operator.
*/
char *GetLSTMBoxText(int page_number);
/**
* The recognized text is returned as a char* which is coded in the same
* format as a box file used in training.
* Constructs coordinates in the original image - not just the rectangle.
* page_number is a 0-based page index that will appear in the box file.
* Returned string must be freed with the delete [] operator.
*/
char *GetBoxText(int page_number);
/**
* The recognized text is returned as a char* which is coded in the same
* format as a WordStr box file used in training.
* page_number is a 0-based page index that will appear in the box file.
* Returned string must be freed with the delete [] operator.
*/
char *GetWordStrBoxText(int page_number);
/**
* The recognized text is returned as a char* which is coded
* as UNLV format Latin-1 with specific reject and suspect codes.
* Returned string must be freed with the delete [] operator.
*/
char *GetUNLVText();
/**
* Detect the orientation of the input image and apparent script (alphabet).
* orient_deg is the detected clockwise rotation of the input image in degrees
* (0, 90, 180, 270)
* orient_conf is the confidence (15.0 is reasonably confident)
* script_name is an ASCII string, the name of the script, e.g. "Latin"
* script_conf is confidence level in the script
* Returns true on success and writes values to each parameter as an output
*/
bool DetectOrientationScript(int *orient_deg, float *orient_conf,
const char **script_name, float *script_conf);
/**
* The recognized text is returned as a char* which is coded
* as UTF8 and must be freed with the delete [] operator.
* page_number is a 0-based page index that will appear in the osd file.
*/
char *GetOsdText(int page_number);
/** Returns the (average) confidence value between 0 and 100. */
int MeanTextConf();
/**
* Returns all word confidences (between 0 and 100) in an array, terminated
* by -1. The calling function must delete [] after use.
* The number of confidences should correspond to the number of space-
* delimited words in GetUTF8Text.
*/
int *AllWordConfidences();
#ifndef DISABLED_LEGACY_ENGINE
/**
* Applies the given word to the adaptive classifier if possible.
* The word must be SPACE-DELIMITED UTF-8 - l i k e t h i s , so it can
* tell the boundaries of the graphemes.
* Assumes that SetImage/SetRectangle have been used to set the image
* to the given word. The mode arg should be PSM_SINGLE_WORD or
* PSM_CIRCLE_WORD, as that will be used to control layout analysis.
* The currently set PageSegMode is preserved.
* Returns false if adaption was not possible for some reason.
*/
bool AdaptToWordStr(PageSegMode mode, const char *wordstr);
#endif // ndef DISABLED_LEGACY_ENGINE
/**
* Free up recognition results and any stored image data, without actually
* freeing any recognition data that would be time-consuming to reload.
* Afterwards, you must call SetImage or TesseractRect before doing
* any Recognize or Get* operation.
*/
void Clear();
/**
* Close down tesseract and free up all memory. End() is equivalent to
* destructing and reconstructing your TessBaseAPI.
* Once End() has been used, none of the other API functions may be used
* other than Init and anything declared above it in the class definition.
*/
void End();
/**
* Clear any library-level memory caches.
* There are a variety of expensive-to-load constant data structures (mostly
* language dictionaries) that are cached globally -- surviving the Init()
* and End() of individual TessBaseAPI's. This function allows the clearing
* of these caches.
**/
static void ClearPersistentCache();
/**
* Check whether a word is valid according to Tesseract's language model
* @return 0 if the word is invalid, non-zero if valid.
* @warning temporary! This function will be removed from here and placed
* in a separate API at some future time.
*/
int IsValidWord(const char *word) const;
// Returns true if utf8_character is defined in the UniCharset.
bool IsValidCharacter(const char *utf8_character) const;
bool GetTextDirection(int *out_offset, float *out_slope);
/** Sets Dict::letter_is_okay_ function to point to the given function. */
void SetDictFunc(DictFunc f);
/** Sets Dict::probability_in_context_ function to point to the given
* function.
*/
void SetProbabilityInContextFunc(ProbabilityInContextFunc f);
/**
* Estimates the Orientation And Script of the image.
* @return true if the image was processed successfully.
*/
bool DetectOS(OSResults *);
/**
* Return text orientation of each block as determined by an earlier run
* of layout analysis.
*/
void GetBlockTextOrientations(int **block_orientation,
bool **vertical_writing);
/** This method returns the string form of the specified unichar. */
const char *GetUnichar(int unichar_id) const;
/** Return the pointer to the i-th dawg loaded into tesseract_ object. */
const Dawg *GetDawg(int i) const;
/** Return the number of dawgs loaded into tesseract_ object. */
int NumDawgs() const;
Tesseract *tesseract() const {
return tesseract_;
}
OcrEngineMode oem() const {
return last_oem_requested_;
}
void set_min_orientation_margin(double margin);
/* @} */
protected:
/** Common code for setting the image. Returns true if Init has been called.
*/
bool InternalSetImage();
/**
* Run the thresholder to make the thresholded image. If pix is not nullptr,
* the source is thresholded to pix instead of the internal IMAGE.
*/
virtual bool Threshold(Pix **pix);
/**
* Find lines from the image making the BLOCK_LIST.
* @return 0 on success.
*/
int FindLines();
/** Delete the pageres and block list ready for a new page. */
void ClearResults();
/**
* Return an LTR Result Iterator -- used only for training, as we really want
* to ignore all BiDi smarts at that point.
* delete once you're done with it.
*/
LTRResultIterator *GetLTRIterator();
/**
* Return the length of the output text string, as UTF8, assuming
* one newline per line and one per block, with a terminator,
* and assuming a single character reject marker for each rejected character.
* Also return the number of recognized blobs in blob_count.
*/
int TextLength(int *blob_count) const;
//// paragraphs.cpp ////////////////////////////////////////////////////
void DetectParagraphs(bool after_text_recognition);
const PAGE_RES *GetPageRes() const {
return page_res_;
}
protected:
Tesseract *tesseract_; ///< The underlying data object.
Tesseract *osd_tesseract_; ///< For orientation & script detection.
EquationDetect *equ_detect_; ///< The equation detector.
FileReader reader_; ///< Reads files from any filesystem.
ImageThresholder *thresholder_; ///< Image thresholding module.
std::vector<ParagraphModel *> *paragraph_models_;
BLOCK_LIST *block_list_; ///< The page layout.
PAGE_RES *page_res_; ///< The page-level data.
std::string input_file_; ///< Name used by training code.
std::string output_file_; ///< Name used by debug code.
std::string datapath_; ///< Current location of tessdata.
std::string language_; ///< Last initialized language.
OcrEngineMode last_oem_requested_; ///< Last ocr language mode requested.
bool recognition_done_; ///< page_res_ contains recognition data.
/**
* @defgroup ThresholderParams Thresholder Parameters
* Parameters saved from the Thresholder. Needed to rebuild coordinates.
*/
/* @{ */
int rect_left_;
int rect_top_;
int rect_width_;
int rect_height_;
int image_width_;
int image_height_;
/* @} */
private:
// A list of image filenames gets special consideration
bool ProcessPagesFileList(FILE *fp, std::string *buf,
const char *retry_config, int timeout_millisec,
TessResultRenderer *renderer,
int tessedit_page_number);
// TIFF supports multipage so gets special consideration.
bool ProcessPagesMultipageTiff(const unsigned char *data, size_t size,
const char *filename, const char *retry_config,
int timeout_millisec,
TessResultRenderer *renderer,
int tessedit_page_number);
}; // class TessBaseAPI.
/** Escape a char string - remove &<>"' with HTML codes. */
std::string HOcrEscape(const char *text);
} // namespace tesseract
#endif // TESSERACT_API_BASEAPI_H_

View File

@ -1,484 +0,0 @@
// SPDX-License-Identifier: Apache-2.0
// File: capi.h
// Description: C-API TessBaseAPI
//
// (C) Copyright 2012, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef API_CAPI_H_
#define API_CAPI_H_
#include "export.h"
#ifdef __cplusplus
# include <tesseract/baseapi.h>
# include <tesseract/ocrclass.h>
# include <tesseract/pageiterator.h>
# include <tesseract/renderer.h>
# include <tesseract/resultiterator.h>
#endif
#include <stdbool.h>
#include <stdio.h>
#ifdef __cplusplus
extern "C" {
#endif
#ifndef BOOL
# define BOOL int
# define TRUE 1
# define FALSE 0
#endif
#ifdef __cplusplus
typedef tesseract::TessResultRenderer TessResultRenderer;
typedef tesseract::TessBaseAPI TessBaseAPI;
typedef tesseract::PageIterator TessPageIterator;
typedef tesseract::ResultIterator TessResultIterator;
typedef tesseract::MutableIterator TessMutableIterator;
typedef tesseract::ChoiceIterator TessChoiceIterator;
typedef tesseract::OcrEngineMode TessOcrEngineMode;
typedef tesseract::PageSegMode TessPageSegMode;
typedef tesseract::PageIteratorLevel TessPageIteratorLevel;
typedef tesseract::Orientation TessOrientation;
typedef tesseract::ParagraphJustification TessParagraphJustification;
typedef tesseract::WritingDirection TessWritingDirection;
typedef tesseract::TextlineOrder TessTextlineOrder;
typedef tesseract::PolyBlockType TessPolyBlockType;
typedef tesseract::ETEXT_DESC ETEXT_DESC;
#else
typedef struct TessResultRenderer TessResultRenderer;
typedef struct TessBaseAPI TessBaseAPI;
typedef struct TessPageIterator TessPageIterator;
typedef struct TessResultIterator TessResultIterator;
typedef struct TessMutableIterator TessMutableIterator;
typedef struct TessChoiceIterator TessChoiceIterator;
typedef enum TessOcrEngineMode {
OEM_TESSERACT_ONLY,
OEM_LSTM_ONLY,
OEM_TESSERACT_LSTM_COMBINED,
OEM_DEFAULT
} TessOcrEngineMode;
typedef enum TessPageSegMode {
PSM_OSD_ONLY,
PSM_AUTO_OSD,
PSM_AUTO_ONLY,
PSM_AUTO,
PSM_SINGLE_COLUMN,
PSM_SINGLE_BLOCK_VERT_TEXT,
PSM_SINGLE_BLOCK,
PSM_SINGLE_LINE,
PSM_SINGLE_WORD,
PSM_CIRCLE_WORD,
PSM_SINGLE_CHAR,
PSM_SPARSE_TEXT,
PSM_SPARSE_TEXT_OSD,
PSM_RAW_LINE,
PSM_COUNT
} TessPageSegMode;
typedef enum TessPageIteratorLevel {
RIL_BLOCK,
RIL_PARA,
RIL_TEXTLINE,
RIL_WORD,
RIL_SYMBOL
} TessPageIteratorLevel;
typedef enum TessPolyBlockType {
PT_UNKNOWN,
PT_FLOWING_TEXT,
PT_HEADING_TEXT,
PT_PULLOUT_TEXT,
PT_EQUATION,
PT_INLINE_EQUATION,
PT_TABLE,
PT_VERTICAL_TEXT,
PT_CAPTION_TEXT,
PT_FLOWING_IMAGE,
PT_HEADING_IMAGE,
PT_PULLOUT_IMAGE,
PT_HORZ_LINE,
PT_VERT_LINE,
PT_NOISE,
PT_COUNT
} TessPolyBlockType;
typedef enum TessOrientation {
ORIENTATION_PAGE_UP,
ORIENTATION_PAGE_RIGHT,
ORIENTATION_PAGE_DOWN,
ORIENTATION_PAGE_LEFT
} TessOrientation;
typedef enum TessParagraphJustification {
JUSTIFICATION_UNKNOWN,
JUSTIFICATION_LEFT,
JUSTIFICATION_CENTER,
JUSTIFICATION_RIGHT
} TessParagraphJustification;
typedef enum TessWritingDirection {
WRITING_DIRECTION_LEFT_TO_RIGHT,
WRITING_DIRECTION_RIGHT_TO_LEFT,
WRITING_DIRECTION_TOP_TO_BOTTOM
} TessWritingDirection;
typedef enum TessTextlineOrder {
TEXTLINE_ORDER_LEFT_TO_RIGHT,
TEXTLINE_ORDER_RIGHT_TO_LEFT,
TEXTLINE_ORDER_TOP_TO_BOTTOM
} TessTextlineOrder;
typedef struct ETEXT_DESC ETEXT_DESC;
#endif
typedef bool (*TessCancelFunc)(void *cancel_this, int words);
typedef bool (*TessProgressFunc)(ETEXT_DESC *ths, int left, int right, int top,
int bottom);
struct Pix;
struct Boxa;
struct Pixa;
/* General free functions */
TESS_API const char *TessVersion();
TESS_API void TessDeleteText(const char *text);
TESS_API void TessDeleteTextArray(char **arr);
TESS_API void TessDeleteIntArray(const int *arr);
/* Renderer API */
TESS_API TessResultRenderer *TessTextRendererCreate(const char *outputbase);
TESS_API TessResultRenderer *TessHOcrRendererCreate(const char *outputbase);
TESS_API TessResultRenderer *TessHOcrRendererCreate2(const char *outputbase,
BOOL font_info);
TESS_API TessResultRenderer *TessAltoRendererCreate(const char *outputbase);
TESS_API TessResultRenderer *TessTsvRendererCreate(const char *outputbase);
TESS_API TessResultRenderer *TessPDFRendererCreate(const char *outputbase,
const char *datadir,
BOOL textonly);
TESS_API TessResultRenderer *TessUnlvRendererCreate(const char *outputbase);
TESS_API TessResultRenderer *TessBoxTextRendererCreate(const char *outputbase);
TESS_API TessResultRenderer *TessLSTMBoxRendererCreate(const char *outputbase);
TESS_API TessResultRenderer *TessWordStrBoxRendererCreate(
const char *outputbase);
TESS_API void TessDeleteResultRenderer(TessResultRenderer *renderer);
TESS_API void TessResultRendererInsert(TessResultRenderer *renderer,
TessResultRenderer *next);
TESS_API TessResultRenderer *TessResultRendererNext(
TessResultRenderer *renderer);
TESS_API BOOL TessResultRendererBeginDocument(TessResultRenderer *renderer,
const char *title);
TESS_API BOOL TessResultRendererAddImage(TessResultRenderer *renderer,
TessBaseAPI *api);
TESS_API BOOL TessResultRendererEndDocument(TessResultRenderer *renderer);
TESS_API const char *TessResultRendererExtention(TessResultRenderer *renderer);
TESS_API const char *TessResultRendererTitle(TessResultRenderer *renderer);
TESS_API int TessResultRendererImageNum(TessResultRenderer *renderer);
/* Base API */
TESS_API TessBaseAPI *TessBaseAPICreate();
TESS_API void TessBaseAPIDelete(TessBaseAPI *handle);
TESS_API size_t TessBaseAPIGetOpenCLDevice(TessBaseAPI *handle, void **device);
TESS_API void TessBaseAPISetInputName(TessBaseAPI *handle, const char *name);
TESS_API const char *TessBaseAPIGetInputName(TessBaseAPI *handle);
TESS_API void TessBaseAPISetInputImage(TessBaseAPI *handle, struct Pix *pix);
TESS_API struct Pix *TessBaseAPIGetInputImage(TessBaseAPI *handle);
TESS_API int TessBaseAPIGetSourceYResolution(TessBaseAPI *handle);
TESS_API const char *TessBaseAPIGetDatapath(TessBaseAPI *handle);
TESS_API void TessBaseAPISetOutputName(TessBaseAPI *handle, const char *name);
TESS_API BOOL TessBaseAPISetVariable(TessBaseAPI *handle, const char *name,
const char *value);
TESS_API BOOL TessBaseAPISetDebugVariable(TessBaseAPI *handle, const char *name,
const char *value);
TESS_API BOOL TessBaseAPIGetIntVariable(const TessBaseAPI *handle,
const char *name, int *value);
TESS_API BOOL TessBaseAPIGetBoolVariable(const TessBaseAPI *handle,
const char *name, BOOL *value);
TESS_API BOOL TessBaseAPIGetDoubleVariable(const TessBaseAPI *handle,
const char *name, double *value);
TESS_API const char *TessBaseAPIGetStringVariable(const TessBaseAPI *handle,
const char *name);
TESS_API void TessBaseAPIPrintVariables(const TessBaseAPI *handle, FILE *fp);
TESS_API BOOL TessBaseAPIPrintVariablesToFile(const TessBaseAPI *handle,
const char *filename);
TESS_API int TessBaseAPIInit1(TessBaseAPI *handle, const char *datapath,
const char *language, TessOcrEngineMode oem,
char **configs, int configs_size);
TESS_API int TessBaseAPIInit2(TessBaseAPI *handle, const char *datapath,
const char *language, TessOcrEngineMode oem);
TESS_API int TessBaseAPIInit3(TessBaseAPI *handle, const char *datapath,
const char *language);
TESS_API int TessBaseAPIInit4(TessBaseAPI *handle, const char *datapath,
const char *language, TessOcrEngineMode mode,
char **configs, int configs_size, char **vars_vec,
char **vars_values, size_t vars_vec_size,
BOOL set_only_non_debug_params);
TESS_API int TessBaseAPIInit5(TessBaseAPI *handle, const char *data, int data_size,
const char *language, TessOcrEngineMode mode,
char **configs, int configs_size, char **vars_vec,
char **vars_values, size_t vars_vec_size,
BOOL set_only_non_debug_params);
TESS_API const char *TessBaseAPIGetInitLanguagesAsString(
const TessBaseAPI *handle);
TESS_API char **TessBaseAPIGetLoadedLanguagesAsVector(
const TessBaseAPI *handle);
TESS_API char **TessBaseAPIGetAvailableLanguagesAsVector(
const TessBaseAPI *handle);
TESS_API void TessBaseAPIInitForAnalysePage(TessBaseAPI *handle);
TESS_API void TessBaseAPIReadConfigFile(TessBaseAPI *handle,
const char *filename);
TESS_API void TessBaseAPIReadDebugConfigFile(TessBaseAPI *handle,
const char *filename);
TESS_API void TessBaseAPISetPageSegMode(TessBaseAPI *handle,
TessPageSegMode mode);
TESS_API TessPageSegMode TessBaseAPIGetPageSegMode(const TessBaseAPI *handle);
TESS_API char *TessBaseAPIRect(TessBaseAPI *handle,
const unsigned char *imagedata,
int bytes_per_pixel, int bytes_per_line,
int left, int top, int width, int height);
TESS_API void TessBaseAPIClearAdaptiveClassifier(TessBaseAPI *handle);
TESS_API void TessBaseAPISetImage(TessBaseAPI *handle,
const unsigned char *imagedata, int width,
int height, int bytes_per_pixel,
int bytes_per_line);
TESS_API void TessBaseAPISetImage2(TessBaseAPI *handle, struct Pix *pix);
TESS_API void TessBaseAPISetSourceResolution(TessBaseAPI *handle, int ppi);
TESS_API void TessBaseAPISetRectangle(TessBaseAPI *handle, int left, int top,
int width, int height);
TESS_API struct Pix *TessBaseAPIGetThresholdedImage(TessBaseAPI *handle);
TESS_API struct Boxa *TessBaseAPIGetRegions(TessBaseAPI *handle,
struct Pixa **pixa);
TESS_API struct Boxa *TessBaseAPIGetTextlines(TessBaseAPI *handle,
struct Pixa **pixa,
int **blockids);
TESS_API struct Boxa *TessBaseAPIGetTextlines1(TessBaseAPI *handle,
BOOL raw_image, int raw_padding,
struct Pixa **pixa,
int **blockids, int **paraids);
TESS_API struct Boxa *TessBaseAPIGetStrips(TessBaseAPI *handle,
struct Pixa **pixa, int **blockids);
TESS_API struct Boxa *TessBaseAPIGetWords(TessBaseAPI *handle,
struct Pixa **pixa);
TESS_API struct Boxa *TessBaseAPIGetConnectedComponents(TessBaseAPI *handle,
struct Pixa **cc);
TESS_API struct Boxa *TessBaseAPIGetComponentImages(TessBaseAPI *handle,
TessPageIteratorLevel level,
BOOL text_only,
struct Pixa **pixa,
int **blockids);
TESS_API struct Boxa *TessBaseAPIGetComponentImages1(
TessBaseAPI *handle, TessPageIteratorLevel level, BOOL text_only,
BOOL raw_image, int raw_padding, struct Pixa **pixa, int **blockids,
int **paraids);
TESS_API int TessBaseAPIGetThresholdedImageScaleFactor(
const TessBaseAPI *handle);
TESS_API TessPageIterator *TessBaseAPIAnalyseLayout(TessBaseAPI *handle);
TESS_API int TessBaseAPIRecognize(TessBaseAPI *handle, ETEXT_DESC *monitor);
TESS_API BOOL TessBaseAPIProcessPages(TessBaseAPI *handle, const char *filename,
const char *retry_config,
int timeout_millisec,
TessResultRenderer *renderer);
TESS_API BOOL TessBaseAPIProcessPage(TessBaseAPI *handle, struct Pix *pix,
int page_index, const char *filename,
const char *retry_config,
int timeout_millisec,
TessResultRenderer *renderer);
TESS_API TessResultIterator *TessBaseAPIGetIterator(TessBaseAPI *handle);
TESS_API TessMutableIterator *TessBaseAPIGetMutableIterator(
TessBaseAPI *handle);
TESS_API char *TessBaseAPIGetUTF8Text(TessBaseAPI *handle);
TESS_API char *TessBaseAPIGetHOCRText(TessBaseAPI *handle, int page_number);
TESS_API char *TessBaseAPIGetAltoText(TessBaseAPI *handle, int page_number);
TESS_API char *TessBaseAPIGetTsvText(TessBaseAPI *handle, int page_number);
TESS_API char *TessBaseAPIGetBoxText(TessBaseAPI *handle, int page_number);
TESS_API char *TessBaseAPIGetLSTMBoxText(TessBaseAPI *handle, int page_number);
TESS_API char *TessBaseAPIGetWordStrBoxText(TessBaseAPI *handle,
int page_number);
TESS_API char *TessBaseAPIGetUNLVText(TessBaseAPI *handle);
TESS_API int TessBaseAPIMeanTextConf(TessBaseAPI *handle);
TESS_API int *TessBaseAPIAllWordConfidences(TessBaseAPI *handle);
#ifndef DISABLED_LEGACY_ENGINE
TESS_API BOOL TessBaseAPIAdaptToWordStr(TessBaseAPI *handle,
TessPageSegMode mode,
const char *wordstr);
#endif // #ifndef DISABLED_LEGACY_ENGINE
TESS_API void TessBaseAPIClear(TessBaseAPI *handle);
TESS_API void TessBaseAPIEnd(TessBaseAPI *handle);
TESS_API int TessBaseAPIIsValidWord(TessBaseAPI *handle, const char *word);
TESS_API BOOL TessBaseAPIGetTextDirection(TessBaseAPI *handle, int *out_offset,
float *out_slope);
TESS_API const char *TessBaseAPIGetUnichar(TessBaseAPI *handle, int unichar_id);
TESS_API void TessBaseAPIClearPersistentCache(TessBaseAPI *handle);
#ifndef DISABLED_LEGACY_ENGINE
// Call TessDeleteText(*best_script_name) to free memory allocated by this
// function
TESS_API BOOL TessBaseAPIDetectOrientationScript(TessBaseAPI *handle,
int *orient_deg,
float *orient_conf,
const char **script_name,
float *script_conf);
#endif // #ifndef DISABLED_LEGACY_ENGINE
TESS_API void TessBaseAPISetMinOrientationMargin(TessBaseAPI *handle,
double margin);
TESS_API int TessBaseAPINumDawgs(const TessBaseAPI *handle);
TESS_API TessOcrEngineMode TessBaseAPIOem(const TessBaseAPI *handle);
TESS_API void TessBaseGetBlockTextOrientations(TessBaseAPI *handle,
int **block_orientation,
bool **vertical_writing);
/* Page iterator */
TESS_API void TessPageIteratorDelete(TessPageIterator *handle);
TESS_API TessPageIterator *TessPageIteratorCopy(const TessPageIterator *handle);
TESS_API void TessPageIteratorBegin(TessPageIterator *handle);
TESS_API BOOL TessPageIteratorNext(TessPageIterator *handle,
TessPageIteratorLevel level);
TESS_API BOOL TessPageIteratorIsAtBeginningOf(const TessPageIterator *handle,
TessPageIteratorLevel level);
TESS_API BOOL TessPageIteratorIsAtFinalElement(const TessPageIterator *handle,
TessPageIteratorLevel level,
TessPageIteratorLevel element);
TESS_API BOOL TessPageIteratorBoundingBox(const TessPageIterator *handle,
TessPageIteratorLevel level,
int *left, int *top, int *right,
int *bottom);
TESS_API TessPolyBlockType
TessPageIteratorBlockType(const TessPageIterator *handle);
TESS_API struct Pix *TessPageIteratorGetBinaryImage(
const TessPageIterator *handle, TessPageIteratorLevel level);
TESS_API struct Pix *TessPageIteratorGetImage(const TessPageIterator *handle,
TessPageIteratorLevel level,
int padding,
struct Pix *original_image,
int *left, int *top);
TESS_API BOOL TessPageIteratorBaseline(const TessPageIterator *handle,
TessPageIteratorLevel level, int *x1,
int *y1, int *x2, int *y2);
TESS_API void TessPageIteratorOrientation(
TessPageIterator *handle, TessOrientation *orientation,
TessWritingDirection *writing_direction, TessTextlineOrder *textline_order,
float *deskew_angle);
TESS_API void TessPageIteratorParagraphInfo(
TessPageIterator *handle, TessParagraphJustification *justification,
BOOL *is_list_item, BOOL *is_crown, int *first_line_indent);
/* Result iterator */
TESS_API void TessResultIteratorDelete(TessResultIterator *handle);
TESS_API TessResultIterator *TessResultIteratorCopy(
const TessResultIterator *handle);
TESS_API TessPageIterator *TessResultIteratorGetPageIterator(
TessResultIterator *handle);
TESS_API const TessPageIterator *TessResultIteratorGetPageIteratorConst(
const TessResultIterator *handle);
TESS_API TessChoiceIterator *TessResultIteratorGetChoiceIterator(
const TessResultIterator *handle);
TESS_API BOOL TessResultIteratorNext(TessResultIterator *handle,
TessPageIteratorLevel level);
TESS_API char *TessResultIteratorGetUTF8Text(const TessResultIterator *handle,
TessPageIteratorLevel level);
TESS_API float TessResultIteratorConfidence(const TessResultIterator *handle,
TessPageIteratorLevel level);
TESS_API const char *TessResultIteratorWordRecognitionLanguage(
const TessResultIterator *handle);
TESS_API const char *TessResultIteratorWordFontAttributes(
const TessResultIterator *handle, BOOL *is_bold, BOOL *is_italic,
BOOL *is_underlined, BOOL *is_monospace, BOOL *is_serif, BOOL *is_smallcaps,
int *pointsize, int *font_id);
TESS_API BOOL
TessResultIteratorWordIsFromDictionary(const TessResultIterator *handle);
TESS_API BOOL TessResultIteratorWordIsNumeric(const TessResultIterator *handle);
TESS_API BOOL
TessResultIteratorSymbolIsSuperscript(const TessResultIterator *handle);
TESS_API BOOL
TessResultIteratorSymbolIsSubscript(const TessResultIterator *handle);
TESS_API BOOL
TessResultIteratorSymbolIsDropcap(const TessResultIterator *handle);
TESS_API void TessChoiceIteratorDelete(TessChoiceIterator *handle);
TESS_API BOOL TessChoiceIteratorNext(TessChoiceIterator *handle);
TESS_API const char *TessChoiceIteratorGetUTF8Text(
const TessChoiceIterator *handle);
TESS_API float TessChoiceIteratorConfidence(const TessChoiceIterator *handle);
/* Progress monitor */
TESS_API ETEXT_DESC *TessMonitorCreate();
TESS_API void TessMonitorDelete(ETEXT_DESC *monitor);
TESS_API void TessMonitorSetCancelFunc(ETEXT_DESC *monitor,
TessCancelFunc cancelFunc);
TESS_API void TessMonitorSetCancelThis(ETEXT_DESC *monitor, void *cancelThis);
TESS_API void *TessMonitorGetCancelThis(ETEXT_DESC *monitor);
TESS_API void TessMonitorSetProgressFunc(ETEXT_DESC *monitor,
TessProgressFunc progressFunc);
TESS_API int TessMonitorGetProgress(ETEXT_DESC *monitor);
TESS_API void TessMonitorSetDeadlineMSecs(ETEXT_DESC *monitor, int deadline);
#ifdef __cplusplus
}
#endif
#endif // API_CAPI_H_

View File

@ -1,37 +0,0 @@
// SPDX-License-Identifier: Apache-2.0
// File: export.h
// Description: Place holder
//
// (C) Copyright 2006, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef TESSERACT_PLATFORM_H_
#define TESSERACT_PLATFORM_H_
#ifndef TESS_API
# if defined(_WIN32) || defined(__CYGWIN__)
# if defined(TESS_EXPORTS)
# define TESS_API __declspec(dllexport)
# elif defined(TESS_IMPORTS)
# define TESS_API __declspec(dllimport)
# else
# define TESS_API
# endif
# else
# if defined(TESS_EXPORTS) || defined(TESS_IMPORTS)
# define TESS_API __attribute__((visibility("default")))
# else
# define TESS_API
# endif
# endif
#endif
#endif // TESSERACT_PLATFORM_H_

View File

@ -1,235 +0,0 @@
// SPDX-License-Identifier: Apache-2.0
// File: ltrresultiterator.h
// Description: Iterator for tesseract results in strict left-to-right
// order that avoids using tesseract internal data structures.
// Author: Ray Smith
//
// (C) Copyright 2010, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef TESSERACT_CCMAIN_LTR_RESULT_ITERATOR_H_
#define TESSERACT_CCMAIN_LTR_RESULT_ITERATOR_H_
#include "export.h" // for TESS_API
#include "pageiterator.h" // for PageIterator
#include "publictypes.h" // for PageIteratorLevel
#include "unichar.h" // for StrongScriptDirection
namespace tesseract {
class BLOB_CHOICE_IT;
class PAGE_RES;
class WERD_RES;
class Tesseract;
// Class to iterate over tesseract results, providing access to all levels
// of the page hierarchy, without including any tesseract headers or having
// to handle any tesseract structures.
// WARNING! This class points to data held within the TessBaseAPI class, and
// therefore can only be used while the TessBaseAPI class still exists and
// has not been subjected to a call of Init, SetImage, Recognize, Clear, End
// DetectOS, or anything else that changes the internal PAGE_RES.
// See tesseract/publictypes.h for the definition of PageIteratorLevel.
// See also base class PageIterator, which contains the bulk of the interface.
// LTRResultIterator adds text-specific methods for access to OCR output.
class TESS_API LTRResultIterator : public PageIterator {
friend class ChoiceIterator;
public:
// page_res and tesseract come directly from the BaseAPI.
// The rectangle parameters are copied indirectly from the Thresholder,
// via the BaseAPI. They represent the coordinates of some rectangle in an
// original image (in top-left-origin coordinates) and therefore the top-left
// needs to be added to any output boxes in order to specify coordinates
// in the original image. See TessBaseAPI::SetRectangle.
// The scale and scaled_yres are in case the Thresholder scaled the image
// rectangle prior to thresholding. Any coordinates in tesseract's image
// must be divided by scale before adding (rect_left, rect_top).
// The scaled_yres indicates the effective resolution of the binary image
// that tesseract has been given by the Thresholder.
// After the constructor, Begin has already been called.
LTRResultIterator(PAGE_RES *page_res, Tesseract *tesseract, int scale,
int scaled_yres, int rect_left, int rect_top,
int rect_width, int rect_height);
~LTRResultIterator() override;
// LTRResultIterators may be copied! This makes it possible to iterate over
// all the objects at a lower level, while maintaining an iterator to
// objects at a higher level. These constructors DO NOT CALL Begin, so
// iterations will continue from the location of src.
// TODO: For now the copy constructor and operator= only need the base class
// versions, but if new data members are added, don't forget to add them!
// ============= Moving around within the page ============.
// See PageIterator.
// ============= Accessing data ==============.
// Returns the null terminated UTF-8 encoded text string for the current
// object at the given level. Use delete [] to free after use.
char *GetUTF8Text(PageIteratorLevel level) const;
// Set the string inserted at the end of each text line. "\n" by default.
void SetLineSeparator(const char *new_line);
// Set the string inserted at the end of each paragraph. "\n" by default.
void SetParagraphSeparator(const char *new_para);
// Returns the mean confidence of the current object at the given level.
// The number should be interpreted as a percent probability. (0.0f-100.0f)
float Confidence(PageIteratorLevel level) const;
// ============= Functions that refer to words only ============.
// Returns the font attributes of the current word. If iterating at a higher
// level object than words, eg textlines, then this will return the
// attributes of the first word in that textline.
// The actual return value is a string representing a font name. It points
// to an internal table and SHOULD NOT BE DELETED. Lifespan is the same as
// the iterator itself, ie rendered invalid by various members of
// TessBaseAPI, including Init, SetImage, End or deleting the TessBaseAPI.
// Pointsize is returned in printers points (1/72 inch.)
const char *WordFontAttributes(bool *is_bold, bool *is_italic,
bool *is_underlined, bool *is_monospace,
bool *is_serif, bool *is_smallcaps,
int *pointsize, int *font_id) const;
// Return the name of the language used to recognize this word.
// On error, nullptr. Do not delete this pointer.
const char *WordRecognitionLanguage() const;
// Return the overall directionality of this word.
StrongScriptDirection WordDirection() const;
// Returns true if the current word was found in a dictionary.
bool WordIsFromDictionary() const;
// Returns the number of blanks before the current word.
int BlanksBeforeWord() const;
// Returns true if the current word is numeric.
bool WordIsNumeric() const;
// Returns true if the word contains blamer information.
bool HasBlamerInfo() const;
// Returns the pointer to ParamsTrainingBundle stored in the BlamerBundle
// of the current word.
const void *GetParamsTrainingBundle() const;
// Returns a pointer to the string with blamer information for this word.
// Assumes that the word's blamer_bundle is not nullptr.
const char *GetBlamerDebug() const;
// Returns a pointer to the string with misadaption information for this word.
// Assumes that the word's blamer_bundle is not nullptr.
const char *GetBlamerMisadaptionDebug() const;
// Returns true if a truth string was recorded for the current word.
bool HasTruthString() const;
// Returns true if the given string is equivalent to the truth string for
// the current word.
bool EquivalentToTruth(const char *str) const;
// Returns a null terminated UTF-8 encoded truth string for the current word.
// Use delete [] to free after use.
char *WordTruthUTF8Text() const;
// Returns a null terminated UTF-8 encoded normalized OCR string for the
// current word. Use delete [] to free after use.
char *WordNormedUTF8Text() const;
// Returns a pointer to serialized choice lattice.
// Fills lattice_size with the number of bytes in lattice data.
const char *WordLattice(int *lattice_size) const;
// ============= Functions that refer to symbols only ============.
// Returns true if the current symbol is a superscript.
// If iterating at a higher level object than symbols, eg words, then
// this will return the attributes of the first symbol in that word.
bool SymbolIsSuperscript() const;
// Returns true if the current symbol is a subscript.
// If iterating at a higher level object than symbols, eg words, then
// this will return the attributes of the first symbol in that word.
bool SymbolIsSubscript() const;
// Returns true if the current symbol is a dropcap.
// If iterating at a higher level object than symbols, eg words, then
// this will return the attributes of the first symbol in that word.
bool SymbolIsDropcap() const;
protected:
const char *line_separator_;
const char *paragraph_separator_;
};
// Class to iterate over the classifier choices for a single RIL_SYMBOL.
class TESS_API ChoiceIterator {
public:
// Construction is from a LTRResultIterator that points to the symbol of
// interest. The ChoiceIterator allows a one-shot iteration over the
// choices for this symbol and after that it is useless.
explicit ChoiceIterator(const LTRResultIterator &result_it);
~ChoiceIterator();
// Moves to the next choice for the symbol and returns false if there
// are none left.
bool Next();
// ============= Accessing data ==============.
// Returns the null terminated UTF-8 encoded text string for the current
// choice.
// NOTE: Unlike LTRResultIterator::GetUTF8Text, the return points to an
// internal structure and should NOT be delete[]ed to free after use.
const char *GetUTF8Text() const;
// Returns the confidence of the current choice depending on the used language
// data. If only LSTM traineddata is used the value range is 0.0f - 1.0f. All
// choices for one symbol should roughly add up to 1.0f.
// If only traineddata of the legacy engine is used, the number should be
// interpreted as a percent probability. (0.0f-100.0f) In this case
// probabilities won't add up to 100. Each one stands on its own.
float Confidence() const;
// Returns a vector containing all timesteps, which belong to the currently
// selected symbol. A timestep is a vector containing pairs of symbols and
// floating point numbers. The number states the probability for the
// corresponding symbol.
std::vector<std::vector<std::pair<const char *, float>>> *Timesteps() const;
private:
// clears the remaining spaces out of the results and adapt the probabilities
void filterSpaces();
// Pointer to the WERD_RES object owned by the API.
WERD_RES *word_res_;
// Iterator over the blob choices.
BLOB_CHOICE_IT *choice_it_;
std::vector<std::pair<const char *, float>> *LSTM_choices_ = nullptr;
std::vector<std::pair<const char *, float>>::iterator LSTM_choice_it_;
const int *tstep_index_;
// regulates the rating granularity
double rating_coefficient_;
// leading blanks
int blanks_before_word_;
// true when there is lstm engine related trained data
bool oemLSTM_;
};
} // namespace tesseract.
#endif // TESSERACT_CCMAIN_LTR_RESULT_ITERATOR_H_

View File

@ -1,158 +0,0 @@
// SPDX-License-Identifier: Apache-2.0
/**********************************************************************
* File: ocrclass.h
* Description: Class definitions and constants for the OCR API.
* Author: Hewlett-Packard Co
*
* (C) Copyright 1996, Hewlett-Packard Co.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
/**********************************************************************
* This file contains typedefs for all the structures used by
* the HP OCR interface.
* The structures are designed to allow them to be used with any
* structure alignment up to 8.
**********************************************************************/
#ifndef CCUTIL_OCRCLASS_H_
#define CCUTIL_OCRCLASS_H_
#include <chrono>
#include <ctime>
namespace tesseract {
/**********************************************************************
* EANYCODE_CHAR
* Description of a single character. The character code is defined by
* the character set of the current font.
* Output text is sent as an array of these structures.
* Spaces and line endings in the output are represented in the
* structures of the surrounding characters. They are not directly
* represented as characters.
* The first character in a word has a positive value of blanks.
* Missing information should be set to the defaults in the comments.
* If word bounds are known, but not character bounds, then the top and
* bottom of each character should be those of the word. The left of the
* first and right of the last char in each word should be set. All other
* lefts and rights should be set to -1.
* If set, the values of right and bottom are left+width and top+height.
* Most of the members come directly from the parameters to ocr_append_char.
* The formatting member uses the enhancement parameter and combines the
* line direction stuff into the top 3 bits.
* The coding is 0=RL char, 1=LR char, 2=DR NL, 3=UL NL, 4=DR Para,
* 5=UL Para, 6=TB char, 7=BT char. API users do not need to know what
* the coding is, only that it is backwards compatible with the previous
* version.
**********************************************************************/
struct EANYCODE_CHAR { /*single character */
// It should be noted that the format for char_code for version 2.0 and beyond
// is UTF8 which means that ASCII characters will come out as one structure
// but other characters will be returned in two or more instances of this
// structure with a single byte of the UTF8 code in each, but each will have
// the same bounding box. Programs which want to handle languagues with
// different characters sets will need to handle extended characters
// appropriately, but *all* code needs to be prepared to receive UTF8 coded
// characters for characters such as bullet and fancy quotes.
uint16_t char_code; /*character itself */
int16_t left; /*of char (-1) */
int16_t right; /*of char (-1) */
int16_t top; /*of char (-1) */
int16_t bottom; /*of char (-1) */
int16_t font_index; /*what font (0) */
uint8_t confidence; /*0=perfect, 100=reject (0/100) */
uint8_t point_size; /*of char, 72=i inch, (10) */
int8_t blanks; /*no of spaces before this char (1) */
uint8_t formatting; /*char formatting (0) */
};
/**********************************************************************
* ETEXT_DESC
* Description of the output of the OCR engine.
* This structure is used as both a progress monitor and the final
* output header, since it needs to be a valid progress monitor while
* the OCR engine is storing its output to shared memory.
* During progress, all the buffer info is -1.
* Progress starts at 0 and increases to 100 during OCR. No other constraint.
* Additionally the progress callback contains the bounding box of the word that
* is currently being processed.
* Every progress callback, the OCR engine must set ocr_alive to 1.
* The HP side will set ocr_alive to 0. Repeated failure to reset
* to 1 indicates that the OCR engine is dead.
* If the cancel function is not null then it is called with the number of
* user words found. If it returns true then operation is cancelled.
**********************************************************************/
class ETEXT_DESC;
using CANCEL_FUNC = bool (*)(void *, int);
using PROGRESS_FUNC = bool (*)(int, int, int, int, int);
using PROGRESS_FUNC2 = bool (*)(ETEXT_DESC *, int, int, int, int);
class ETEXT_DESC { // output header
public:
int16_t count{0}; /// chars in this buffer(0)
int16_t progress{0}; /// percent complete increasing (0-100)
/** Progress monitor covers word recognition and it does not cover layout
* analysis.
* See Ray comment in https://github.com/tesseract-ocr/tesseract/pull/27 */
int8_t more_to_come{0}; /// true if not last
volatile int8_t ocr_alive{0}; /// ocr sets to 1, HP 0
int8_t err_code{0}; /// for errcode use
CANCEL_FUNC cancel{nullptr}; /// returns true to cancel
PROGRESS_FUNC progress_callback{
nullptr}; /// called whenever progress increases
PROGRESS_FUNC2 progress_callback2; /// monitor-aware progress callback
void *cancel_this{nullptr}; /// this or other data for cancel
std::chrono::steady_clock::time_point end_time;
/// Time to stop. Expected to be set only
/// by call to set_deadline_msecs().
EANYCODE_CHAR text[1]{}; /// character data
ETEXT_DESC() : progress_callback2(&default_progress_func) {
end_time = std::chrono::time_point<std::chrono::steady_clock,
std::chrono::milliseconds>();
}
// Sets the end time to be deadline_msecs milliseconds from now.
void set_deadline_msecs(int32_t deadline_msecs) {
if (deadline_msecs > 0) {
end_time = std::chrono::steady_clock::now() +
std::chrono::milliseconds(deadline_msecs);
}
}
// Returns false if we've not passed the end_time, or have not set a deadline.
bool deadline_exceeded() const {
if (end_time.time_since_epoch() ==
std::chrono::steady_clock::duration::zero()) {
return false;
}
auto now = std::chrono::steady_clock::now();
return (now > end_time);
}
private:
static bool default_progress_func(ETEXT_DESC *ths, int left, int right,
int top, int bottom) {
if (ths->progress_callback != nullptr) {
return (*(ths->progress_callback))(ths->progress, left, right, top,
bottom);
}
return true;
}
};
} // namespace tesseract
#endif // CCUTIL_OCRCLASS_H_

View File

@ -1,139 +0,0 @@
// SPDX-License-Identifier: Apache-2.0
// File: osdetect.h
// Description: Orientation and script detection.
// Author: Samuel Charron
// Ranjith Unnikrishnan
//
// (C) Copyright 2008, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef TESSERACT_CCMAIN_OSDETECT_H_
#define TESSERACT_CCMAIN_OSDETECT_H_
#include "export.h" // for TESS_API
#include <vector> // for std::vector
namespace tesseract {
class BLOBNBOX;
class BLOBNBOX_CLIST;
class BLOB_CHOICE_LIST;
class TO_BLOCK_LIST;
class UNICHARSET;
class Tesseract;
// Max number of scripts in ICU + "NULL" + Japanese and Korean + Fraktur
const int kMaxNumberOfScripts = 116 + 1 + 2 + 1;
struct OSBestResult {
OSBestResult()
: orientation_id(0), script_id(0), sconfidence(0.0), oconfidence(0.0) {}
int orientation_id;
int script_id;
float sconfidence;
float oconfidence;
};
struct OSResults {
OSResults() : unicharset(nullptr) {
for (int i = 0; i < 4; ++i) {
for (int j = 0; j < kMaxNumberOfScripts; ++j) {
scripts_na[i][j] = 0;
}
orientations[i] = 0;
}
}
void update_best_orientation();
// Set the estimate of the orientation to the given id.
void set_best_orientation(int orientation_id);
// Update/Compute the best estimate of the script assuming the given
// orientation id.
void update_best_script(int orientation_id);
// Return the index of the script with the highest score for this orientation.
TESS_API int get_best_script(int orientation_id) const;
// Accumulate scores with given OSResults instance and update the best script.
void accumulate(const OSResults &osr);
// Print statistics.
void print_scores(void) const;
void print_scores(int orientation_id) const;
// Array holding scores for each orientation id [0,3].
// Orientation ids [0..3] map to [0, 270, 180, 90] degree orientations of the
// page respectively, where the values refer to the amount of clockwise
// rotation to be applied to the page for the text to be upright and readable.
float orientations[4];
// Script confidence scores for each of 4 possible orientations.
float scripts_na[4][kMaxNumberOfScripts];
UNICHARSET *unicharset;
OSBestResult best_result;
};
class OrientationDetector {
public:
OrientationDetector(const std::vector<int> *allowed_scripts,
OSResults *results);
bool detect_blob(BLOB_CHOICE_LIST *scores);
int get_orientation();
private:
OSResults *osr_;
const std::vector<int> *allowed_scripts_;
};
class ScriptDetector {
public:
ScriptDetector(const std::vector<int> *allowed_scripts, OSResults *osr,
tesseract::Tesseract *tess);
void detect_blob(BLOB_CHOICE_LIST *scores);
bool must_stop(int orientation) const;
private:
OSResults *osr_;
static const char *korean_script_;
static const char *japanese_script_;
static const char *fraktur_script_;
int korean_id_;
int japanese_id_;
int katakana_id_;
int hiragana_id_;
int han_id_;
int hangul_id_;
int latin_id_;
int fraktur_id_;
tesseract::Tesseract *tess_;
const std::vector<int> *allowed_scripts_;
};
int orientation_and_script_detection(const char *filename, OSResults *,
tesseract::Tesseract *);
int os_detect(TO_BLOCK_LIST *port_blocks, OSResults *osr,
tesseract::Tesseract *tess);
int os_detect_blobs(const std::vector<int> *allowed_scripts,
BLOBNBOX_CLIST *blob_list, OSResults *osr,
tesseract::Tesseract *tess);
bool os_detect_blob(BLOBNBOX *bbox, OrientationDetector *o, ScriptDetector *s,
OSResults *, tesseract::Tesseract *tess);
// Helper method to convert an orientation index to its value in degrees.
// The value represents the amount of clockwise rotation in degrees that must be
// applied for the text to be upright (readable).
TESS_API int OrientationIdToValue(const int &id);
} // namespace tesseract
#endif // TESSERACT_CCMAIN_OSDETECT_H_

View File

@ -1,364 +0,0 @@
// SPDX-License-Identifier: Apache-2.0
// File: pageiterator.h
// Description: Iterator for tesseract page structure that avoids using
// tesseract internal data structures.
// Author: Ray Smith
//
// (C) Copyright 2010, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef TESSERACT_CCMAIN_PAGEITERATOR_H_
#define TESSERACT_CCMAIN_PAGEITERATOR_H_
#include "export.h"
#include "publictypes.h"
struct Pix;
struct Pta;
namespace tesseract {
struct BlamerBundle;
class C_BLOB_IT;
class PAGE_RES;
class PAGE_RES_IT;
class WERD;
class Tesseract;
/**
* Class to iterate over tesseract page structure, providing access to all
* levels of the page hierarchy, without including any tesseract headers or
* having to handle any tesseract structures.
* WARNING! This class points to data held within the TessBaseAPI class, and
* therefore can only be used while the TessBaseAPI class still exists and
* has not been subjected to a call of Init, SetImage, Recognize, Clear, End
* DetectOS, or anything else that changes the internal PAGE_RES.
* See tesseract/publictypes.h for the definition of PageIteratorLevel.
* See also ResultIterator, derived from PageIterator, which adds in the
* ability to access OCR output with text-specific methods.
*/
class TESS_API PageIterator {
public:
/**
* page_res and tesseract come directly from the BaseAPI.
* The rectangle parameters are copied indirectly from the Thresholder,
* via the BaseAPI. They represent the coordinates of some rectangle in an
* original image (in top-left-origin coordinates) and therefore the top-left
* needs to be added to any output boxes in order to specify coordinates
* in the original image. See TessBaseAPI::SetRectangle.
* The scale and scaled_yres are in case the Thresholder scaled the image
* rectangle prior to thresholding. Any coordinates in tesseract's image
* must be divided by scale before adding (rect_left, rect_top).
* The scaled_yres indicates the effective resolution of the binary image
* that tesseract has been given by the Thresholder.
* After the constructor, Begin has already been called.
*/
PageIterator(PAGE_RES *page_res, Tesseract *tesseract, int scale,
int scaled_yres, int rect_left, int rect_top, int rect_width,
int rect_height);
virtual ~PageIterator();
/**
* Page/ResultIterators may be copied! This makes it possible to iterate over
* all the objects at a lower level, while maintaining an iterator to
* objects at a higher level. These constructors DO NOT CALL Begin, so
* iterations will continue from the location of src.
*/
PageIterator(const PageIterator &src);
const PageIterator &operator=(const PageIterator &src);
/** Are we positioned at the same location as other? */
bool PositionedAtSameWord(const PAGE_RES_IT *other) const;
// ============= Moving around within the page ============.
/**
* Moves the iterator to point to the start of the page to begin an
* iteration.
*/
virtual void Begin();
/**
* Moves the iterator to the beginning of the paragraph.
* This class implements this functionality by moving it to the zero indexed
* blob of the first (leftmost) word on the first row of the paragraph.
*/
virtual void RestartParagraph();
/**
* Return whether this iterator points anywhere in the first textline of a
* paragraph.
*/
bool IsWithinFirstTextlineOfParagraph() const;
/**
* Moves the iterator to the beginning of the text line.
* This class implements this functionality by moving it to the zero indexed
* blob of the first (leftmost) word of the row.
*/
virtual void RestartRow();
/**
* Moves to the start of the next object at the given level in the
* page hierarchy, and returns false if the end of the page was reached.
* NOTE that RIL_SYMBOL will skip non-text blocks, but all other
* PageIteratorLevel level values will visit each non-text block once.
* Think of non text blocks as containing a single para, with a single line,
* with a single imaginary word.
* Calls to Next with different levels may be freely intermixed.
* This function iterates words in right-to-left scripts correctly, if
* the appropriate language has been loaded into Tesseract.
*/
virtual bool Next(PageIteratorLevel level);
/**
* Returns true if the iterator is at the start of an object at the given
* level.
*
* For instance, suppose an iterator it is pointed to the first symbol of the
* first word of the third line of the second paragraph of the first block in
* a page, then:
* it.IsAtBeginningOf(RIL_BLOCK) = false
* it.IsAtBeginningOf(RIL_PARA) = false
* it.IsAtBeginningOf(RIL_TEXTLINE) = true
* it.IsAtBeginningOf(RIL_WORD) = true
* it.IsAtBeginningOf(RIL_SYMBOL) = true
*/
virtual bool IsAtBeginningOf(PageIteratorLevel level) const;
/**
* Returns whether the iterator is positioned at the last element in a
* given level. (e.g. the last word in a line, the last line in a block)
*
* Here's some two-paragraph example
* text. It starts off innocuously
* enough but quickly turns bizarre.
* The author inserts a cornucopia
* of words to guard against confused
* references.
*
* Now take an iterator it pointed to the start of "bizarre."
* it.IsAtFinalElement(RIL_PARA, RIL_SYMBOL) = false
* it.IsAtFinalElement(RIL_PARA, RIL_WORD) = true
* it.IsAtFinalElement(RIL_BLOCK, RIL_WORD) = false
*/
virtual bool IsAtFinalElement(PageIteratorLevel level,
PageIteratorLevel element) const;
/**
* Returns whether this iterator is positioned
* before other: -1
* equal to other: 0
* after other: 1
*/
int Cmp(const PageIterator &other) const;
// ============= Accessing data ==============.
// Coordinate system:
// Integer coordinates are at the cracks between the pixels.
// The top-left corner of the top-left pixel in the image is at (0,0).
// The bottom-right corner of the bottom-right pixel in the image is at
// (width, height).
// Every bounding box goes from the top-left of the top-left contained
// pixel to the bottom-right of the bottom-right contained pixel, so
// the bounding box of the single top-left pixel in the image is:
// (0,0)->(1,1).
// If an image rectangle has been set in the API, then returned coordinates
// relate to the original (full) image, rather than the rectangle.
/**
* Controls what to include in a bounding box. Bounding boxes of all levels
* between RIL_WORD and RIL_BLOCK can include or exclude potential diacritics.
* Between layout analysis and recognition, it isn't known where all
* diacritics belong, so this control is used to include or exclude some
* diacritics that are above or below the main body of the word. In most cases
* where the placement is obvious, and after recognition, it doesn't make as
* much difference, as the diacritics will already be included in the word.
*/
void SetBoundingBoxComponents(bool include_upper_dots,
bool include_lower_dots) {
include_upper_dots_ = include_upper_dots;
include_lower_dots_ = include_lower_dots;
}
/**
* Returns the bounding rectangle of the current object at the given level.
* See comment on coordinate system above.
* Returns false if there is no such object at the current position.
* The returned bounding box is guaranteed to match the size and position
* of the image returned by GetBinaryImage, but may clip foreground pixels
* from a grey image. The padding argument to GetImage can be used to expand
* the image to include more foreground pixels. See GetImage below.
*/
bool BoundingBox(PageIteratorLevel level, int *left, int *top, int *right,
int *bottom) const;
bool BoundingBox(PageIteratorLevel level, int padding, int *left, int *top,
int *right, int *bottom) const;
/**
* Returns the bounding rectangle of the object in a coordinate system of the
* working image rectangle having its origin at (rect_left_, rect_top_) with
* respect to the original image and is scaled by a factor scale_.
*/
bool BoundingBoxInternal(PageIteratorLevel level, int *left, int *top,
int *right, int *bottom) const;
/** Returns whether there is no object of a given level. */
bool Empty(PageIteratorLevel level) const;
/**
* Returns the type of the current block.
* See tesseract/publictypes.h for PolyBlockType.
*/
PolyBlockType BlockType() const;
/**
* Returns the polygon outline of the current block. The returned Pta must
* be ptaDestroy-ed after use. Note that the returned Pta lists the vertices
* of the polygon, and the last edge is the line segment between the last
* point and the first point. nullptr will be returned if the iterator is
* at the end of the document or layout analysis was not used.
*/
Pta *BlockPolygon() const;
/**
* Returns a binary image of the current object at the given level.
* The position and size match the return from BoundingBoxInternal, and so
* this could be upscaled with respect to the original input image.
* Use pixDestroy to delete the image after use.
*/
Pix *GetBinaryImage(PageIteratorLevel level) const;
/**
* Returns an image of the current object at the given level in greyscale
* if available in the input. To guarantee a binary image use BinaryImage.
* NOTE that in order to give the best possible image, the bounds are
* expanded slightly over the binary connected component, by the supplied
* padding, so the top-left position of the returned image is returned
* in (left,top). These will most likely not match the coordinates
* returned by BoundingBox.
* If you do not supply an original image, you will get a binary one.
* Use pixDestroy to delete the image after use.
*/
Pix *GetImage(PageIteratorLevel level, int padding, Pix *original_img,
int *left, int *top) const;
/**
* Returns the baseline of the current object at the given level.
* The baseline is the line that passes through (x1, y1) and (x2, y2).
* WARNING: with vertical text, baselines may be vertical!
* Returns false if there is no baseline at the current position.
*/
bool Baseline(PageIteratorLevel level, int *x1, int *y1, int *x2,
int *y2) const;
// Returns the attributes of the current row.
void RowAttributes(float *row_height, float *descenders,
float *ascenders) const;
/**
* Returns orientation for the block the iterator points to.
* orientation, writing_direction, textline_order: see publictypes.h
* deskew_angle: after rotating the block so the text orientation is
* upright, how many radians does one have to rotate the
* block anti-clockwise for it to be level?
* -Pi/4 <= deskew_angle <= Pi/4
*/
void Orientation(tesseract::Orientation *orientation,
tesseract::WritingDirection *writing_direction,
tesseract::TextlineOrder *textline_order,
float *deskew_angle) const;
/**
* Returns information about the current paragraph, if available.
*
* justification -
* LEFT if ragged right, or fully justified and script is left-to-right.
* RIGHT if ragged left, or fully justified and script is right-to-left.
* unknown if it looks like source code or we have very few lines.
* is_list_item -
* true if we believe this is a member of an ordered or unordered list.
* is_crown -
* true if the first line of the paragraph is aligned with the other
* lines of the paragraph even though subsequent paragraphs have first
* line indents. This typically indicates that this is the continuation
* of a previous paragraph or that it is the very first paragraph in
* the chapter.
* first_line_indent -
* For LEFT aligned paragraphs, the first text line of paragraphs of
* this kind are indented this many pixels from the left edge of the
* rest of the paragraph.
* for RIGHT aligned paragraphs, the first text line of paragraphs of
* this kind are indented this many pixels from the right edge of the
* rest of the paragraph.
* NOTE 1: This value may be negative.
* NOTE 2: if *is_crown == true, the first line of this paragraph is
* actually flush, and first_line_indent is set to the "common"
* first_line_indent for subsequent paragraphs in this block
* of text.
*/
void ParagraphInfo(tesseract::ParagraphJustification *justification,
bool *is_list_item, bool *is_crown,
int *first_line_indent) const;
// If the current WERD_RES (it_->word()) is not nullptr, sets the BlamerBundle
// of the current word to the given pointer (takes ownership of the pointer)
// and returns true.
// Can only be used when iterating on the word level.
bool SetWordBlamerBundle(BlamerBundle *blamer_bundle);
protected:
/**
* Sets up the internal data for iterating the blobs of a new word, then
* moves the iterator to the given offset.
*/
void BeginWord(int offset);
/** Pointer to the page_res owned by the API. */
PAGE_RES *page_res_;
/** Pointer to the Tesseract object owned by the API. */
Tesseract *tesseract_;
/**
* The iterator to the page_res_. Owned by this ResultIterator.
* A pointer just to avoid dragging in Tesseract includes.
*/
PAGE_RES_IT *it_;
/**
* The current input WERD being iterated. If there is an output from OCR,
* then word_ is nullptr. Owned by the API
*/
WERD *word_;
/** The length of the current word_. */
int word_length_;
/** The current blob index within the word. */
int blob_index_;
/**
* Iterator to the blobs within the word. If nullptr, then we are iterating
* OCR results in the box_word.
* Owned by this ResultIterator.
*/
C_BLOB_IT *cblob_it_;
/** Control over what to include in bounding boxes. */
bool include_upper_dots_;
bool include_lower_dots_;
/** Parameters saved from the Thresholder. Needed to rebuild coordinates.*/
int scale_;
int scaled_yres_;
int rect_left_;
int rect_top_;
int rect_width_;
int rect_height_;
};
} // namespace tesseract.
#endif // TESSERACT_CCMAIN_PAGEITERATOR_H_

View File

@ -1,281 +0,0 @@
// SPDX-License-Identifier: Apache-2.0
// File: publictypes.h
// Description: Types used in both the API and internally
// Author: Ray Smith
//
// (C) Copyright 2010, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef TESSERACT_CCSTRUCT_PUBLICTYPES_H_
#define TESSERACT_CCSTRUCT_PUBLICTYPES_H_
namespace tesseract {
// This file contains types that are used both by the API and internally
// to Tesseract. In order to decouple the API from Tesseract and prevent cyclic
// dependencies, THIS FILE SHOULD NOT DEPEND ON ANY OTHER PART OF TESSERACT.
// Restated: It is OK for low-level Tesseract files to include publictypes.h,
// but not for the low-level tesseract code to include top-level API code.
// This file should not use other Tesseract types, as that would drag
// their includes into the API-level.
/** Number of printers' points in an inch. The unit of the pointsize return. */
constexpr int kPointsPerInch = 72;
/**
* Minimum believable resolution. Used as a default if there is no other
* information, as it is safer to under-estimate than over-estimate.
*/
constexpr int kMinCredibleResolution = 70;
/** Maximum believable resolution. */
constexpr int kMaxCredibleResolution = 2400;
/**
* Ratio between median blob size and likely resolution. Used to estimate
* resolution when none is provided. This is basically 1/usual text size in
* inches. */
constexpr int kResolutionEstimationFactor = 10;
/**
* Possible types for a POLY_BLOCK or ColPartition.
* Must be kept in sync with kPBColors in polyblk.cpp and PTIs*Type functions
* below, as well as kPolyBlockNames in layout_test.cc.
* Used extensively by ColPartition, and POLY_BLOCK.
*/
enum PolyBlockType {
PT_UNKNOWN, // Type is not yet known. Keep as the first element.
PT_FLOWING_TEXT, // Text that lives inside a column.
PT_HEADING_TEXT, // Text that spans more than one column.
PT_PULLOUT_TEXT, // Text that is in a cross-column pull-out region.
PT_EQUATION, // Partition belonging to an equation region.
PT_INLINE_EQUATION, // Partition has inline equation.
PT_TABLE, // Partition belonging to a table region.
PT_VERTICAL_TEXT, // Text-line runs vertically.
PT_CAPTION_TEXT, // Text that belongs to an image.
PT_FLOWING_IMAGE, // Image that lives inside a column.
PT_HEADING_IMAGE, // Image that spans more than one column.
PT_PULLOUT_IMAGE, // Image that is in a cross-column pull-out region.
PT_HORZ_LINE, // Horizontal Line.
PT_VERT_LINE, // Vertical Line.
PT_NOISE, // Lies outside of any column.
PT_COUNT
};
/** Returns true if PolyBlockType is of horizontal line type */
inline bool PTIsLineType(PolyBlockType type) {
return type == PT_HORZ_LINE || type == PT_VERT_LINE;
}
/** Returns true if PolyBlockType is of image type */
inline bool PTIsImageType(PolyBlockType type) {
return type == PT_FLOWING_IMAGE || type == PT_HEADING_IMAGE ||
type == PT_PULLOUT_IMAGE;
}
/** Returns true if PolyBlockType is of text type */
inline bool PTIsTextType(PolyBlockType type) {
return type == PT_FLOWING_TEXT || type == PT_HEADING_TEXT ||
type == PT_PULLOUT_TEXT || type == PT_TABLE ||
type == PT_VERTICAL_TEXT || type == PT_CAPTION_TEXT ||
type == PT_INLINE_EQUATION;
}
// Returns true if PolyBlockType is of pullout(inter-column) type
inline bool PTIsPulloutType(PolyBlockType type) {
return type == PT_PULLOUT_IMAGE || type == PT_PULLOUT_TEXT;
}
/**
* +------------------+ Orientation Example:
* | 1 Aaaa Aaaa Aaaa | ====================
* | Aaa aa aaa aa | To left is a diagram of some (1) English and
* | aaaaaa A aa aaa. | (2) Chinese text and a (3) photo credit.
* | 2 |
* | ####### c c C | Upright Latin characters are represented as A and a.
* | ####### c c c | '<' represents a latin character rotated
* | < ####### c c c | anti-clockwise 90 degrees.
* | < ####### c c |
* | < ####### . c | Upright Chinese characters are represented C and c.
* | 3 ####### c |
* +------------------+ NOTA BENE: enum values here should match goodoc.proto
* If you orient your head so that "up" aligns with Orientation,
* then the characters will appear "right side up" and readable.
*
* In the example above, both the English and Chinese paragraphs are oriented
* so their "up" is the top of the page (page up). The photo credit is read
* with one's head turned leftward ("up" is to page left).
*
* The values of this enum match the convention of Tesseract's osdetect.h
*/
enum Orientation {
ORIENTATION_PAGE_UP = 0,
ORIENTATION_PAGE_RIGHT = 1,
ORIENTATION_PAGE_DOWN = 2,
ORIENTATION_PAGE_LEFT = 3,
};
/**
* The grapheme clusters within a line of text are laid out logically
* in this direction, judged when looking at the text line rotated so that
* its Orientation is "page up".
*
* For English text, the writing direction is left-to-right. For the
* Chinese text in the above example, the writing direction is top-to-bottom.
*/
enum WritingDirection {
WRITING_DIRECTION_LEFT_TO_RIGHT = 0,
WRITING_DIRECTION_RIGHT_TO_LEFT = 1,
WRITING_DIRECTION_TOP_TO_BOTTOM = 2,
};
/**
* The text lines are read in the given sequence.
*
* In English, the order is top-to-bottom.
* In Chinese, vertical text lines are read right-to-left. Mongolian is
* written in vertical columns top to bottom like Chinese, but the lines
* order left-to right.
*
* Note that only some combinations make sense. For example,
* WRITING_DIRECTION_LEFT_TO_RIGHT implies TEXTLINE_ORDER_TOP_TO_BOTTOM
*/
enum TextlineOrder {
TEXTLINE_ORDER_LEFT_TO_RIGHT = 0,
TEXTLINE_ORDER_RIGHT_TO_LEFT = 1,
TEXTLINE_ORDER_TOP_TO_BOTTOM = 2,
};
/**
* Possible modes for page layout analysis. These *must* be kept in order
* of decreasing amount of layout analysis to be done, except for OSD_ONLY,
* so that the inequality test macros below work.
*/
enum PageSegMode {
PSM_OSD_ONLY = 0, ///< Orientation and script detection only.
PSM_AUTO_OSD = 1, ///< Automatic page segmentation with orientation and
///< script detection. (OSD)
PSM_AUTO_ONLY = 2, ///< Automatic page segmentation, but no OSD, or OCR.
PSM_AUTO = 3, ///< Fully automatic page segmentation, but no OSD.
PSM_SINGLE_COLUMN = 4, ///< Assume a single column of text of variable sizes.
PSM_SINGLE_BLOCK_VERT_TEXT = 5, ///< Assume a single uniform block of
///< vertically aligned text.
PSM_SINGLE_BLOCK = 6, ///< Assume a single uniform block of text. (Default.)
PSM_SINGLE_LINE = 7, ///< Treat the image as a single text line.
PSM_SINGLE_WORD = 8, ///< Treat the image as a single word.
PSM_CIRCLE_WORD = 9, ///< Treat the image as a single word in a circle.
PSM_SINGLE_CHAR = 10, ///< Treat the image as a single character.
PSM_SPARSE_TEXT =
11, ///< Find as much text as possible in no particular order.
PSM_SPARSE_TEXT_OSD = 12, ///< Sparse text with orientation and script det.
PSM_RAW_LINE = 13, ///< Treat the image as a single text line, bypassing
///< hacks that are Tesseract-specific.
PSM_COUNT ///< Number of enum entries.
};
/**
* Inline functions that act on a PageSegMode to determine whether components of
* layout analysis are enabled.
* *Depend critically on the order of elements of PageSegMode.*
* NOTE that arg is an int for compatibility with INT_PARAM.
*/
inline bool PSM_OSD_ENABLED(int pageseg_mode) {
return pageseg_mode <= PSM_AUTO_OSD || pageseg_mode == PSM_SPARSE_TEXT_OSD;
}
inline bool PSM_ORIENTATION_ENABLED(int pageseg_mode) {
return pageseg_mode <= PSM_AUTO || pageseg_mode == PSM_SPARSE_TEXT_OSD;
}
inline bool PSM_COL_FIND_ENABLED(int pageseg_mode) {
return pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_AUTO;
}
inline bool PSM_SPARSE(int pageseg_mode) {
return pageseg_mode == PSM_SPARSE_TEXT || pageseg_mode == PSM_SPARSE_TEXT_OSD;
}
inline bool PSM_BLOCK_FIND_ENABLED(int pageseg_mode) {
return pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_SINGLE_COLUMN;
}
inline bool PSM_LINE_FIND_ENABLED(int pageseg_mode) {
return pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_SINGLE_BLOCK;
}
inline bool PSM_WORD_FIND_ENABLED(int pageseg_mode) {
return (pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_SINGLE_LINE) ||
pageseg_mode == PSM_SPARSE_TEXT || pageseg_mode == PSM_SPARSE_TEXT_OSD;
}
/**
* enum of the elements of the page hierarchy, used in ResultIterator
* to provide functions that operate on each level without having to
* have 5x as many functions.
*/
enum PageIteratorLevel {
RIL_BLOCK, // Block of text/image/separator line.
RIL_PARA, // Paragraph within a block.
RIL_TEXTLINE, // Line within a paragraph.
RIL_WORD, // Word within a textline.
RIL_SYMBOL // Symbol/character within a word.
};
/**
* JUSTIFICATION_UNKNOWN
* The alignment is not clearly one of the other options. This could happen
* for example if there are only one or two lines of text or the text looks
* like source code or poetry.
*
* NOTA BENE: Fully justified paragraphs (text aligned to both left and right
* margins) are marked by Tesseract with JUSTIFICATION_LEFT if their text
* is written with a left-to-right script and with JUSTIFICATION_RIGHT if
* their text is written in a right-to-left script.
*
* Interpretation for text read in vertical lines:
* "Left" is wherever the starting reading position is.
*
* JUSTIFICATION_LEFT
* Each line, except possibly the first, is flush to the same left tab stop.
*
* JUSTIFICATION_CENTER
* The text lines of the paragraph are centered about a line going
* down through their middle of the text lines.
*
* JUSTIFICATION_RIGHT
* Each line, except possibly the first, is flush to the same right tab stop.
*/
enum ParagraphJustification {
JUSTIFICATION_UNKNOWN,
JUSTIFICATION_LEFT,
JUSTIFICATION_CENTER,
JUSTIFICATION_RIGHT,
};
/**
* When Tesseract/Cube is initialized we can choose to instantiate/load/run
* only the Tesseract part, only the Cube part or both along with the combiner.
* The preference of which engine to use is stored in tessedit_ocr_engine_mode.
*
* ATTENTION: When modifying this enum, please make sure to make the
* appropriate changes to all the enums mirroring it (e.g. OCREngine in
* cityblock/workflow/detection/detection_storage.proto). Such enums will
* mention the connection to OcrEngineMode in the comments.
*/
enum OcrEngineMode {
OEM_TESSERACT_ONLY, // Run Tesseract only - fastest; deprecated
OEM_LSTM_ONLY, // Run just the LSTM line recognizer.
OEM_TESSERACT_LSTM_COMBINED, // Run the LSTM recognizer, but allow fallback
// to Tesseract when things get difficult.
// deprecated
OEM_DEFAULT, // Specify this mode when calling init_*(),
// to indicate that any of the above modes
// should be automatically inferred from the
// variables in the language-specific config,
// command-line configs, or if not specified
// in any of the above should be set to the
// default OEM_TESSERACT_ONLY.
OEM_COUNT // Number of OEMs
};
} // namespace tesseract.
#endif // TESSERACT_CCSTRUCT_PUBLICTYPES_H_

View File

@ -1,311 +0,0 @@
// SPDX-License-Identifier: Apache-2.0
// File: renderer.h
// Description: Rendering interface to inject into TessBaseAPI
//
// (C) Copyright 2011, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef TESSERACT_API_RENDERER_H_
#define TESSERACT_API_RENDERER_H_
#include "export.h"
// To avoid collision with other typenames include the ABSOLUTE MINIMUM
// complexity of includes here. Use forward declarations wherever possible
// and hide includes of complex types in baseapi.cpp.
#include <cstdint>
#include <string> // for std::string
#include <vector> // for std::vector
struct Pix;
namespace tesseract {
class TessBaseAPI;
/**
* Interface for rendering tesseract results into a document, such as text,
* HOCR or pdf. This class is abstract. Specific classes handle individual
* formats. This interface is then used to inject the renderer class into
* tesseract when processing images.
*
* For simplicity implementing this with tesseract version 3.01,
* the renderer contains document state that is cleared from document
* to document just as the TessBaseAPI is. This way the base API can just
* delegate its rendering functionality to injected renderers, and the
* renderers can manage the associated state needed for the specific formats
* in addition to the heuristics for producing it.
*/
class TESS_API TessResultRenderer {
public:
virtual ~TessResultRenderer();
// Takes ownership of pointer so must be new'd instance.
// Renderers aren't ordered, but appends the sequences of next parameter
// and existing next(). The renderers should be unique across both lists.
void insert(TessResultRenderer *next);
// Returns the next renderer or nullptr.
TessResultRenderer *next() {
return next_;
}
/**
* Starts a new document with the given title.
* This clears the contents of the output data.
* Title should use UTF-8 encoding.
*/
bool BeginDocument(const char *title);
/**
* Adds the recognized text from the source image to the current document.
* Invalid if BeginDocument not yet called.
*
* Note that this API is a bit weird but is designed to fit into the
* current TessBaseAPI implementation where the api has lots of state
* information that we might want to add in.
*/
bool AddImage(TessBaseAPI *api);
/**
* Finishes the document and finalizes the output data
* Invalid if BeginDocument not yet called.
*/
bool EndDocument();
const char *file_extension() const {
return file_extension_;
}
const char *title() const {
return title_.c_str();
}
// Is everything fine? Otherwise something went wrong.
bool happy() const {
return happy_;
}
/**
* Returns the index of the last image given to AddImage
* (i.e. images are incremented whether the image succeeded or not)
*
* This is always defined. It means either the number of the
* current image, the last image ended, or in the completed document
* depending on when in the document lifecycle you are looking at it.
* Will return -1 if a document was never started.
*/
int imagenum() const {
return imagenum_;
}
protected:
/**
* Called by concrete classes.
*
* outputbase is the name of the output file excluding
* extension. For example, "/path/to/chocolate-chip-cookie-recipe"
*
* extension indicates the file extension to be used for output
* files. For example "pdf" will produce a .pdf file, and "hocr"
* will produce .hocr files.
*/
TessResultRenderer(const char *outputbase, const char *extension);
// Hook for specialized handling in BeginDocument()
virtual bool BeginDocumentHandler();
// This must be overridden to render the OCR'd results
virtual bool AddImageHandler(TessBaseAPI *api) = 0;
// Hook for specialized handling in EndDocument()
virtual bool EndDocumentHandler();
// Renderers can call this to append '\0' terminated strings into
// the output string returned by GetOutput.
// This method will grow the output buffer if needed.
void AppendString(const char *s);
// Renderers can call this to append binary byte sequences into
// the output string returned by GetOutput. Note that s is not necessarily
// '\0' terminated (and can contain '\0' within it).
// This method will grow the output buffer if needed.
void AppendData(const char *s, int len);
private:
TessResultRenderer *next_; // Can link multiple renderers together
FILE *fout_; // output file pointer
const char *file_extension_; // standard extension for generated output
std::string title_; // title of document being rendered
int imagenum_; // index of last image added
bool happy_; // I get grumpy when the disk fills up, etc.
};
/**
* Renders tesseract output into a plain UTF-8 text string
*/
class TESS_API TessTextRenderer : public TessResultRenderer {
public:
explicit TessTextRenderer(const char *outputbase);
protected:
bool AddImageHandler(TessBaseAPI *api) override;
};
/**
* Renders tesseract output into an hocr text string
*/
class TESS_API TessHOcrRenderer : public TessResultRenderer {
public:
explicit TessHOcrRenderer(const char *outputbase, bool font_info);
explicit TessHOcrRenderer(const char *outputbase);
protected:
bool BeginDocumentHandler() override;
bool AddImageHandler(TessBaseAPI *api) override;
bool EndDocumentHandler() override;
private:
bool font_info_; // whether to print font information
};
/**
* Renders tesseract output into an alto text string
*/
class TESS_API TessAltoRenderer : public TessResultRenderer {
public:
explicit TessAltoRenderer(const char *outputbase);
protected:
bool BeginDocumentHandler() override;
bool AddImageHandler(TessBaseAPI *api) override;
bool EndDocumentHandler() override;
private:
bool begin_document;
};
/**
* Renders Tesseract output into a TSV string
*/
class TESS_API TessTsvRenderer : public TessResultRenderer {
public:
explicit TessTsvRenderer(const char *outputbase, bool font_info);
explicit TessTsvRenderer(const char *outputbase);
protected:
bool BeginDocumentHandler() override;
bool AddImageHandler(TessBaseAPI *api) override;
bool EndDocumentHandler() override;
private:
bool font_info_; // whether to print font information
};
/**
* Renders tesseract output into searchable PDF
*/
class TESS_API TessPDFRenderer : public TessResultRenderer {
public:
// datadir is the location of the TESSDATA. We need it because
// we load a custom PDF font from this location.
TessPDFRenderer(const char *outputbase, const char *datadir,
bool textonly = false);
protected:
bool BeginDocumentHandler() override;
bool AddImageHandler(TessBaseAPI *api) override;
bool EndDocumentHandler() override;
private:
// We don't want to have every image in memory at once,
// so we store some metadata as we go along producing
// PDFs one page at a time. At the end, that metadata is
// used to make everything that isn't easily handled in a
// streaming fashion.
long int obj_; // counter for PDF objects
std::vector<uint64_t> offsets_; // offset of every PDF object in bytes
std::vector<long int> pages_; // object number for every /Page object
std::string datadir_; // where to find the custom font
bool textonly_; // skip images if set
// Bookkeeping only. DIY = Do It Yourself.
void AppendPDFObjectDIY(size_t objectsize);
// Bookkeeping + emit data.
void AppendPDFObject(const char *data);
// Create the /Contents object for an entire page.
char *GetPDFTextObjects(TessBaseAPI *api, double width, double height);
// Turn an image into a PDF object. Only transcode if we have to.
static bool imageToPDFObj(Pix *pix, const char *filename, long int objnum,
char **pdf_object, long int *pdf_object_size,
int jpg_quality);
};
/**
* Renders tesseract output into a plain UTF-8 text string
*/
class TESS_API TessUnlvRenderer : public TessResultRenderer {
public:
explicit TessUnlvRenderer(const char *outputbase);
protected:
bool AddImageHandler(TessBaseAPI *api) override;
};
/**
* Renders tesseract output into a plain UTF-8 text string for LSTMBox
*/
class TESS_API TessLSTMBoxRenderer : public TessResultRenderer {
public:
explicit TessLSTMBoxRenderer(const char *outputbase);
protected:
bool AddImageHandler(TessBaseAPI *api) override;
};
/**
* Renders tesseract output into a plain UTF-8 text string
*/
class TESS_API TessBoxTextRenderer : public TessResultRenderer {
public:
explicit TessBoxTextRenderer(const char *outputbase);
protected:
bool AddImageHandler(TessBaseAPI *api) override;
};
/**
* Renders tesseract output into a plain UTF-8 text string in WordStr format
*/
class TESS_API TessWordStrBoxRenderer : public TessResultRenderer {
public:
explicit TessWordStrBoxRenderer(const char *outputbase);
protected:
bool AddImageHandler(TessBaseAPI *api) override;
};
#ifndef DISABLED_LEGACY_ENGINE
/**
* Renders tesseract output into an osd text string
*/
class TESS_API TessOsdRenderer : public TessResultRenderer {
public:
explicit TessOsdRenderer(const char *outputbase);
protected:
bool AddImageHandler(TessBaseAPI *api) override;
};
#endif // ndef DISABLED_LEGACY_ENGINE
} // namespace tesseract.
#endif // TESSERACT_API_RENDERER_H_

View File

@ -1,250 +0,0 @@
// SPDX-License-Identifier: Apache-2.0
// File: resultiterator.h
// Description: Iterator for tesseract results that is capable of
// iterating in proper reading order over Bi Directional
// (e.g. mixed Hebrew and English) text.
// Author: David Eger
//
// (C) Copyright 2011, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef TESSERACT_CCMAIN_RESULT_ITERATOR_H_
#define TESSERACT_CCMAIN_RESULT_ITERATOR_H_
#include "export.h" // for TESS_API, TESS_LOCAL
#include "ltrresultiterator.h" // for LTRResultIterator
#include "publictypes.h" // for PageIteratorLevel
#include "unichar.h" // for StrongScriptDirection
#include <set> // for std::pair
#include <vector> // for std::vector
namespace tesseract {
class TESS_API ResultIterator : public LTRResultIterator {
public:
static ResultIterator *StartOfParagraph(const LTRResultIterator &resit);
/**
* ResultIterator is copy constructible!
* The default copy constructor works just fine for us.
*/
~ResultIterator() override = default;
// ============= Moving around within the page ============.
/**
* Moves the iterator to point to the start of the page to begin
* an iteration.
*/
void Begin() override;
/**
* Moves to the start of the next object at the given level in the
* page hierarchy in the appropriate reading order and returns false if
* the end of the page was reached.
* NOTE that RIL_SYMBOL will skip non-text blocks, but all other
* PageIteratorLevel level values will visit each non-text block once.
* Think of non text blocks as containing a single para, with a single line,
* with a single imaginary word.
* Calls to Next with different levels may be freely intermixed.
* This function iterates words in right-to-left scripts correctly, if
* the appropriate language has been loaded into Tesseract.
*/
bool Next(PageIteratorLevel level) override;
/**
* IsAtBeginningOf() returns whether we're at the logical beginning of the
* given level. (as opposed to ResultIterator's left-to-right top-to-bottom
* order). Otherwise, this acts the same as PageIterator::IsAtBeginningOf().
* For a full description, see pageiterator.h
*/
bool IsAtBeginningOf(PageIteratorLevel level) const override;
/**
* Implement PageIterator's IsAtFinalElement correctly in a BiDi context.
* For instance, IsAtFinalElement(RIL_PARA, RIL_WORD) returns whether we
* point at the last word in a paragraph. See PageIterator for full comment.
*/
bool IsAtFinalElement(PageIteratorLevel level,
PageIteratorLevel element) const override;
// ============= Functions that refer to words only ============.
// Returns the number of blanks before the current word.
int BlanksBeforeWord() const;
// ============= Accessing data ==============.
/**
* Returns the null terminated UTF-8 encoded text string for the current
* object at the given level. Use delete [] to free after use.
*/
virtual char *GetUTF8Text(PageIteratorLevel level) const;
/**
* Returns the LSTM choices for every LSTM timestep for the current word.
*/
virtual std::vector<std::vector<std::vector<std::pair<const char *, float>>>>
*GetRawLSTMTimesteps() const;
virtual std::vector<std::vector<std::pair<const char *, float>>>
*GetBestLSTMSymbolChoices() const;
/**
* Return whether the current paragraph's dominant reading direction
* is left-to-right (as opposed to right-to-left).
*/
bool ParagraphIsLtr() const;
// ============= Exposed only for testing =============.
/**
* Yields the reading order as a sequence of indices and (optional)
* meta-marks for a set of words (given left-to-right).
* The meta marks are passed as negative values:
* kMinorRunStart Start of minor direction text.
* kMinorRunEnd End of minor direction text.
* kComplexWord The next indexed word contains both left-to-right and
* right-to-left characters and was treated as neutral.
*
* For example, suppose we have five words in a text line,
* indexed [0,1,2,3,4] from the leftmost side of the text line.
* The following are all believable reading_orders:
*
* Left-to-Right (in ltr paragraph):
* { 0, 1, 2, 3, 4 }
* Left-to-Right (in rtl paragraph):
* { kMinorRunStart, 0, 1, 2, 3, 4, kMinorRunEnd }
* Right-to-Left (in rtl paragraph):
* { 4, 3, 2, 1, 0 }
* Left-to-Right except for an RTL phrase in words 2, 3 in an ltr paragraph:
* { 0, 1, kMinorRunStart, 3, 2, kMinorRunEnd, 4 }
*/
static void CalculateTextlineOrder(
bool paragraph_is_ltr,
const std::vector<StrongScriptDirection> &word_dirs,
std::vector<int> *reading_order);
static const int kMinorRunStart;
static const int kMinorRunEnd;
static const int kComplexWord;
protected:
/**
* We presume the data associated with the given iterator will outlive us.
* NB: This is private because it does something that is non-obvious:
* it resets to the beginning of the paragraph instead of staying wherever
* resit might have pointed.
*/
explicit ResultIterator(const LTRResultIterator &resit);
private:
/**
* Calculates the current paragraph's dominant writing direction.
* Typically, members should use current_paragraph_ltr_ instead.
*/
bool CurrentParagraphIsLtr() const;
/**
* Returns word indices as measured from resit->RestartRow() = index 0
* for the reading order of words within a textline given an iterator
* into the middle of the text line.
* In addition to non-negative word indices, the following negative values
* may be inserted:
* kMinorRunStart Start of minor direction text.
* kMinorRunEnd End of minor direction text.
* kComplexWord The previous word contains both left-to-right and
* right-to-left characters and was treated as neutral.
*/
void CalculateTextlineOrder(bool paragraph_is_ltr,
const LTRResultIterator &resit,
std::vector<int> *indices) const;
/** Same as above, but the caller's ssd gets filled in if ssd != nullptr. */
void CalculateTextlineOrder(bool paragraph_is_ltr,
const LTRResultIterator &resit,
std::vector<StrongScriptDirection> *ssd,
std::vector<int> *indices) const;
/**
* What is the index of the current word in a strict left-to-right reading
* of the row?
*/
int LTRWordIndex() const;
/**
* Given an iterator pointing at a word, returns the logical reading order
* of blob indices for the word.
*/
void CalculateBlobOrder(std::vector<int> *blob_indices) const;
/** Precondition: current_paragraph_is_ltr_ is set. */
void MoveToLogicalStartOfTextline();
/**
* Precondition: current_paragraph_is_ltr_ and in_minor_direction_
* are set.
*/
void MoveToLogicalStartOfWord();
/** Are we pointing at the final (reading order) symbol of the word? */
bool IsAtFinalSymbolOfWord() const;
/** Are we pointing at the first (reading order) symbol of the word? */
bool IsAtFirstSymbolOfWord() const;
/**
* Append any extra marks that should be appended to this word when printed.
* Mostly, these are Unicode BiDi control characters.
*/
void AppendSuffixMarks(std::string *text) const;
/** Appends the current word in reading order to the given buffer.*/
void AppendUTF8WordText(std::string *text) const;
/**
* Appends the text of the current text line, *assuming this iterator is
* positioned at the beginning of the text line* This function
* updates the iterator to point to the first position past the text line.
* Each textline is terminated in a single newline character.
* If the textline ends a paragraph, it gets a second terminal newline.
*/
void IterateAndAppendUTF8TextlineText(std::string *text);
/**
* Appends the text of the current paragraph in reading order
* to the given buffer.
* Each textline is terminated in a single newline character, and the
* paragraph gets an extra newline at the end.
*/
void AppendUTF8ParagraphText(std::string *text) const;
/** Returns whether the bidi_debug flag is set to at least min_level. */
bool BidiDebug(int min_level) const;
bool current_paragraph_is_ltr_;
/**
* Is the currently pointed-at character at the beginning of
* a minor-direction run?
*/
bool at_beginning_of_minor_run_;
/** Is the currently pointed-at character in a minor-direction sequence? */
bool in_minor_direction_;
/**
* Should detected inter-word spaces be preserved, or "compressed" to a single
* space character (default behavior).
*/
bool preserve_interword_spaces_;
};
} // namespace tesseract.
#endif // TESSERACT_CCMAIN_RESULT_ITERATOR_H_

View File

@ -1,174 +0,0 @@
// SPDX-License-Identifier: Apache-2.0
// File: unichar.h
// Description: Unicode character/ligature class.
// Author: Ray Smith
//
// (C) Copyright 2006, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef TESSERACT_CCUTIL_UNICHAR_H_
#define TESSERACT_CCUTIL_UNICHAR_H_
#include "export.h"
#include <memory.h>
#include <cstring>
#include <string>
#include <vector>
namespace tesseract {
// Maximum number of characters that can be stored in a UNICHAR. Must be
// at least 4. Must not exceed 31 without changing the coding of length.
#define UNICHAR_LEN 30
// A UNICHAR_ID is the unique id of a unichar.
using UNICHAR_ID = int;
// A variable to indicate an invalid or uninitialized unichar id.
static const int INVALID_UNICHAR_ID = -1;
// A special unichar that corresponds to INVALID_UNICHAR_ID.
static const char INVALID_UNICHAR[] = "__INVALID_UNICHAR__";
enum StrongScriptDirection {
DIR_NEUTRAL = 0, // Text contains only neutral characters.
DIR_LEFT_TO_RIGHT = 1, // Text contains no Right-to-Left characters.
DIR_RIGHT_TO_LEFT = 2, // Text contains no Left-to-Right characters.
DIR_MIX = 3, // Text contains a mixture of left-to-right
// and right-to-left characters.
};
using char32 = signed int;
// The UNICHAR class holds a single classification result. This may be
// a single Unicode character (stored as between 1 and 4 utf8 bytes) or
// multiple Unicode characters representing the NFKC expansion of a ligature
// such as fi, ffl etc. These are also stored as utf8.
class TESS_API UNICHAR {
public:
UNICHAR() {
memset(chars, 0, UNICHAR_LEN);
}
// Construct from a utf8 string. If len<0 then the string is null terminated.
// If the string is too long to fit in the UNICHAR then it takes only what
// will fit.
UNICHAR(const char *utf8_str, int len);
// Construct from a single UCS4 character.
explicit UNICHAR(int unicode);
// Default copy constructor and operator= are OK.
// Get the first character as UCS-4.
int first_uni() const;
// Get the length of the UTF8 string.
int utf8_len() const {
int len = chars[UNICHAR_LEN - 1];
return len >= 0 && len < UNICHAR_LEN ? len : UNICHAR_LEN;
}
// Get a UTF8 string, but NOT nullptr terminated.
const char *utf8() const {
return chars;
}
// Get a terminated UTF8 string: Must delete[] it after use.
char *utf8_str() const;
// Get the number of bytes in the first character of the given utf8 string.
static int utf8_step(const char *utf8_str);
// A class to simplify iterating over and accessing elements of a UTF8
// string. Note that unlike the UNICHAR class, const_iterator does NOT COPY or
// take ownership of the underlying byte array. It also does not permit
// modification of the array (as the name suggests).
//
// Example:
// for (UNICHAR::const_iterator it = UNICHAR::begin(str, str_len);
// it != UNICHAR::end(str, len);
// ++it) {
// printf("UCS-4 symbol code = %d\n", *it);
// char buf[5];
// int char_len = it.get_utf8(buf); buf[char_len] = '\0';
// printf("Char = %s\n", buf);
// }
class TESS_API const_iterator {
using CI = const_iterator;
public:
// Step to the next UTF8 character.
// If the current position is at an illegal UTF8 character, then print an
// error message and step by one byte. If the current position is at a
// nullptr value, don't step past it.
const_iterator &operator++();
// Return the UCS-4 value at the current position.
// If the current position is at an illegal UTF8 value, return a single
// space character.
int operator*() const;
// Store the UTF-8 encoding of the current codepoint into buf, which must be
// at least 4 bytes long. Return the number of bytes written.
// If the current position is at an illegal UTF8 value, writes a single
// space character and returns 1.
// Note that this method does not null-terminate the buffer.
int get_utf8(char *buf) const;
// Returns the number of bytes of the current codepoint. Returns 1 if the
// current position is at an illegal UTF8 value.
int utf8_len() const;
// Returns true if the UTF-8 encoding at the current position is legal.
bool is_legal() const;
// Return the pointer into the string at the current position.
const char *utf8_data() const {
return it_;
}
// Iterator equality operators.
friend bool operator==(const CI &lhs, const CI &rhs) {
return lhs.it_ == rhs.it_;
}
friend bool operator!=(const CI &lhs, const CI &rhs) {
return !(lhs == rhs);
}
private:
friend class UNICHAR;
explicit const_iterator(const char *it) : it_(it) {}
const char *it_; // Pointer into the string.
};
// Create a start/end iterator pointing to a string. Note that these methods
// are static and do NOT create a copy or take ownership of the underlying
// array.
static const_iterator begin(const char *utf8_str, int byte_length);
static const_iterator end(const char *utf8_str, int byte_length);
// Converts a utf-8 string to a vector of unicodes.
// Returns an empty vector if the input contains invalid UTF-8.
static std::vector<char32> UTF8ToUTF32(const char *utf8_str);
// Converts a vector of unicodes to a utf8 string.
// Returns an empty string if the input contains an invalid unicode.
static std::string UTF32ToUTF8(const std::vector<char32> &str32);
private:
// A UTF-8 representation of 1 or more Unicode characters.
// The last element (chars[UNICHAR_LEN - 1]) is a length if
// its value < UNICHAR_LEN, otherwise it is a genuine character.
char chars[UNICHAR_LEN]{};
};
} // namespace tesseract
#endif // TESSERACT_CCUTIL_UNICHAR_H_

View File

@ -1,34 +0,0 @@
// SPDX-License-Identifier: Apache-2.0
// File: version.h
// Description: Version information
//
// (C) Copyright 2018, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef TESSERACT_API_VERSION_H_
#define TESSERACT_API_VERSION_H_
// clang-format off
#define TESSERACT_MAJOR_VERSION @GENERIC_MAJOR_VERSION@
#define TESSERACT_MINOR_VERSION @GENERIC_MINOR_VERSION@
#define TESSERACT_MICRO_VERSION @GENERIC_MICRO_VERSION@
#define TESSERACT_VERSION \
(TESSERACT_MAJOR_VERSION << 16 | \
TESSERACT_MINOR_VERSION << 8 | \
TESSERACT_MICRO_VERSION)
#define TESSERACT_VERSION_STR "@PACKAGE_VERSION@"
// clang-format on
#endif // TESSERACT_API_VERSION_H_

View File

@ -1,812 +0,0 @@
// SPDX-License-Identifier: Apache-2.0
// File: baseapi.h
// Description: Simple API for calling tesseract.
// Author: Ray Smith
//
// (C) Copyright 2006, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef TESSERACT_API_BASEAPI_H_
#define TESSERACT_API_BASEAPI_H_
#ifdef HAVE_CONFIG_H
# include "config_auto.h" // DISABLED_LEGACY_ENGINE
#endif
#include "export.h"
#include "pageiterator.h"
#include "publictypes.h"
#include "resultiterator.h"
#include "unichar.h"
#include "version.h"
#include <cstdio>
#include <vector> // for std::vector
struct Pix;
struct Pixa;
struct Boxa;
namespace tesseract {
class PAGE_RES;
class ParagraphModel;
class BLOCK_LIST;
class ETEXT_DESC;
struct OSResults;
class UNICHARSET;
class Dawg;
class Dict;
class EquationDetect;
class PageIterator;
class ImageThresholder;
class LTRResultIterator;
class ResultIterator;
class MutableIterator;
class TessResultRenderer;
class Tesseract;
// Function to read a std::vector<char> from a whole file.
// Returns false on failure.
using FileReader = bool (*)(const char *filename, std::vector<char> *data);
using DictFunc = int (Dict::*)(void *, const UNICHARSET &, UNICHAR_ID,
bool) const;
using ProbabilityInContextFunc = double (Dict::*)(const char *, const char *,
int, const char *, int);
/**
* Base class for all tesseract APIs.
* Specific classes can add ability to work on different inputs or produce
* different outputs.
* This class is mostly an interface layer on top of the Tesseract instance
* class to hide the data types so that users of this class don't have to
* include any other Tesseract headers.
*/
class TESS_API TessBaseAPI {
public:
TessBaseAPI();
virtual ~TessBaseAPI();
// Copy constructor and assignment operator are currently unsupported.
TessBaseAPI(TessBaseAPI const &) = delete;
TessBaseAPI &operator=(TessBaseAPI const &) = delete;
/**
* Returns the version identifier as a static string. Do not delete.
*/
static const char *Version();
/**
* If compiled with OpenCL AND an available OpenCL
* device is deemed faster than serial code, then
* "device" is populated with the cl_device_id
* and returns sizeof(cl_device_id)
* otherwise *device=nullptr and returns 0.
*/
static size_t getOpenCLDevice(void **device);
/**
* Set the name of the input file. Needed for training and
* reading a UNLV zone file, and for searchable PDF output.
*/
void SetInputName(const char *name);
/**
* These functions are required for searchable PDF output.
* We need our hands on the input file so that we can include
* it in the PDF without transcoding. If that is not possible,
* we need the original image. Finally, resolution metadata
* is stored in the PDF so we need that as well.
*/
const char *GetInputName();
// Takes ownership of the input pix.
void SetInputImage(Pix *pix);
Pix *GetInputImage();
int GetSourceYResolution();
const char *GetDatapath();
/** Set the name of the bonus output files. Needed only for debugging. */
void SetOutputName(const char *name);
/**
* Set the value of an internal "parameter."
* Supply the name of the parameter and the value as a string, just as
* you would in a config file.
* Returns false if the name lookup failed.
* Eg SetVariable("tessedit_char_blacklist", "xyz"); to ignore x, y and z.
* Or SetVariable("classify_bln_numeric_mode", "1"); to set numeric-only mode.
* SetVariable may be used before Init, but settings will revert to
* defaults on End().
*
* Note: Must be called after Init(). Only works for non-init variables
* (init variables should be passed to Init()).
*/
bool SetVariable(const char *name, const char *value);
bool SetDebugVariable(const char *name, const char *value);
/**
* Returns true if the parameter was found among Tesseract parameters.
* Fills in value with the value of the parameter.
*/
bool GetIntVariable(const char *name, int *value) const;
bool GetBoolVariable(const char *name, bool *value) const;
bool GetDoubleVariable(const char *name, double *value) const;
/**
* Returns the pointer to the string that represents the value of the
* parameter if it was found among Tesseract parameters.
*/
const char *GetStringVariable(const char *name) const;
#ifndef DISABLED_LEGACY_ENGINE
/**
* Print Tesseract fonts table to the given file.
*/
void PrintFontsTable(FILE *fp) const;
#endif
/**
* Print Tesseract parameters to the given file.
*/
void PrintVariables(FILE *fp) const;
/**
* Get value of named variable as a string, if it exists.
*/
bool GetVariableAsString(const char *name, std::string *val) const;
/**
* Instances are now mostly thread-safe and totally independent,
* but some global parameters remain. Basically it is safe to use multiple
* TessBaseAPIs in different threads in parallel, UNLESS:
* you use SetVariable on some of the Params in classify and textord.
* If you do, then the effect will be to change it for all your instances.
*
* Start tesseract. Returns zero on success and -1 on failure.
* NOTE that the only members that may be called before Init are those
* listed above here in the class definition.
*
* The datapath must be the name of the tessdata directory.
* The language is (usually) an ISO 639-3 string or nullptr will default to
* eng. It is entirely safe (and eventually will be efficient too) to call
* Init multiple times on the same instance to change language, or just
* to reset the classifier.
* The language may be a string of the form [~]<lang>[+[~]<lang>]* indicating
* that multiple languages are to be loaded. Eg hin+eng will load Hindi and
* English. Languages may specify internally that they want to be loaded
* with one or more other languages, so the ~ sign is available to override
* that. Eg if hin were set to load eng by default, then hin+~eng would force
* loading only hin. The number of loaded languages is limited only by
* memory, with the caveat that loading additional languages will impact
* both speed and accuracy, as there is more work to do to decide on the
* applicable language, and there is more chance of hallucinating incorrect
* words.
* WARNING: On changing languages, all Tesseract parameters are reset
* back to their default values. (Which may vary between languages.)
* If you have a rare need to set a Variable that controls
* initialization for a second call to Init you should explicitly
* call End() and then use SetVariable before Init. This is only a very
* rare use case, since there are very few uses that require any parameters
* to be set before Init.
*
* If set_only_non_debug_params is true, only params that do not contain
* "debug" in the name will be set.
*/
int Init(const char *datapath, const char *language, OcrEngineMode mode,
char **configs, int configs_size,
const std::vector<std::string> *vars_vec,
const std::vector<std::string> *vars_values,
bool set_only_non_debug_params);
int Init(const char *datapath, const char *language, OcrEngineMode oem) {
return Init(datapath, language, oem, nullptr, 0, nullptr, nullptr, false);
}
int Init(const char *datapath, const char *language) {
return Init(datapath, language, OEM_DEFAULT, nullptr, 0, nullptr, nullptr,
false);
}
// In-memory version reads the traineddata file directly from the given
// data[data_size] array, and/or reads data via a FileReader.
int Init(const char *data, int data_size, const char *language,
OcrEngineMode mode, char **configs, int configs_size,
const std::vector<std::string> *vars_vec,
const std::vector<std::string> *vars_values,
bool set_only_non_debug_params, FileReader reader);
/**
* Returns the languages string used in the last valid initialization.
* If the last initialization specified "deu+hin" then that will be
* returned. If hin loaded eng automatically as well, then that will
* not be included in this list. To find the languages actually
* loaded use GetLoadedLanguagesAsVector.
* The returned string should NOT be deleted.
*/
const char *GetInitLanguagesAsString() const;
/**
* Returns the loaded languages in the vector of std::string.
* Includes all languages loaded by the last Init, including those loaded
* as dependencies of other loaded languages.
*/
void GetLoadedLanguagesAsVector(std::vector<std::string> *langs) const;
/**
* Returns the available languages in the sorted vector of std::string.
*/
void GetAvailableLanguagesAsVector(std::vector<std::string> *langs) const;
/**
* Init only for page layout analysis. Use only for calls to SetImage and
* AnalysePage. Calls that attempt recognition will generate an error.
*/
void InitForAnalysePage();
/**
* Read a "config" file containing a set of param, value pairs.
* Searches the standard places: tessdata/configs, tessdata/tessconfigs
* and also accepts a relative or absolute path name.
* Note: only non-init params will be set (init params are set by Init()).
*/
void ReadConfigFile(const char *filename);
/** Same as above, but only set debug params from the given config file. */
void ReadDebugConfigFile(const char *filename);
/**
* Set the current page segmentation mode. Defaults to PSM_SINGLE_BLOCK.
* The mode is stored as an IntParam so it can also be modified by
* ReadConfigFile or SetVariable("tessedit_pageseg_mode", mode as string).
*/
void SetPageSegMode(PageSegMode mode);
/** Return the current page segmentation mode. */
PageSegMode GetPageSegMode() const;
/**
* Recognize a rectangle from an image and return the result as a string.
* May be called many times for a single Init.
* Currently has no error checking.
* Greyscale of 8 and color of 24 or 32 bits per pixel may be given.
* Palette color images will not work properly and must be converted to
* 24 bit.
* Binary images of 1 bit per pixel may also be given but they must be
* byte packed with the MSB of the first byte being the first pixel, and a
* 1 represents WHITE. For binary images set bytes_per_pixel=0.
* The recognized text is returned as a char* which is coded
* as UTF8 and must be freed with the delete [] operator.
*
* Note that TesseractRect is the simplified convenience interface.
* For advanced uses, use SetImage, (optionally) SetRectangle, Recognize,
* and one or more of the Get*Text functions below.
*/
char *TesseractRect(const unsigned char *imagedata, int bytes_per_pixel,
int bytes_per_line, int left, int top, int width,
int height);
/**
* Call between pages or documents etc to free up memory and forget
* adaptive data.
*/
void ClearAdaptiveClassifier();
/**
* @defgroup AdvancedAPI Advanced API
* The following methods break TesseractRect into pieces, so you can
* get hold of the thresholded image, get the text in different formats,
* get bounding boxes, confidences etc.
*/
/* @{ */
/**
* Provide an image for Tesseract to recognize. Format is as
* TesseractRect above. Copies the image buffer and converts to Pix.
* SetImage clears all recognition results, and sets the rectangle to the
* full image, so it may be followed immediately by a GetUTF8Text, and it
* will automatically perform recognition.
*/
void SetImage(const unsigned char *imagedata, int width, int height,
int bytes_per_pixel, int bytes_per_line);
/**
* Provide an image for Tesseract to recognize. As with SetImage above,
* Tesseract takes its own copy of the image, so it need not persist until
* after Recognize.
* Pix vs raw, which to use?
* Use Pix where possible. Tesseract uses Pix as its internal representation
* and it is therefore more efficient to provide a Pix directly.
*/
void SetImage(Pix *pix);
/**
* Set the resolution of the source image in pixels per inch so font size
* information can be calculated in results. Call this after SetImage().
*/
void SetSourceResolution(int ppi);
/**
* Restrict recognition to a sub-rectangle of the image. Call after SetImage.
* Each SetRectangle clears the recogntion results so multiple rectangles
* can be recognized with the same image.
*/
void SetRectangle(int left, int top, int width, int height);
/**
* Get a copy of the internal thresholded image from Tesseract.
* Caller takes ownership of the Pix and must pixDestroy it.
* May be called any time after SetImage, or after TesseractRect.
*/
Pix *GetThresholdedImage();
/**
* Get the result of page layout analysis as a leptonica-style
* Boxa, Pixa pair, in reading order.
* Can be called before or after Recognize.
*/
Boxa *GetRegions(Pixa **pixa);
/**
* Get the textlines as a leptonica-style
* Boxa, Pixa pair, in reading order.
* Can be called before or after Recognize.
* If raw_image is true, then extract from the original image instead of the
* thresholded image and pad by raw_padding pixels.
* If blockids is not nullptr, the block-id of each line is also returned as
* an array of one element per line. delete [] after use. If paraids is not
* nullptr, the paragraph-id of each line within its block is also returned as
* an array of one element per line. delete [] after use.
*/
Boxa *GetTextlines(bool raw_image, int raw_padding, Pixa **pixa,
int **blockids, int **paraids);
/*
Helper method to extract from the thresholded image. (most common usage)
*/
Boxa *GetTextlines(Pixa **pixa, int **blockids) {
return GetTextlines(false, 0, pixa, blockids, nullptr);
}
/**
* Get textlines and strips of image regions as a leptonica-style Boxa, Pixa
* pair, in reading order. Enables downstream handling of non-rectangular
* regions.
* Can be called before or after Recognize.
* If blockids is not nullptr, the block-id of each line is also returned as
* an array of one element per line. delete [] after use.
*/
Boxa *GetStrips(Pixa **pixa, int **blockids);
/**
* Get the words as a leptonica-style
* Boxa, Pixa pair, in reading order.
* Can be called before or after Recognize.
*/
Boxa *GetWords(Pixa **pixa);
/**
* Gets the individual connected (text) components (created
* after pages segmentation step, but before recognition)
* as a leptonica-style Boxa, Pixa pair, in reading order.
* Can be called before or after Recognize.
* Note: the caller is responsible for calling boxaDestroy()
* on the returned Boxa array and pixaDestroy() on cc array.
*/
Boxa *GetConnectedComponents(Pixa **cc);
/**
* Get the given level kind of components (block, textline, word etc.) as a
* leptonica-style Boxa, Pixa pair, in reading order.
* Can be called before or after Recognize.
* If blockids is not nullptr, the block-id of each component is also returned
* as an array of one element per component. delete [] after use.
* If blockids is not nullptr, the paragraph-id of each component with its
* block is also returned as an array of one element per component. delete []
* after use. If raw_image is true, then portions of the original image are
* extracted instead of the thresholded image and padded with raw_padding. If
* text_only is true, then only text components are returned.
*/
Boxa *GetComponentImages(PageIteratorLevel level, bool text_only,
bool raw_image, int raw_padding, Pixa **pixa,
int **blockids, int **paraids);
// Helper function to get binary images with no padding (most common usage).
Boxa *GetComponentImages(const PageIteratorLevel level, const bool text_only,
Pixa **pixa, int **blockids) {
return GetComponentImages(level, text_only, false, 0, pixa, blockids,
nullptr);
}
/**
* Returns the scale factor of the thresholded image that would be returned by
* GetThresholdedImage() and the various GetX() methods that call
* GetComponentImages().
* Returns 0 if no thresholder has been set.
*/
int GetThresholdedImageScaleFactor() const;
/**
* Runs page layout analysis in the mode set by SetPageSegMode.
* May optionally be called prior to Recognize to get access to just
* the page layout results. Returns an iterator to the results.
* If merge_similar_words is true, words are combined where suitable for use
* with a line recognizer. Use if you want to use AnalyseLayout to find the
* textlines, and then want to process textline fragments with an external
* line recognizer.
* Returns nullptr on error or an empty page.
* The returned iterator must be deleted after use.
* WARNING! This class points to data held within the TessBaseAPI class, and
* therefore can only be used while the TessBaseAPI class still exists and
* has not been subjected to a call of Init, SetImage, Recognize, Clear, End
* DetectOS, or anything else that changes the internal PAGE_RES.
*/
PageIterator *AnalyseLayout();
PageIterator *AnalyseLayout(bool merge_similar_words);
/**
* Recognize the image from SetAndThresholdImage, generating Tesseract
* internal structures. Returns 0 on success.
* Optional. The Get*Text functions below will call Recognize if needed.
* After Recognize, the output is kept internally until the next SetImage.
*/
int Recognize(ETEXT_DESC *monitor);
/**
* Methods to retrieve information after SetAndThresholdImage(),
* Recognize() or TesseractRect(). (Recognize is called implicitly if needed.)
*/
/**
* Turns images into symbolic text.
*
* filename can point to a single image, a multi-page TIFF,
* or a plain text list of image filenames.
*
* retry_config is useful for debugging. If not nullptr, you can fall
* back to an alternate configuration if a page fails for some
* reason.
*
* timeout_millisec terminates processing if any single page
* takes too long. Set to 0 for unlimited time.
*
* renderer is responible for creating the output. For example,
* use the TessTextRenderer if you want plaintext output, or
* the TessPDFRender to produce searchable PDF.
*
* If tessedit_page_number is non-negative, will only process that
* single page. Works for multi-page tiff file, or filelist.
*
* Returns true if successful, false on error.
*/
bool ProcessPages(const char *filename, const char *retry_config,
int timeout_millisec, TessResultRenderer *renderer);
// Does the real work of ProcessPages.
bool ProcessPagesInternal(const char *filename, const char *retry_config,
int timeout_millisec, TessResultRenderer *renderer);
/**
* Turn a single image into symbolic text.
*
* The pix is the image processed. filename and page_index are
* metadata used by side-effect processes, such as reading a box
* file or formatting as hOCR.
*
* See ProcessPages for descriptions of other parameters.
*/
bool ProcessPage(Pix *pix, int page_index, const char *filename,
const char *retry_config, int timeout_millisec,
TessResultRenderer *renderer);
/**
* Get a reading-order iterator to the results of LayoutAnalysis and/or
* Recognize. The returned iterator must be deleted after use.
* WARNING! This class points to data held within the TessBaseAPI class, and
* therefore can only be used while the TessBaseAPI class still exists and
* has not been subjected to a call of Init, SetImage, Recognize, Clear, End
* DetectOS, or anything else that changes the internal PAGE_RES.
*/
ResultIterator *GetIterator();
/**
* Get a mutable iterator to the results of LayoutAnalysis and/or Recognize.
* The returned iterator must be deleted after use.
* WARNING! This class points to data held within the TessBaseAPI class, and
* therefore can only be used while the TessBaseAPI class still exists and
* has not been subjected to a call of Init, SetImage, Recognize, Clear, End
* DetectOS, or anything else that changes the internal PAGE_RES.
*/
MutableIterator *GetMutableIterator();
/**
* The recognized text is returned as a char* which is coded
* as UTF8 and must be freed with the delete [] operator.
*/
char *GetUTF8Text();
/**
* Make a HTML-formatted string with hOCR markup from the internal
* data structures.
* page_number is 0-based but will appear in the output as 1-based.
* monitor can be used to
* cancel the recognition
* receive progress callbacks
* Returned string must be freed with the delete [] operator.
*/
char *GetHOCRText(ETEXT_DESC *monitor, int page_number);
/**
* Make a HTML-formatted string with hOCR markup from the internal
* data structures.
* page_number is 0-based but will appear in the output as 1-based.
* Returned string must be freed with the delete [] operator.
*/
char *GetHOCRText(int page_number);
/**
* Make an XML-formatted string with Alto markup from the internal
* data structures.
*/
char *GetAltoText(ETEXT_DESC *monitor, int page_number);
/**
* Make an XML-formatted string with Alto markup from the internal
* data structures.
*/
char *GetAltoText(int page_number);
/**
* Make a TSV-formatted string from the internal data structures.
* page_number is 0-based but will appear in the output as 1-based.
* Returned string must be freed with the delete [] operator.
*/
char *GetTSVText(int page_number);
/**
* Make a box file for LSTM training from the internal data structures.
* Constructs coordinates in the original image - not just the rectangle.
* page_number is a 0-based page index that will appear in the box file.
* Returned string must be freed with the delete [] operator.
*/
char *GetLSTMBoxText(int page_number);
/**
* The recognized text is returned as a char* which is coded in the same
* format as a box file used in training.
* Constructs coordinates in the original image - not just the rectangle.
* page_number is a 0-based page index that will appear in the box file.
* Returned string must be freed with the delete [] operator.
*/
char *GetBoxText(int page_number);
/**
* The recognized text is returned as a char* which is coded in the same
* format as a WordStr box file used in training.
* page_number is a 0-based page index that will appear in the box file.
* Returned string must be freed with the delete [] operator.
*/
char *GetWordStrBoxText(int page_number);
/**
* The recognized text is returned as a char* which is coded
* as UNLV format Latin-1 with specific reject and suspect codes.
* Returned string must be freed with the delete [] operator.
*/
char *GetUNLVText();
/**
* Detect the orientation of the input image and apparent script (alphabet).
* orient_deg is the detected clockwise rotation of the input image in degrees
* (0, 90, 180, 270)
* orient_conf is the confidence (15.0 is reasonably confident)
* script_name is an ASCII string, the name of the script, e.g. "Latin"
* script_conf is confidence level in the script
* Returns true on success and writes values to each parameter as an output
*/
bool DetectOrientationScript(int *orient_deg, float *orient_conf,
const char **script_name, float *script_conf);
/**
* The recognized text is returned as a char* which is coded
* as UTF8 and must be freed with the delete [] operator.
* page_number is a 0-based page index that will appear in the osd file.
*/
char *GetOsdText(int page_number);
/** Returns the (average) confidence value between 0 and 100. */
int MeanTextConf();
/**
* Returns all word confidences (between 0 and 100) in an array, terminated
* by -1. The calling function must delete [] after use.
* The number of confidences should correspond to the number of space-
* delimited words in GetUTF8Text.
*/
int *AllWordConfidences();
#ifndef DISABLED_LEGACY_ENGINE
/**
* Applies the given word to the adaptive classifier if possible.
* The word must be SPACE-DELIMITED UTF-8 - l i k e t h i s , so it can
* tell the boundaries of the graphemes.
* Assumes that SetImage/SetRectangle have been used to set the image
* to the given word. The mode arg should be PSM_SINGLE_WORD or
* PSM_CIRCLE_WORD, as that will be used to control layout analysis.
* The currently set PageSegMode is preserved.
* Returns false if adaption was not possible for some reason.
*/
bool AdaptToWordStr(PageSegMode mode, const char *wordstr);
#endif // ndef DISABLED_LEGACY_ENGINE
/**
* Free up recognition results and any stored image data, without actually
* freeing any recognition data that would be time-consuming to reload.
* Afterwards, you must call SetImage or TesseractRect before doing
* any Recognize or Get* operation.
*/
void Clear();
/**
* Close down tesseract and free up all memory. End() is equivalent to
* destructing and reconstructing your TessBaseAPI.
* Once End() has been used, none of the other API functions may be used
* other than Init and anything declared above it in the class definition.
*/
void End();
/**
* Clear any library-level memory caches.
* There are a variety of expensive-to-load constant data structures (mostly
* language dictionaries) that are cached globally -- surviving the Init()
* and End() of individual TessBaseAPI's. This function allows the clearing
* of these caches.
**/
static void ClearPersistentCache();
/**
* Check whether a word is valid according to Tesseract's language model
* @return 0 if the word is invalid, non-zero if valid.
* @warning temporary! This function will be removed from here and placed
* in a separate API at some future time.
*/
int IsValidWord(const char *word) const;
// Returns true if utf8_character is defined in the UniCharset.
bool IsValidCharacter(const char *utf8_character) const;
bool GetTextDirection(int *out_offset, float *out_slope);
/** Sets Dict::letter_is_okay_ function to point to the given function. */
void SetDictFunc(DictFunc f);
/** Sets Dict::probability_in_context_ function to point to the given
* function.
*/
void SetProbabilityInContextFunc(ProbabilityInContextFunc f);
/**
* Estimates the Orientation And Script of the image.
* @return true if the image was processed successfully.
*/
bool DetectOS(OSResults *);
/**
* Return text orientation of each block as determined by an earlier run
* of layout analysis.
*/
void GetBlockTextOrientations(int **block_orientation,
bool **vertical_writing);
/** This method returns the string form of the specified unichar. */
const char *GetUnichar(int unichar_id) const;
/** Return the pointer to the i-th dawg loaded into tesseract_ object. */
const Dawg *GetDawg(int i) const;
/** Return the number of dawgs loaded into tesseract_ object. */
int NumDawgs() const;
Tesseract *tesseract() const {
return tesseract_;
}
OcrEngineMode oem() const {
return last_oem_requested_;
}
void set_min_orientation_margin(double margin);
/* @} */
protected:
/** Common code for setting the image. Returns true if Init has been called.
*/
bool InternalSetImage();
/**
* Run the thresholder to make the thresholded image. If pix is not nullptr,
* the source is thresholded to pix instead of the internal IMAGE.
*/
virtual bool Threshold(Pix **pix);
/**
* Find lines from the image making the BLOCK_LIST.
* @return 0 on success.
*/
int FindLines();
/** Delete the pageres and block list ready for a new page. */
void ClearResults();
/**
* Return an LTR Result Iterator -- used only for training, as we really want
* to ignore all BiDi smarts at that point.
* delete once you're done with it.
*/
LTRResultIterator *GetLTRIterator();
/**
* Return the length of the output text string, as UTF8, assuming
* one newline per line and one per block, with a terminator,
* and assuming a single character reject marker for each rejected character.
* Also return the number of recognized blobs in blob_count.
*/
int TextLength(int *blob_count) const;
//// paragraphs.cpp ////////////////////////////////////////////////////
void DetectParagraphs(bool after_text_recognition);
const PAGE_RES *GetPageRes() const {
return page_res_;
}
protected:
Tesseract *tesseract_; ///< The underlying data object.
Tesseract *osd_tesseract_; ///< For orientation & script detection.
EquationDetect *equ_detect_; ///< The equation detector.
FileReader reader_; ///< Reads files from any filesystem.
ImageThresholder *thresholder_; ///< Image thresholding module.
std::vector<ParagraphModel *> *paragraph_models_;
BLOCK_LIST *block_list_; ///< The page layout.
PAGE_RES *page_res_; ///< The page-level data.
std::string input_file_; ///< Name used by training code.
std::string output_file_; ///< Name used by debug code.
std::string datapath_; ///< Current location of tessdata.
std::string language_; ///< Last initialized language.
OcrEngineMode last_oem_requested_; ///< Last ocr language mode requested.
bool recognition_done_; ///< page_res_ contains recognition data.
/**
* @defgroup ThresholderParams Thresholder Parameters
* Parameters saved from the Thresholder. Needed to rebuild coordinates.
*/
/* @{ */
int rect_left_;
int rect_top_;
int rect_width_;
int rect_height_;
int image_width_;
int image_height_;
/* @} */
private:
// A list of image filenames gets special consideration
bool ProcessPagesFileList(FILE *fp, std::string *buf,
const char *retry_config, int timeout_millisec,
TessResultRenderer *renderer,
int tessedit_page_number);
// TIFF supports multipage so gets special consideration.
bool ProcessPagesMultipageTiff(const unsigned char *data, size_t size,
const char *filename, const char *retry_config,
int timeout_millisec,
TessResultRenderer *renderer,
int tessedit_page_number);
}; // class TessBaseAPI.
/** Escape a char string - remove &<>"' with HTML codes. */
std::string HOcrEscape(const char *text);
} // namespace tesseract
#endif // TESSERACT_API_BASEAPI_H_

View File

@ -1,484 +0,0 @@
// SPDX-License-Identifier: Apache-2.0
// File: capi.h
// Description: C-API TessBaseAPI
//
// (C) Copyright 2012, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef API_CAPI_H_
#define API_CAPI_H_
#include "export.h"
#ifdef __cplusplus
# include <tesseract/baseapi.h>
# include <tesseract/ocrclass.h>
# include <tesseract/pageiterator.h>
# include <tesseract/renderer.h>
# include <tesseract/resultiterator.h>
#endif
#include <stdbool.h>
#include <stdio.h>
#ifdef __cplusplus
extern "C" {
#endif
#ifndef BOOL
# define BOOL int
# define TRUE 1
# define FALSE 0
#endif
#ifdef __cplusplus
typedef tesseract::TessResultRenderer TessResultRenderer;
typedef tesseract::TessBaseAPI TessBaseAPI;
typedef tesseract::PageIterator TessPageIterator;
typedef tesseract::ResultIterator TessResultIterator;
typedef tesseract::MutableIterator TessMutableIterator;
typedef tesseract::ChoiceIterator TessChoiceIterator;
typedef tesseract::OcrEngineMode TessOcrEngineMode;
typedef tesseract::PageSegMode TessPageSegMode;
typedef tesseract::PageIteratorLevel TessPageIteratorLevel;
typedef tesseract::Orientation TessOrientation;
typedef tesseract::ParagraphJustification TessParagraphJustification;
typedef tesseract::WritingDirection TessWritingDirection;
typedef tesseract::TextlineOrder TessTextlineOrder;
typedef tesseract::PolyBlockType TessPolyBlockType;
typedef tesseract::ETEXT_DESC ETEXT_DESC;
#else
typedef struct TessResultRenderer TessResultRenderer;
typedef struct TessBaseAPI TessBaseAPI;
typedef struct TessPageIterator TessPageIterator;
typedef struct TessResultIterator TessResultIterator;
typedef struct TessMutableIterator TessMutableIterator;
typedef struct TessChoiceIterator TessChoiceIterator;
typedef enum TessOcrEngineMode {
OEM_TESSERACT_ONLY,
OEM_LSTM_ONLY,
OEM_TESSERACT_LSTM_COMBINED,
OEM_DEFAULT
} TessOcrEngineMode;
typedef enum TessPageSegMode {
PSM_OSD_ONLY,
PSM_AUTO_OSD,
PSM_AUTO_ONLY,
PSM_AUTO,
PSM_SINGLE_COLUMN,
PSM_SINGLE_BLOCK_VERT_TEXT,
PSM_SINGLE_BLOCK,
PSM_SINGLE_LINE,
PSM_SINGLE_WORD,
PSM_CIRCLE_WORD,
PSM_SINGLE_CHAR,
PSM_SPARSE_TEXT,
PSM_SPARSE_TEXT_OSD,
PSM_RAW_LINE,
PSM_COUNT
} TessPageSegMode;
typedef enum TessPageIteratorLevel {
RIL_BLOCK,
RIL_PARA,
RIL_TEXTLINE,
RIL_WORD,
RIL_SYMBOL
} TessPageIteratorLevel;
typedef enum TessPolyBlockType {
PT_UNKNOWN,
PT_FLOWING_TEXT,
PT_HEADING_TEXT,
PT_PULLOUT_TEXT,
PT_EQUATION,
PT_INLINE_EQUATION,
PT_TABLE,
PT_VERTICAL_TEXT,
PT_CAPTION_TEXT,
PT_FLOWING_IMAGE,
PT_HEADING_IMAGE,
PT_PULLOUT_IMAGE,
PT_HORZ_LINE,
PT_VERT_LINE,
PT_NOISE,
PT_COUNT
} TessPolyBlockType;
typedef enum TessOrientation {
ORIENTATION_PAGE_UP,
ORIENTATION_PAGE_RIGHT,
ORIENTATION_PAGE_DOWN,
ORIENTATION_PAGE_LEFT
} TessOrientation;
typedef enum TessParagraphJustification {
JUSTIFICATION_UNKNOWN,
JUSTIFICATION_LEFT,
JUSTIFICATION_CENTER,
JUSTIFICATION_RIGHT
} TessParagraphJustification;
typedef enum TessWritingDirection {
WRITING_DIRECTION_LEFT_TO_RIGHT,
WRITING_DIRECTION_RIGHT_TO_LEFT,
WRITING_DIRECTION_TOP_TO_BOTTOM
} TessWritingDirection;
typedef enum TessTextlineOrder {
TEXTLINE_ORDER_LEFT_TO_RIGHT,
TEXTLINE_ORDER_RIGHT_TO_LEFT,
TEXTLINE_ORDER_TOP_TO_BOTTOM
} TessTextlineOrder;
typedef struct ETEXT_DESC ETEXT_DESC;
#endif
typedef bool (*TessCancelFunc)(void *cancel_this, int words);
typedef bool (*TessProgressFunc)(ETEXT_DESC *ths, int left, int right, int top,
int bottom);
struct Pix;
struct Boxa;
struct Pixa;
/* General free functions */
TESS_API const char *TessVersion();
TESS_API void TessDeleteText(const char *text);
TESS_API void TessDeleteTextArray(char **arr);
TESS_API void TessDeleteIntArray(const int *arr);
/* Renderer API */
TESS_API TessResultRenderer *TessTextRendererCreate(const char *outputbase);
TESS_API TessResultRenderer *TessHOcrRendererCreate(const char *outputbase);
TESS_API TessResultRenderer *TessHOcrRendererCreate2(const char *outputbase,
BOOL font_info);
TESS_API TessResultRenderer *TessAltoRendererCreate(const char *outputbase);
TESS_API TessResultRenderer *TessTsvRendererCreate(const char *outputbase);
TESS_API TessResultRenderer *TessPDFRendererCreate(const char *outputbase,
const char *datadir,
BOOL textonly);
TESS_API TessResultRenderer *TessUnlvRendererCreate(const char *outputbase);
TESS_API TessResultRenderer *TessBoxTextRendererCreate(const char *outputbase);
TESS_API TessResultRenderer *TessLSTMBoxRendererCreate(const char *outputbase);
TESS_API TessResultRenderer *TessWordStrBoxRendererCreate(
const char *outputbase);
TESS_API void TessDeleteResultRenderer(TessResultRenderer *renderer);
TESS_API void TessResultRendererInsert(TessResultRenderer *renderer,
TessResultRenderer *next);
TESS_API TessResultRenderer *TessResultRendererNext(
TessResultRenderer *renderer);
TESS_API BOOL TessResultRendererBeginDocument(TessResultRenderer *renderer,
const char *title);
TESS_API BOOL TessResultRendererAddImage(TessResultRenderer *renderer,
TessBaseAPI *api);
TESS_API BOOL TessResultRendererEndDocument(TessResultRenderer *renderer);
TESS_API const char *TessResultRendererExtention(TessResultRenderer *renderer);
TESS_API const char *TessResultRendererTitle(TessResultRenderer *renderer);
TESS_API int TessResultRendererImageNum(TessResultRenderer *renderer);
/* Base API */
TESS_API TessBaseAPI *TessBaseAPICreate();
TESS_API void TessBaseAPIDelete(TessBaseAPI *handle);
TESS_API size_t TessBaseAPIGetOpenCLDevice(TessBaseAPI *handle, void **device);
TESS_API void TessBaseAPISetInputName(TessBaseAPI *handle, const char *name);
TESS_API const char *TessBaseAPIGetInputName(TessBaseAPI *handle);
TESS_API void TessBaseAPISetInputImage(TessBaseAPI *handle, struct Pix *pix);
TESS_API struct Pix *TessBaseAPIGetInputImage(TessBaseAPI *handle);
TESS_API int TessBaseAPIGetSourceYResolution(TessBaseAPI *handle);
TESS_API const char *TessBaseAPIGetDatapath(TessBaseAPI *handle);
TESS_API void TessBaseAPISetOutputName(TessBaseAPI *handle, const char *name);
TESS_API BOOL TessBaseAPISetVariable(TessBaseAPI *handle, const char *name,
const char *value);
TESS_API BOOL TessBaseAPISetDebugVariable(TessBaseAPI *handle, const char *name,
const char *value);
TESS_API BOOL TessBaseAPIGetIntVariable(const TessBaseAPI *handle,
const char *name, int *value);
TESS_API BOOL TessBaseAPIGetBoolVariable(const TessBaseAPI *handle,
const char *name, BOOL *value);
TESS_API BOOL TessBaseAPIGetDoubleVariable(const TessBaseAPI *handle,
const char *name, double *value);
TESS_API const char *TessBaseAPIGetStringVariable(const TessBaseAPI *handle,
const char *name);
TESS_API void TessBaseAPIPrintVariables(const TessBaseAPI *handle, FILE *fp);
TESS_API BOOL TessBaseAPIPrintVariablesToFile(const TessBaseAPI *handle,
const char *filename);
TESS_API int TessBaseAPIInit1(TessBaseAPI *handle, const char *datapath,
const char *language, TessOcrEngineMode oem,
char **configs, int configs_size);
TESS_API int TessBaseAPIInit2(TessBaseAPI *handle, const char *datapath,
const char *language, TessOcrEngineMode oem);
TESS_API int TessBaseAPIInit3(TessBaseAPI *handle, const char *datapath,
const char *language);
TESS_API int TessBaseAPIInit4(TessBaseAPI *handle, const char *datapath,
const char *language, TessOcrEngineMode mode,
char **configs, int configs_size, char **vars_vec,
char **vars_values, size_t vars_vec_size,
BOOL set_only_non_debug_params);
TESS_API int TessBaseAPIInit5(TessBaseAPI *handle, const char *data, int data_size,
const char *language, TessOcrEngineMode mode,
char **configs, int configs_size, char **vars_vec,
char **vars_values, size_t vars_vec_size,
BOOL set_only_non_debug_params);
TESS_API const char *TessBaseAPIGetInitLanguagesAsString(
const TessBaseAPI *handle);
TESS_API char **TessBaseAPIGetLoadedLanguagesAsVector(
const TessBaseAPI *handle);
TESS_API char **TessBaseAPIGetAvailableLanguagesAsVector(
const TessBaseAPI *handle);
TESS_API void TessBaseAPIInitForAnalysePage(TessBaseAPI *handle);
TESS_API void TessBaseAPIReadConfigFile(TessBaseAPI *handle,
const char *filename);
TESS_API void TessBaseAPIReadDebugConfigFile(TessBaseAPI *handle,
const char *filename);
TESS_API void TessBaseAPISetPageSegMode(TessBaseAPI *handle,
TessPageSegMode mode);
TESS_API TessPageSegMode TessBaseAPIGetPageSegMode(const TessBaseAPI *handle);
TESS_API char *TessBaseAPIRect(TessBaseAPI *handle,
const unsigned char *imagedata,
int bytes_per_pixel, int bytes_per_line,
int left, int top, int width, int height);
TESS_API void TessBaseAPIClearAdaptiveClassifier(TessBaseAPI *handle);
TESS_API void TessBaseAPISetImage(TessBaseAPI *handle,
const unsigned char *imagedata, int width,
int height, int bytes_per_pixel,
int bytes_per_line);
TESS_API void TessBaseAPISetImage2(TessBaseAPI *handle, struct Pix *pix);
TESS_API void TessBaseAPISetSourceResolution(TessBaseAPI *handle, int ppi);
TESS_API void TessBaseAPISetRectangle(TessBaseAPI *handle, int left, int top,
int width, int height);
TESS_API struct Pix *TessBaseAPIGetThresholdedImage(TessBaseAPI *handle);
TESS_API struct Boxa *TessBaseAPIGetRegions(TessBaseAPI *handle,
struct Pixa **pixa);
TESS_API struct Boxa *TessBaseAPIGetTextlines(TessBaseAPI *handle,
struct Pixa **pixa,
int **blockids);
TESS_API struct Boxa *TessBaseAPIGetTextlines1(TessBaseAPI *handle,
BOOL raw_image, int raw_padding,
struct Pixa **pixa,
int **blockids, int **paraids);
TESS_API struct Boxa *TessBaseAPIGetStrips(TessBaseAPI *handle,
struct Pixa **pixa, int **blockids);
TESS_API struct Boxa *TessBaseAPIGetWords(TessBaseAPI *handle,
struct Pixa **pixa);
TESS_API struct Boxa *TessBaseAPIGetConnectedComponents(TessBaseAPI *handle,
struct Pixa **cc);
TESS_API struct Boxa *TessBaseAPIGetComponentImages(TessBaseAPI *handle,
TessPageIteratorLevel level,
BOOL text_only,
struct Pixa **pixa,
int **blockids);
TESS_API struct Boxa *TessBaseAPIGetComponentImages1(
TessBaseAPI *handle, TessPageIteratorLevel level, BOOL text_only,
BOOL raw_image, int raw_padding, struct Pixa **pixa, int **blockids,
int **paraids);
TESS_API int TessBaseAPIGetThresholdedImageScaleFactor(
const TessBaseAPI *handle);
TESS_API TessPageIterator *TessBaseAPIAnalyseLayout(TessBaseAPI *handle);
TESS_API int TessBaseAPIRecognize(TessBaseAPI *handle, ETEXT_DESC *monitor);
TESS_API BOOL TessBaseAPIProcessPages(TessBaseAPI *handle, const char *filename,
const char *retry_config,
int timeout_millisec,
TessResultRenderer *renderer);
TESS_API BOOL TessBaseAPIProcessPage(TessBaseAPI *handle, struct Pix *pix,
int page_index, const char *filename,
const char *retry_config,
int timeout_millisec,
TessResultRenderer *renderer);
TESS_API TessResultIterator *TessBaseAPIGetIterator(TessBaseAPI *handle);
TESS_API TessMutableIterator *TessBaseAPIGetMutableIterator(
TessBaseAPI *handle);
TESS_API char *TessBaseAPIGetUTF8Text(TessBaseAPI *handle);
TESS_API char *TessBaseAPIGetHOCRText(TessBaseAPI *handle, int page_number);
TESS_API char *TessBaseAPIGetAltoText(TessBaseAPI *handle, int page_number);
TESS_API char *TessBaseAPIGetTsvText(TessBaseAPI *handle, int page_number);
TESS_API char *TessBaseAPIGetBoxText(TessBaseAPI *handle, int page_number);
TESS_API char *TessBaseAPIGetLSTMBoxText(TessBaseAPI *handle, int page_number);
TESS_API char *TessBaseAPIGetWordStrBoxText(TessBaseAPI *handle,
int page_number);
TESS_API char *TessBaseAPIGetUNLVText(TessBaseAPI *handle);
TESS_API int TessBaseAPIMeanTextConf(TessBaseAPI *handle);
TESS_API int *TessBaseAPIAllWordConfidences(TessBaseAPI *handle);
#ifndef DISABLED_LEGACY_ENGINE
TESS_API BOOL TessBaseAPIAdaptToWordStr(TessBaseAPI *handle,
TessPageSegMode mode,
const char *wordstr);
#endif // #ifndef DISABLED_LEGACY_ENGINE
TESS_API void TessBaseAPIClear(TessBaseAPI *handle);
TESS_API void TessBaseAPIEnd(TessBaseAPI *handle);
TESS_API int TessBaseAPIIsValidWord(TessBaseAPI *handle, const char *word);
TESS_API BOOL TessBaseAPIGetTextDirection(TessBaseAPI *handle, int *out_offset,
float *out_slope);
TESS_API const char *TessBaseAPIGetUnichar(TessBaseAPI *handle, int unichar_id);
TESS_API void TessBaseAPIClearPersistentCache(TessBaseAPI *handle);
#ifndef DISABLED_LEGACY_ENGINE
// Call TessDeleteText(*best_script_name) to free memory allocated by this
// function
TESS_API BOOL TessBaseAPIDetectOrientationScript(TessBaseAPI *handle,
int *orient_deg,
float *orient_conf,
const char **script_name,
float *script_conf);
#endif // #ifndef DISABLED_LEGACY_ENGINE
TESS_API void TessBaseAPISetMinOrientationMargin(TessBaseAPI *handle,
double margin);
TESS_API int TessBaseAPINumDawgs(const TessBaseAPI *handle);
TESS_API TessOcrEngineMode TessBaseAPIOem(const TessBaseAPI *handle);
TESS_API void TessBaseGetBlockTextOrientations(TessBaseAPI *handle,
int **block_orientation,
bool **vertical_writing);
/* Page iterator */
TESS_API void TessPageIteratorDelete(TessPageIterator *handle);
TESS_API TessPageIterator *TessPageIteratorCopy(const TessPageIterator *handle);
TESS_API void TessPageIteratorBegin(TessPageIterator *handle);
TESS_API BOOL TessPageIteratorNext(TessPageIterator *handle,
TessPageIteratorLevel level);
TESS_API BOOL TessPageIteratorIsAtBeginningOf(const TessPageIterator *handle,
TessPageIteratorLevel level);
TESS_API BOOL TessPageIteratorIsAtFinalElement(const TessPageIterator *handle,
TessPageIteratorLevel level,
TessPageIteratorLevel element);
TESS_API BOOL TessPageIteratorBoundingBox(const TessPageIterator *handle,
TessPageIteratorLevel level,
int *left, int *top, int *right,
int *bottom);
TESS_API TessPolyBlockType
TessPageIteratorBlockType(const TessPageIterator *handle);
TESS_API struct Pix *TessPageIteratorGetBinaryImage(
const TessPageIterator *handle, TessPageIteratorLevel level);
TESS_API struct Pix *TessPageIteratorGetImage(const TessPageIterator *handle,
TessPageIteratorLevel level,
int padding,
struct Pix *original_image,
int *left, int *top);
TESS_API BOOL TessPageIteratorBaseline(const TessPageIterator *handle,
TessPageIteratorLevel level, int *x1,
int *y1, int *x2, int *y2);
TESS_API void TessPageIteratorOrientation(
TessPageIterator *handle, TessOrientation *orientation,
TessWritingDirection *writing_direction, TessTextlineOrder *textline_order,
float *deskew_angle);
TESS_API void TessPageIteratorParagraphInfo(
TessPageIterator *handle, TessParagraphJustification *justification,
BOOL *is_list_item, BOOL *is_crown, int *first_line_indent);
/* Result iterator */
TESS_API void TessResultIteratorDelete(TessResultIterator *handle);
TESS_API TessResultIterator *TessResultIteratorCopy(
const TessResultIterator *handle);
TESS_API TessPageIterator *TessResultIteratorGetPageIterator(
TessResultIterator *handle);
TESS_API const TessPageIterator *TessResultIteratorGetPageIteratorConst(
const TessResultIterator *handle);
TESS_API TessChoiceIterator *TessResultIteratorGetChoiceIterator(
const TessResultIterator *handle);
TESS_API BOOL TessResultIteratorNext(TessResultIterator *handle,
TessPageIteratorLevel level);
TESS_API char *TessResultIteratorGetUTF8Text(const TessResultIterator *handle,
TessPageIteratorLevel level);
TESS_API float TessResultIteratorConfidence(const TessResultIterator *handle,
TessPageIteratorLevel level);
TESS_API const char *TessResultIteratorWordRecognitionLanguage(
const TessResultIterator *handle);
TESS_API const char *TessResultIteratorWordFontAttributes(
const TessResultIterator *handle, BOOL *is_bold, BOOL *is_italic,
BOOL *is_underlined, BOOL *is_monospace, BOOL *is_serif, BOOL *is_smallcaps,
int *pointsize, int *font_id);
TESS_API BOOL
TessResultIteratorWordIsFromDictionary(const TessResultIterator *handle);
TESS_API BOOL TessResultIteratorWordIsNumeric(const TessResultIterator *handle);
TESS_API BOOL
TessResultIteratorSymbolIsSuperscript(const TessResultIterator *handle);
TESS_API BOOL
TessResultIteratorSymbolIsSubscript(const TessResultIterator *handle);
TESS_API BOOL
TessResultIteratorSymbolIsDropcap(const TessResultIterator *handle);
TESS_API void TessChoiceIteratorDelete(TessChoiceIterator *handle);
TESS_API BOOL TessChoiceIteratorNext(TessChoiceIterator *handle);
TESS_API const char *TessChoiceIteratorGetUTF8Text(
const TessChoiceIterator *handle);
TESS_API float TessChoiceIteratorConfidence(const TessChoiceIterator *handle);
/* Progress monitor */
TESS_API ETEXT_DESC *TessMonitorCreate();
TESS_API void TessMonitorDelete(ETEXT_DESC *monitor);
TESS_API void TessMonitorSetCancelFunc(ETEXT_DESC *monitor,
TessCancelFunc cancelFunc);
TESS_API void TessMonitorSetCancelThis(ETEXT_DESC *monitor, void *cancelThis);
TESS_API void *TessMonitorGetCancelThis(ETEXT_DESC *monitor);
TESS_API void TessMonitorSetProgressFunc(ETEXT_DESC *monitor,
TessProgressFunc progressFunc);
TESS_API int TessMonitorGetProgress(ETEXT_DESC *monitor);
TESS_API void TessMonitorSetDeadlineMSecs(ETEXT_DESC *monitor, int deadline);
#ifdef __cplusplus
}
#endif
#endif // API_CAPI_H_

View File

@ -1,37 +0,0 @@
// SPDX-License-Identifier: Apache-2.0
// File: export.h
// Description: Place holder
//
// (C) Copyright 2006, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef TESSERACT_PLATFORM_H_
#define TESSERACT_PLATFORM_H_
#ifndef TESS_API
# if defined(_WIN32) || defined(__CYGWIN__)
# if defined(TESS_EXPORTS)
# define TESS_API __declspec(dllexport)
# elif defined(TESS_IMPORTS)
# define TESS_API __declspec(dllimport)
# else
# define TESS_API
# endif
# else
# if defined(TESS_EXPORTS) || defined(TESS_IMPORTS)
# define TESS_API __attribute__((visibility("default")))
# else
# define TESS_API
# endif
# endif
#endif
#endif // TESSERACT_PLATFORM_H_

View File

@ -1,235 +0,0 @@
// SPDX-License-Identifier: Apache-2.0
// File: ltrresultiterator.h
// Description: Iterator for tesseract results in strict left-to-right
// order that avoids using tesseract internal data structures.
// Author: Ray Smith
//
// (C) Copyright 2010, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef TESSERACT_CCMAIN_LTR_RESULT_ITERATOR_H_
#define TESSERACT_CCMAIN_LTR_RESULT_ITERATOR_H_
#include "export.h" // for TESS_API
#include "pageiterator.h" // for PageIterator
#include "publictypes.h" // for PageIteratorLevel
#include "unichar.h" // for StrongScriptDirection
namespace tesseract {
class BLOB_CHOICE_IT;
class PAGE_RES;
class WERD_RES;
class Tesseract;
// Class to iterate over tesseract results, providing access to all levels
// of the page hierarchy, without including any tesseract headers or having
// to handle any tesseract structures.
// WARNING! This class points to data held within the TessBaseAPI class, and
// therefore can only be used while the TessBaseAPI class still exists and
// has not been subjected to a call of Init, SetImage, Recognize, Clear, End
// DetectOS, or anything else that changes the internal PAGE_RES.
// See tesseract/publictypes.h for the definition of PageIteratorLevel.
// See also base class PageIterator, which contains the bulk of the interface.
// LTRResultIterator adds text-specific methods for access to OCR output.
class TESS_API LTRResultIterator : public PageIterator {
friend class ChoiceIterator;
public:
// page_res and tesseract come directly from the BaseAPI.
// The rectangle parameters are copied indirectly from the Thresholder,
// via the BaseAPI. They represent the coordinates of some rectangle in an
// original image (in top-left-origin coordinates) and therefore the top-left
// needs to be added to any output boxes in order to specify coordinates
// in the original image. See TessBaseAPI::SetRectangle.
// The scale and scaled_yres are in case the Thresholder scaled the image
// rectangle prior to thresholding. Any coordinates in tesseract's image
// must be divided by scale before adding (rect_left, rect_top).
// The scaled_yres indicates the effective resolution of the binary image
// that tesseract has been given by the Thresholder.
// After the constructor, Begin has already been called.
LTRResultIterator(PAGE_RES *page_res, Tesseract *tesseract, int scale,
int scaled_yres, int rect_left, int rect_top,
int rect_width, int rect_height);
~LTRResultIterator() override;
// LTRResultIterators may be copied! This makes it possible to iterate over
// all the objects at a lower level, while maintaining an iterator to
// objects at a higher level. These constructors DO NOT CALL Begin, so
// iterations will continue from the location of src.
// TODO: For now the copy constructor and operator= only need the base class
// versions, but if new data members are added, don't forget to add them!
// ============= Moving around within the page ============.
// See PageIterator.
// ============= Accessing data ==============.
// Returns the null terminated UTF-8 encoded text string for the current
// object at the given level. Use delete [] to free after use.
char *GetUTF8Text(PageIteratorLevel level) const;
// Set the string inserted at the end of each text line. "\n" by default.
void SetLineSeparator(const char *new_line);
// Set the string inserted at the end of each paragraph. "\n" by default.
void SetParagraphSeparator(const char *new_para);
// Returns the mean confidence of the current object at the given level.
// The number should be interpreted as a percent probability. (0.0f-100.0f)
float Confidence(PageIteratorLevel level) const;
// ============= Functions that refer to words only ============.
// Returns the font attributes of the current word. If iterating at a higher
// level object than words, eg textlines, then this will return the
// attributes of the first word in that textline.
// The actual return value is a string representing a font name. It points
// to an internal table and SHOULD NOT BE DELETED. Lifespan is the same as
// the iterator itself, ie rendered invalid by various members of
// TessBaseAPI, including Init, SetImage, End or deleting the TessBaseAPI.
// Pointsize is returned in printers points (1/72 inch.)
const char *WordFontAttributes(bool *is_bold, bool *is_italic,
bool *is_underlined, bool *is_monospace,
bool *is_serif, bool *is_smallcaps,
int *pointsize, int *font_id) const;
// Return the name of the language used to recognize this word.
// On error, nullptr. Do not delete this pointer.
const char *WordRecognitionLanguage() const;
// Return the overall directionality of this word.
StrongScriptDirection WordDirection() const;
// Returns true if the current word was found in a dictionary.
bool WordIsFromDictionary() const;
// Returns the number of blanks before the current word.
int BlanksBeforeWord() const;
// Returns true if the current word is numeric.
bool WordIsNumeric() const;
// Returns true if the word contains blamer information.
bool HasBlamerInfo() const;
// Returns the pointer to ParamsTrainingBundle stored in the BlamerBundle
// of the current word.
const void *GetParamsTrainingBundle() const;
// Returns a pointer to the string with blamer information for this word.
// Assumes that the word's blamer_bundle is not nullptr.
const char *GetBlamerDebug() const;
// Returns a pointer to the string with misadaption information for this word.
// Assumes that the word's blamer_bundle is not nullptr.
const char *GetBlamerMisadaptionDebug() const;
// Returns true if a truth string was recorded for the current word.
bool HasTruthString() const;
// Returns true if the given string is equivalent to the truth string for
// the current word.
bool EquivalentToTruth(const char *str) const;
// Returns a null terminated UTF-8 encoded truth string for the current word.
// Use delete [] to free after use.
char *WordTruthUTF8Text() const;
// Returns a null terminated UTF-8 encoded normalized OCR string for the
// current word. Use delete [] to free after use.
char *WordNormedUTF8Text() const;
// Returns a pointer to serialized choice lattice.
// Fills lattice_size with the number of bytes in lattice data.
const char *WordLattice(int *lattice_size) const;
// ============= Functions that refer to symbols only ============.
// Returns true if the current symbol is a superscript.
// If iterating at a higher level object than symbols, eg words, then
// this will return the attributes of the first symbol in that word.
bool SymbolIsSuperscript() const;
// Returns true if the current symbol is a subscript.
// If iterating at a higher level object than symbols, eg words, then
// this will return the attributes of the first symbol in that word.
bool SymbolIsSubscript() const;
// Returns true if the current symbol is a dropcap.
// If iterating at a higher level object than symbols, eg words, then
// this will return the attributes of the first symbol in that word.
bool SymbolIsDropcap() const;
protected:
const char *line_separator_;
const char *paragraph_separator_;
};
// Class to iterate over the classifier choices for a single RIL_SYMBOL.
class TESS_API ChoiceIterator {
public:
// Construction is from a LTRResultIterator that points to the symbol of
// interest. The ChoiceIterator allows a one-shot iteration over the
// choices for this symbol and after that it is useless.
explicit ChoiceIterator(const LTRResultIterator &result_it);
~ChoiceIterator();
// Moves to the next choice for the symbol and returns false if there
// are none left.
bool Next();
// ============= Accessing data ==============.
// Returns the null terminated UTF-8 encoded text string for the current
// choice.
// NOTE: Unlike LTRResultIterator::GetUTF8Text, the return points to an
// internal structure and should NOT be delete[]ed to free after use.
const char *GetUTF8Text() const;
// Returns the confidence of the current choice depending on the used language
// data. If only LSTM traineddata is used the value range is 0.0f - 1.0f. All
// choices for one symbol should roughly add up to 1.0f.
// If only traineddata of the legacy engine is used, the number should be
// interpreted as a percent probability. (0.0f-100.0f) In this case
// probabilities won't add up to 100. Each one stands on its own.
float Confidence() const;
// Returns a vector containing all timesteps, which belong to the currently
// selected symbol. A timestep is a vector containing pairs of symbols and
// floating point numbers. The number states the probability for the
// corresponding symbol.
std::vector<std::vector<std::pair<const char *, float>>> *Timesteps() const;
private:
// clears the remaining spaces out of the results and adapt the probabilities
void filterSpaces();
// Pointer to the WERD_RES object owned by the API.
WERD_RES *word_res_;
// Iterator over the blob choices.
BLOB_CHOICE_IT *choice_it_;
std::vector<std::pair<const char *, float>> *LSTM_choices_ = nullptr;
std::vector<std::pair<const char *, float>>::iterator LSTM_choice_it_;
const int *tstep_index_;
// regulates the rating granularity
double rating_coefficient_;
// leading blanks
int blanks_before_word_;
// true when there is lstm engine related trained data
bool oemLSTM_;
};
} // namespace tesseract.
#endif // TESSERACT_CCMAIN_LTR_RESULT_ITERATOR_H_

View File

@ -1,158 +0,0 @@
// SPDX-License-Identifier: Apache-2.0
/**********************************************************************
* File: ocrclass.h
* Description: Class definitions and constants for the OCR API.
* Author: Hewlett-Packard Co
*
* (C) Copyright 1996, Hewlett-Packard Co.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
/**********************************************************************
* This file contains typedefs for all the structures used by
* the HP OCR interface.
* The structures are designed to allow them to be used with any
* structure alignment up to 8.
**********************************************************************/
#ifndef CCUTIL_OCRCLASS_H_
#define CCUTIL_OCRCLASS_H_
#include <chrono>
#include <ctime>
namespace tesseract {
/**********************************************************************
* EANYCODE_CHAR
* Description of a single character. The character code is defined by
* the character set of the current font.
* Output text is sent as an array of these structures.
* Spaces and line endings in the output are represented in the
* structures of the surrounding characters. They are not directly
* represented as characters.
* The first character in a word has a positive value of blanks.
* Missing information should be set to the defaults in the comments.
* If word bounds are known, but not character bounds, then the top and
* bottom of each character should be those of the word. The left of the
* first and right of the last char in each word should be set. All other
* lefts and rights should be set to -1.
* If set, the values of right and bottom are left+width and top+height.
* Most of the members come directly from the parameters to ocr_append_char.
* The formatting member uses the enhancement parameter and combines the
* line direction stuff into the top 3 bits.
* The coding is 0=RL char, 1=LR char, 2=DR NL, 3=UL NL, 4=DR Para,
* 5=UL Para, 6=TB char, 7=BT char. API users do not need to know what
* the coding is, only that it is backwards compatible with the previous
* version.
**********************************************************************/
struct EANYCODE_CHAR { /*single character */
// It should be noted that the format for char_code for version 2.0 and beyond
// is UTF8 which means that ASCII characters will come out as one structure
// but other characters will be returned in two or more instances of this
// structure with a single byte of the UTF8 code in each, but each will have
// the same bounding box. Programs which want to handle languagues with
// different characters sets will need to handle extended characters
// appropriately, but *all* code needs to be prepared to receive UTF8 coded
// characters for characters such as bullet and fancy quotes.
uint16_t char_code; /*character itself */
int16_t left; /*of char (-1) */
int16_t right; /*of char (-1) */
int16_t top; /*of char (-1) */
int16_t bottom; /*of char (-1) */
int16_t font_index; /*what font (0) */
uint8_t confidence; /*0=perfect, 100=reject (0/100) */
uint8_t point_size; /*of char, 72=i inch, (10) */
int8_t blanks; /*no of spaces before this char (1) */
uint8_t formatting; /*char formatting (0) */
};
/**********************************************************************
* ETEXT_DESC
* Description of the output of the OCR engine.
* This structure is used as both a progress monitor and the final
* output header, since it needs to be a valid progress monitor while
* the OCR engine is storing its output to shared memory.
* During progress, all the buffer info is -1.
* Progress starts at 0 and increases to 100 during OCR. No other constraint.
* Additionally the progress callback contains the bounding box of the word that
* is currently being processed.
* Every progress callback, the OCR engine must set ocr_alive to 1.
* The HP side will set ocr_alive to 0. Repeated failure to reset
* to 1 indicates that the OCR engine is dead.
* If the cancel function is not null then it is called with the number of
* user words found. If it returns true then operation is cancelled.
**********************************************************************/
class ETEXT_DESC;
using CANCEL_FUNC = bool (*)(void *, int);
using PROGRESS_FUNC = bool (*)(int, int, int, int, int);
using PROGRESS_FUNC2 = bool (*)(ETEXT_DESC *, int, int, int, int);
class ETEXT_DESC { // output header
public:
int16_t count{0}; /// chars in this buffer(0)
int16_t progress{0}; /// percent complete increasing (0-100)
/** Progress monitor covers word recognition and it does not cover layout
* analysis.
* See Ray comment in https://github.com/tesseract-ocr/tesseract/pull/27 */
int8_t more_to_come{0}; /// true if not last
volatile int8_t ocr_alive{0}; /// ocr sets to 1, HP 0
int8_t err_code{0}; /// for errcode use
CANCEL_FUNC cancel{nullptr}; /// returns true to cancel
PROGRESS_FUNC progress_callback{
nullptr}; /// called whenever progress increases
PROGRESS_FUNC2 progress_callback2; /// monitor-aware progress callback
void *cancel_this{nullptr}; /// this or other data for cancel
std::chrono::steady_clock::time_point end_time;
/// Time to stop. Expected to be set only
/// by call to set_deadline_msecs().
EANYCODE_CHAR text[1]{}; /// character data
ETEXT_DESC() : progress_callback2(&default_progress_func) {
end_time = std::chrono::time_point<std::chrono::steady_clock,
std::chrono::milliseconds>();
}
// Sets the end time to be deadline_msecs milliseconds from now.
void set_deadline_msecs(int32_t deadline_msecs) {
if (deadline_msecs > 0) {
end_time = std::chrono::steady_clock::now() +
std::chrono::milliseconds(deadline_msecs);
}
}
// Returns false if we've not passed the end_time, or have not set a deadline.
bool deadline_exceeded() const {
if (end_time.time_since_epoch() ==
std::chrono::steady_clock::duration::zero()) {
return false;
}
auto now = std::chrono::steady_clock::now();
return (now > end_time);
}
private:
static bool default_progress_func(ETEXT_DESC *ths, int left, int right,
int top, int bottom) {
if (ths->progress_callback != nullptr) {
return (*(ths->progress_callback))(ths->progress, left, right, top,
bottom);
}
return true;
}
};
} // namespace tesseract
#endif // CCUTIL_OCRCLASS_H_

View File

@ -1,139 +0,0 @@
// SPDX-License-Identifier: Apache-2.0
// File: osdetect.h
// Description: Orientation and script detection.
// Author: Samuel Charron
// Ranjith Unnikrishnan
//
// (C) Copyright 2008, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef TESSERACT_CCMAIN_OSDETECT_H_
#define TESSERACT_CCMAIN_OSDETECT_H_
#include "export.h" // for TESS_API
#include <vector> // for std::vector
namespace tesseract {
class BLOBNBOX;
class BLOBNBOX_CLIST;
class BLOB_CHOICE_LIST;
class TO_BLOCK_LIST;
class UNICHARSET;
class Tesseract;
// Max number of scripts in ICU + "NULL" + Japanese and Korean + Fraktur
const int kMaxNumberOfScripts = 116 + 1 + 2 + 1;
struct OSBestResult {
OSBestResult()
: orientation_id(0), script_id(0), sconfidence(0.0), oconfidence(0.0) {}
int orientation_id;
int script_id;
float sconfidence;
float oconfidence;
};
struct OSResults {
OSResults() : unicharset(nullptr) {
for (int i = 0; i < 4; ++i) {
for (int j = 0; j < kMaxNumberOfScripts; ++j) {
scripts_na[i][j] = 0;
}
orientations[i] = 0;
}
}
void update_best_orientation();
// Set the estimate of the orientation to the given id.
void set_best_orientation(int orientation_id);
// Update/Compute the best estimate of the script assuming the given
// orientation id.
void update_best_script(int orientation_id);
// Return the index of the script with the highest score for this orientation.
TESS_API int get_best_script(int orientation_id) const;
// Accumulate scores with given OSResults instance and update the best script.
void accumulate(const OSResults &osr);
// Print statistics.
void print_scores(void) const;
void print_scores(int orientation_id) const;
// Array holding scores for each orientation id [0,3].
// Orientation ids [0..3] map to [0, 270, 180, 90] degree orientations of the
// page respectively, where the values refer to the amount of clockwise
// rotation to be applied to the page for the text to be upright and readable.
float orientations[4];
// Script confidence scores for each of 4 possible orientations.
float scripts_na[4][kMaxNumberOfScripts];
UNICHARSET *unicharset;
OSBestResult best_result;
};
class OrientationDetector {
public:
OrientationDetector(const std::vector<int> *allowed_scripts,
OSResults *results);
bool detect_blob(BLOB_CHOICE_LIST *scores);
int get_orientation();
private:
OSResults *osr_;
const std::vector<int> *allowed_scripts_;
};
class ScriptDetector {
public:
ScriptDetector(const std::vector<int> *allowed_scripts, OSResults *osr,
tesseract::Tesseract *tess);
void detect_blob(BLOB_CHOICE_LIST *scores);
bool must_stop(int orientation) const;
private:
OSResults *osr_;
static const char *korean_script_;
static const char *japanese_script_;
static const char *fraktur_script_;
int korean_id_;
int japanese_id_;
int katakana_id_;
int hiragana_id_;
int han_id_;
int hangul_id_;
int latin_id_;
int fraktur_id_;
tesseract::Tesseract *tess_;
const std::vector<int> *allowed_scripts_;
};
int orientation_and_script_detection(const char *filename, OSResults *,
tesseract::Tesseract *);
int os_detect(TO_BLOCK_LIST *port_blocks, OSResults *osr,
tesseract::Tesseract *tess);
int os_detect_blobs(const std::vector<int> *allowed_scripts,
BLOBNBOX_CLIST *blob_list, OSResults *osr,
tesseract::Tesseract *tess);
bool os_detect_blob(BLOBNBOX *bbox, OrientationDetector *o, ScriptDetector *s,
OSResults *, tesseract::Tesseract *tess);
// Helper method to convert an orientation index to its value in degrees.
// The value represents the amount of clockwise rotation in degrees that must be
// applied for the text to be upright (readable).
TESS_API int OrientationIdToValue(const int &id);
} // namespace tesseract
#endif // TESSERACT_CCMAIN_OSDETECT_H_

View File

@ -1,364 +0,0 @@
// SPDX-License-Identifier: Apache-2.0
// File: pageiterator.h
// Description: Iterator for tesseract page structure that avoids using
// tesseract internal data structures.
// Author: Ray Smith
//
// (C) Copyright 2010, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef TESSERACT_CCMAIN_PAGEITERATOR_H_
#define TESSERACT_CCMAIN_PAGEITERATOR_H_
#include "export.h"
#include "publictypes.h"
struct Pix;
struct Pta;
namespace tesseract {
struct BlamerBundle;
class C_BLOB_IT;
class PAGE_RES;
class PAGE_RES_IT;
class WERD;
class Tesseract;
/**
* Class to iterate over tesseract page structure, providing access to all
* levels of the page hierarchy, without including any tesseract headers or
* having to handle any tesseract structures.
* WARNING! This class points to data held within the TessBaseAPI class, and
* therefore can only be used while the TessBaseAPI class still exists and
* has not been subjected to a call of Init, SetImage, Recognize, Clear, End
* DetectOS, or anything else that changes the internal PAGE_RES.
* See tesseract/publictypes.h for the definition of PageIteratorLevel.
* See also ResultIterator, derived from PageIterator, which adds in the
* ability to access OCR output with text-specific methods.
*/
class TESS_API PageIterator {
public:
/**
* page_res and tesseract come directly from the BaseAPI.
* The rectangle parameters are copied indirectly from the Thresholder,
* via the BaseAPI. They represent the coordinates of some rectangle in an
* original image (in top-left-origin coordinates) and therefore the top-left
* needs to be added to any output boxes in order to specify coordinates
* in the original image. See TessBaseAPI::SetRectangle.
* The scale and scaled_yres are in case the Thresholder scaled the image
* rectangle prior to thresholding. Any coordinates in tesseract's image
* must be divided by scale before adding (rect_left, rect_top).
* The scaled_yres indicates the effective resolution of the binary image
* that tesseract has been given by the Thresholder.
* After the constructor, Begin has already been called.
*/
PageIterator(PAGE_RES *page_res, Tesseract *tesseract, int scale,
int scaled_yres, int rect_left, int rect_top, int rect_width,
int rect_height);
virtual ~PageIterator();
/**
* Page/ResultIterators may be copied! This makes it possible to iterate over
* all the objects at a lower level, while maintaining an iterator to
* objects at a higher level. These constructors DO NOT CALL Begin, so
* iterations will continue from the location of src.
*/
PageIterator(const PageIterator &src);
const PageIterator &operator=(const PageIterator &src);
/** Are we positioned at the same location as other? */
bool PositionedAtSameWord(const PAGE_RES_IT *other) const;
// ============= Moving around within the page ============.
/**
* Moves the iterator to point to the start of the page to begin an
* iteration.
*/
virtual void Begin();
/**
* Moves the iterator to the beginning of the paragraph.
* This class implements this functionality by moving it to the zero indexed
* blob of the first (leftmost) word on the first row of the paragraph.
*/
virtual void RestartParagraph();
/**
* Return whether this iterator points anywhere in the first textline of a
* paragraph.
*/
bool IsWithinFirstTextlineOfParagraph() const;
/**
* Moves the iterator to the beginning of the text line.
* This class implements this functionality by moving it to the zero indexed
* blob of the first (leftmost) word of the row.
*/
virtual void RestartRow();
/**
* Moves to the start of the next object at the given level in the
* page hierarchy, and returns false if the end of the page was reached.
* NOTE that RIL_SYMBOL will skip non-text blocks, but all other
* PageIteratorLevel level values will visit each non-text block once.
* Think of non text blocks as containing a single para, with a single line,
* with a single imaginary word.
* Calls to Next with different levels may be freely intermixed.
* This function iterates words in right-to-left scripts correctly, if
* the appropriate language has been loaded into Tesseract.
*/
virtual bool Next(PageIteratorLevel level);
/**
* Returns true if the iterator is at the start of an object at the given
* level.
*
* For instance, suppose an iterator it is pointed to the first symbol of the
* first word of the third line of the second paragraph of the first block in
* a page, then:
* it.IsAtBeginningOf(RIL_BLOCK) = false
* it.IsAtBeginningOf(RIL_PARA) = false
* it.IsAtBeginningOf(RIL_TEXTLINE) = true
* it.IsAtBeginningOf(RIL_WORD) = true
* it.IsAtBeginningOf(RIL_SYMBOL) = true
*/
virtual bool IsAtBeginningOf(PageIteratorLevel level) const;
/**
* Returns whether the iterator is positioned at the last element in a
* given level. (e.g. the last word in a line, the last line in a block)
*
* Here's some two-paragraph example
* text. It starts off innocuously
* enough but quickly turns bizarre.
* The author inserts a cornucopia
* of words to guard against confused
* references.
*
* Now take an iterator it pointed to the start of "bizarre."
* it.IsAtFinalElement(RIL_PARA, RIL_SYMBOL) = false
* it.IsAtFinalElement(RIL_PARA, RIL_WORD) = true
* it.IsAtFinalElement(RIL_BLOCK, RIL_WORD) = false
*/
virtual bool IsAtFinalElement(PageIteratorLevel level,
PageIteratorLevel element) const;
/**
* Returns whether this iterator is positioned
* before other: -1
* equal to other: 0
* after other: 1
*/
int Cmp(const PageIterator &other) const;
// ============= Accessing data ==============.
// Coordinate system:
// Integer coordinates are at the cracks between the pixels.
// The top-left corner of the top-left pixel in the image is at (0,0).
// The bottom-right corner of the bottom-right pixel in the image is at
// (width, height).
// Every bounding box goes from the top-left of the top-left contained
// pixel to the bottom-right of the bottom-right contained pixel, so
// the bounding box of the single top-left pixel in the image is:
// (0,0)->(1,1).
// If an image rectangle has been set in the API, then returned coordinates
// relate to the original (full) image, rather than the rectangle.
/**
* Controls what to include in a bounding box. Bounding boxes of all levels
* between RIL_WORD and RIL_BLOCK can include or exclude potential diacritics.
* Between layout analysis and recognition, it isn't known where all
* diacritics belong, so this control is used to include or exclude some
* diacritics that are above or below the main body of the word. In most cases
* where the placement is obvious, and after recognition, it doesn't make as
* much difference, as the diacritics will already be included in the word.
*/
void SetBoundingBoxComponents(bool include_upper_dots,
bool include_lower_dots) {
include_upper_dots_ = include_upper_dots;
include_lower_dots_ = include_lower_dots;
}
/**
* Returns the bounding rectangle of the current object at the given level.
* See comment on coordinate system above.
* Returns false if there is no such object at the current position.
* The returned bounding box is guaranteed to match the size and position
* of the image returned by GetBinaryImage, but may clip foreground pixels
* from a grey image. The padding argument to GetImage can be used to expand
* the image to include more foreground pixels. See GetImage below.
*/
bool BoundingBox(PageIteratorLevel level, int *left, int *top, int *right,
int *bottom) const;
bool BoundingBox(PageIteratorLevel level, int padding, int *left, int *top,
int *right, int *bottom) const;
/**
* Returns the bounding rectangle of the object in a coordinate system of the
* working image rectangle having its origin at (rect_left_, rect_top_) with
* respect to the original image and is scaled by a factor scale_.
*/
bool BoundingBoxInternal(PageIteratorLevel level, int *left, int *top,
int *right, int *bottom) const;
/** Returns whether there is no object of a given level. */
bool Empty(PageIteratorLevel level) const;
/**
* Returns the type of the current block.
* See tesseract/publictypes.h for PolyBlockType.
*/
PolyBlockType BlockType() const;
/**
* Returns the polygon outline of the current block. The returned Pta must
* be ptaDestroy-ed after use. Note that the returned Pta lists the vertices
* of the polygon, and the last edge is the line segment between the last
* point and the first point. nullptr will be returned if the iterator is
* at the end of the document or layout analysis was not used.
*/
Pta *BlockPolygon() const;
/**
* Returns a binary image of the current object at the given level.
* The position and size match the return from BoundingBoxInternal, and so
* this could be upscaled with respect to the original input image.
* Use pixDestroy to delete the image after use.
*/
Pix *GetBinaryImage(PageIteratorLevel level) const;
/**
* Returns an image of the current object at the given level in greyscale
* if available in the input. To guarantee a binary image use BinaryImage.
* NOTE that in order to give the best possible image, the bounds are
* expanded slightly over the binary connected component, by the supplied
* padding, so the top-left position of the returned image is returned
* in (left,top). These will most likely not match the coordinates
* returned by BoundingBox.
* If you do not supply an original image, you will get a binary one.
* Use pixDestroy to delete the image after use.
*/
Pix *GetImage(PageIteratorLevel level, int padding, Pix *original_img,
int *left, int *top) const;
/**
* Returns the baseline of the current object at the given level.
* The baseline is the line that passes through (x1, y1) and (x2, y2).
* WARNING: with vertical text, baselines may be vertical!
* Returns false if there is no baseline at the current position.
*/
bool Baseline(PageIteratorLevel level, int *x1, int *y1, int *x2,
int *y2) const;
// Returns the attributes of the current row.
void RowAttributes(float *row_height, float *descenders,
float *ascenders) const;
/**
* Returns orientation for the block the iterator points to.
* orientation, writing_direction, textline_order: see publictypes.h
* deskew_angle: after rotating the block so the text orientation is
* upright, how many radians does one have to rotate the
* block anti-clockwise for it to be level?
* -Pi/4 <= deskew_angle <= Pi/4
*/
void Orientation(tesseract::Orientation *orientation,
tesseract::WritingDirection *writing_direction,
tesseract::TextlineOrder *textline_order,
float *deskew_angle) const;
/**
* Returns information about the current paragraph, if available.
*
* justification -
* LEFT if ragged right, or fully justified and script is left-to-right.
* RIGHT if ragged left, or fully justified and script is right-to-left.
* unknown if it looks like source code or we have very few lines.
* is_list_item -
* true if we believe this is a member of an ordered or unordered list.
* is_crown -
* true if the first line of the paragraph is aligned with the other
* lines of the paragraph even though subsequent paragraphs have first
* line indents. This typically indicates that this is the continuation
* of a previous paragraph or that it is the very first paragraph in
* the chapter.
* first_line_indent -
* For LEFT aligned paragraphs, the first text line of paragraphs of
* this kind are indented this many pixels from the left edge of the
* rest of the paragraph.
* for RIGHT aligned paragraphs, the first text line of paragraphs of
* this kind are indented this many pixels from the right edge of the
* rest of the paragraph.
* NOTE 1: This value may be negative.
* NOTE 2: if *is_crown == true, the first line of this paragraph is
* actually flush, and first_line_indent is set to the "common"
* first_line_indent for subsequent paragraphs in this block
* of text.
*/
void ParagraphInfo(tesseract::ParagraphJustification *justification,
bool *is_list_item, bool *is_crown,
int *first_line_indent) const;
// If the current WERD_RES (it_->word()) is not nullptr, sets the BlamerBundle
// of the current word to the given pointer (takes ownership of the pointer)
// and returns true.
// Can only be used when iterating on the word level.
bool SetWordBlamerBundle(BlamerBundle *blamer_bundle);
protected:
/**
* Sets up the internal data for iterating the blobs of a new word, then
* moves the iterator to the given offset.
*/
void BeginWord(int offset);
/** Pointer to the page_res owned by the API. */
PAGE_RES *page_res_;
/** Pointer to the Tesseract object owned by the API. */
Tesseract *tesseract_;
/**
* The iterator to the page_res_. Owned by this ResultIterator.
* A pointer just to avoid dragging in Tesseract includes.
*/
PAGE_RES_IT *it_;
/**
* The current input WERD being iterated. If there is an output from OCR,
* then word_ is nullptr. Owned by the API
*/
WERD *word_;
/** The length of the current word_. */
int word_length_;
/** The current blob index within the word. */
int blob_index_;
/**
* Iterator to the blobs within the word. If nullptr, then we are iterating
* OCR results in the box_word.
* Owned by this ResultIterator.
*/
C_BLOB_IT *cblob_it_;
/** Control over what to include in bounding boxes. */
bool include_upper_dots_;
bool include_lower_dots_;
/** Parameters saved from the Thresholder. Needed to rebuild coordinates.*/
int scale_;
int scaled_yres_;
int rect_left_;
int rect_top_;
int rect_width_;
int rect_height_;
};
} // namespace tesseract.
#endif // TESSERACT_CCMAIN_PAGEITERATOR_H_

View File

@ -1,281 +0,0 @@
// SPDX-License-Identifier: Apache-2.0
// File: publictypes.h
// Description: Types used in both the API and internally
// Author: Ray Smith
//
// (C) Copyright 2010, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef TESSERACT_CCSTRUCT_PUBLICTYPES_H_
#define TESSERACT_CCSTRUCT_PUBLICTYPES_H_
namespace tesseract {
// This file contains types that are used both by the API and internally
// to Tesseract. In order to decouple the API from Tesseract and prevent cyclic
// dependencies, THIS FILE SHOULD NOT DEPEND ON ANY OTHER PART OF TESSERACT.
// Restated: It is OK for low-level Tesseract files to include publictypes.h,
// but not for the low-level tesseract code to include top-level API code.
// This file should not use other Tesseract types, as that would drag
// their includes into the API-level.
/** Number of printers' points in an inch. The unit of the pointsize return. */
constexpr int kPointsPerInch = 72;
/**
* Minimum believable resolution. Used as a default if there is no other
* information, as it is safer to under-estimate than over-estimate.
*/
constexpr int kMinCredibleResolution = 70;
/** Maximum believable resolution. */
constexpr int kMaxCredibleResolution = 2400;
/**
* Ratio between median blob size and likely resolution. Used to estimate
* resolution when none is provided. This is basically 1/usual text size in
* inches. */
constexpr int kResolutionEstimationFactor = 10;
/**
* Possible types for a POLY_BLOCK or ColPartition.
* Must be kept in sync with kPBColors in polyblk.cpp and PTIs*Type functions
* below, as well as kPolyBlockNames in layout_test.cc.
* Used extensively by ColPartition, and POLY_BLOCK.
*/
enum PolyBlockType {
PT_UNKNOWN, // Type is not yet known. Keep as the first element.
PT_FLOWING_TEXT, // Text that lives inside a column.
PT_HEADING_TEXT, // Text that spans more than one column.
PT_PULLOUT_TEXT, // Text that is in a cross-column pull-out region.
PT_EQUATION, // Partition belonging to an equation region.
PT_INLINE_EQUATION, // Partition has inline equation.
PT_TABLE, // Partition belonging to a table region.
PT_VERTICAL_TEXT, // Text-line runs vertically.
PT_CAPTION_TEXT, // Text that belongs to an image.
PT_FLOWING_IMAGE, // Image that lives inside a column.
PT_HEADING_IMAGE, // Image that spans more than one column.
PT_PULLOUT_IMAGE, // Image that is in a cross-column pull-out region.
PT_HORZ_LINE, // Horizontal Line.
PT_VERT_LINE, // Vertical Line.
PT_NOISE, // Lies outside of any column.
PT_COUNT
};
/** Returns true if PolyBlockType is of horizontal line type */
inline bool PTIsLineType(PolyBlockType type) {
return type == PT_HORZ_LINE || type == PT_VERT_LINE;
}
/** Returns true if PolyBlockType is of image type */
inline bool PTIsImageType(PolyBlockType type) {
return type == PT_FLOWING_IMAGE || type == PT_HEADING_IMAGE ||
type == PT_PULLOUT_IMAGE;
}
/** Returns true if PolyBlockType is of text type */
inline bool PTIsTextType(PolyBlockType type) {
return type == PT_FLOWING_TEXT || type == PT_HEADING_TEXT ||
type == PT_PULLOUT_TEXT || type == PT_TABLE ||
type == PT_VERTICAL_TEXT || type == PT_CAPTION_TEXT ||
type == PT_INLINE_EQUATION;
}
// Returns true if PolyBlockType is of pullout(inter-column) type
inline bool PTIsPulloutType(PolyBlockType type) {
return type == PT_PULLOUT_IMAGE || type == PT_PULLOUT_TEXT;
}
/**
* +------------------+ Orientation Example:
* | 1 Aaaa Aaaa Aaaa | ====================
* | Aaa aa aaa aa | To left is a diagram of some (1) English and
* | aaaaaa A aa aaa. | (2) Chinese text and a (3) photo credit.
* | 2 |
* | ####### c c C | Upright Latin characters are represented as A and a.
* | ####### c c c | '<' represents a latin character rotated
* | < ####### c c c | anti-clockwise 90 degrees.
* | < ####### c c |
* | < ####### . c | Upright Chinese characters are represented C and c.
* | 3 ####### c |
* +------------------+ NOTA BENE: enum values here should match goodoc.proto
* If you orient your head so that "up" aligns with Orientation,
* then the characters will appear "right side up" and readable.
*
* In the example above, both the English and Chinese paragraphs are oriented
* so their "up" is the top of the page (page up). The photo credit is read
* with one's head turned leftward ("up" is to page left).
*
* The values of this enum match the convention of Tesseract's osdetect.h
*/
enum Orientation {
ORIENTATION_PAGE_UP = 0,
ORIENTATION_PAGE_RIGHT = 1,
ORIENTATION_PAGE_DOWN = 2,
ORIENTATION_PAGE_LEFT = 3,
};
/**
* The grapheme clusters within a line of text are laid out logically
* in this direction, judged when looking at the text line rotated so that
* its Orientation is "page up".
*
* For English text, the writing direction is left-to-right. For the
* Chinese text in the above example, the writing direction is top-to-bottom.
*/
enum WritingDirection {
WRITING_DIRECTION_LEFT_TO_RIGHT = 0,
WRITING_DIRECTION_RIGHT_TO_LEFT = 1,
WRITING_DIRECTION_TOP_TO_BOTTOM = 2,
};
/**
* The text lines are read in the given sequence.
*
* In English, the order is top-to-bottom.
* In Chinese, vertical text lines are read right-to-left. Mongolian is
* written in vertical columns top to bottom like Chinese, but the lines
* order left-to right.
*
* Note that only some combinations make sense. For example,
* WRITING_DIRECTION_LEFT_TO_RIGHT implies TEXTLINE_ORDER_TOP_TO_BOTTOM
*/
enum TextlineOrder {
TEXTLINE_ORDER_LEFT_TO_RIGHT = 0,
TEXTLINE_ORDER_RIGHT_TO_LEFT = 1,
TEXTLINE_ORDER_TOP_TO_BOTTOM = 2,
};
/**
* Possible modes for page layout analysis. These *must* be kept in order
* of decreasing amount of layout analysis to be done, except for OSD_ONLY,
* so that the inequality test macros below work.
*/
enum PageSegMode {
PSM_OSD_ONLY = 0, ///< Orientation and script detection only.
PSM_AUTO_OSD = 1, ///< Automatic page segmentation with orientation and
///< script detection. (OSD)
PSM_AUTO_ONLY = 2, ///< Automatic page segmentation, but no OSD, or OCR.
PSM_AUTO = 3, ///< Fully automatic page segmentation, but no OSD.
PSM_SINGLE_COLUMN = 4, ///< Assume a single column of text of variable sizes.
PSM_SINGLE_BLOCK_VERT_TEXT = 5, ///< Assume a single uniform block of
///< vertically aligned text.
PSM_SINGLE_BLOCK = 6, ///< Assume a single uniform block of text. (Default.)
PSM_SINGLE_LINE = 7, ///< Treat the image as a single text line.
PSM_SINGLE_WORD = 8, ///< Treat the image as a single word.
PSM_CIRCLE_WORD = 9, ///< Treat the image as a single word in a circle.
PSM_SINGLE_CHAR = 10, ///< Treat the image as a single character.
PSM_SPARSE_TEXT =
11, ///< Find as much text as possible in no particular order.
PSM_SPARSE_TEXT_OSD = 12, ///< Sparse text with orientation and script det.
PSM_RAW_LINE = 13, ///< Treat the image as a single text line, bypassing
///< hacks that are Tesseract-specific.
PSM_COUNT ///< Number of enum entries.
};
/**
* Inline functions that act on a PageSegMode to determine whether components of
* layout analysis are enabled.
* *Depend critically on the order of elements of PageSegMode.*
* NOTE that arg is an int for compatibility with INT_PARAM.
*/
inline bool PSM_OSD_ENABLED(int pageseg_mode) {
return pageseg_mode <= PSM_AUTO_OSD || pageseg_mode == PSM_SPARSE_TEXT_OSD;
}
inline bool PSM_ORIENTATION_ENABLED(int pageseg_mode) {
return pageseg_mode <= PSM_AUTO || pageseg_mode == PSM_SPARSE_TEXT_OSD;
}
inline bool PSM_COL_FIND_ENABLED(int pageseg_mode) {
return pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_AUTO;
}
inline bool PSM_SPARSE(int pageseg_mode) {
return pageseg_mode == PSM_SPARSE_TEXT || pageseg_mode == PSM_SPARSE_TEXT_OSD;
}
inline bool PSM_BLOCK_FIND_ENABLED(int pageseg_mode) {
return pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_SINGLE_COLUMN;
}
inline bool PSM_LINE_FIND_ENABLED(int pageseg_mode) {
return pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_SINGLE_BLOCK;
}
inline bool PSM_WORD_FIND_ENABLED(int pageseg_mode) {
return (pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_SINGLE_LINE) ||
pageseg_mode == PSM_SPARSE_TEXT || pageseg_mode == PSM_SPARSE_TEXT_OSD;
}
/**
* enum of the elements of the page hierarchy, used in ResultIterator
* to provide functions that operate on each level without having to
* have 5x as many functions.
*/
enum PageIteratorLevel {
RIL_BLOCK, // Block of text/image/separator line.
RIL_PARA, // Paragraph within a block.
RIL_TEXTLINE, // Line within a paragraph.
RIL_WORD, // Word within a textline.
RIL_SYMBOL // Symbol/character within a word.
};
/**
* JUSTIFICATION_UNKNOWN
* The alignment is not clearly one of the other options. This could happen
* for example if there are only one or two lines of text or the text looks
* like source code or poetry.
*
* NOTA BENE: Fully justified paragraphs (text aligned to both left and right
* margins) are marked by Tesseract with JUSTIFICATION_LEFT if their text
* is written with a left-to-right script and with JUSTIFICATION_RIGHT if
* their text is written in a right-to-left script.
*
* Interpretation for text read in vertical lines:
* "Left" is wherever the starting reading position is.
*
* JUSTIFICATION_LEFT
* Each line, except possibly the first, is flush to the same left tab stop.
*
* JUSTIFICATION_CENTER
* The text lines of the paragraph are centered about a line going
* down through their middle of the text lines.
*
* JUSTIFICATION_RIGHT
* Each line, except possibly the first, is flush to the same right tab stop.
*/
enum ParagraphJustification {
JUSTIFICATION_UNKNOWN,
JUSTIFICATION_LEFT,
JUSTIFICATION_CENTER,
JUSTIFICATION_RIGHT,
};
/**
* When Tesseract/Cube is initialized we can choose to instantiate/load/run
* only the Tesseract part, only the Cube part or both along with the combiner.
* The preference of which engine to use is stored in tessedit_ocr_engine_mode.
*
* ATTENTION: When modifying this enum, please make sure to make the
* appropriate changes to all the enums mirroring it (e.g. OCREngine in
* cityblock/workflow/detection/detection_storage.proto). Such enums will
* mention the connection to OcrEngineMode in the comments.
*/
enum OcrEngineMode {
OEM_TESSERACT_ONLY, // Run Tesseract only - fastest; deprecated
OEM_LSTM_ONLY, // Run just the LSTM line recognizer.
OEM_TESSERACT_LSTM_COMBINED, // Run the LSTM recognizer, but allow fallback
// to Tesseract when things get difficult.
// deprecated
OEM_DEFAULT, // Specify this mode when calling init_*(),
// to indicate that any of the above modes
// should be automatically inferred from the
// variables in the language-specific config,
// command-line configs, or if not specified
// in any of the above should be set to the
// default OEM_TESSERACT_ONLY.
OEM_COUNT // Number of OEMs
};
} // namespace tesseract.
#endif // TESSERACT_CCSTRUCT_PUBLICTYPES_H_

View File

@ -1,311 +0,0 @@
// SPDX-License-Identifier: Apache-2.0
// File: renderer.h
// Description: Rendering interface to inject into TessBaseAPI
//
// (C) Copyright 2011, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef TESSERACT_API_RENDERER_H_
#define TESSERACT_API_RENDERER_H_
#include "export.h"
// To avoid collision with other typenames include the ABSOLUTE MINIMUM
// complexity of includes here. Use forward declarations wherever possible
// and hide includes of complex types in baseapi.cpp.
#include <cstdint>
#include <string> // for std::string
#include <vector> // for std::vector
struct Pix;
namespace tesseract {
class TessBaseAPI;
/**
* Interface for rendering tesseract results into a document, such as text,
* HOCR or pdf. This class is abstract. Specific classes handle individual
* formats. This interface is then used to inject the renderer class into
* tesseract when processing images.
*
* For simplicity implementing this with tesseract version 3.01,
* the renderer contains document state that is cleared from document
* to document just as the TessBaseAPI is. This way the base API can just
* delegate its rendering functionality to injected renderers, and the
* renderers can manage the associated state needed for the specific formats
* in addition to the heuristics for producing it.
*/
class TESS_API TessResultRenderer {
public:
virtual ~TessResultRenderer();
// Takes ownership of pointer so must be new'd instance.
// Renderers aren't ordered, but appends the sequences of next parameter
// and existing next(). The renderers should be unique across both lists.
void insert(TessResultRenderer *next);
// Returns the next renderer or nullptr.
TessResultRenderer *next() {
return next_;
}
/**
* Starts a new document with the given title.
* This clears the contents of the output data.
* Title should use UTF-8 encoding.
*/
bool BeginDocument(const char *title);
/**
* Adds the recognized text from the source image to the current document.
* Invalid if BeginDocument not yet called.
*
* Note that this API is a bit weird but is designed to fit into the
* current TessBaseAPI implementation where the api has lots of state
* information that we might want to add in.
*/
bool AddImage(TessBaseAPI *api);
/**
* Finishes the document and finalizes the output data
* Invalid if BeginDocument not yet called.
*/
bool EndDocument();
const char *file_extension() const {
return file_extension_;
}
const char *title() const {
return title_.c_str();
}
// Is everything fine? Otherwise something went wrong.
bool happy() const {
return happy_;
}
/**
* Returns the index of the last image given to AddImage
* (i.e. images are incremented whether the image succeeded or not)
*
* This is always defined. It means either the number of the
* current image, the last image ended, or in the completed document
* depending on when in the document lifecycle you are looking at it.
* Will return -1 if a document was never started.
*/
int imagenum() const {
return imagenum_;
}
protected:
/**
* Called by concrete classes.
*
* outputbase is the name of the output file excluding
* extension. For example, "/path/to/chocolate-chip-cookie-recipe"
*
* extension indicates the file extension to be used for output
* files. For example "pdf" will produce a .pdf file, and "hocr"
* will produce .hocr files.
*/
TessResultRenderer(const char *outputbase, const char *extension);
// Hook for specialized handling in BeginDocument()
virtual bool BeginDocumentHandler();
// This must be overridden to render the OCR'd results
virtual bool AddImageHandler(TessBaseAPI *api) = 0;
// Hook for specialized handling in EndDocument()
virtual bool EndDocumentHandler();
// Renderers can call this to append '\0' terminated strings into
// the output string returned by GetOutput.
// This method will grow the output buffer if needed.
void AppendString(const char *s);
// Renderers can call this to append binary byte sequences into
// the output string returned by GetOutput. Note that s is not necessarily
// '\0' terminated (and can contain '\0' within it).
// This method will grow the output buffer if needed.
void AppendData(const char *s, int len);
private:
TessResultRenderer *next_; // Can link multiple renderers together
FILE *fout_; // output file pointer
const char *file_extension_; // standard extension for generated output
std::string title_; // title of document being rendered
int imagenum_; // index of last image added
bool happy_; // I get grumpy when the disk fills up, etc.
};
/**
* Renders tesseract output into a plain UTF-8 text string
*/
class TESS_API TessTextRenderer : public TessResultRenderer {
public:
explicit TessTextRenderer(const char *outputbase);
protected:
bool AddImageHandler(TessBaseAPI *api) override;
};
/**
* Renders tesseract output into an hocr text string
*/
class TESS_API TessHOcrRenderer : public TessResultRenderer {
public:
explicit TessHOcrRenderer(const char *outputbase, bool font_info);
explicit TessHOcrRenderer(const char *outputbase);
protected:
bool BeginDocumentHandler() override;
bool AddImageHandler(TessBaseAPI *api) override;
bool EndDocumentHandler() override;
private:
bool font_info_; // whether to print font information
};
/**
* Renders tesseract output into an alto text string
*/
class TESS_API TessAltoRenderer : public TessResultRenderer {
public:
explicit TessAltoRenderer(const char *outputbase);
protected:
bool BeginDocumentHandler() override;
bool AddImageHandler(TessBaseAPI *api) override;
bool EndDocumentHandler() override;
private:
bool begin_document;
};
/**
* Renders Tesseract output into a TSV string
*/
class TESS_API TessTsvRenderer : public TessResultRenderer {
public:
explicit TessTsvRenderer(const char *outputbase, bool font_info);
explicit TessTsvRenderer(const char *outputbase);
protected:
bool BeginDocumentHandler() override;
bool AddImageHandler(TessBaseAPI *api) override;
bool EndDocumentHandler() override;
private:
bool font_info_; // whether to print font information
};
/**
* Renders tesseract output into searchable PDF
*/
class TESS_API TessPDFRenderer : public TessResultRenderer {
public:
// datadir is the location of the TESSDATA. We need it because
// we load a custom PDF font from this location.
TessPDFRenderer(const char *outputbase, const char *datadir,
bool textonly = false);
protected:
bool BeginDocumentHandler() override;
bool AddImageHandler(TessBaseAPI *api) override;
bool EndDocumentHandler() override;
private:
// We don't want to have every image in memory at once,
// so we store some metadata as we go along producing
// PDFs one page at a time. At the end, that metadata is
// used to make everything that isn't easily handled in a
// streaming fashion.
long int obj_; // counter for PDF objects
std::vector<uint64_t> offsets_; // offset of every PDF object in bytes
std::vector<long int> pages_; // object number for every /Page object
std::string datadir_; // where to find the custom font
bool textonly_; // skip images if set
// Bookkeeping only. DIY = Do It Yourself.
void AppendPDFObjectDIY(size_t objectsize);
// Bookkeeping + emit data.
void AppendPDFObject(const char *data);
// Create the /Contents object for an entire page.
char *GetPDFTextObjects(TessBaseAPI *api, double width, double height);
// Turn an image into a PDF object. Only transcode if we have to.
static bool imageToPDFObj(Pix *pix, const char *filename, long int objnum,
char **pdf_object, long int *pdf_object_size,
int jpg_quality);
};
/**
* Renders tesseract output into a plain UTF-8 text string
*/
class TESS_API TessUnlvRenderer : public TessResultRenderer {
public:
explicit TessUnlvRenderer(const char *outputbase);
protected:
bool AddImageHandler(TessBaseAPI *api) override;
};
/**
* Renders tesseract output into a plain UTF-8 text string for LSTMBox
*/
class TESS_API TessLSTMBoxRenderer : public TessResultRenderer {
public:
explicit TessLSTMBoxRenderer(const char *outputbase);
protected:
bool AddImageHandler(TessBaseAPI *api) override;
};
/**
* Renders tesseract output into a plain UTF-8 text string
*/
class TESS_API TessBoxTextRenderer : public TessResultRenderer {
public:
explicit TessBoxTextRenderer(const char *outputbase);
protected:
bool AddImageHandler(TessBaseAPI *api) override;
};
/**
* Renders tesseract output into a plain UTF-8 text string in WordStr format
*/
class TESS_API TessWordStrBoxRenderer : public TessResultRenderer {
public:
explicit TessWordStrBoxRenderer(const char *outputbase);
protected:
bool AddImageHandler(TessBaseAPI *api) override;
};
#ifndef DISABLED_LEGACY_ENGINE
/**
* Renders tesseract output into an osd text string
*/
class TESS_API TessOsdRenderer : public TessResultRenderer {
public:
explicit TessOsdRenderer(const char *outputbase);
protected:
bool AddImageHandler(TessBaseAPI *api) override;
};
#endif // ndef DISABLED_LEGACY_ENGINE
} // namespace tesseract.
#endif // TESSERACT_API_RENDERER_H_

View File

@ -1,250 +0,0 @@
// SPDX-License-Identifier: Apache-2.0
// File: resultiterator.h
// Description: Iterator for tesseract results that is capable of
// iterating in proper reading order over Bi Directional
// (e.g. mixed Hebrew and English) text.
// Author: David Eger
//
// (C) Copyright 2011, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef TESSERACT_CCMAIN_RESULT_ITERATOR_H_
#define TESSERACT_CCMAIN_RESULT_ITERATOR_H_
#include "export.h" // for TESS_API, TESS_LOCAL
#include "ltrresultiterator.h" // for LTRResultIterator
#include "publictypes.h" // for PageIteratorLevel
#include "unichar.h" // for StrongScriptDirection
#include <set> // for std::pair
#include <vector> // for std::vector
namespace tesseract {
class TESS_API ResultIterator : public LTRResultIterator {
public:
static ResultIterator *StartOfParagraph(const LTRResultIterator &resit);
/**
* ResultIterator is copy constructible!
* The default copy constructor works just fine for us.
*/
~ResultIterator() override = default;
// ============= Moving around within the page ============.
/**
* Moves the iterator to point to the start of the page to begin
* an iteration.
*/
void Begin() override;
/**
* Moves to the start of the next object at the given level in the
* page hierarchy in the appropriate reading order and returns false if
* the end of the page was reached.
* NOTE that RIL_SYMBOL will skip non-text blocks, but all other
* PageIteratorLevel level values will visit each non-text block once.
* Think of non text blocks as containing a single para, with a single line,
* with a single imaginary word.
* Calls to Next with different levels may be freely intermixed.
* This function iterates words in right-to-left scripts correctly, if
* the appropriate language has been loaded into Tesseract.
*/
bool Next(PageIteratorLevel level) override;
/**
* IsAtBeginningOf() returns whether we're at the logical beginning of the
* given level. (as opposed to ResultIterator's left-to-right top-to-bottom
* order). Otherwise, this acts the same as PageIterator::IsAtBeginningOf().
* For a full description, see pageiterator.h
*/
bool IsAtBeginningOf(PageIteratorLevel level) const override;
/**
* Implement PageIterator's IsAtFinalElement correctly in a BiDi context.
* For instance, IsAtFinalElement(RIL_PARA, RIL_WORD) returns whether we
* point at the last word in a paragraph. See PageIterator for full comment.
*/
bool IsAtFinalElement(PageIteratorLevel level,
PageIteratorLevel element) const override;
// ============= Functions that refer to words only ============.
// Returns the number of blanks before the current word.
int BlanksBeforeWord() const;
// ============= Accessing data ==============.
/**
* Returns the null terminated UTF-8 encoded text string for the current
* object at the given level. Use delete [] to free after use.
*/
virtual char *GetUTF8Text(PageIteratorLevel level) const;
/**
* Returns the LSTM choices for every LSTM timestep for the current word.
*/
virtual std::vector<std::vector<std::vector<std::pair<const char *, float>>>>
*GetRawLSTMTimesteps() const;
virtual std::vector<std::vector<std::pair<const char *, float>>>
*GetBestLSTMSymbolChoices() const;
/**
* Return whether the current paragraph's dominant reading direction
* is left-to-right (as opposed to right-to-left).
*/
bool ParagraphIsLtr() const;
// ============= Exposed only for testing =============.
/**
* Yields the reading order as a sequence of indices and (optional)
* meta-marks for a set of words (given left-to-right).
* The meta marks are passed as negative values:
* kMinorRunStart Start of minor direction text.
* kMinorRunEnd End of minor direction text.
* kComplexWord The next indexed word contains both left-to-right and
* right-to-left characters and was treated as neutral.
*
* For example, suppose we have five words in a text line,
* indexed [0,1,2,3,4] from the leftmost side of the text line.
* The following are all believable reading_orders:
*
* Left-to-Right (in ltr paragraph):
* { 0, 1, 2, 3, 4 }
* Left-to-Right (in rtl paragraph):
* { kMinorRunStart, 0, 1, 2, 3, 4, kMinorRunEnd }
* Right-to-Left (in rtl paragraph):
* { 4, 3, 2, 1, 0 }
* Left-to-Right except for an RTL phrase in words 2, 3 in an ltr paragraph:
* { 0, 1, kMinorRunStart, 3, 2, kMinorRunEnd, 4 }
*/
static void CalculateTextlineOrder(
bool paragraph_is_ltr,
const std::vector<StrongScriptDirection> &word_dirs,
std::vector<int> *reading_order);
static const int kMinorRunStart;
static const int kMinorRunEnd;
static const int kComplexWord;
protected:
/**
* We presume the data associated with the given iterator will outlive us.
* NB: This is private because it does something that is non-obvious:
* it resets to the beginning of the paragraph instead of staying wherever
* resit might have pointed.
*/
explicit ResultIterator(const LTRResultIterator &resit);
private:
/**
* Calculates the current paragraph's dominant writing direction.
* Typically, members should use current_paragraph_ltr_ instead.
*/
bool CurrentParagraphIsLtr() const;
/**
* Returns word indices as measured from resit->RestartRow() = index 0
* for the reading order of words within a textline given an iterator
* into the middle of the text line.
* In addition to non-negative word indices, the following negative values
* may be inserted:
* kMinorRunStart Start of minor direction text.
* kMinorRunEnd End of minor direction text.
* kComplexWord The previous word contains both left-to-right and
* right-to-left characters and was treated as neutral.
*/
void CalculateTextlineOrder(bool paragraph_is_ltr,
const LTRResultIterator &resit,
std::vector<int> *indices) const;
/** Same as above, but the caller's ssd gets filled in if ssd != nullptr. */
void CalculateTextlineOrder(bool paragraph_is_ltr,
const LTRResultIterator &resit,
std::vector<StrongScriptDirection> *ssd,
std::vector<int> *indices) const;
/**
* What is the index of the current word in a strict left-to-right reading
* of the row?
*/
int LTRWordIndex() const;
/**
* Given an iterator pointing at a word, returns the logical reading order
* of blob indices for the word.
*/
void CalculateBlobOrder(std::vector<int> *blob_indices) const;
/** Precondition: current_paragraph_is_ltr_ is set. */
void MoveToLogicalStartOfTextline();
/**
* Precondition: current_paragraph_is_ltr_ and in_minor_direction_
* are set.
*/
void MoveToLogicalStartOfWord();
/** Are we pointing at the final (reading order) symbol of the word? */
bool IsAtFinalSymbolOfWord() const;
/** Are we pointing at the first (reading order) symbol of the word? */
bool IsAtFirstSymbolOfWord() const;
/**
* Append any extra marks that should be appended to this word when printed.
* Mostly, these are Unicode BiDi control characters.
*/
void AppendSuffixMarks(std::string *text) const;
/** Appends the current word in reading order to the given buffer.*/
void AppendUTF8WordText(std::string *text) const;
/**
* Appends the text of the current text line, *assuming this iterator is
* positioned at the beginning of the text line* This function
* updates the iterator to point to the first position past the text line.
* Each textline is terminated in a single newline character.
* If the textline ends a paragraph, it gets a second terminal newline.
*/
void IterateAndAppendUTF8TextlineText(std::string *text);
/**
* Appends the text of the current paragraph in reading order
* to the given buffer.
* Each textline is terminated in a single newline character, and the
* paragraph gets an extra newline at the end.
*/
void AppendUTF8ParagraphText(std::string *text) const;
/** Returns whether the bidi_debug flag is set to at least min_level. */
bool BidiDebug(int min_level) const;
bool current_paragraph_is_ltr_;
/**
* Is the currently pointed-at character at the beginning of
* a minor-direction run?
*/
bool at_beginning_of_minor_run_;
/** Is the currently pointed-at character in a minor-direction sequence? */
bool in_minor_direction_;
/**
* Should detected inter-word spaces be preserved, or "compressed" to a single
* space character (default behavior).
*/
bool preserve_interword_spaces_;
};
} // namespace tesseract.
#endif // TESSERACT_CCMAIN_RESULT_ITERATOR_H_

View File

@ -1,174 +0,0 @@
// SPDX-License-Identifier: Apache-2.0
// File: unichar.h
// Description: Unicode character/ligature class.
// Author: Ray Smith
//
// (C) Copyright 2006, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef TESSERACT_CCUTIL_UNICHAR_H_
#define TESSERACT_CCUTIL_UNICHAR_H_
#include "export.h"
#include <memory.h>
#include <cstring>
#include <string>
#include <vector>
namespace tesseract {
// Maximum number of characters that can be stored in a UNICHAR. Must be
// at least 4. Must not exceed 31 without changing the coding of length.
#define UNICHAR_LEN 30
// A UNICHAR_ID is the unique id of a unichar.
using UNICHAR_ID = int;
// A variable to indicate an invalid or uninitialized unichar id.
static const int INVALID_UNICHAR_ID = -1;
// A special unichar that corresponds to INVALID_UNICHAR_ID.
static const char INVALID_UNICHAR[] = "__INVALID_UNICHAR__";
enum StrongScriptDirection {
DIR_NEUTRAL = 0, // Text contains only neutral characters.
DIR_LEFT_TO_RIGHT = 1, // Text contains no Right-to-Left characters.
DIR_RIGHT_TO_LEFT = 2, // Text contains no Left-to-Right characters.
DIR_MIX = 3, // Text contains a mixture of left-to-right
// and right-to-left characters.
};
using char32 = signed int;
// The UNICHAR class holds a single classification result. This may be
// a single Unicode character (stored as between 1 and 4 utf8 bytes) or
// multiple Unicode characters representing the NFKC expansion of a ligature
// such as fi, ffl etc. These are also stored as utf8.
class TESS_API UNICHAR {
public:
UNICHAR() {
memset(chars, 0, UNICHAR_LEN);
}
// Construct from a utf8 string. If len<0 then the string is null terminated.
// If the string is too long to fit in the UNICHAR then it takes only what
// will fit.
UNICHAR(const char *utf8_str, int len);
// Construct from a single UCS4 character.
explicit UNICHAR(int unicode);
// Default copy constructor and operator= are OK.
// Get the first character as UCS-4.
int first_uni() const;
// Get the length of the UTF8 string.
int utf8_len() const {
int len = chars[UNICHAR_LEN - 1];
return len >= 0 && len < UNICHAR_LEN ? len : UNICHAR_LEN;
}
// Get a UTF8 string, but NOT nullptr terminated.
const char *utf8() const {
return chars;
}
// Get a terminated UTF8 string: Must delete[] it after use.
char *utf8_str() const;
// Get the number of bytes in the first character of the given utf8 string.
static int utf8_step(const char *utf8_str);
// A class to simplify iterating over and accessing elements of a UTF8
// string. Note that unlike the UNICHAR class, const_iterator does NOT COPY or
// take ownership of the underlying byte array. It also does not permit
// modification of the array (as the name suggests).
//
// Example:
// for (UNICHAR::const_iterator it = UNICHAR::begin(str, str_len);
// it != UNICHAR::end(str, len);
// ++it) {
// printf("UCS-4 symbol code = %d\n", *it);
// char buf[5];
// int char_len = it.get_utf8(buf); buf[char_len] = '\0';
// printf("Char = %s\n", buf);
// }
class TESS_API const_iterator {
using CI = const_iterator;
public:
// Step to the next UTF8 character.
// If the current position is at an illegal UTF8 character, then print an
// error message and step by one byte. If the current position is at a
// nullptr value, don't step past it.
const_iterator &operator++();
// Return the UCS-4 value at the current position.
// If the current position is at an illegal UTF8 value, return a single
// space character.
int operator*() const;
// Store the UTF-8 encoding of the current codepoint into buf, which must be
// at least 4 bytes long. Return the number of bytes written.
// If the current position is at an illegal UTF8 value, writes a single
// space character and returns 1.
// Note that this method does not null-terminate the buffer.
int get_utf8(char *buf) const;
// Returns the number of bytes of the current codepoint. Returns 1 if the
// current position is at an illegal UTF8 value.
int utf8_len() const;
// Returns true if the UTF-8 encoding at the current position is legal.
bool is_legal() const;
// Return the pointer into the string at the current position.
const char *utf8_data() const {
return it_;
}
// Iterator equality operators.
friend bool operator==(const CI &lhs, const CI &rhs) {
return lhs.it_ == rhs.it_;
}
friend bool operator!=(const CI &lhs, const CI &rhs) {
return !(lhs == rhs);
}
private:
friend class UNICHAR;
explicit const_iterator(const char *it) : it_(it) {}
const char *it_; // Pointer into the string.
};
// Create a start/end iterator pointing to a string. Note that these methods
// are static and do NOT create a copy or take ownership of the underlying
// array.
static const_iterator begin(const char *utf8_str, int byte_length);
static const_iterator end(const char *utf8_str, int byte_length);
// Converts a utf-8 string to a vector of unicodes.
// Returns an empty vector if the input contains invalid UTF-8.
static std::vector<char32> UTF8ToUTF32(const char *utf8_str);
// Converts a vector of unicodes to a utf8 string.
// Returns an empty string if the input contains an invalid unicode.
static std::string UTF32ToUTF8(const std::vector<char32> &str32);
private:
// A UTF-8 representation of 1 or more Unicode characters.
// The last element (chars[UNICHAR_LEN - 1]) is a length if
// its value < UNICHAR_LEN, otherwise it is a genuine character.
char chars[UNICHAR_LEN]{};
};
} // namespace tesseract
#endif // TESSERACT_CCUTIL_UNICHAR_H_

View File

@ -1,34 +0,0 @@
// SPDX-License-Identifier: Apache-2.0
// File: version.h
// Description: Version information
//
// (C) Copyright 2018, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef TESSERACT_API_VERSION_H_
#define TESSERACT_API_VERSION_H_
// clang-format off
#define TESSERACT_MAJOR_VERSION @GENERIC_MAJOR_VERSION@
#define TESSERACT_MINOR_VERSION @GENERIC_MINOR_VERSION@
#define TESSERACT_MICRO_VERSION @GENERIC_MICRO_VERSION@
#define TESSERACT_VERSION \
(TESSERACT_MAJOR_VERSION << 16 | \
TESSERACT_MINOR_VERSION << 8 | \
TESSERACT_MICRO_VERSION)
#define TESSERACT_VERSION_STR "@PACKAGE_VERSION@"
// clang-format on
#endif // TESSERACT_API_VERSION_H_

View File

@ -1,812 +0,0 @@
// SPDX-License-Identifier: Apache-2.0
// File: baseapi.h
// Description: Simple API for calling tesseract.
// Author: Ray Smith
//
// (C) Copyright 2006, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef TESSERACT_API_BASEAPI_H_
#define TESSERACT_API_BASEAPI_H_
#ifdef HAVE_CONFIG_H
# include "config_auto.h" // DISABLED_LEGACY_ENGINE
#endif
#include "export.h"
#include "pageiterator.h"
#include "publictypes.h"
#include "resultiterator.h"
#include "unichar.h"
#include "version.h"
#include <cstdio>
#include <vector> // for std::vector
struct Pix;
struct Pixa;
struct Boxa;
namespace tesseract {
class PAGE_RES;
class ParagraphModel;
class BLOCK_LIST;
class ETEXT_DESC;
struct OSResults;
class UNICHARSET;
class Dawg;
class Dict;
class EquationDetect;
class PageIterator;
class ImageThresholder;
class LTRResultIterator;
class ResultIterator;
class MutableIterator;
class TessResultRenderer;
class Tesseract;
// Function to read a std::vector<char> from a whole file.
// Returns false on failure.
using FileReader = bool (*)(const char *filename, std::vector<char> *data);
using DictFunc = int (Dict::*)(void *, const UNICHARSET &, UNICHAR_ID,
bool) const;
using ProbabilityInContextFunc = double (Dict::*)(const char *, const char *,
int, const char *, int);
/**
* Base class for all tesseract APIs.
* Specific classes can add ability to work on different inputs or produce
* different outputs.
* This class is mostly an interface layer on top of the Tesseract instance
* class to hide the data types so that users of this class don't have to
* include any other Tesseract headers.
*/
class TESS_API TessBaseAPI {
public:
TessBaseAPI();
virtual ~TessBaseAPI();
// Copy constructor and assignment operator are currently unsupported.
TessBaseAPI(TessBaseAPI const &) = delete;
TessBaseAPI &operator=(TessBaseAPI const &) = delete;
/**
* Returns the version identifier as a static string. Do not delete.
*/
static const char *Version();
/**
* If compiled with OpenCL AND an available OpenCL
* device is deemed faster than serial code, then
* "device" is populated with the cl_device_id
* and returns sizeof(cl_device_id)
* otherwise *device=nullptr and returns 0.
*/
static size_t getOpenCLDevice(void **device);
/**
* Set the name of the input file. Needed for training and
* reading a UNLV zone file, and for searchable PDF output.
*/
void SetInputName(const char *name);
/**
* These functions are required for searchable PDF output.
* We need our hands on the input file so that we can include
* it in the PDF without transcoding. If that is not possible,
* we need the original image. Finally, resolution metadata
* is stored in the PDF so we need that as well.
*/
const char *GetInputName();
// Takes ownership of the input pix.
void SetInputImage(Pix *pix);
Pix *GetInputImage();
int GetSourceYResolution();
const char *GetDatapath();
/** Set the name of the bonus output files. Needed only for debugging. */
void SetOutputName(const char *name);
/**
* Set the value of an internal "parameter."
* Supply the name of the parameter and the value as a string, just as
* you would in a config file.
* Returns false if the name lookup failed.
* Eg SetVariable("tessedit_char_blacklist", "xyz"); to ignore x, y and z.
* Or SetVariable("classify_bln_numeric_mode", "1"); to set numeric-only mode.
* SetVariable may be used before Init, but settings will revert to
* defaults on End().
*
* Note: Must be called after Init(). Only works for non-init variables
* (init variables should be passed to Init()).
*/
bool SetVariable(const char *name, const char *value);
bool SetDebugVariable(const char *name, const char *value);
/**
* Returns true if the parameter was found among Tesseract parameters.
* Fills in value with the value of the parameter.
*/
bool GetIntVariable(const char *name, int *value) const;
bool GetBoolVariable(const char *name, bool *value) const;
bool GetDoubleVariable(const char *name, double *value) const;
/**
* Returns the pointer to the string that represents the value of the
* parameter if it was found among Tesseract parameters.
*/
const char *GetStringVariable(const char *name) const;
#ifndef DISABLED_LEGACY_ENGINE
/**
* Print Tesseract fonts table to the given file.
*/
void PrintFontsTable(FILE *fp) const;
#endif
/**
* Print Tesseract parameters to the given file.
*/
void PrintVariables(FILE *fp) const;
/**
* Get value of named variable as a string, if it exists.
*/
bool GetVariableAsString(const char *name, std::string *val) const;
/**
* Instances are now mostly thread-safe and totally independent,
* but some global parameters remain. Basically it is safe to use multiple
* TessBaseAPIs in different threads in parallel, UNLESS:
* you use SetVariable on some of the Params in classify and textord.
* If you do, then the effect will be to change it for all your instances.
*
* Start tesseract. Returns zero on success and -1 on failure.
* NOTE that the only members that may be called before Init are those
* listed above here in the class definition.
*
* The datapath must be the name of the tessdata directory.
* The language is (usually) an ISO 639-3 string or nullptr will default to
* eng. It is entirely safe (and eventually will be efficient too) to call
* Init multiple times on the same instance to change language, or just
* to reset the classifier.
* The language may be a string of the form [~]<lang>[+[~]<lang>]* indicating
* that multiple languages are to be loaded. Eg hin+eng will load Hindi and
* English. Languages may specify internally that they want to be loaded
* with one or more other languages, so the ~ sign is available to override
* that. Eg if hin were set to load eng by default, then hin+~eng would force
* loading only hin. The number of loaded languages is limited only by
* memory, with the caveat that loading additional languages will impact
* both speed and accuracy, as there is more work to do to decide on the
* applicable language, and there is more chance of hallucinating incorrect
* words.
* WARNING: On changing languages, all Tesseract parameters are reset
* back to their default values. (Which may vary between languages.)
* If you have a rare need to set a Variable that controls
* initialization for a second call to Init you should explicitly
* call End() and then use SetVariable before Init. This is only a very
* rare use case, since there are very few uses that require any parameters
* to be set before Init.
*
* If set_only_non_debug_params is true, only params that do not contain
* "debug" in the name will be set.
*/
int Init(const char *datapath, const char *language, OcrEngineMode mode,
char **configs, int configs_size,
const std::vector<std::string> *vars_vec,
const std::vector<std::string> *vars_values,
bool set_only_non_debug_params);
int Init(const char *datapath, const char *language, OcrEngineMode oem) {
return Init(datapath, language, oem, nullptr, 0, nullptr, nullptr, false);
}
int Init(const char *datapath, const char *language) {
return Init(datapath, language, OEM_DEFAULT, nullptr, 0, nullptr, nullptr,
false);
}
// In-memory version reads the traineddata file directly from the given
// data[data_size] array, and/or reads data via a FileReader.
int Init(const char *data, int data_size, const char *language,
OcrEngineMode mode, char **configs, int configs_size,
const std::vector<std::string> *vars_vec,
const std::vector<std::string> *vars_values,
bool set_only_non_debug_params, FileReader reader);
/**
* Returns the languages string used in the last valid initialization.
* If the last initialization specified "deu+hin" then that will be
* returned. If hin loaded eng automatically as well, then that will
* not be included in this list. To find the languages actually
* loaded use GetLoadedLanguagesAsVector.
* The returned string should NOT be deleted.
*/
const char *GetInitLanguagesAsString() const;
/**
* Returns the loaded languages in the vector of std::string.
* Includes all languages loaded by the last Init, including those loaded
* as dependencies of other loaded languages.
*/
void GetLoadedLanguagesAsVector(std::vector<std::string> *langs) const;
/**
* Returns the available languages in the sorted vector of std::string.
*/
void GetAvailableLanguagesAsVector(std::vector<std::string> *langs) const;
/**
* Init only for page layout analysis. Use only for calls to SetImage and
* AnalysePage. Calls that attempt recognition will generate an error.
*/
void InitForAnalysePage();
/**
* Read a "config" file containing a set of param, value pairs.
* Searches the standard places: tessdata/configs, tessdata/tessconfigs
* and also accepts a relative or absolute path name.
* Note: only non-init params will be set (init params are set by Init()).
*/
void ReadConfigFile(const char *filename);
/** Same as above, but only set debug params from the given config file. */
void ReadDebugConfigFile(const char *filename);
/**
* Set the current page segmentation mode. Defaults to PSM_SINGLE_BLOCK.
* The mode is stored as an IntParam so it can also be modified by
* ReadConfigFile or SetVariable("tessedit_pageseg_mode", mode as string).
*/
void SetPageSegMode(PageSegMode mode);
/** Return the current page segmentation mode. */
PageSegMode GetPageSegMode() const;
/**
* Recognize a rectangle from an image and return the result as a string.
* May be called many times for a single Init.
* Currently has no error checking.
* Greyscale of 8 and color of 24 or 32 bits per pixel may be given.
* Palette color images will not work properly and must be converted to
* 24 bit.
* Binary images of 1 bit per pixel may also be given but they must be
* byte packed with the MSB of the first byte being the first pixel, and a
* 1 represents WHITE. For binary images set bytes_per_pixel=0.
* The recognized text is returned as a char* which is coded
* as UTF8 and must be freed with the delete [] operator.
*
* Note that TesseractRect is the simplified convenience interface.
* For advanced uses, use SetImage, (optionally) SetRectangle, Recognize,
* and one or more of the Get*Text functions below.
*/
char *TesseractRect(const unsigned char *imagedata, int bytes_per_pixel,
int bytes_per_line, int left, int top, int width,
int height);
/**
* Call between pages or documents etc to free up memory and forget
* adaptive data.
*/
void ClearAdaptiveClassifier();
/**
* @defgroup AdvancedAPI Advanced API
* The following methods break TesseractRect into pieces, so you can
* get hold of the thresholded image, get the text in different formats,
* get bounding boxes, confidences etc.
*/
/* @{ */
/**
* Provide an image for Tesseract to recognize. Format is as
* TesseractRect above. Copies the image buffer and converts to Pix.
* SetImage clears all recognition results, and sets the rectangle to the
* full image, so it may be followed immediately by a GetUTF8Text, and it
* will automatically perform recognition.
*/
void SetImage(const unsigned char *imagedata, int width, int height,
int bytes_per_pixel, int bytes_per_line);
/**
* Provide an image for Tesseract to recognize. As with SetImage above,
* Tesseract takes its own copy of the image, so it need not persist until
* after Recognize.
* Pix vs raw, which to use?
* Use Pix where possible. Tesseract uses Pix as its internal representation
* and it is therefore more efficient to provide a Pix directly.
*/
void SetImage(Pix *pix);
/**
* Set the resolution of the source image in pixels per inch so font size
* information can be calculated in results. Call this after SetImage().
*/
void SetSourceResolution(int ppi);
/**
* Restrict recognition to a sub-rectangle of the image. Call after SetImage.
* Each SetRectangle clears the recogntion results so multiple rectangles
* can be recognized with the same image.
*/
void SetRectangle(int left, int top, int width, int height);
/**
* Get a copy of the internal thresholded image from Tesseract.
* Caller takes ownership of the Pix and must pixDestroy it.
* May be called any time after SetImage, or after TesseractRect.
*/
Pix *GetThresholdedImage();
/**
* Get the result of page layout analysis as a leptonica-style
* Boxa, Pixa pair, in reading order.
* Can be called before or after Recognize.
*/
Boxa *GetRegions(Pixa **pixa);
/**
* Get the textlines as a leptonica-style
* Boxa, Pixa pair, in reading order.
* Can be called before or after Recognize.
* If raw_image is true, then extract from the original image instead of the
* thresholded image and pad by raw_padding pixels.
* If blockids is not nullptr, the block-id of each line is also returned as
* an array of one element per line. delete [] after use. If paraids is not
* nullptr, the paragraph-id of each line within its block is also returned as
* an array of one element per line. delete [] after use.
*/
Boxa *GetTextlines(bool raw_image, int raw_padding, Pixa **pixa,
int **blockids, int **paraids);
/*
Helper method to extract from the thresholded image. (most common usage)
*/
Boxa *GetTextlines(Pixa **pixa, int **blockids) {
return GetTextlines(false, 0, pixa, blockids, nullptr);
}
/**
* Get textlines and strips of image regions as a leptonica-style Boxa, Pixa
* pair, in reading order. Enables downstream handling of non-rectangular
* regions.
* Can be called before or after Recognize.
* If blockids is not nullptr, the block-id of each line is also returned as
* an array of one element per line. delete [] after use.
*/
Boxa *GetStrips(Pixa **pixa, int **blockids);
/**
* Get the words as a leptonica-style
* Boxa, Pixa pair, in reading order.
* Can be called before or after Recognize.
*/
Boxa *GetWords(Pixa **pixa);
/**
* Gets the individual connected (text) components (created
* after pages segmentation step, but before recognition)
* as a leptonica-style Boxa, Pixa pair, in reading order.
* Can be called before or after Recognize.
* Note: the caller is responsible for calling boxaDestroy()
* on the returned Boxa array and pixaDestroy() on cc array.
*/
Boxa *GetConnectedComponents(Pixa **cc);
/**
* Get the given level kind of components (block, textline, word etc.) as a
* leptonica-style Boxa, Pixa pair, in reading order.
* Can be called before or after Recognize.
* If blockids is not nullptr, the block-id of each component is also returned
* as an array of one element per component. delete [] after use.
* If blockids is not nullptr, the paragraph-id of each component with its
* block is also returned as an array of one element per component. delete []
* after use. If raw_image is true, then portions of the original image are
* extracted instead of the thresholded image and padded with raw_padding. If
* text_only is true, then only text components are returned.
*/
Boxa *GetComponentImages(PageIteratorLevel level, bool text_only,
bool raw_image, int raw_padding, Pixa **pixa,
int **blockids, int **paraids);
// Helper function to get binary images with no padding (most common usage).
Boxa *GetComponentImages(const PageIteratorLevel level, const bool text_only,
Pixa **pixa, int **blockids) {
return GetComponentImages(level, text_only, false, 0, pixa, blockids,
nullptr);
}
/**
* Returns the scale factor of the thresholded image that would be returned by
* GetThresholdedImage() and the various GetX() methods that call
* GetComponentImages().
* Returns 0 if no thresholder has been set.
*/
int GetThresholdedImageScaleFactor() const;
/**
* Runs page layout analysis in the mode set by SetPageSegMode.
* May optionally be called prior to Recognize to get access to just
* the page layout results. Returns an iterator to the results.
* If merge_similar_words is true, words are combined where suitable for use
* with a line recognizer. Use if you want to use AnalyseLayout to find the
* textlines, and then want to process textline fragments with an external
* line recognizer.
* Returns nullptr on error or an empty page.
* The returned iterator must be deleted after use.
* WARNING! This class points to data held within the TessBaseAPI class, and
* therefore can only be used while the TessBaseAPI class still exists and
* has not been subjected to a call of Init, SetImage, Recognize, Clear, End
* DetectOS, or anything else that changes the internal PAGE_RES.
*/
PageIterator *AnalyseLayout();
PageIterator *AnalyseLayout(bool merge_similar_words);
/**
* Recognize the image from SetAndThresholdImage, generating Tesseract
* internal structures. Returns 0 on success.
* Optional. The Get*Text functions below will call Recognize if needed.
* After Recognize, the output is kept internally until the next SetImage.
*/
int Recognize(ETEXT_DESC *monitor);
/**
* Methods to retrieve information after SetAndThresholdImage(),
* Recognize() or TesseractRect(). (Recognize is called implicitly if needed.)
*/
/**
* Turns images into symbolic text.
*
* filename can point to a single image, a multi-page TIFF,
* or a plain text list of image filenames.
*
* retry_config is useful for debugging. If not nullptr, you can fall
* back to an alternate configuration if a page fails for some
* reason.
*
* timeout_millisec terminates processing if any single page
* takes too long. Set to 0 for unlimited time.
*
* renderer is responible for creating the output. For example,
* use the TessTextRenderer if you want plaintext output, or
* the TessPDFRender to produce searchable PDF.
*
* If tessedit_page_number is non-negative, will only process that
* single page. Works for multi-page tiff file, or filelist.
*
* Returns true if successful, false on error.
*/
bool ProcessPages(const char *filename, const char *retry_config,
int timeout_millisec, TessResultRenderer *renderer);
// Does the real work of ProcessPages.
bool ProcessPagesInternal(const char *filename, const char *retry_config,
int timeout_millisec, TessResultRenderer *renderer);
/**
* Turn a single image into symbolic text.
*
* The pix is the image processed. filename and page_index are
* metadata used by side-effect processes, such as reading a box
* file or formatting as hOCR.
*
* See ProcessPages for descriptions of other parameters.
*/
bool ProcessPage(Pix *pix, int page_index, const char *filename,
const char *retry_config, int timeout_millisec,
TessResultRenderer *renderer);
/**
* Get a reading-order iterator to the results of LayoutAnalysis and/or
* Recognize. The returned iterator must be deleted after use.
* WARNING! This class points to data held within the TessBaseAPI class, and
* therefore can only be used while the TessBaseAPI class still exists and
* has not been subjected to a call of Init, SetImage, Recognize, Clear, End
* DetectOS, or anything else that changes the internal PAGE_RES.
*/
ResultIterator *GetIterator();
/**
* Get a mutable iterator to the results of LayoutAnalysis and/or Recognize.
* The returned iterator must be deleted after use.
* WARNING! This class points to data held within the TessBaseAPI class, and
* therefore can only be used while the TessBaseAPI class still exists and
* has not been subjected to a call of Init, SetImage, Recognize, Clear, End
* DetectOS, or anything else that changes the internal PAGE_RES.
*/
MutableIterator *GetMutableIterator();
/**
* The recognized text is returned as a char* which is coded
* as UTF8 and must be freed with the delete [] operator.
*/
char *GetUTF8Text();
/**
* Make a HTML-formatted string with hOCR markup from the internal
* data structures.
* page_number is 0-based but will appear in the output as 1-based.
* monitor can be used to
* cancel the recognition
* receive progress callbacks
* Returned string must be freed with the delete [] operator.
*/
char *GetHOCRText(ETEXT_DESC *monitor, int page_number);
/**
* Make a HTML-formatted string with hOCR markup from the internal
* data structures.
* page_number is 0-based but will appear in the output as 1-based.
* Returned string must be freed with the delete [] operator.
*/
char *GetHOCRText(int page_number);
/**
* Make an XML-formatted string with Alto markup from the internal
* data structures.
*/
char *GetAltoText(ETEXT_DESC *monitor, int page_number);
/**
* Make an XML-formatted string with Alto markup from the internal
* data structures.
*/
char *GetAltoText(int page_number);
/**
* Make a TSV-formatted string from the internal data structures.
* page_number is 0-based but will appear in the output as 1-based.
* Returned string must be freed with the delete [] operator.
*/
char *GetTSVText(int page_number);
/**
* Make a box file for LSTM training from the internal data structures.
* Constructs coordinates in the original image - not just the rectangle.
* page_number is a 0-based page index that will appear in the box file.
* Returned string must be freed with the delete [] operator.
*/
char *GetLSTMBoxText(int page_number);
/**
* The recognized text is returned as a char* which is coded in the same
* format as a box file used in training.
* Constructs coordinates in the original image - not just the rectangle.
* page_number is a 0-based page index that will appear in the box file.
* Returned string must be freed with the delete [] operator.
*/
char *GetBoxText(int page_number);
/**
* The recognized text is returned as a char* which is coded in the same
* format as a WordStr box file used in training.
* page_number is a 0-based page index that will appear in the box file.
* Returned string must be freed with the delete [] operator.
*/
char *GetWordStrBoxText(int page_number);
/**
* The recognized text is returned as a char* which is coded
* as UNLV format Latin-1 with specific reject and suspect codes.
* Returned string must be freed with the delete [] operator.
*/
char *GetUNLVText();
/**
* Detect the orientation of the input image and apparent script (alphabet).
* orient_deg is the detected clockwise rotation of the input image in degrees
* (0, 90, 180, 270)
* orient_conf is the confidence (15.0 is reasonably confident)
* script_name is an ASCII string, the name of the script, e.g. "Latin"
* script_conf is confidence level in the script
* Returns true on success and writes values to each parameter as an output
*/
bool DetectOrientationScript(int *orient_deg, float *orient_conf,
const char **script_name, float *script_conf);
/**
* The recognized text is returned as a char* which is coded
* as UTF8 and must be freed with the delete [] operator.
* page_number is a 0-based page index that will appear in the osd file.
*/
char *GetOsdText(int page_number);
/** Returns the (average) confidence value between 0 and 100. */
int MeanTextConf();
/**
* Returns all word confidences (between 0 and 100) in an array, terminated
* by -1. The calling function must delete [] after use.
* The number of confidences should correspond to the number of space-
* delimited words in GetUTF8Text.
*/
int *AllWordConfidences();
#ifndef DISABLED_LEGACY_ENGINE
/**
* Applies the given word to the adaptive classifier if possible.
* The word must be SPACE-DELIMITED UTF-8 - l i k e t h i s , so it can
* tell the boundaries of the graphemes.
* Assumes that SetImage/SetRectangle have been used to set the image
* to the given word. The mode arg should be PSM_SINGLE_WORD or
* PSM_CIRCLE_WORD, as that will be used to control layout analysis.
* The currently set PageSegMode is preserved.
* Returns false if adaption was not possible for some reason.
*/
bool AdaptToWordStr(PageSegMode mode, const char *wordstr);
#endif // ndef DISABLED_LEGACY_ENGINE
/**
* Free up recognition results and any stored image data, without actually
* freeing any recognition data that would be time-consuming to reload.
* Afterwards, you must call SetImage or TesseractRect before doing
* any Recognize or Get* operation.
*/
void Clear();
/**
* Close down tesseract and free up all memory. End() is equivalent to
* destructing and reconstructing your TessBaseAPI.
* Once End() has been used, none of the other API functions may be used
* other than Init and anything declared above it in the class definition.
*/
void End();
/**
* Clear any library-level memory caches.
* There are a variety of expensive-to-load constant data structures (mostly
* language dictionaries) that are cached globally -- surviving the Init()
* and End() of individual TessBaseAPI's. This function allows the clearing
* of these caches.
**/
static void ClearPersistentCache();
/**
* Check whether a word is valid according to Tesseract's language model
* @return 0 if the word is invalid, non-zero if valid.
* @warning temporary! This function will be removed from here and placed
* in a separate API at some future time.
*/
int IsValidWord(const char *word) const;
// Returns true if utf8_character is defined in the UniCharset.
bool IsValidCharacter(const char *utf8_character) const;
bool GetTextDirection(int *out_offset, float *out_slope);
/** Sets Dict::letter_is_okay_ function to point to the given function. */
void SetDictFunc(DictFunc f);
/** Sets Dict::probability_in_context_ function to point to the given
* function.
*/
void SetProbabilityInContextFunc(ProbabilityInContextFunc f);
/**
* Estimates the Orientation And Script of the image.
* @return true if the image was processed successfully.
*/
bool DetectOS(OSResults *);
/**
* Return text orientation of each block as determined by an earlier run
* of layout analysis.
*/
void GetBlockTextOrientations(int **block_orientation,
bool **vertical_writing);
/** This method returns the string form of the specified unichar. */
const char *GetUnichar(int unichar_id) const;
/** Return the pointer to the i-th dawg loaded into tesseract_ object. */
const Dawg *GetDawg(int i) const;
/** Return the number of dawgs loaded into tesseract_ object. */
int NumDawgs() const;
Tesseract *tesseract() const {
return tesseract_;
}
OcrEngineMode oem() const {
return last_oem_requested_;
}
void set_min_orientation_margin(double margin);
/* @} */
protected:
/** Common code for setting the image. Returns true if Init has been called.
*/
bool InternalSetImage();
/**
* Run the thresholder to make the thresholded image. If pix is not nullptr,
* the source is thresholded to pix instead of the internal IMAGE.
*/
virtual bool Threshold(Pix **pix);
/**
* Find lines from the image making the BLOCK_LIST.
* @return 0 on success.
*/
int FindLines();
/** Delete the pageres and block list ready for a new page. */
void ClearResults();
/**
* Return an LTR Result Iterator -- used only for training, as we really want
* to ignore all BiDi smarts at that point.
* delete once you're done with it.
*/
LTRResultIterator *GetLTRIterator();
/**
* Return the length of the output text string, as UTF8, assuming
* one newline per line and one per block, with a terminator,
* and assuming a single character reject marker for each rejected character.
* Also return the number of recognized blobs in blob_count.
*/
int TextLength(int *blob_count) const;
//// paragraphs.cpp ////////////////////////////////////////////////////
void DetectParagraphs(bool after_text_recognition);
const PAGE_RES *GetPageRes() const {
return page_res_;
}
protected:
Tesseract *tesseract_; ///< The underlying data object.
Tesseract *osd_tesseract_; ///< For orientation & script detection.
EquationDetect *equ_detect_; ///< The equation detector.
FileReader reader_; ///< Reads files from any filesystem.
ImageThresholder *thresholder_; ///< Image thresholding module.
std::vector<ParagraphModel *> *paragraph_models_;
BLOCK_LIST *block_list_; ///< The page layout.
PAGE_RES *page_res_; ///< The page-level data.
std::string input_file_; ///< Name used by training code.
std::string output_file_; ///< Name used by debug code.
std::string datapath_; ///< Current location of tessdata.
std::string language_; ///< Last initialized language.
OcrEngineMode last_oem_requested_; ///< Last ocr language mode requested.
bool recognition_done_; ///< page_res_ contains recognition data.
/**
* @defgroup ThresholderParams Thresholder Parameters
* Parameters saved from the Thresholder. Needed to rebuild coordinates.
*/
/* @{ */
int rect_left_;
int rect_top_;
int rect_width_;
int rect_height_;
int image_width_;
int image_height_;
/* @} */
private:
// A list of image filenames gets special consideration
bool ProcessPagesFileList(FILE *fp, std::string *buf,
const char *retry_config, int timeout_millisec,
TessResultRenderer *renderer,
int tessedit_page_number);
// TIFF supports multipage so gets special consideration.
bool ProcessPagesMultipageTiff(const unsigned char *data, size_t size,
const char *filename, const char *retry_config,
int timeout_millisec,
TessResultRenderer *renderer,
int tessedit_page_number);
}; // class TessBaseAPI.
/** Escape a char string - remove &<>"' with HTML codes. */
std::string HOcrEscape(const char *text);
} // namespace tesseract
#endif // TESSERACT_API_BASEAPI_H_

View File

@ -1,484 +0,0 @@
// SPDX-License-Identifier: Apache-2.0
// File: capi.h
// Description: C-API TessBaseAPI
//
// (C) Copyright 2012, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef API_CAPI_H_
#define API_CAPI_H_
#include "export.h"
#ifdef __cplusplus
# include <tesseract/baseapi.h>
# include <tesseract/ocrclass.h>
# include <tesseract/pageiterator.h>
# include <tesseract/renderer.h>
# include <tesseract/resultiterator.h>
#endif
#include <stdbool.h>
#include <stdio.h>
#ifdef __cplusplus
extern "C" {
#endif
#ifndef BOOL
# define BOOL int
# define TRUE 1
# define FALSE 0
#endif
#ifdef __cplusplus
typedef tesseract::TessResultRenderer TessResultRenderer;
typedef tesseract::TessBaseAPI TessBaseAPI;
typedef tesseract::PageIterator TessPageIterator;
typedef tesseract::ResultIterator TessResultIterator;
typedef tesseract::MutableIterator TessMutableIterator;
typedef tesseract::ChoiceIterator TessChoiceIterator;
typedef tesseract::OcrEngineMode TessOcrEngineMode;
typedef tesseract::PageSegMode TessPageSegMode;
typedef tesseract::PageIteratorLevel TessPageIteratorLevel;
typedef tesseract::Orientation TessOrientation;
typedef tesseract::ParagraphJustification TessParagraphJustification;
typedef tesseract::WritingDirection TessWritingDirection;
typedef tesseract::TextlineOrder TessTextlineOrder;
typedef tesseract::PolyBlockType TessPolyBlockType;
typedef tesseract::ETEXT_DESC ETEXT_DESC;
#else
typedef struct TessResultRenderer TessResultRenderer;
typedef struct TessBaseAPI TessBaseAPI;
typedef struct TessPageIterator TessPageIterator;
typedef struct TessResultIterator TessResultIterator;
typedef struct TessMutableIterator TessMutableIterator;
typedef struct TessChoiceIterator TessChoiceIterator;
typedef enum TessOcrEngineMode {
OEM_TESSERACT_ONLY,
OEM_LSTM_ONLY,
OEM_TESSERACT_LSTM_COMBINED,
OEM_DEFAULT
} TessOcrEngineMode;
typedef enum TessPageSegMode {
PSM_OSD_ONLY,
PSM_AUTO_OSD,
PSM_AUTO_ONLY,
PSM_AUTO,
PSM_SINGLE_COLUMN,
PSM_SINGLE_BLOCK_VERT_TEXT,
PSM_SINGLE_BLOCK,
PSM_SINGLE_LINE,
PSM_SINGLE_WORD,
PSM_CIRCLE_WORD,
PSM_SINGLE_CHAR,
PSM_SPARSE_TEXT,
PSM_SPARSE_TEXT_OSD,
PSM_RAW_LINE,
PSM_COUNT
} TessPageSegMode;
typedef enum TessPageIteratorLevel {
RIL_BLOCK,
RIL_PARA,
RIL_TEXTLINE,
RIL_WORD,
RIL_SYMBOL
} TessPageIteratorLevel;
typedef enum TessPolyBlockType {
PT_UNKNOWN,
PT_FLOWING_TEXT,
PT_HEADING_TEXT,
PT_PULLOUT_TEXT,
PT_EQUATION,
PT_INLINE_EQUATION,
PT_TABLE,
PT_VERTICAL_TEXT,
PT_CAPTION_TEXT,
PT_FLOWING_IMAGE,
PT_HEADING_IMAGE,
PT_PULLOUT_IMAGE,
PT_HORZ_LINE,
PT_VERT_LINE,
PT_NOISE,
PT_COUNT
} TessPolyBlockType;
typedef enum TessOrientation {
ORIENTATION_PAGE_UP,
ORIENTATION_PAGE_RIGHT,
ORIENTATION_PAGE_DOWN,
ORIENTATION_PAGE_LEFT
} TessOrientation;
typedef enum TessParagraphJustification {
JUSTIFICATION_UNKNOWN,
JUSTIFICATION_LEFT,
JUSTIFICATION_CENTER,
JUSTIFICATION_RIGHT
} TessParagraphJustification;
typedef enum TessWritingDirection {
WRITING_DIRECTION_LEFT_TO_RIGHT,
WRITING_DIRECTION_RIGHT_TO_LEFT,
WRITING_DIRECTION_TOP_TO_BOTTOM
} TessWritingDirection;
typedef enum TessTextlineOrder {
TEXTLINE_ORDER_LEFT_TO_RIGHT,
TEXTLINE_ORDER_RIGHT_TO_LEFT,
TEXTLINE_ORDER_TOP_TO_BOTTOM
} TessTextlineOrder;
typedef struct ETEXT_DESC ETEXT_DESC;
#endif
typedef bool (*TessCancelFunc)(void *cancel_this, int words);
typedef bool (*TessProgressFunc)(ETEXT_DESC *ths, int left, int right, int top,
int bottom);
struct Pix;
struct Boxa;
struct Pixa;
/* General free functions */
TESS_API const char *TessVersion();
TESS_API void TessDeleteText(const char *text);
TESS_API void TessDeleteTextArray(char **arr);
TESS_API void TessDeleteIntArray(const int *arr);
/* Renderer API */
TESS_API TessResultRenderer *TessTextRendererCreate(const char *outputbase);
TESS_API TessResultRenderer *TessHOcrRendererCreate(const char *outputbase);
TESS_API TessResultRenderer *TessHOcrRendererCreate2(const char *outputbase,
BOOL font_info);
TESS_API TessResultRenderer *TessAltoRendererCreate(const char *outputbase);
TESS_API TessResultRenderer *TessTsvRendererCreate(const char *outputbase);
TESS_API TessResultRenderer *TessPDFRendererCreate(const char *outputbase,
const char *datadir,
BOOL textonly);
TESS_API TessResultRenderer *TessUnlvRendererCreate(const char *outputbase);
TESS_API TessResultRenderer *TessBoxTextRendererCreate(const char *outputbase);
TESS_API TessResultRenderer *TessLSTMBoxRendererCreate(const char *outputbase);
TESS_API TessResultRenderer *TessWordStrBoxRendererCreate(
const char *outputbase);
TESS_API void TessDeleteResultRenderer(TessResultRenderer *renderer);
TESS_API void TessResultRendererInsert(TessResultRenderer *renderer,
TessResultRenderer *next);
TESS_API TessResultRenderer *TessResultRendererNext(
TessResultRenderer *renderer);
TESS_API BOOL TessResultRendererBeginDocument(TessResultRenderer *renderer,
const char *title);
TESS_API BOOL TessResultRendererAddImage(TessResultRenderer *renderer,
TessBaseAPI *api);
TESS_API BOOL TessResultRendererEndDocument(TessResultRenderer *renderer);
TESS_API const char *TessResultRendererExtention(TessResultRenderer *renderer);
TESS_API const char *TessResultRendererTitle(TessResultRenderer *renderer);
TESS_API int TessResultRendererImageNum(TessResultRenderer *renderer);
/* Base API */
TESS_API TessBaseAPI *TessBaseAPICreate();
TESS_API void TessBaseAPIDelete(TessBaseAPI *handle);
TESS_API size_t TessBaseAPIGetOpenCLDevice(TessBaseAPI *handle, void **device);
TESS_API void TessBaseAPISetInputName(TessBaseAPI *handle, const char *name);
TESS_API const char *TessBaseAPIGetInputName(TessBaseAPI *handle);
TESS_API void TessBaseAPISetInputImage(TessBaseAPI *handle, struct Pix *pix);
TESS_API struct Pix *TessBaseAPIGetInputImage(TessBaseAPI *handle);
TESS_API int TessBaseAPIGetSourceYResolution(TessBaseAPI *handle);
TESS_API const char *TessBaseAPIGetDatapath(TessBaseAPI *handle);
TESS_API void TessBaseAPISetOutputName(TessBaseAPI *handle, const char *name);
TESS_API BOOL TessBaseAPISetVariable(TessBaseAPI *handle, const char *name,
const char *value);
TESS_API BOOL TessBaseAPISetDebugVariable(TessBaseAPI *handle, const char *name,
const char *value);
TESS_API BOOL TessBaseAPIGetIntVariable(const TessBaseAPI *handle,
const char *name, int *value);
TESS_API BOOL TessBaseAPIGetBoolVariable(const TessBaseAPI *handle,
const char *name, BOOL *value);
TESS_API BOOL TessBaseAPIGetDoubleVariable(const TessBaseAPI *handle,
const char *name, double *value);
TESS_API const char *TessBaseAPIGetStringVariable(const TessBaseAPI *handle,
const char *name);
TESS_API void TessBaseAPIPrintVariables(const TessBaseAPI *handle, FILE *fp);
TESS_API BOOL TessBaseAPIPrintVariablesToFile(const TessBaseAPI *handle,
const char *filename);
TESS_API int TessBaseAPIInit1(TessBaseAPI *handle, const char *datapath,
const char *language, TessOcrEngineMode oem,
char **configs, int configs_size);
TESS_API int TessBaseAPIInit2(TessBaseAPI *handle, const char *datapath,
const char *language, TessOcrEngineMode oem);
TESS_API int TessBaseAPIInit3(TessBaseAPI *handle, const char *datapath,
const char *language);
TESS_API int TessBaseAPIInit4(TessBaseAPI *handle, const char *datapath,
const char *language, TessOcrEngineMode mode,
char **configs, int configs_size, char **vars_vec,
char **vars_values, size_t vars_vec_size,
BOOL set_only_non_debug_params);
TESS_API int TessBaseAPIInit5(TessBaseAPI *handle, const char *data, int data_size,
const char *language, TessOcrEngineMode mode,
char **configs, int configs_size, char **vars_vec,
char **vars_values, size_t vars_vec_size,
BOOL set_only_non_debug_params);
TESS_API const char *TessBaseAPIGetInitLanguagesAsString(
const TessBaseAPI *handle);
TESS_API char **TessBaseAPIGetLoadedLanguagesAsVector(
const TessBaseAPI *handle);
TESS_API char **TessBaseAPIGetAvailableLanguagesAsVector(
const TessBaseAPI *handle);
TESS_API void TessBaseAPIInitForAnalysePage(TessBaseAPI *handle);
TESS_API void TessBaseAPIReadConfigFile(TessBaseAPI *handle,
const char *filename);
TESS_API void TessBaseAPIReadDebugConfigFile(TessBaseAPI *handle,
const char *filename);
TESS_API void TessBaseAPISetPageSegMode(TessBaseAPI *handle,
TessPageSegMode mode);
TESS_API TessPageSegMode TessBaseAPIGetPageSegMode(const TessBaseAPI *handle);
TESS_API char *TessBaseAPIRect(TessBaseAPI *handle,
const unsigned char *imagedata,
int bytes_per_pixel, int bytes_per_line,
int left, int top, int width, int height);
TESS_API void TessBaseAPIClearAdaptiveClassifier(TessBaseAPI *handle);
TESS_API void TessBaseAPISetImage(TessBaseAPI *handle,
const unsigned char *imagedata, int width,
int height, int bytes_per_pixel,
int bytes_per_line);
TESS_API void TessBaseAPISetImage2(TessBaseAPI *handle, struct Pix *pix);
TESS_API void TessBaseAPISetSourceResolution(TessBaseAPI *handle, int ppi);
TESS_API void TessBaseAPISetRectangle(TessBaseAPI *handle, int left, int top,
int width, int height);
TESS_API struct Pix *TessBaseAPIGetThresholdedImage(TessBaseAPI *handle);
TESS_API struct Boxa *TessBaseAPIGetRegions(TessBaseAPI *handle,
struct Pixa **pixa);
TESS_API struct Boxa *TessBaseAPIGetTextlines(TessBaseAPI *handle,
struct Pixa **pixa,
int **blockids);
TESS_API struct Boxa *TessBaseAPIGetTextlines1(TessBaseAPI *handle,
BOOL raw_image, int raw_padding,
struct Pixa **pixa,
int **blockids, int **paraids);
TESS_API struct Boxa *TessBaseAPIGetStrips(TessBaseAPI *handle,
struct Pixa **pixa, int **blockids);
TESS_API struct Boxa *TessBaseAPIGetWords(TessBaseAPI *handle,
struct Pixa **pixa);
TESS_API struct Boxa *TessBaseAPIGetConnectedComponents(TessBaseAPI *handle,
struct Pixa **cc);
TESS_API struct Boxa *TessBaseAPIGetComponentImages(TessBaseAPI *handle,
TessPageIteratorLevel level,
BOOL text_only,
struct Pixa **pixa,
int **blockids);
TESS_API struct Boxa *TessBaseAPIGetComponentImages1(
TessBaseAPI *handle, TessPageIteratorLevel level, BOOL text_only,
BOOL raw_image, int raw_padding, struct Pixa **pixa, int **blockids,
int **paraids);
TESS_API int TessBaseAPIGetThresholdedImageScaleFactor(
const TessBaseAPI *handle);
TESS_API TessPageIterator *TessBaseAPIAnalyseLayout(TessBaseAPI *handle);
TESS_API int TessBaseAPIRecognize(TessBaseAPI *handle, ETEXT_DESC *monitor);
TESS_API BOOL TessBaseAPIProcessPages(TessBaseAPI *handle, const char *filename,
const char *retry_config,
int timeout_millisec,
TessResultRenderer *renderer);
TESS_API BOOL TessBaseAPIProcessPage(TessBaseAPI *handle, struct Pix *pix,
int page_index, const char *filename,
const char *retry_config,
int timeout_millisec,
TessResultRenderer *renderer);
TESS_API TessResultIterator *TessBaseAPIGetIterator(TessBaseAPI *handle);
TESS_API TessMutableIterator *TessBaseAPIGetMutableIterator(
TessBaseAPI *handle);
TESS_API char *TessBaseAPIGetUTF8Text(TessBaseAPI *handle);
TESS_API char *TessBaseAPIGetHOCRText(TessBaseAPI *handle, int page_number);
TESS_API char *TessBaseAPIGetAltoText(TessBaseAPI *handle, int page_number);
TESS_API char *TessBaseAPIGetTsvText(TessBaseAPI *handle, int page_number);
TESS_API char *TessBaseAPIGetBoxText(TessBaseAPI *handle, int page_number);
TESS_API char *TessBaseAPIGetLSTMBoxText(TessBaseAPI *handle, int page_number);
TESS_API char *TessBaseAPIGetWordStrBoxText(TessBaseAPI *handle,
int page_number);
TESS_API char *TessBaseAPIGetUNLVText(TessBaseAPI *handle);
TESS_API int TessBaseAPIMeanTextConf(TessBaseAPI *handle);
TESS_API int *TessBaseAPIAllWordConfidences(TessBaseAPI *handle);
#ifndef DISABLED_LEGACY_ENGINE
TESS_API BOOL TessBaseAPIAdaptToWordStr(TessBaseAPI *handle,
TessPageSegMode mode,
const char *wordstr);
#endif // #ifndef DISABLED_LEGACY_ENGINE
TESS_API void TessBaseAPIClear(TessBaseAPI *handle);
TESS_API void TessBaseAPIEnd(TessBaseAPI *handle);
TESS_API int TessBaseAPIIsValidWord(TessBaseAPI *handle, const char *word);
TESS_API BOOL TessBaseAPIGetTextDirection(TessBaseAPI *handle, int *out_offset,
float *out_slope);
TESS_API const char *TessBaseAPIGetUnichar(TessBaseAPI *handle, int unichar_id);
TESS_API void TessBaseAPIClearPersistentCache(TessBaseAPI *handle);
#ifndef DISABLED_LEGACY_ENGINE
// Call TessDeleteText(*best_script_name) to free memory allocated by this
// function
TESS_API BOOL TessBaseAPIDetectOrientationScript(TessBaseAPI *handle,
int *orient_deg,
float *orient_conf,
const char **script_name,
float *script_conf);
#endif // #ifndef DISABLED_LEGACY_ENGINE
TESS_API void TessBaseAPISetMinOrientationMargin(TessBaseAPI *handle,
double margin);
TESS_API int TessBaseAPINumDawgs(const TessBaseAPI *handle);
TESS_API TessOcrEngineMode TessBaseAPIOem(const TessBaseAPI *handle);
TESS_API void TessBaseGetBlockTextOrientations(TessBaseAPI *handle,
int **block_orientation,
bool **vertical_writing);
/* Page iterator */
TESS_API void TessPageIteratorDelete(TessPageIterator *handle);
TESS_API TessPageIterator *TessPageIteratorCopy(const TessPageIterator *handle);
TESS_API void TessPageIteratorBegin(TessPageIterator *handle);
TESS_API BOOL TessPageIteratorNext(TessPageIterator *handle,
TessPageIteratorLevel level);
TESS_API BOOL TessPageIteratorIsAtBeginningOf(const TessPageIterator *handle,
TessPageIteratorLevel level);
TESS_API BOOL TessPageIteratorIsAtFinalElement(const TessPageIterator *handle,
TessPageIteratorLevel level,
TessPageIteratorLevel element);
TESS_API BOOL TessPageIteratorBoundingBox(const TessPageIterator *handle,
TessPageIteratorLevel level,
int *left, int *top, int *right,
int *bottom);
TESS_API TessPolyBlockType
TessPageIteratorBlockType(const TessPageIterator *handle);
TESS_API struct Pix *TessPageIteratorGetBinaryImage(
const TessPageIterator *handle, TessPageIteratorLevel level);
TESS_API struct Pix *TessPageIteratorGetImage(const TessPageIterator *handle,
TessPageIteratorLevel level,
int padding,
struct Pix *original_image,
int *left, int *top);
TESS_API BOOL TessPageIteratorBaseline(const TessPageIterator *handle,
TessPageIteratorLevel level, int *x1,
int *y1, int *x2, int *y2);
TESS_API void TessPageIteratorOrientation(
TessPageIterator *handle, TessOrientation *orientation,
TessWritingDirection *writing_direction, TessTextlineOrder *textline_order,
float *deskew_angle);
TESS_API void TessPageIteratorParagraphInfo(
TessPageIterator *handle, TessParagraphJustification *justification,
BOOL *is_list_item, BOOL *is_crown, int *first_line_indent);
/* Result iterator */
TESS_API void TessResultIteratorDelete(TessResultIterator *handle);
TESS_API TessResultIterator *TessResultIteratorCopy(
const TessResultIterator *handle);
TESS_API TessPageIterator *TessResultIteratorGetPageIterator(
TessResultIterator *handle);
TESS_API const TessPageIterator *TessResultIteratorGetPageIteratorConst(
const TessResultIterator *handle);
TESS_API TessChoiceIterator *TessResultIteratorGetChoiceIterator(
const TessResultIterator *handle);
TESS_API BOOL TessResultIteratorNext(TessResultIterator *handle,
TessPageIteratorLevel level);
TESS_API char *TessResultIteratorGetUTF8Text(const TessResultIterator *handle,
TessPageIteratorLevel level);
TESS_API float TessResultIteratorConfidence(const TessResultIterator *handle,
TessPageIteratorLevel level);
TESS_API const char *TessResultIteratorWordRecognitionLanguage(
const TessResultIterator *handle);
TESS_API const char *TessResultIteratorWordFontAttributes(
const TessResultIterator *handle, BOOL *is_bold, BOOL *is_italic,
BOOL *is_underlined, BOOL *is_monospace, BOOL *is_serif, BOOL *is_smallcaps,
int *pointsize, int *font_id);
TESS_API BOOL
TessResultIteratorWordIsFromDictionary(const TessResultIterator *handle);
TESS_API BOOL TessResultIteratorWordIsNumeric(const TessResultIterator *handle);
TESS_API BOOL
TessResultIteratorSymbolIsSuperscript(const TessResultIterator *handle);
TESS_API BOOL
TessResultIteratorSymbolIsSubscript(const TessResultIterator *handle);
TESS_API BOOL
TessResultIteratorSymbolIsDropcap(const TessResultIterator *handle);
TESS_API void TessChoiceIteratorDelete(TessChoiceIterator *handle);
TESS_API BOOL TessChoiceIteratorNext(TessChoiceIterator *handle);
TESS_API const char *TessChoiceIteratorGetUTF8Text(
const TessChoiceIterator *handle);
TESS_API float TessChoiceIteratorConfidence(const TessChoiceIterator *handle);
/* Progress monitor */
TESS_API ETEXT_DESC *TessMonitorCreate();
TESS_API void TessMonitorDelete(ETEXT_DESC *monitor);
TESS_API void TessMonitorSetCancelFunc(ETEXT_DESC *monitor,
TessCancelFunc cancelFunc);
TESS_API void TessMonitorSetCancelThis(ETEXT_DESC *monitor, void *cancelThis);
TESS_API void *TessMonitorGetCancelThis(ETEXT_DESC *monitor);
TESS_API void TessMonitorSetProgressFunc(ETEXT_DESC *monitor,
TessProgressFunc progressFunc);
TESS_API int TessMonitorGetProgress(ETEXT_DESC *monitor);
TESS_API void TessMonitorSetDeadlineMSecs(ETEXT_DESC *monitor, int deadline);
#ifdef __cplusplus
}
#endif
#endif // API_CAPI_H_

View File

@ -1,37 +0,0 @@
// SPDX-License-Identifier: Apache-2.0
// File: export.h
// Description: Place holder
//
// (C) Copyright 2006, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef TESSERACT_PLATFORM_H_
#define TESSERACT_PLATFORM_H_
#ifndef TESS_API
# if defined(_WIN32) || defined(__CYGWIN__)
# if defined(TESS_EXPORTS)
# define TESS_API __declspec(dllexport)
# elif defined(TESS_IMPORTS)
# define TESS_API __declspec(dllimport)
# else
# define TESS_API
# endif
# else
# if defined(TESS_EXPORTS) || defined(TESS_IMPORTS)
# define TESS_API __attribute__((visibility("default")))
# else
# define TESS_API
# endif
# endif
#endif
#endif // TESSERACT_PLATFORM_H_

View File

@ -1,235 +0,0 @@
// SPDX-License-Identifier: Apache-2.0
// File: ltrresultiterator.h
// Description: Iterator for tesseract results in strict left-to-right
// order that avoids using tesseract internal data structures.
// Author: Ray Smith
//
// (C) Copyright 2010, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef TESSERACT_CCMAIN_LTR_RESULT_ITERATOR_H_
#define TESSERACT_CCMAIN_LTR_RESULT_ITERATOR_H_
#include "export.h" // for TESS_API
#include "pageiterator.h" // for PageIterator
#include "publictypes.h" // for PageIteratorLevel
#include "unichar.h" // for StrongScriptDirection
namespace tesseract {
class BLOB_CHOICE_IT;
class PAGE_RES;
class WERD_RES;
class Tesseract;
// Class to iterate over tesseract results, providing access to all levels
// of the page hierarchy, without including any tesseract headers or having
// to handle any tesseract structures.
// WARNING! This class points to data held within the TessBaseAPI class, and
// therefore can only be used while the TessBaseAPI class still exists and
// has not been subjected to a call of Init, SetImage, Recognize, Clear, End
// DetectOS, or anything else that changes the internal PAGE_RES.
// See tesseract/publictypes.h for the definition of PageIteratorLevel.
// See also base class PageIterator, which contains the bulk of the interface.
// LTRResultIterator adds text-specific methods for access to OCR output.
class TESS_API LTRResultIterator : public PageIterator {
friend class ChoiceIterator;
public:
// page_res and tesseract come directly from the BaseAPI.
// The rectangle parameters are copied indirectly from the Thresholder,
// via the BaseAPI. They represent the coordinates of some rectangle in an
// original image (in top-left-origin coordinates) and therefore the top-left
// needs to be added to any output boxes in order to specify coordinates
// in the original image. See TessBaseAPI::SetRectangle.
// The scale and scaled_yres are in case the Thresholder scaled the image
// rectangle prior to thresholding. Any coordinates in tesseract's image
// must be divided by scale before adding (rect_left, rect_top).
// The scaled_yres indicates the effective resolution of the binary image
// that tesseract has been given by the Thresholder.
// After the constructor, Begin has already been called.
LTRResultIterator(PAGE_RES *page_res, Tesseract *tesseract, int scale,
int scaled_yres, int rect_left, int rect_top,
int rect_width, int rect_height);
~LTRResultIterator() override;
// LTRResultIterators may be copied! This makes it possible to iterate over
// all the objects at a lower level, while maintaining an iterator to
// objects at a higher level. These constructors DO NOT CALL Begin, so
// iterations will continue from the location of src.
// TODO: For now the copy constructor and operator= only need the base class
// versions, but if new data members are added, don't forget to add them!
// ============= Moving around within the page ============.
// See PageIterator.
// ============= Accessing data ==============.
// Returns the null terminated UTF-8 encoded text string for the current
// object at the given level. Use delete [] to free after use.
char *GetUTF8Text(PageIteratorLevel level) const;
// Set the string inserted at the end of each text line. "\n" by default.
void SetLineSeparator(const char *new_line);
// Set the string inserted at the end of each paragraph. "\n" by default.
void SetParagraphSeparator(const char *new_para);
// Returns the mean confidence of the current object at the given level.
// The number should be interpreted as a percent probability. (0.0f-100.0f)
float Confidence(PageIteratorLevel level) const;
// ============= Functions that refer to words only ============.
// Returns the font attributes of the current word. If iterating at a higher
// level object than words, eg textlines, then this will return the
// attributes of the first word in that textline.
// The actual return value is a string representing a font name. It points
// to an internal table and SHOULD NOT BE DELETED. Lifespan is the same as
// the iterator itself, ie rendered invalid by various members of
// TessBaseAPI, including Init, SetImage, End or deleting the TessBaseAPI.
// Pointsize is returned in printers points (1/72 inch.)
const char *WordFontAttributes(bool *is_bold, bool *is_italic,
bool *is_underlined, bool *is_monospace,
bool *is_serif, bool *is_smallcaps,
int *pointsize, int *font_id) const;
// Return the name of the language used to recognize this word.
// On error, nullptr. Do not delete this pointer.
const char *WordRecognitionLanguage() const;
// Return the overall directionality of this word.
StrongScriptDirection WordDirection() const;
// Returns true if the current word was found in a dictionary.
bool WordIsFromDictionary() const;
// Returns the number of blanks before the current word.
int BlanksBeforeWord() const;
// Returns true if the current word is numeric.
bool WordIsNumeric() const;
// Returns true if the word contains blamer information.
bool HasBlamerInfo() const;
// Returns the pointer to ParamsTrainingBundle stored in the BlamerBundle
// of the current word.
const void *GetParamsTrainingBundle() const;
// Returns a pointer to the string with blamer information for this word.
// Assumes that the word's blamer_bundle is not nullptr.
const char *GetBlamerDebug() const;
// Returns a pointer to the string with misadaption information for this word.
// Assumes that the word's blamer_bundle is not nullptr.
const char *GetBlamerMisadaptionDebug() const;
// Returns true if a truth string was recorded for the current word.
bool HasTruthString() const;
// Returns true if the given string is equivalent to the truth string for
// the current word.
bool EquivalentToTruth(const char *str) const;
// Returns a null terminated UTF-8 encoded truth string for the current word.
// Use delete [] to free after use.
char *WordTruthUTF8Text() const;
// Returns a null terminated UTF-8 encoded normalized OCR string for the
// current word. Use delete [] to free after use.
char *WordNormedUTF8Text() const;
// Returns a pointer to serialized choice lattice.
// Fills lattice_size with the number of bytes in lattice data.
const char *WordLattice(int *lattice_size) const;
// ============= Functions that refer to symbols only ============.
// Returns true if the current symbol is a superscript.
// If iterating at a higher level object than symbols, eg words, then
// this will return the attributes of the first symbol in that word.
bool SymbolIsSuperscript() const;
// Returns true if the current symbol is a subscript.
// If iterating at a higher level object than symbols, eg words, then
// this will return the attributes of the first symbol in that word.
bool SymbolIsSubscript() const;
// Returns true if the current symbol is a dropcap.
// If iterating at a higher level object than symbols, eg words, then
// this will return the attributes of the first symbol in that word.
bool SymbolIsDropcap() const;
protected:
const char *line_separator_;
const char *paragraph_separator_;
};
// Class to iterate over the classifier choices for a single RIL_SYMBOL.
class TESS_API ChoiceIterator {
public:
// Construction is from a LTRResultIterator that points to the symbol of
// interest. The ChoiceIterator allows a one-shot iteration over the
// choices for this symbol and after that it is useless.
explicit ChoiceIterator(const LTRResultIterator &result_it);
~ChoiceIterator();
// Moves to the next choice for the symbol and returns false if there
// are none left.
bool Next();
// ============= Accessing data ==============.
// Returns the null terminated UTF-8 encoded text string for the current
// choice.
// NOTE: Unlike LTRResultIterator::GetUTF8Text, the return points to an
// internal structure and should NOT be delete[]ed to free after use.
const char *GetUTF8Text() const;
// Returns the confidence of the current choice depending on the used language
// data. If only LSTM traineddata is used the value range is 0.0f - 1.0f. All
// choices for one symbol should roughly add up to 1.0f.
// If only traineddata of the legacy engine is used, the number should be
// interpreted as a percent probability. (0.0f-100.0f) In this case
// probabilities won't add up to 100. Each one stands on its own.
float Confidence() const;
// Returns a vector containing all timesteps, which belong to the currently
// selected symbol. A timestep is a vector containing pairs of symbols and
// floating point numbers. The number states the probability for the
// corresponding symbol.
std::vector<std::vector<std::pair<const char *, float>>> *Timesteps() const;
private:
// clears the remaining spaces out of the results and adapt the probabilities
void filterSpaces();
// Pointer to the WERD_RES object owned by the API.
WERD_RES *word_res_;
// Iterator over the blob choices.
BLOB_CHOICE_IT *choice_it_;
std::vector<std::pair<const char *, float>> *LSTM_choices_ = nullptr;
std::vector<std::pair<const char *, float>>::iterator LSTM_choice_it_;
const int *tstep_index_;
// regulates the rating granularity
double rating_coefficient_;
// leading blanks
int blanks_before_word_;
// true when there is lstm engine related trained data
bool oemLSTM_;
};
} // namespace tesseract.
#endif // TESSERACT_CCMAIN_LTR_RESULT_ITERATOR_H_

View File

@ -1,158 +0,0 @@
// SPDX-License-Identifier: Apache-2.0
/**********************************************************************
* File: ocrclass.h
* Description: Class definitions and constants for the OCR API.
* Author: Hewlett-Packard Co
*
* (C) Copyright 1996, Hewlett-Packard Co.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
/**********************************************************************
* This file contains typedefs for all the structures used by
* the HP OCR interface.
* The structures are designed to allow them to be used with any
* structure alignment up to 8.
**********************************************************************/
#ifndef CCUTIL_OCRCLASS_H_
#define CCUTIL_OCRCLASS_H_
#include <chrono>
#include <ctime>
namespace tesseract {
/**********************************************************************
* EANYCODE_CHAR
* Description of a single character. The character code is defined by
* the character set of the current font.
* Output text is sent as an array of these structures.
* Spaces and line endings in the output are represented in the
* structures of the surrounding characters. They are not directly
* represented as characters.
* The first character in a word has a positive value of blanks.
* Missing information should be set to the defaults in the comments.
* If word bounds are known, but not character bounds, then the top and
* bottom of each character should be those of the word. The left of the
* first and right of the last char in each word should be set. All other
* lefts and rights should be set to -1.
* If set, the values of right and bottom are left+width and top+height.
* Most of the members come directly from the parameters to ocr_append_char.
* The formatting member uses the enhancement parameter and combines the
* line direction stuff into the top 3 bits.
* The coding is 0=RL char, 1=LR char, 2=DR NL, 3=UL NL, 4=DR Para,
* 5=UL Para, 6=TB char, 7=BT char. API users do not need to know what
* the coding is, only that it is backwards compatible with the previous
* version.
**********************************************************************/
struct EANYCODE_CHAR { /*single character */
// It should be noted that the format for char_code for version 2.0 and beyond
// is UTF8 which means that ASCII characters will come out as one structure
// but other characters will be returned in two or more instances of this
// structure with a single byte of the UTF8 code in each, but each will have
// the same bounding box. Programs which want to handle languagues with
// different characters sets will need to handle extended characters
// appropriately, but *all* code needs to be prepared to receive UTF8 coded
// characters for characters such as bullet and fancy quotes.
uint16_t char_code; /*character itself */
int16_t left; /*of char (-1) */
int16_t right; /*of char (-1) */
int16_t top; /*of char (-1) */
int16_t bottom; /*of char (-1) */
int16_t font_index; /*what font (0) */
uint8_t confidence; /*0=perfect, 100=reject (0/100) */
uint8_t point_size; /*of char, 72=i inch, (10) */
int8_t blanks; /*no of spaces before this char (1) */
uint8_t formatting; /*char formatting (0) */
};
/**********************************************************************
* ETEXT_DESC
* Description of the output of the OCR engine.
* This structure is used as both a progress monitor and the final
* output header, since it needs to be a valid progress monitor while
* the OCR engine is storing its output to shared memory.
* During progress, all the buffer info is -1.
* Progress starts at 0 and increases to 100 during OCR. No other constraint.
* Additionally the progress callback contains the bounding box of the word that
* is currently being processed.
* Every progress callback, the OCR engine must set ocr_alive to 1.
* The HP side will set ocr_alive to 0. Repeated failure to reset
* to 1 indicates that the OCR engine is dead.
* If the cancel function is not null then it is called with the number of
* user words found. If it returns true then operation is cancelled.
**********************************************************************/
class ETEXT_DESC;
using CANCEL_FUNC = bool (*)(void *, int);
using PROGRESS_FUNC = bool (*)(int, int, int, int, int);
using PROGRESS_FUNC2 = bool (*)(ETEXT_DESC *, int, int, int, int);
class ETEXT_DESC { // output header
public:
int16_t count{0}; /// chars in this buffer(0)
int16_t progress{0}; /// percent complete increasing (0-100)
/** Progress monitor covers word recognition and it does not cover layout
* analysis.
* See Ray comment in https://github.com/tesseract-ocr/tesseract/pull/27 */
int8_t more_to_come{0}; /// true if not last
volatile int8_t ocr_alive{0}; /// ocr sets to 1, HP 0
int8_t err_code{0}; /// for errcode use
CANCEL_FUNC cancel{nullptr}; /// returns true to cancel
PROGRESS_FUNC progress_callback{
nullptr}; /// called whenever progress increases
PROGRESS_FUNC2 progress_callback2; /// monitor-aware progress callback
void *cancel_this{nullptr}; /// this or other data for cancel
std::chrono::steady_clock::time_point end_time;
/// Time to stop. Expected to be set only
/// by call to set_deadline_msecs().
EANYCODE_CHAR text[1]{}; /// character data
ETEXT_DESC() : progress_callback2(&default_progress_func) {
end_time = std::chrono::time_point<std::chrono::steady_clock,
std::chrono::milliseconds>();
}
// Sets the end time to be deadline_msecs milliseconds from now.
void set_deadline_msecs(int32_t deadline_msecs) {
if (deadline_msecs > 0) {
end_time = std::chrono::steady_clock::now() +
std::chrono::milliseconds(deadline_msecs);
}
}
// Returns false if we've not passed the end_time, or have not set a deadline.
bool deadline_exceeded() const {
if (end_time.time_since_epoch() ==
std::chrono::steady_clock::duration::zero()) {
return false;
}
auto now = std::chrono::steady_clock::now();
return (now > end_time);
}
private:
static bool default_progress_func(ETEXT_DESC *ths, int left, int right,
int top, int bottom) {
if (ths->progress_callback != nullptr) {
return (*(ths->progress_callback))(ths->progress, left, right, top,
bottom);
}
return true;
}
};
} // namespace tesseract
#endif // CCUTIL_OCRCLASS_H_

View File

@ -1,139 +0,0 @@
// SPDX-License-Identifier: Apache-2.0
// File: osdetect.h
// Description: Orientation and script detection.
// Author: Samuel Charron
// Ranjith Unnikrishnan
//
// (C) Copyright 2008, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef TESSERACT_CCMAIN_OSDETECT_H_
#define TESSERACT_CCMAIN_OSDETECT_H_
#include "export.h" // for TESS_API
#include <vector> // for std::vector
namespace tesseract {
class BLOBNBOX;
class BLOBNBOX_CLIST;
class BLOB_CHOICE_LIST;
class TO_BLOCK_LIST;
class UNICHARSET;
class Tesseract;
// Max number of scripts in ICU + "NULL" + Japanese and Korean + Fraktur
const int kMaxNumberOfScripts = 116 + 1 + 2 + 1;
struct OSBestResult {
OSBestResult()
: orientation_id(0), script_id(0), sconfidence(0.0), oconfidence(0.0) {}
int orientation_id;
int script_id;
float sconfidence;
float oconfidence;
};
struct OSResults {
OSResults() : unicharset(nullptr) {
for (int i = 0; i < 4; ++i) {
for (int j = 0; j < kMaxNumberOfScripts; ++j) {
scripts_na[i][j] = 0;
}
orientations[i] = 0;
}
}
void update_best_orientation();
// Set the estimate of the orientation to the given id.
void set_best_orientation(int orientation_id);
// Update/Compute the best estimate of the script assuming the given
// orientation id.
void update_best_script(int orientation_id);
// Return the index of the script with the highest score for this orientation.
TESS_API int get_best_script(int orientation_id) const;
// Accumulate scores with given OSResults instance and update the best script.
void accumulate(const OSResults &osr);
// Print statistics.
void print_scores(void) const;
void print_scores(int orientation_id) const;
// Array holding scores for each orientation id [0,3].
// Orientation ids [0..3] map to [0, 270, 180, 90] degree orientations of the
// page respectively, where the values refer to the amount of clockwise
// rotation to be applied to the page for the text to be upright and readable.
float orientations[4];
// Script confidence scores for each of 4 possible orientations.
float scripts_na[4][kMaxNumberOfScripts];
UNICHARSET *unicharset;
OSBestResult best_result;
};
class OrientationDetector {
public:
OrientationDetector(const std::vector<int> *allowed_scripts,
OSResults *results);
bool detect_blob(BLOB_CHOICE_LIST *scores);
int get_orientation();
private:
OSResults *osr_;
const std::vector<int> *allowed_scripts_;
};
class ScriptDetector {
public:
ScriptDetector(const std::vector<int> *allowed_scripts, OSResults *osr,
tesseract::Tesseract *tess);
void detect_blob(BLOB_CHOICE_LIST *scores);
bool must_stop(int orientation) const;
private:
OSResults *osr_;
static const char *korean_script_;
static const char *japanese_script_;
static const char *fraktur_script_;
int korean_id_;
int japanese_id_;
int katakana_id_;
int hiragana_id_;
int han_id_;
int hangul_id_;
int latin_id_;
int fraktur_id_;
tesseract::Tesseract *tess_;
const std::vector<int> *allowed_scripts_;
};
int orientation_and_script_detection(const char *filename, OSResults *,
tesseract::Tesseract *);
int os_detect(TO_BLOCK_LIST *port_blocks, OSResults *osr,
tesseract::Tesseract *tess);
int os_detect_blobs(const std::vector<int> *allowed_scripts,
BLOBNBOX_CLIST *blob_list, OSResults *osr,
tesseract::Tesseract *tess);
bool os_detect_blob(BLOBNBOX *bbox, OrientationDetector *o, ScriptDetector *s,
OSResults *, tesseract::Tesseract *tess);
// Helper method to convert an orientation index to its value in degrees.
// The value represents the amount of clockwise rotation in degrees that must be
// applied for the text to be upright (readable).
TESS_API int OrientationIdToValue(const int &id);
} // namespace tesseract
#endif // TESSERACT_CCMAIN_OSDETECT_H_

View File

@ -1,364 +0,0 @@
// SPDX-License-Identifier: Apache-2.0
// File: pageiterator.h
// Description: Iterator for tesseract page structure that avoids using
// tesseract internal data structures.
// Author: Ray Smith
//
// (C) Copyright 2010, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef TESSERACT_CCMAIN_PAGEITERATOR_H_
#define TESSERACT_CCMAIN_PAGEITERATOR_H_
#include "export.h"
#include "publictypes.h"
struct Pix;
struct Pta;
namespace tesseract {
struct BlamerBundle;
class C_BLOB_IT;
class PAGE_RES;
class PAGE_RES_IT;
class WERD;
class Tesseract;
/**
* Class to iterate over tesseract page structure, providing access to all
* levels of the page hierarchy, without including any tesseract headers or
* having to handle any tesseract structures.
* WARNING! This class points to data held within the TessBaseAPI class, and
* therefore can only be used while the TessBaseAPI class still exists and
* has not been subjected to a call of Init, SetImage, Recognize, Clear, End
* DetectOS, or anything else that changes the internal PAGE_RES.
* See tesseract/publictypes.h for the definition of PageIteratorLevel.
* See also ResultIterator, derived from PageIterator, which adds in the
* ability to access OCR output with text-specific methods.
*/
class TESS_API PageIterator {
public:
/**
* page_res and tesseract come directly from the BaseAPI.
* The rectangle parameters are copied indirectly from the Thresholder,
* via the BaseAPI. They represent the coordinates of some rectangle in an
* original image (in top-left-origin coordinates) and therefore the top-left
* needs to be added to any output boxes in order to specify coordinates
* in the original image. See TessBaseAPI::SetRectangle.
* The scale and scaled_yres are in case the Thresholder scaled the image
* rectangle prior to thresholding. Any coordinates in tesseract's image
* must be divided by scale before adding (rect_left, rect_top).
* The scaled_yres indicates the effective resolution of the binary image
* that tesseract has been given by the Thresholder.
* After the constructor, Begin has already been called.
*/
PageIterator(PAGE_RES *page_res, Tesseract *tesseract, int scale,
int scaled_yres, int rect_left, int rect_top, int rect_width,
int rect_height);
virtual ~PageIterator();
/**
* Page/ResultIterators may be copied! This makes it possible to iterate over
* all the objects at a lower level, while maintaining an iterator to
* objects at a higher level. These constructors DO NOT CALL Begin, so
* iterations will continue from the location of src.
*/
PageIterator(const PageIterator &src);
const PageIterator &operator=(const PageIterator &src);
/** Are we positioned at the same location as other? */
bool PositionedAtSameWord(const PAGE_RES_IT *other) const;
// ============= Moving around within the page ============.
/**
* Moves the iterator to point to the start of the page to begin an
* iteration.
*/
virtual void Begin();
/**
* Moves the iterator to the beginning of the paragraph.
* This class implements this functionality by moving it to the zero indexed
* blob of the first (leftmost) word on the first row of the paragraph.
*/
virtual void RestartParagraph();
/**
* Return whether this iterator points anywhere in the first textline of a
* paragraph.
*/
bool IsWithinFirstTextlineOfParagraph() const;
/**
* Moves the iterator to the beginning of the text line.
* This class implements this functionality by moving it to the zero indexed
* blob of the first (leftmost) word of the row.
*/
virtual void RestartRow();
/**
* Moves to the start of the next object at the given level in the
* page hierarchy, and returns false if the end of the page was reached.
* NOTE that RIL_SYMBOL will skip non-text blocks, but all other
* PageIteratorLevel level values will visit each non-text block once.
* Think of non text blocks as containing a single para, with a single line,
* with a single imaginary word.
* Calls to Next with different levels may be freely intermixed.
* This function iterates words in right-to-left scripts correctly, if
* the appropriate language has been loaded into Tesseract.
*/
virtual bool Next(PageIteratorLevel level);
/**
* Returns true if the iterator is at the start of an object at the given
* level.
*
* For instance, suppose an iterator it is pointed to the first symbol of the
* first word of the third line of the second paragraph of the first block in
* a page, then:
* it.IsAtBeginningOf(RIL_BLOCK) = false
* it.IsAtBeginningOf(RIL_PARA) = false
* it.IsAtBeginningOf(RIL_TEXTLINE) = true
* it.IsAtBeginningOf(RIL_WORD) = true
* it.IsAtBeginningOf(RIL_SYMBOL) = true
*/
virtual bool IsAtBeginningOf(PageIteratorLevel level) const;
/**
* Returns whether the iterator is positioned at the last element in a
* given level. (e.g. the last word in a line, the last line in a block)
*
* Here's some two-paragraph example
* text. It starts off innocuously
* enough but quickly turns bizarre.
* The author inserts a cornucopia
* of words to guard against confused
* references.
*
* Now take an iterator it pointed to the start of "bizarre."
* it.IsAtFinalElement(RIL_PARA, RIL_SYMBOL) = false
* it.IsAtFinalElement(RIL_PARA, RIL_WORD) = true
* it.IsAtFinalElement(RIL_BLOCK, RIL_WORD) = false
*/
virtual bool IsAtFinalElement(PageIteratorLevel level,
PageIteratorLevel element) const;
/**
* Returns whether this iterator is positioned
* before other: -1
* equal to other: 0
* after other: 1
*/
int Cmp(const PageIterator &other) const;
// ============= Accessing data ==============.
// Coordinate system:
// Integer coordinates are at the cracks between the pixels.
// The top-left corner of the top-left pixel in the image is at (0,0).
// The bottom-right corner of the bottom-right pixel in the image is at
// (width, height).
// Every bounding box goes from the top-left of the top-left contained
// pixel to the bottom-right of the bottom-right contained pixel, so
// the bounding box of the single top-left pixel in the image is:
// (0,0)->(1,1).
// If an image rectangle has been set in the API, then returned coordinates
// relate to the original (full) image, rather than the rectangle.
/**
* Controls what to include in a bounding box. Bounding boxes of all levels
* between RIL_WORD and RIL_BLOCK can include or exclude potential diacritics.
* Between layout analysis and recognition, it isn't known where all
* diacritics belong, so this control is used to include or exclude some
* diacritics that are above or below the main body of the word. In most cases
* where the placement is obvious, and after recognition, it doesn't make as
* much difference, as the diacritics will already be included in the word.
*/
void SetBoundingBoxComponents(bool include_upper_dots,
bool include_lower_dots) {
include_upper_dots_ = include_upper_dots;
include_lower_dots_ = include_lower_dots;
}
/**
* Returns the bounding rectangle of the current object at the given level.
* See comment on coordinate system above.
* Returns false if there is no such object at the current position.
* The returned bounding box is guaranteed to match the size and position
* of the image returned by GetBinaryImage, but may clip foreground pixels
* from a grey image. The padding argument to GetImage can be used to expand
* the image to include more foreground pixels. See GetImage below.
*/
bool BoundingBox(PageIteratorLevel level, int *left, int *top, int *right,
int *bottom) const;
bool BoundingBox(PageIteratorLevel level, int padding, int *left, int *top,
int *right, int *bottom) const;
/**
* Returns the bounding rectangle of the object in a coordinate system of the
* working image rectangle having its origin at (rect_left_, rect_top_) with
* respect to the original image and is scaled by a factor scale_.
*/
bool BoundingBoxInternal(PageIteratorLevel level, int *left, int *top,
int *right, int *bottom) const;
/** Returns whether there is no object of a given level. */
bool Empty(PageIteratorLevel level) const;
/**
* Returns the type of the current block.
* See tesseract/publictypes.h for PolyBlockType.
*/
PolyBlockType BlockType() const;
/**
* Returns the polygon outline of the current block. The returned Pta must
* be ptaDestroy-ed after use. Note that the returned Pta lists the vertices
* of the polygon, and the last edge is the line segment between the last
* point and the first point. nullptr will be returned if the iterator is
* at the end of the document or layout analysis was not used.
*/
Pta *BlockPolygon() const;
/**
* Returns a binary image of the current object at the given level.
* The position and size match the return from BoundingBoxInternal, and so
* this could be upscaled with respect to the original input image.
* Use pixDestroy to delete the image after use.
*/
Pix *GetBinaryImage(PageIteratorLevel level) const;
/**
* Returns an image of the current object at the given level in greyscale
* if available in the input. To guarantee a binary image use BinaryImage.
* NOTE that in order to give the best possible image, the bounds are
* expanded slightly over the binary connected component, by the supplied
* padding, so the top-left position of the returned image is returned
* in (left,top). These will most likely not match the coordinates
* returned by BoundingBox.
* If you do not supply an original image, you will get a binary one.
* Use pixDestroy to delete the image after use.
*/
Pix *GetImage(PageIteratorLevel level, int padding, Pix *original_img,
int *left, int *top) const;
/**
* Returns the baseline of the current object at the given level.
* The baseline is the line that passes through (x1, y1) and (x2, y2).
* WARNING: with vertical text, baselines may be vertical!
* Returns false if there is no baseline at the current position.
*/
bool Baseline(PageIteratorLevel level, int *x1, int *y1, int *x2,
int *y2) const;
// Returns the attributes of the current row.
void RowAttributes(float *row_height, float *descenders,
float *ascenders) const;
/**
* Returns orientation for the block the iterator points to.
* orientation, writing_direction, textline_order: see publictypes.h
* deskew_angle: after rotating the block so the text orientation is
* upright, how many radians does one have to rotate the
* block anti-clockwise for it to be level?
* -Pi/4 <= deskew_angle <= Pi/4
*/
void Orientation(tesseract::Orientation *orientation,
tesseract::WritingDirection *writing_direction,
tesseract::TextlineOrder *textline_order,
float *deskew_angle) const;
/**
* Returns information about the current paragraph, if available.
*
* justification -
* LEFT if ragged right, or fully justified and script is left-to-right.
* RIGHT if ragged left, or fully justified and script is right-to-left.
* unknown if it looks like source code or we have very few lines.
* is_list_item -
* true if we believe this is a member of an ordered or unordered list.
* is_crown -
* true if the first line of the paragraph is aligned with the other
* lines of the paragraph even though subsequent paragraphs have first
* line indents. This typically indicates that this is the continuation
* of a previous paragraph or that it is the very first paragraph in
* the chapter.
* first_line_indent -
* For LEFT aligned paragraphs, the first text line of paragraphs of
* this kind are indented this many pixels from the left edge of the
* rest of the paragraph.
* for RIGHT aligned paragraphs, the first text line of paragraphs of
* this kind are indented this many pixels from the right edge of the
* rest of the paragraph.
* NOTE 1: This value may be negative.
* NOTE 2: if *is_crown == true, the first line of this paragraph is
* actually flush, and first_line_indent is set to the "common"
* first_line_indent for subsequent paragraphs in this block
* of text.
*/
void ParagraphInfo(tesseract::ParagraphJustification *justification,
bool *is_list_item, bool *is_crown,
int *first_line_indent) const;
// If the current WERD_RES (it_->word()) is not nullptr, sets the BlamerBundle
// of the current word to the given pointer (takes ownership of the pointer)
// and returns true.
// Can only be used when iterating on the word level.
bool SetWordBlamerBundle(BlamerBundle *blamer_bundle);
protected:
/**
* Sets up the internal data for iterating the blobs of a new word, then
* moves the iterator to the given offset.
*/
void BeginWord(int offset);
/** Pointer to the page_res owned by the API. */
PAGE_RES *page_res_;
/** Pointer to the Tesseract object owned by the API. */
Tesseract *tesseract_;
/**
* The iterator to the page_res_. Owned by this ResultIterator.
* A pointer just to avoid dragging in Tesseract includes.
*/
PAGE_RES_IT *it_;
/**
* The current input WERD being iterated. If there is an output from OCR,
* then word_ is nullptr. Owned by the API
*/
WERD *word_;
/** The length of the current word_. */
int word_length_;
/** The current blob index within the word. */
int blob_index_;
/**
* Iterator to the blobs within the word. If nullptr, then we are iterating
* OCR results in the box_word.
* Owned by this ResultIterator.
*/
C_BLOB_IT *cblob_it_;
/** Control over what to include in bounding boxes. */
bool include_upper_dots_;
bool include_lower_dots_;
/** Parameters saved from the Thresholder. Needed to rebuild coordinates.*/
int scale_;
int scaled_yres_;
int rect_left_;
int rect_top_;
int rect_width_;
int rect_height_;
};
} // namespace tesseract.
#endif // TESSERACT_CCMAIN_PAGEITERATOR_H_

View File

@ -1,281 +0,0 @@
// SPDX-License-Identifier: Apache-2.0
// File: publictypes.h
// Description: Types used in both the API and internally
// Author: Ray Smith
//
// (C) Copyright 2010, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef TESSERACT_CCSTRUCT_PUBLICTYPES_H_
#define TESSERACT_CCSTRUCT_PUBLICTYPES_H_
namespace tesseract {
// This file contains types that are used both by the API and internally
// to Tesseract. In order to decouple the API from Tesseract and prevent cyclic
// dependencies, THIS FILE SHOULD NOT DEPEND ON ANY OTHER PART OF TESSERACT.
// Restated: It is OK for low-level Tesseract files to include publictypes.h,
// but not for the low-level tesseract code to include top-level API code.
// This file should not use other Tesseract types, as that would drag
// their includes into the API-level.
/** Number of printers' points in an inch. The unit of the pointsize return. */
constexpr int kPointsPerInch = 72;
/**
* Minimum believable resolution. Used as a default if there is no other
* information, as it is safer to under-estimate than over-estimate.
*/
constexpr int kMinCredibleResolution = 70;
/** Maximum believable resolution. */
constexpr int kMaxCredibleResolution = 2400;
/**
* Ratio between median blob size and likely resolution. Used to estimate
* resolution when none is provided. This is basically 1/usual text size in
* inches. */
constexpr int kResolutionEstimationFactor = 10;
/**
* Possible types for a POLY_BLOCK or ColPartition.
* Must be kept in sync with kPBColors in polyblk.cpp and PTIs*Type functions
* below, as well as kPolyBlockNames in layout_test.cc.
* Used extensively by ColPartition, and POLY_BLOCK.
*/
enum PolyBlockType {
PT_UNKNOWN, // Type is not yet known. Keep as the first element.
PT_FLOWING_TEXT, // Text that lives inside a column.
PT_HEADING_TEXT, // Text that spans more than one column.
PT_PULLOUT_TEXT, // Text that is in a cross-column pull-out region.
PT_EQUATION, // Partition belonging to an equation region.
PT_INLINE_EQUATION, // Partition has inline equation.
PT_TABLE, // Partition belonging to a table region.
PT_VERTICAL_TEXT, // Text-line runs vertically.
PT_CAPTION_TEXT, // Text that belongs to an image.
PT_FLOWING_IMAGE, // Image that lives inside a column.
PT_HEADING_IMAGE, // Image that spans more than one column.
PT_PULLOUT_IMAGE, // Image that is in a cross-column pull-out region.
PT_HORZ_LINE, // Horizontal Line.
PT_VERT_LINE, // Vertical Line.
PT_NOISE, // Lies outside of any column.
PT_COUNT
};
/** Returns true if PolyBlockType is of horizontal line type */
inline bool PTIsLineType(PolyBlockType type) {
return type == PT_HORZ_LINE || type == PT_VERT_LINE;
}
/** Returns true if PolyBlockType is of image type */
inline bool PTIsImageType(PolyBlockType type) {
return type == PT_FLOWING_IMAGE || type == PT_HEADING_IMAGE ||
type == PT_PULLOUT_IMAGE;
}
/** Returns true if PolyBlockType is of text type */
inline bool PTIsTextType(PolyBlockType type) {
return type == PT_FLOWING_TEXT || type == PT_HEADING_TEXT ||
type == PT_PULLOUT_TEXT || type == PT_TABLE ||
type == PT_VERTICAL_TEXT || type == PT_CAPTION_TEXT ||
type == PT_INLINE_EQUATION;
}
// Returns true if PolyBlockType is of pullout(inter-column) type
inline bool PTIsPulloutType(PolyBlockType type) {
return type == PT_PULLOUT_IMAGE || type == PT_PULLOUT_TEXT;
}
/**
* +------------------+ Orientation Example:
* | 1 Aaaa Aaaa Aaaa | ====================
* | Aaa aa aaa aa | To left is a diagram of some (1) English and
* | aaaaaa A aa aaa. | (2) Chinese text and a (3) photo credit.
* | 2 |
* | ####### c c C | Upright Latin characters are represented as A and a.
* | ####### c c c | '<' represents a latin character rotated
* | < ####### c c c | anti-clockwise 90 degrees.
* | < ####### c c |
* | < ####### . c | Upright Chinese characters are represented C and c.
* | 3 ####### c |
* +------------------+ NOTA BENE: enum values here should match goodoc.proto
* If you orient your head so that "up" aligns with Orientation,
* then the characters will appear "right side up" and readable.
*
* In the example above, both the English and Chinese paragraphs are oriented
* so their "up" is the top of the page (page up). The photo credit is read
* with one's head turned leftward ("up" is to page left).
*
* The values of this enum match the convention of Tesseract's osdetect.h
*/
enum Orientation {
ORIENTATION_PAGE_UP = 0,
ORIENTATION_PAGE_RIGHT = 1,
ORIENTATION_PAGE_DOWN = 2,
ORIENTATION_PAGE_LEFT = 3,
};
/**
* The grapheme clusters within a line of text are laid out logically
* in this direction, judged when looking at the text line rotated so that
* its Orientation is "page up".
*
* For English text, the writing direction is left-to-right. For the
* Chinese text in the above example, the writing direction is top-to-bottom.
*/
enum WritingDirection {
WRITING_DIRECTION_LEFT_TO_RIGHT = 0,
WRITING_DIRECTION_RIGHT_TO_LEFT = 1,
WRITING_DIRECTION_TOP_TO_BOTTOM = 2,
};
/**
* The text lines are read in the given sequence.
*
* In English, the order is top-to-bottom.
* In Chinese, vertical text lines are read right-to-left. Mongolian is
* written in vertical columns top to bottom like Chinese, but the lines
* order left-to right.
*
* Note that only some combinations make sense. For example,
* WRITING_DIRECTION_LEFT_TO_RIGHT implies TEXTLINE_ORDER_TOP_TO_BOTTOM
*/
enum TextlineOrder {
TEXTLINE_ORDER_LEFT_TO_RIGHT = 0,
TEXTLINE_ORDER_RIGHT_TO_LEFT = 1,
TEXTLINE_ORDER_TOP_TO_BOTTOM = 2,
};
/**
* Possible modes for page layout analysis. These *must* be kept in order
* of decreasing amount of layout analysis to be done, except for OSD_ONLY,
* so that the inequality test macros below work.
*/
enum PageSegMode {
PSM_OSD_ONLY = 0, ///< Orientation and script detection only.
PSM_AUTO_OSD = 1, ///< Automatic page segmentation with orientation and
///< script detection. (OSD)
PSM_AUTO_ONLY = 2, ///< Automatic page segmentation, but no OSD, or OCR.
PSM_AUTO = 3, ///< Fully automatic page segmentation, but no OSD.
PSM_SINGLE_COLUMN = 4, ///< Assume a single column of text of variable sizes.
PSM_SINGLE_BLOCK_VERT_TEXT = 5, ///< Assume a single uniform block of
///< vertically aligned text.
PSM_SINGLE_BLOCK = 6, ///< Assume a single uniform block of text. (Default.)
PSM_SINGLE_LINE = 7, ///< Treat the image as a single text line.
PSM_SINGLE_WORD = 8, ///< Treat the image as a single word.
PSM_CIRCLE_WORD = 9, ///< Treat the image as a single word in a circle.
PSM_SINGLE_CHAR = 10, ///< Treat the image as a single character.
PSM_SPARSE_TEXT =
11, ///< Find as much text as possible in no particular order.
PSM_SPARSE_TEXT_OSD = 12, ///< Sparse text with orientation and script det.
PSM_RAW_LINE = 13, ///< Treat the image as a single text line, bypassing
///< hacks that are Tesseract-specific.
PSM_COUNT ///< Number of enum entries.
};
/**
* Inline functions that act on a PageSegMode to determine whether components of
* layout analysis are enabled.
* *Depend critically on the order of elements of PageSegMode.*
* NOTE that arg is an int for compatibility with INT_PARAM.
*/
inline bool PSM_OSD_ENABLED(int pageseg_mode) {
return pageseg_mode <= PSM_AUTO_OSD || pageseg_mode == PSM_SPARSE_TEXT_OSD;
}
inline bool PSM_ORIENTATION_ENABLED(int pageseg_mode) {
return pageseg_mode <= PSM_AUTO || pageseg_mode == PSM_SPARSE_TEXT_OSD;
}
inline bool PSM_COL_FIND_ENABLED(int pageseg_mode) {
return pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_AUTO;
}
inline bool PSM_SPARSE(int pageseg_mode) {
return pageseg_mode == PSM_SPARSE_TEXT || pageseg_mode == PSM_SPARSE_TEXT_OSD;
}
inline bool PSM_BLOCK_FIND_ENABLED(int pageseg_mode) {
return pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_SINGLE_COLUMN;
}
inline bool PSM_LINE_FIND_ENABLED(int pageseg_mode) {
return pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_SINGLE_BLOCK;
}
inline bool PSM_WORD_FIND_ENABLED(int pageseg_mode) {
return (pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_SINGLE_LINE) ||
pageseg_mode == PSM_SPARSE_TEXT || pageseg_mode == PSM_SPARSE_TEXT_OSD;
}
/**
* enum of the elements of the page hierarchy, used in ResultIterator
* to provide functions that operate on each level without having to
* have 5x as many functions.
*/
enum PageIteratorLevel {
RIL_BLOCK, // Block of text/image/separator line.
RIL_PARA, // Paragraph within a block.
RIL_TEXTLINE, // Line within a paragraph.
RIL_WORD, // Word within a textline.
RIL_SYMBOL // Symbol/character within a word.
};
/**
* JUSTIFICATION_UNKNOWN
* The alignment is not clearly one of the other options. This could happen
* for example if there are only one or two lines of text or the text looks
* like source code or poetry.
*
* NOTA BENE: Fully justified paragraphs (text aligned to both left and right
* margins) are marked by Tesseract with JUSTIFICATION_LEFT if their text
* is written with a left-to-right script and with JUSTIFICATION_RIGHT if
* their text is written in a right-to-left script.
*
* Interpretation for text read in vertical lines:
* "Left" is wherever the starting reading position is.
*
* JUSTIFICATION_LEFT
* Each line, except possibly the first, is flush to the same left tab stop.
*
* JUSTIFICATION_CENTER
* The text lines of the paragraph are centered about a line going
* down through their middle of the text lines.
*
* JUSTIFICATION_RIGHT
* Each line, except possibly the first, is flush to the same right tab stop.
*/
enum ParagraphJustification {
JUSTIFICATION_UNKNOWN,
JUSTIFICATION_LEFT,
JUSTIFICATION_CENTER,
JUSTIFICATION_RIGHT,
};
/**
* When Tesseract/Cube is initialized we can choose to instantiate/load/run
* only the Tesseract part, only the Cube part or both along with the combiner.
* The preference of which engine to use is stored in tessedit_ocr_engine_mode.
*
* ATTENTION: When modifying this enum, please make sure to make the
* appropriate changes to all the enums mirroring it (e.g. OCREngine in
* cityblock/workflow/detection/detection_storage.proto). Such enums will
* mention the connection to OcrEngineMode in the comments.
*/
enum OcrEngineMode {
OEM_TESSERACT_ONLY, // Run Tesseract only - fastest; deprecated
OEM_LSTM_ONLY, // Run just the LSTM line recognizer.
OEM_TESSERACT_LSTM_COMBINED, // Run the LSTM recognizer, but allow fallback
// to Tesseract when things get difficult.
// deprecated
OEM_DEFAULT, // Specify this mode when calling init_*(),
// to indicate that any of the above modes
// should be automatically inferred from the
// variables in the language-specific config,
// command-line configs, or if not specified
// in any of the above should be set to the
// default OEM_TESSERACT_ONLY.
OEM_COUNT // Number of OEMs
};
} // namespace tesseract.
#endif // TESSERACT_CCSTRUCT_PUBLICTYPES_H_

View File

@ -1,311 +0,0 @@
// SPDX-License-Identifier: Apache-2.0
// File: renderer.h
// Description: Rendering interface to inject into TessBaseAPI
//
// (C) Copyright 2011, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef TESSERACT_API_RENDERER_H_
#define TESSERACT_API_RENDERER_H_
#include "export.h"
// To avoid collision with other typenames include the ABSOLUTE MINIMUM
// complexity of includes here. Use forward declarations wherever possible
// and hide includes of complex types in baseapi.cpp.
#include <cstdint>
#include <string> // for std::string
#include <vector> // for std::vector
struct Pix;
namespace tesseract {
class TessBaseAPI;
/**
* Interface for rendering tesseract results into a document, such as text,
* HOCR or pdf. This class is abstract. Specific classes handle individual
* formats. This interface is then used to inject the renderer class into
* tesseract when processing images.
*
* For simplicity implementing this with tesseract version 3.01,
* the renderer contains document state that is cleared from document
* to document just as the TessBaseAPI is. This way the base API can just
* delegate its rendering functionality to injected renderers, and the
* renderers can manage the associated state needed for the specific formats
* in addition to the heuristics for producing it.
*/
class TESS_API TessResultRenderer {
public:
virtual ~TessResultRenderer();
// Takes ownership of pointer so must be new'd instance.
// Renderers aren't ordered, but appends the sequences of next parameter
// and existing next(). The renderers should be unique across both lists.
void insert(TessResultRenderer *next);
// Returns the next renderer or nullptr.
TessResultRenderer *next() {
return next_;
}
/**
* Starts a new document with the given title.
* This clears the contents of the output data.
* Title should use UTF-8 encoding.
*/
bool BeginDocument(const char *title);
/**
* Adds the recognized text from the source image to the current document.
* Invalid if BeginDocument not yet called.
*
* Note that this API is a bit weird but is designed to fit into the
* current TessBaseAPI implementation where the api has lots of state
* information that we might want to add in.
*/
bool AddImage(TessBaseAPI *api);
/**
* Finishes the document and finalizes the output data
* Invalid if BeginDocument not yet called.
*/
bool EndDocument();
const char *file_extension() const {
return file_extension_;
}
const char *title() const {
return title_.c_str();
}
// Is everything fine? Otherwise something went wrong.
bool happy() const {
return happy_;
}
/**
* Returns the index of the last image given to AddImage
* (i.e. images are incremented whether the image succeeded or not)
*
* This is always defined. It means either the number of the
* current image, the last image ended, or in the completed document
* depending on when in the document lifecycle you are looking at it.
* Will return -1 if a document was never started.
*/
int imagenum() const {
return imagenum_;
}
protected:
/**
* Called by concrete classes.
*
* outputbase is the name of the output file excluding
* extension. For example, "/path/to/chocolate-chip-cookie-recipe"
*
* extension indicates the file extension to be used for output
* files. For example "pdf" will produce a .pdf file, and "hocr"
* will produce .hocr files.
*/
TessResultRenderer(const char *outputbase, const char *extension);
// Hook for specialized handling in BeginDocument()
virtual bool BeginDocumentHandler();
// This must be overridden to render the OCR'd results
virtual bool AddImageHandler(TessBaseAPI *api) = 0;
// Hook for specialized handling in EndDocument()
virtual bool EndDocumentHandler();
// Renderers can call this to append '\0' terminated strings into
// the output string returned by GetOutput.
// This method will grow the output buffer if needed.
void AppendString(const char *s);
// Renderers can call this to append binary byte sequences into
// the output string returned by GetOutput. Note that s is not necessarily
// '\0' terminated (and can contain '\0' within it).
// This method will grow the output buffer if needed.
void AppendData(const char *s, int len);
private:
TessResultRenderer *next_; // Can link multiple renderers together
FILE *fout_; // output file pointer
const char *file_extension_; // standard extension for generated output
std::string title_; // title of document being rendered
int imagenum_; // index of last image added
bool happy_; // I get grumpy when the disk fills up, etc.
};
/**
* Renders tesseract output into a plain UTF-8 text string
*/
class TESS_API TessTextRenderer : public TessResultRenderer {
public:
explicit TessTextRenderer(const char *outputbase);
protected:
bool AddImageHandler(TessBaseAPI *api) override;
};
/**
* Renders tesseract output into an hocr text string
*/
class TESS_API TessHOcrRenderer : public TessResultRenderer {
public:
explicit TessHOcrRenderer(const char *outputbase, bool font_info);
explicit TessHOcrRenderer(const char *outputbase);
protected:
bool BeginDocumentHandler() override;
bool AddImageHandler(TessBaseAPI *api) override;
bool EndDocumentHandler() override;
private:
bool font_info_; // whether to print font information
};
/**
* Renders tesseract output into an alto text string
*/
class TESS_API TessAltoRenderer : public TessResultRenderer {
public:
explicit TessAltoRenderer(const char *outputbase);
protected:
bool BeginDocumentHandler() override;
bool AddImageHandler(TessBaseAPI *api) override;
bool EndDocumentHandler() override;
private:
bool begin_document;
};
/**
* Renders Tesseract output into a TSV string
*/
class TESS_API TessTsvRenderer : public TessResultRenderer {
public:
explicit TessTsvRenderer(const char *outputbase, bool font_info);
explicit TessTsvRenderer(const char *outputbase);
protected:
bool BeginDocumentHandler() override;
bool AddImageHandler(TessBaseAPI *api) override;
bool EndDocumentHandler() override;
private:
bool font_info_; // whether to print font information
};
/**
* Renders tesseract output into searchable PDF
*/
class TESS_API TessPDFRenderer : public TessResultRenderer {
public:
// datadir is the location of the TESSDATA. We need it because
// we load a custom PDF font from this location.
TessPDFRenderer(const char *outputbase, const char *datadir,
bool textonly = false);
protected:
bool BeginDocumentHandler() override;
bool AddImageHandler(TessBaseAPI *api) override;
bool EndDocumentHandler() override;
private:
// We don't want to have every image in memory at once,
// so we store some metadata as we go along producing
// PDFs one page at a time. At the end, that metadata is
// used to make everything that isn't easily handled in a
// streaming fashion.
long int obj_; // counter for PDF objects
std::vector<uint64_t> offsets_; // offset of every PDF object in bytes
std::vector<long int> pages_; // object number for every /Page object
std::string datadir_; // where to find the custom font
bool textonly_; // skip images if set
// Bookkeeping only. DIY = Do It Yourself.
void AppendPDFObjectDIY(size_t objectsize);
// Bookkeeping + emit data.
void AppendPDFObject(const char *data);
// Create the /Contents object for an entire page.
char *GetPDFTextObjects(TessBaseAPI *api, double width, double height);
// Turn an image into a PDF object. Only transcode if we have to.
static bool imageToPDFObj(Pix *pix, const char *filename, long int objnum,
char **pdf_object, long int *pdf_object_size,
int jpg_quality);
};
/**
* Renders tesseract output into a plain UTF-8 text string
*/
class TESS_API TessUnlvRenderer : public TessResultRenderer {
public:
explicit TessUnlvRenderer(const char *outputbase);
protected:
bool AddImageHandler(TessBaseAPI *api) override;
};
/**
* Renders tesseract output into a plain UTF-8 text string for LSTMBox
*/
class TESS_API TessLSTMBoxRenderer : public TessResultRenderer {
public:
explicit TessLSTMBoxRenderer(const char *outputbase);
protected:
bool AddImageHandler(TessBaseAPI *api) override;
};
/**
* Renders tesseract output into a plain UTF-8 text string
*/
class TESS_API TessBoxTextRenderer : public TessResultRenderer {
public:
explicit TessBoxTextRenderer(const char *outputbase);
protected:
bool AddImageHandler(TessBaseAPI *api) override;
};
/**
* Renders tesseract output into a plain UTF-8 text string in WordStr format
*/
class TESS_API TessWordStrBoxRenderer : public TessResultRenderer {
public:
explicit TessWordStrBoxRenderer(const char *outputbase);
protected:
bool AddImageHandler(TessBaseAPI *api) override;
};
#ifndef DISABLED_LEGACY_ENGINE
/**
* Renders tesseract output into an osd text string
*/
class TESS_API TessOsdRenderer : public TessResultRenderer {
public:
explicit TessOsdRenderer(const char *outputbase);
protected:
bool AddImageHandler(TessBaseAPI *api) override;
};
#endif // ndef DISABLED_LEGACY_ENGINE
} // namespace tesseract.
#endif // TESSERACT_API_RENDERER_H_

View File

@ -1,250 +0,0 @@
// SPDX-License-Identifier: Apache-2.0
// File: resultiterator.h
// Description: Iterator for tesseract results that is capable of
// iterating in proper reading order over Bi Directional
// (e.g. mixed Hebrew and English) text.
// Author: David Eger
//
// (C) Copyright 2011, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef TESSERACT_CCMAIN_RESULT_ITERATOR_H_
#define TESSERACT_CCMAIN_RESULT_ITERATOR_H_
#include "export.h" // for TESS_API, TESS_LOCAL
#include "ltrresultiterator.h" // for LTRResultIterator
#include "publictypes.h" // for PageIteratorLevel
#include "unichar.h" // for StrongScriptDirection
#include <set> // for std::pair
#include <vector> // for std::vector
namespace tesseract {
class TESS_API ResultIterator : public LTRResultIterator {
public:
static ResultIterator *StartOfParagraph(const LTRResultIterator &resit);
/**
* ResultIterator is copy constructible!
* The default copy constructor works just fine for us.
*/
~ResultIterator() override = default;
// ============= Moving around within the page ============.
/**
* Moves the iterator to point to the start of the page to begin
* an iteration.
*/
void Begin() override;
/**
* Moves to the start of the next object at the given level in the
* page hierarchy in the appropriate reading order and returns false if
* the end of the page was reached.
* NOTE that RIL_SYMBOL will skip non-text blocks, but all other
* PageIteratorLevel level values will visit each non-text block once.
* Think of non text blocks as containing a single para, with a single line,
* with a single imaginary word.
* Calls to Next with different levels may be freely intermixed.
* This function iterates words in right-to-left scripts correctly, if
* the appropriate language has been loaded into Tesseract.
*/
bool Next(PageIteratorLevel level) override;
/**
* IsAtBeginningOf() returns whether we're at the logical beginning of the
* given level. (as opposed to ResultIterator's left-to-right top-to-bottom
* order). Otherwise, this acts the same as PageIterator::IsAtBeginningOf().
* For a full description, see pageiterator.h
*/
bool IsAtBeginningOf(PageIteratorLevel level) const override;
/**
* Implement PageIterator's IsAtFinalElement correctly in a BiDi context.
* For instance, IsAtFinalElement(RIL_PARA, RIL_WORD) returns whether we
* point at the last word in a paragraph. See PageIterator for full comment.
*/
bool IsAtFinalElement(PageIteratorLevel level,
PageIteratorLevel element) const override;
// ============= Functions that refer to words only ============.
// Returns the number of blanks before the current word.
int BlanksBeforeWord() const;
// ============= Accessing data ==============.
/**
* Returns the null terminated UTF-8 encoded text string for the current
* object at the given level. Use delete [] to free after use.
*/
virtual char *GetUTF8Text(PageIteratorLevel level) const;
/**
* Returns the LSTM choices for every LSTM timestep for the current word.
*/
virtual std::vector<std::vector<std::vector<std::pair<const char *, float>>>>
*GetRawLSTMTimesteps() const;
virtual std::vector<std::vector<std::pair<const char *, float>>>
*GetBestLSTMSymbolChoices() const;
/**
* Return whether the current paragraph's dominant reading direction
* is left-to-right (as opposed to right-to-left).
*/
bool ParagraphIsLtr() const;
// ============= Exposed only for testing =============.
/**
* Yields the reading order as a sequence of indices and (optional)
* meta-marks for a set of words (given left-to-right).
* The meta marks are passed as negative values:
* kMinorRunStart Start of minor direction text.
* kMinorRunEnd End of minor direction text.
* kComplexWord The next indexed word contains both left-to-right and
* right-to-left characters and was treated as neutral.
*
* For example, suppose we have five words in a text line,
* indexed [0,1,2,3,4] from the leftmost side of the text line.
* The following are all believable reading_orders:
*
* Left-to-Right (in ltr paragraph):
* { 0, 1, 2, 3, 4 }
* Left-to-Right (in rtl paragraph):
* { kMinorRunStart, 0, 1, 2, 3, 4, kMinorRunEnd }
* Right-to-Left (in rtl paragraph):
* { 4, 3, 2, 1, 0 }
* Left-to-Right except for an RTL phrase in words 2, 3 in an ltr paragraph:
* { 0, 1, kMinorRunStart, 3, 2, kMinorRunEnd, 4 }
*/
static void CalculateTextlineOrder(
bool paragraph_is_ltr,
const std::vector<StrongScriptDirection> &word_dirs,
std::vector<int> *reading_order);
static const int kMinorRunStart;
static const int kMinorRunEnd;
static const int kComplexWord;
protected:
/**
* We presume the data associated with the given iterator will outlive us.
* NB: This is private because it does something that is non-obvious:
* it resets to the beginning of the paragraph instead of staying wherever
* resit might have pointed.
*/
explicit ResultIterator(const LTRResultIterator &resit);
private:
/**
* Calculates the current paragraph's dominant writing direction.
* Typically, members should use current_paragraph_ltr_ instead.
*/
bool CurrentParagraphIsLtr() const;
/**
* Returns word indices as measured from resit->RestartRow() = index 0
* for the reading order of words within a textline given an iterator
* into the middle of the text line.
* In addition to non-negative word indices, the following negative values
* may be inserted:
* kMinorRunStart Start of minor direction text.
* kMinorRunEnd End of minor direction text.
* kComplexWord The previous word contains both left-to-right and
* right-to-left characters and was treated as neutral.
*/
void CalculateTextlineOrder(bool paragraph_is_ltr,
const LTRResultIterator &resit,
std::vector<int> *indices) const;
/** Same as above, but the caller's ssd gets filled in if ssd != nullptr. */
void CalculateTextlineOrder(bool paragraph_is_ltr,
const LTRResultIterator &resit,
std::vector<StrongScriptDirection> *ssd,
std::vector<int> *indices) const;
/**
* What is the index of the current word in a strict left-to-right reading
* of the row?
*/
int LTRWordIndex() const;
/**
* Given an iterator pointing at a word, returns the logical reading order
* of blob indices for the word.
*/
void CalculateBlobOrder(std::vector<int> *blob_indices) const;
/** Precondition: current_paragraph_is_ltr_ is set. */
void MoveToLogicalStartOfTextline();
/**
* Precondition: current_paragraph_is_ltr_ and in_minor_direction_
* are set.
*/
void MoveToLogicalStartOfWord();
/** Are we pointing at the final (reading order) symbol of the word? */
bool IsAtFinalSymbolOfWord() const;
/** Are we pointing at the first (reading order) symbol of the word? */
bool IsAtFirstSymbolOfWord() const;
/**
* Append any extra marks that should be appended to this word when printed.
* Mostly, these are Unicode BiDi control characters.
*/
void AppendSuffixMarks(std::string *text) const;
/** Appends the current word in reading order to the given buffer.*/
void AppendUTF8WordText(std::string *text) const;
/**
* Appends the text of the current text line, *assuming this iterator is
* positioned at the beginning of the text line* This function
* updates the iterator to point to the first position past the text line.
* Each textline is terminated in a single newline character.
* If the textline ends a paragraph, it gets a second terminal newline.
*/
void IterateAndAppendUTF8TextlineText(std::string *text);
/**
* Appends the text of the current paragraph in reading order
* to the given buffer.
* Each textline is terminated in a single newline character, and the
* paragraph gets an extra newline at the end.
*/
void AppendUTF8ParagraphText(std::string *text) const;
/** Returns whether the bidi_debug flag is set to at least min_level. */
bool BidiDebug(int min_level) const;
bool current_paragraph_is_ltr_;
/**
* Is the currently pointed-at character at the beginning of
* a minor-direction run?
*/
bool at_beginning_of_minor_run_;
/** Is the currently pointed-at character in a minor-direction sequence? */
bool in_minor_direction_;
/**
* Should detected inter-word spaces be preserved, or "compressed" to a single
* space character (default behavior).
*/
bool preserve_interword_spaces_;
};
} // namespace tesseract.
#endif // TESSERACT_CCMAIN_RESULT_ITERATOR_H_

View File

@ -1,174 +0,0 @@
// SPDX-License-Identifier: Apache-2.0
// File: unichar.h
// Description: Unicode character/ligature class.
// Author: Ray Smith
//
// (C) Copyright 2006, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef TESSERACT_CCUTIL_UNICHAR_H_
#define TESSERACT_CCUTIL_UNICHAR_H_
#include "export.h"
#include <memory.h>
#include <cstring>
#include <string>
#include <vector>
namespace tesseract {
// Maximum number of characters that can be stored in a UNICHAR. Must be
// at least 4. Must not exceed 31 without changing the coding of length.
#define UNICHAR_LEN 30
// A UNICHAR_ID is the unique id of a unichar.
using UNICHAR_ID = int;
// A variable to indicate an invalid or uninitialized unichar id.
static const int INVALID_UNICHAR_ID = -1;
// A special unichar that corresponds to INVALID_UNICHAR_ID.
static const char INVALID_UNICHAR[] = "__INVALID_UNICHAR__";
enum StrongScriptDirection {
DIR_NEUTRAL = 0, // Text contains only neutral characters.
DIR_LEFT_TO_RIGHT = 1, // Text contains no Right-to-Left characters.
DIR_RIGHT_TO_LEFT = 2, // Text contains no Left-to-Right characters.
DIR_MIX = 3, // Text contains a mixture of left-to-right
// and right-to-left characters.
};
using char32 = signed int;
// The UNICHAR class holds a single classification result. This may be
// a single Unicode character (stored as between 1 and 4 utf8 bytes) or
// multiple Unicode characters representing the NFKC expansion of a ligature
// such as fi, ffl etc. These are also stored as utf8.
class TESS_API UNICHAR {
public:
UNICHAR() {
memset(chars, 0, UNICHAR_LEN);
}
// Construct from a utf8 string. If len<0 then the string is null terminated.
// If the string is too long to fit in the UNICHAR then it takes only what
// will fit.
UNICHAR(const char *utf8_str, int len);
// Construct from a single UCS4 character.
explicit UNICHAR(int unicode);
// Default copy constructor and operator= are OK.
// Get the first character as UCS-4.
int first_uni() const;
// Get the length of the UTF8 string.
int utf8_len() const {
int len = chars[UNICHAR_LEN - 1];
return len >= 0 && len < UNICHAR_LEN ? len : UNICHAR_LEN;
}
// Get a UTF8 string, but NOT nullptr terminated.
const char *utf8() const {
return chars;
}
// Get a terminated UTF8 string: Must delete[] it after use.
char *utf8_str() const;
// Get the number of bytes in the first character of the given utf8 string.
static int utf8_step(const char *utf8_str);
// A class to simplify iterating over and accessing elements of a UTF8
// string. Note that unlike the UNICHAR class, const_iterator does NOT COPY or
// take ownership of the underlying byte array. It also does not permit
// modification of the array (as the name suggests).
//
// Example:
// for (UNICHAR::const_iterator it = UNICHAR::begin(str, str_len);
// it != UNICHAR::end(str, len);
// ++it) {
// printf("UCS-4 symbol code = %d\n", *it);
// char buf[5];
// int char_len = it.get_utf8(buf); buf[char_len] = '\0';
// printf("Char = %s\n", buf);
// }
class TESS_API const_iterator {
using CI = const_iterator;
public:
// Step to the next UTF8 character.
// If the current position is at an illegal UTF8 character, then print an
// error message and step by one byte. If the current position is at a
// nullptr value, don't step past it.
const_iterator &operator++();
// Return the UCS-4 value at the current position.
// If the current position is at an illegal UTF8 value, return a single
// space character.
int operator*() const;
// Store the UTF-8 encoding of the current codepoint into buf, which must be
// at least 4 bytes long. Return the number of bytes written.
// If the current position is at an illegal UTF8 value, writes a single
// space character and returns 1.
// Note that this method does not null-terminate the buffer.
int get_utf8(char *buf) const;
// Returns the number of bytes of the current codepoint. Returns 1 if the
// current position is at an illegal UTF8 value.
int utf8_len() const;
// Returns true if the UTF-8 encoding at the current position is legal.
bool is_legal() const;
// Return the pointer into the string at the current position.
const char *utf8_data() const {
return it_;
}
// Iterator equality operators.
friend bool operator==(const CI &lhs, const CI &rhs) {
return lhs.it_ == rhs.it_;
}
friend bool operator!=(const CI &lhs, const CI &rhs) {
return !(lhs == rhs);
}
private:
friend class UNICHAR;
explicit const_iterator(const char *it) : it_(it) {}
const char *it_; // Pointer into the string.
};
// Create a start/end iterator pointing to a string. Note that these methods
// are static and do NOT create a copy or take ownership of the underlying
// array.
static const_iterator begin(const char *utf8_str, int byte_length);
static const_iterator end(const char *utf8_str, int byte_length);
// Converts a utf-8 string to a vector of unicodes.
// Returns an empty vector if the input contains invalid UTF-8.
static std::vector<char32> UTF8ToUTF32(const char *utf8_str);
// Converts a vector of unicodes to a utf8 string.
// Returns an empty string if the input contains an invalid unicode.
static std::string UTF32ToUTF8(const std::vector<char32> &str32);
private:
// A UTF-8 representation of 1 or more Unicode characters.
// The last element (chars[UNICHAR_LEN - 1]) is a length if
// its value < UNICHAR_LEN, otherwise it is a genuine character.
char chars[UNICHAR_LEN]{};
};
} // namespace tesseract
#endif // TESSERACT_CCUTIL_UNICHAR_H_

View File

@ -1,34 +0,0 @@
// SPDX-License-Identifier: Apache-2.0
// File: version.h
// Description: Version information
//
// (C) Copyright 2018, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef TESSERACT_API_VERSION_H_
#define TESSERACT_API_VERSION_H_
// clang-format off
#define TESSERACT_MAJOR_VERSION @GENERIC_MAJOR_VERSION@
#define TESSERACT_MINOR_VERSION @GENERIC_MINOR_VERSION@
#define TESSERACT_MICRO_VERSION @GENERIC_MICRO_VERSION@
#define TESSERACT_VERSION \
(TESSERACT_MAJOR_VERSION << 16 | \
TESSERACT_MINOR_VERSION << 8 | \
TESSERACT_MICRO_VERSION)
#define TESSERACT_VERSION_STR "@PACKAGE_VERSION@"
// clang-format on
#endif // TESSERACT_API_VERSION_H_

View File

@ -1,812 +0,0 @@
// SPDX-License-Identifier: Apache-2.0
// File: baseapi.h
// Description: Simple API for calling tesseract.
// Author: Ray Smith
//
// (C) Copyright 2006, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef TESSERACT_API_BASEAPI_H_
#define TESSERACT_API_BASEAPI_H_
#ifdef HAVE_CONFIG_H
# include "config_auto.h" // DISABLED_LEGACY_ENGINE
#endif
#include "export.h"
#include "pageiterator.h"
#include "publictypes.h"
#include "resultiterator.h"
#include "unichar.h"
#include "version.h"
#include <cstdio>
#include <vector> // for std::vector
struct Pix;
struct Pixa;
struct Boxa;
namespace tesseract {
class PAGE_RES;
class ParagraphModel;
class BLOCK_LIST;
class ETEXT_DESC;
struct OSResults;
class UNICHARSET;
class Dawg;
class Dict;
class EquationDetect;
class PageIterator;
class ImageThresholder;
class LTRResultIterator;
class ResultIterator;
class MutableIterator;
class TessResultRenderer;
class Tesseract;
// Function to read a std::vector<char> from a whole file.
// Returns false on failure.
using FileReader = bool (*)(const char *filename, std::vector<char> *data);
using DictFunc = int (Dict::*)(void *, const UNICHARSET &, UNICHAR_ID,
bool) const;
using ProbabilityInContextFunc = double (Dict::*)(const char *, const char *,
int, const char *, int);
/**
* Base class for all tesseract APIs.
* Specific classes can add ability to work on different inputs or produce
* different outputs.
* This class is mostly an interface layer on top of the Tesseract instance
* class to hide the data types so that users of this class don't have to
* include any other Tesseract headers.
*/
class TESS_API TessBaseAPI {
public:
TessBaseAPI();
virtual ~TessBaseAPI();
// Copy constructor and assignment operator are currently unsupported.
TessBaseAPI(TessBaseAPI const &) = delete;
TessBaseAPI &operator=(TessBaseAPI const &) = delete;
/**
* Returns the version identifier as a static string. Do not delete.
*/
static const char *Version();
/**
* If compiled with OpenCL AND an available OpenCL
* device is deemed faster than serial code, then
* "device" is populated with the cl_device_id
* and returns sizeof(cl_device_id)
* otherwise *device=nullptr and returns 0.
*/
static size_t getOpenCLDevice(void **device);
/**
* Set the name of the input file. Needed for training and
* reading a UNLV zone file, and for searchable PDF output.
*/
void SetInputName(const char *name);
/**
* These functions are required for searchable PDF output.
* We need our hands on the input file so that we can include
* it in the PDF without transcoding. If that is not possible,
* we need the original image. Finally, resolution metadata
* is stored in the PDF so we need that as well.
*/
const char *GetInputName();
// Takes ownership of the input pix.
void SetInputImage(Pix *pix);
Pix *GetInputImage();
int GetSourceYResolution();
const char *GetDatapath();
/** Set the name of the bonus output files. Needed only for debugging. */
void SetOutputName(const char *name);
/**
* Set the value of an internal "parameter."
* Supply the name of the parameter and the value as a string, just as
* you would in a config file.
* Returns false if the name lookup failed.
* Eg SetVariable("tessedit_char_blacklist", "xyz"); to ignore x, y and z.
* Or SetVariable("classify_bln_numeric_mode", "1"); to set numeric-only mode.
* SetVariable may be used before Init, but settings will revert to
* defaults on End().
*
* Note: Must be called after Init(). Only works for non-init variables
* (init variables should be passed to Init()).
*/
bool SetVariable(const char *name, const char *value);
bool SetDebugVariable(const char *name, const char *value);
/**
* Returns true if the parameter was found among Tesseract parameters.
* Fills in value with the value of the parameter.
*/
bool GetIntVariable(const char *name, int *value) const;
bool GetBoolVariable(const char *name, bool *value) const;
bool GetDoubleVariable(const char *name, double *value) const;
/**
* Returns the pointer to the string that represents the value of the
* parameter if it was found among Tesseract parameters.
*/
const char *GetStringVariable(const char *name) const;
#ifndef DISABLED_LEGACY_ENGINE
/**
* Print Tesseract fonts table to the given file.
*/
void PrintFontsTable(FILE *fp) const;
#endif
/**
* Print Tesseract parameters to the given file.
*/
void PrintVariables(FILE *fp) const;
/**
* Get value of named variable as a string, if it exists.
*/
bool GetVariableAsString(const char *name, std::string *val) const;
/**
* Instances are now mostly thread-safe and totally independent,
* but some global parameters remain. Basically it is safe to use multiple
* TessBaseAPIs in different threads in parallel, UNLESS:
* you use SetVariable on some of the Params in classify and textord.
* If you do, then the effect will be to change it for all your instances.
*
* Start tesseract. Returns zero on success and -1 on failure.
* NOTE that the only members that may be called before Init are those
* listed above here in the class definition.
*
* The datapath must be the name of the tessdata directory.
* The language is (usually) an ISO 639-3 string or nullptr will default to
* eng. It is entirely safe (and eventually will be efficient too) to call
* Init multiple times on the same instance to change language, or just
* to reset the classifier.
* The language may be a string of the form [~]<lang>[+[~]<lang>]* indicating
* that multiple languages are to be loaded. Eg hin+eng will load Hindi and
* English. Languages may specify internally that they want to be loaded
* with one or more other languages, so the ~ sign is available to override
* that. Eg if hin were set to load eng by default, then hin+~eng would force
* loading only hin. The number of loaded languages is limited only by
* memory, with the caveat that loading additional languages will impact
* both speed and accuracy, as there is more work to do to decide on the
* applicable language, and there is more chance of hallucinating incorrect
* words.
* WARNING: On changing languages, all Tesseract parameters are reset
* back to their default values. (Which may vary between languages.)
* If you have a rare need to set a Variable that controls
* initialization for a second call to Init you should explicitly
* call End() and then use SetVariable before Init. This is only a very
* rare use case, since there are very few uses that require any parameters
* to be set before Init.
*
* If set_only_non_debug_params is true, only params that do not contain
* "debug" in the name will be set.
*/
int Init(const char *datapath, const char *language, OcrEngineMode mode,
char **configs, int configs_size,
const std::vector<std::string> *vars_vec,
const std::vector<std::string> *vars_values,
bool set_only_non_debug_params);
int Init(const char *datapath, const char *language, OcrEngineMode oem) {
return Init(datapath, language, oem, nullptr, 0, nullptr, nullptr, false);
}
int Init(const char *datapath, const char *language) {
return Init(datapath, language, OEM_DEFAULT, nullptr, 0, nullptr, nullptr,
false);
}
// In-memory version reads the traineddata file directly from the given
// data[data_size] array, and/or reads data via a FileReader.
int Init(const char *data, int data_size, const char *language,
OcrEngineMode mode, char **configs, int configs_size,
const std::vector<std::string> *vars_vec,
const std::vector<std::string> *vars_values,
bool set_only_non_debug_params, FileReader reader);
/**
* Returns the languages string used in the last valid initialization.
* If the last initialization specified "deu+hin" then that will be
* returned. If hin loaded eng automatically as well, then that will
* not be included in this list. To find the languages actually
* loaded use GetLoadedLanguagesAsVector.
* The returned string should NOT be deleted.
*/
const char *GetInitLanguagesAsString() const;
/**
* Returns the loaded languages in the vector of std::string.
* Includes all languages loaded by the last Init, including those loaded
* as dependencies of other loaded languages.
*/
void GetLoadedLanguagesAsVector(std::vector<std::string> *langs) const;
/**
* Returns the available languages in the sorted vector of std::string.
*/
void GetAvailableLanguagesAsVector(std::vector<std::string> *langs) const;
/**
* Init only for page layout analysis. Use only for calls to SetImage and
* AnalysePage. Calls that attempt recognition will generate an error.
*/
void InitForAnalysePage();
/**
* Read a "config" file containing a set of param, value pairs.
* Searches the standard places: tessdata/configs, tessdata/tessconfigs
* and also accepts a relative or absolute path name.
* Note: only non-init params will be set (init params are set by Init()).
*/
void ReadConfigFile(const char *filename);
/** Same as above, but only set debug params from the given config file. */
void ReadDebugConfigFile(const char *filename);
/**
* Set the current page segmentation mode. Defaults to PSM_SINGLE_BLOCK.
* The mode is stored as an IntParam so it can also be modified by
* ReadConfigFile or SetVariable("tessedit_pageseg_mode", mode as string).
*/
void SetPageSegMode(PageSegMode mode);
/** Return the current page segmentation mode. */
PageSegMode GetPageSegMode() const;
/**
* Recognize a rectangle from an image and return the result as a string.
* May be called many times for a single Init.
* Currently has no error checking.
* Greyscale of 8 and color of 24 or 32 bits per pixel may be given.
* Palette color images will not work properly and must be converted to
* 24 bit.
* Binary images of 1 bit per pixel may also be given but they must be
* byte packed with the MSB of the first byte being the first pixel, and a
* 1 represents WHITE. For binary images set bytes_per_pixel=0.
* The recognized text is returned as a char* which is coded
* as UTF8 and must be freed with the delete [] operator.
*
* Note that TesseractRect is the simplified convenience interface.
* For advanced uses, use SetImage, (optionally) SetRectangle, Recognize,
* and one or more of the Get*Text functions below.
*/
char *TesseractRect(const unsigned char *imagedata, int bytes_per_pixel,
int bytes_per_line, int left, int top, int width,
int height);
/**
* Call between pages or documents etc to free up memory and forget
* adaptive data.
*/
void ClearAdaptiveClassifier();
/**
* @defgroup AdvancedAPI Advanced API
* The following methods break TesseractRect into pieces, so you can
* get hold of the thresholded image, get the text in different formats,
* get bounding boxes, confidences etc.
*/
/* @{ */
/**
* Provide an image for Tesseract to recognize. Format is as
* TesseractRect above. Copies the image buffer and converts to Pix.
* SetImage clears all recognition results, and sets the rectangle to the
* full image, so it may be followed immediately by a GetUTF8Text, and it
* will automatically perform recognition.
*/
void SetImage(const unsigned char *imagedata, int width, int height,
int bytes_per_pixel, int bytes_per_line);
/**
* Provide an image for Tesseract to recognize. As with SetImage above,
* Tesseract takes its own copy of the image, so it need not persist until
* after Recognize.
* Pix vs raw, which to use?
* Use Pix where possible. Tesseract uses Pix as its internal representation
* and it is therefore more efficient to provide a Pix directly.
*/
void SetImage(Pix *pix);
/**
* Set the resolution of the source image in pixels per inch so font size
* information can be calculated in results. Call this after SetImage().
*/
void SetSourceResolution(int ppi);
/**
* Restrict recognition to a sub-rectangle of the image. Call after SetImage.
* Each SetRectangle clears the recogntion results so multiple rectangles
* can be recognized with the same image.
*/
void SetRectangle(int left, int top, int width, int height);
/**
* Get a copy of the internal thresholded image from Tesseract.
* Caller takes ownership of the Pix and must pixDestroy it.
* May be called any time after SetImage, or after TesseractRect.
*/
Pix *GetThresholdedImage();
/**
* Get the result of page layout analysis as a leptonica-style
* Boxa, Pixa pair, in reading order.
* Can be called before or after Recognize.
*/
Boxa *GetRegions(Pixa **pixa);
/**
* Get the textlines as a leptonica-style
* Boxa, Pixa pair, in reading order.
* Can be called before or after Recognize.
* If raw_image is true, then extract from the original image instead of the
* thresholded image and pad by raw_padding pixels.
* If blockids is not nullptr, the block-id of each line is also returned as
* an array of one element per line. delete [] after use. If paraids is not
* nullptr, the paragraph-id of each line within its block is also returned as
* an array of one element per line. delete [] after use.
*/
Boxa *GetTextlines(bool raw_image, int raw_padding, Pixa **pixa,
int **blockids, int **paraids);
/*
Helper method to extract from the thresholded image. (most common usage)
*/
Boxa *GetTextlines(Pixa **pixa, int **blockids) {
return GetTextlines(false, 0, pixa, blockids, nullptr);
}
/**
* Get textlines and strips of image regions as a leptonica-style Boxa, Pixa
* pair, in reading order. Enables downstream handling of non-rectangular
* regions.
* Can be called before or after Recognize.
* If blockids is not nullptr, the block-id of each line is also returned as
* an array of one element per line. delete [] after use.
*/
Boxa *GetStrips(Pixa **pixa, int **blockids);
/**
* Get the words as a leptonica-style
* Boxa, Pixa pair, in reading order.
* Can be called before or after Recognize.
*/
Boxa *GetWords(Pixa **pixa);
/**
* Gets the individual connected (text) components (created
* after pages segmentation step, but before recognition)
* as a leptonica-style Boxa, Pixa pair, in reading order.
* Can be called before or after Recognize.
* Note: the caller is responsible for calling boxaDestroy()
* on the returned Boxa array and pixaDestroy() on cc array.
*/
Boxa *GetConnectedComponents(Pixa **cc);
/**
* Get the given level kind of components (block, textline, word etc.) as a
* leptonica-style Boxa, Pixa pair, in reading order.
* Can be called before or after Recognize.
* If blockids is not nullptr, the block-id of each component is also returned
* as an array of one element per component. delete [] after use.
* If blockids is not nullptr, the paragraph-id of each component with its
* block is also returned as an array of one element per component. delete []
* after use. If raw_image is true, then portions of the original image are
* extracted instead of the thresholded image and padded with raw_padding. If
* text_only is true, then only text components are returned.
*/
Boxa *GetComponentImages(PageIteratorLevel level, bool text_only,
bool raw_image, int raw_padding, Pixa **pixa,
int **blockids, int **paraids);
// Helper function to get binary images with no padding (most common usage).
Boxa *GetComponentImages(const PageIteratorLevel level, const bool text_only,
Pixa **pixa, int **blockids) {
return GetComponentImages(level, text_only, false, 0, pixa, blockids,
nullptr);
}
/**
* Returns the scale factor of the thresholded image that would be returned by
* GetThresholdedImage() and the various GetX() methods that call
* GetComponentImages().
* Returns 0 if no thresholder has been set.
*/
int GetThresholdedImageScaleFactor() const;
/**
* Runs page layout analysis in the mode set by SetPageSegMode.
* May optionally be called prior to Recognize to get access to just
* the page layout results. Returns an iterator to the results.
* If merge_similar_words is true, words are combined where suitable for use
* with a line recognizer. Use if you want to use AnalyseLayout to find the
* textlines, and then want to process textline fragments with an external
* line recognizer.
* Returns nullptr on error or an empty page.
* The returned iterator must be deleted after use.
* WARNING! This class points to data held within the TessBaseAPI class, and
* therefore can only be used while the TessBaseAPI class still exists and
* has not been subjected to a call of Init, SetImage, Recognize, Clear, End
* DetectOS, or anything else that changes the internal PAGE_RES.
*/
PageIterator *AnalyseLayout();
PageIterator *AnalyseLayout(bool merge_similar_words);
/**
* Recognize the image from SetAndThresholdImage, generating Tesseract
* internal structures. Returns 0 on success.
* Optional. The Get*Text functions below will call Recognize if needed.
* After Recognize, the output is kept internally until the next SetImage.
*/
int Recognize(ETEXT_DESC *monitor);
/**
* Methods to retrieve information after SetAndThresholdImage(),
* Recognize() or TesseractRect(). (Recognize is called implicitly if needed.)
*/
/**
* Turns images into symbolic text.
*
* filename can point to a single image, a multi-page TIFF,
* or a plain text list of image filenames.
*
* retry_config is useful for debugging. If not nullptr, you can fall
* back to an alternate configuration if a page fails for some
* reason.
*
* timeout_millisec terminates processing if any single page
* takes too long. Set to 0 for unlimited time.
*
* renderer is responible for creating the output. For example,
* use the TessTextRenderer if you want plaintext output, or
* the TessPDFRender to produce searchable PDF.
*
* If tessedit_page_number is non-negative, will only process that
* single page. Works for multi-page tiff file, or filelist.
*
* Returns true if successful, false on error.
*/
bool ProcessPages(const char *filename, const char *retry_config,
int timeout_millisec, TessResultRenderer *renderer);
// Does the real work of ProcessPages.
bool ProcessPagesInternal(const char *filename, const char *retry_config,
int timeout_millisec, TessResultRenderer *renderer);
/**
* Turn a single image into symbolic text.
*
* The pix is the image processed. filename and page_index are
* metadata used by side-effect processes, such as reading a box
* file or formatting as hOCR.
*
* See ProcessPages for descriptions of other parameters.
*/
bool ProcessPage(Pix *pix, int page_index, const char *filename,
const char *retry_config, int timeout_millisec,
TessResultRenderer *renderer);
/**
* Get a reading-order iterator to the results of LayoutAnalysis and/or
* Recognize. The returned iterator must be deleted after use.
* WARNING! This class points to data held within the TessBaseAPI class, and
* therefore can only be used while the TessBaseAPI class still exists and
* has not been subjected to a call of Init, SetImage, Recognize, Clear, End
* DetectOS, or anything else that changes the internal PAGE_RES.
*/
ResultIterator *GetIterator();
/**
* Get a mutable iterator to the results of LayoutAnalysis and/or Recognize.
* The returned iterator must be deleted after use.
* WARNING! This class points to data held within the TessBaseAPI class, and
* therefore can only be used while the TessBaseAPI class still exists and
* has not been subjected to a call of Init, SetImage, Recognize, Clear, End
* DetectOS, or anything else that changes the internal PAGE_RES.
*/
MutableIterator *GetMutableIterator();
/**
* The recognized text is returned as a char* which is coded
* as UTF8 and must be freed with the delete [] operator.
*/
char *GetUTF8Text();
/**
* Make a HTML-formatted string with hOCR markup from the internal
* data structures.
* page_number is 0-based but will appear in the output as 1-based.
* monitor can be used to
* cancel the recognition
* receive progress callbacks
* Returned string must be freed with the delete [] operator.
*/
char *GetHOCRText(ETEXT_DESC *monitor, int page_number);
/**
* Make a HTML-formatted string with hOCR markup from the internal
* data structures.
* page_number is 0-based but will appear in the output as 1-based.
* Returned string must be freed with the delete [] operator.
*/
char *GetHOCRText(int page_number);
/**
* Make an XML-formatted string with Alto markup from the internal
* data structures.
*/
char *GetAltoText(ETEXT_DESC *monitor, int page_number);
/**
* Make an XML-formatted string with Alto markup from the internal
* data structures.
*/
char *GetAltoText(int page_number);
/**
* Make a TSV-formatted string from the internal data structures.
* page_number is 0-based but will appear in the output as 1-based.
* Returned string must be freed with the delete [] operator.
*/
char *GetTSVText(int page_number);
/**
* Make a box file for LSTM training from the internal data structures.
* Constructs coordinates in the original image - not just the rectangle.
* page_number is a 0-based page index that will appear in the box file.
* Returned string must be freed with the delete [] operator.
*/
char *GetLSTMBoxText(int page_number);
/**
* The recognized text is returned as a char* which is coded in the same
* format as a box file used in training.
* Constructs coordinates in the original image - not just the rectangle.
* page_number is a 0-based page index that will appear in the box file.
* Returned string must be freed with the delete [] operator.
*/
char *GetBoxText(int page_number);
/**
* The recognized text is returned as a char* which is coded in the same
* format as a WordStr box file used in training.
* page_number is a 0-based page index that will appear in the box file.
* Returned string must be freed with the delete [] operator.
*/
char *GetWordStrBoxText(int page_number);
/**
* The recognized text is returned as a char* which is coded
* as UNLV format Latin-1 with specific reject and suspect codes.
* Returned string must be freed with the delete [] operator.
*/
char *GetUNLVText();
/**
* Detect the orientation of the input image and apparent script (alphabet).
* orient_deg is the detected clockwise rotation of the input image in degrees
* (0, 90, 180, 270)
* orient_conf is the confidence (15.0 is reasonably confident)
* script_name is an ASCII string, the name of the script, e.g. "Latin"
* script_conf is confidence level in the script
* Returns true on success and writes values to each parameter as an output
*/
bool DetectOrientationScript(int *orient_deg, float *orient_conf,
const char **script_name, float *script_conf);
/**
* The recognized text is returned as a char* which is coded
* as UTF8 and must be freed with the delete [] operator.
* page_number is a 0-based page index that will appear in the osd file.
*/
char *GetOsdText(int page_number);
/** Returns the (average) confidence value between 0 and 100. */
int MeanTextConf();
/**
* Returns all word confidences (between 0 and 100) in an array, terminated
* by -1. The calling function must delete [] after use.
* The number of confidences should correspond to the number of space-
* delimited words in GetUTF8Text.
*/
int *AllWordConfidences();
#ifndef DISABLED_LEGACY_ENGINE
/**
* Applies the given word to the adaptive classifier if possible.
* The word must be SPACE-DELIMITED UTF-8 - l i k e t h i s , so it can
* tell the boundaries of the graphemes.
* Assumes that SetImage/SetRectangle have been used to set the image
* to the given word. The mode arg should be PSM_SINGLE_WORD or
* PSM_CIRCLE_WORD, as that will be used to control layout analysis.
* The currently set PageSegMode is preserved.
* Returns false if adaption was not possible for some reason.
*/
bool AdaptToWordStr(PageSegMode mode, const char *wordstr);
#endif // ndef DISABLED_LEGACY_ENGINE
/**
* Free up recognition results and any stored image data, without actually
* freeing any recognition data that would be time-consuming to reload.
* Afterwards, you must call SetImage or TesseractRect before doing
* any Recognize or Get* operation.
*/
void Clear();
/**
* Close down tesseract and free up all memory. End() is equivalent to
* destructing and reconstructing your TessBaseAPI.
* Once End() has been used, none of the other API functions may be used
* other than Init and anything declared above it in the class definition.
*/
void End();
/**
* Clear any library-level memory caches.
* There are a variety of expensive-to-load constant data structures (mostly
* language dictionaries) that are cached globally -- surviving the Init()
* and End() of individual TessBaseAPI's. This function allows the clearing
* of these caches.
**/
static void ClearPersistentCache();
/**
* Check whether a word is valid according to Tesseract's language model
* @return 0 if the word is invalid, non-zero if valid.
* @warning temporary! This function will be removed from here and placed
* in a separate API at some future time.
*/
int IsValidWord(const char *word) const;
// Returns true if utf8_character is defined in the UniCharset.
bool IsValidCharacter(const char *utf8_character) const;
bool GetTextDirection(int *out_offset, float *out_slope);
/** Sets Dict::letter_is_okay_ function to point to the given function. */
void SetDictFunc(DictFunc f);
/** Sets Dict::probability_in_context_ function to point to the given
* function.
*/
void SetProbabilityInContextFunc(ProbabilityInContextFunc f);
/**
* Estimates the Orientation And Script of the image.
* @return true if the image was processed successfully.
*/
bool DetectOS(OSResults *);
/**
* Return text orientation of each block as determined by an earlier run
* of layout analysis.
*/
void GetBlockTextOrientations(int **block_orientation,
bool **vertical_writing);
/** This method returns the string form of the specified unichar. */
const char *GetUnichar(int unichar_id) const;
/** Return the pointer to the i-th dawg loaded into tesseract_ object. */
const Dawg *GetDawg(int i) const;
/** Return the number of dawgs loaded into tesseract_ object. */
int NumDawgs() const;
Tesseract *tesseract() const {
return tesseract_;
}
OcrEngineMode oem() const {
return last_oem_requested_;
}
void set_min_orientation_margin(double margin);
/* @} */
protected:
/** Common code for setting the image. Returns true if Init has been called.
*/
bool InternalSetImage();
/**
* Run the thresholder to make the thresholded image. If pix is not nullptr,
* the source is thresholded to pix instead of the internal IMAGE.
*/
virtual bool Threshold(Pix **pix);
/**
* Find lines from the image making the BLOCK_LIST.
* @return 0 on success.
*/
int FindLines();
/** Delete the pageres and block list ready for a new page. */
void ClearResults();
/**
* Return an LTR Result Iterator -- used only for training, as we really want
* to ignore all BiDi smarts at that point.
* delete once you're done with it.
*/
LTRResultIterator *GetLTRIterator();
/**
* Return the length of the output text string, as UTF8, assuming
* one newline per line and one per block, with a terminator,
* and assuming a single character reject marker for each rejected character.
* Also return the number of recognized blobs in blob_count.
*/
int TextLength(int *blob_count) const;
//// paragraphs.cpp ////////////////////////////////////////////////////
void DetectParagraphs(bool after_text_recognition);
const PAGE_RES *GetPageRes() const {
return page_res_;
}
protected:
Tesseract *tesseract_; ///< The underlying data object.
Tesseract *osd_tesseract_; ///< For orientation & script detection.
EquationDetect *equ_detect_; ///< The equation detector.
FileReader reader_; ///< Reads files from any filesystem.
ImageThresholder *thresholder_; ///< Image thresholding module.
std::vector<ParagraphModel *> *paragraph_models_;
BLOCK_LIST *block_list_; ///< The page layout.
PAGE_RES *page_res_; ///< The page-level data.
std::string input_file_; ///< Name used by training code.
std::string output_file_; ///< Name used by debug code.
std::string datapath_; ///< Current location of tessdata.
std::string language_; ///< Last initialized language.
OcrEngineMode last_oem_requested_; ///< Last ocr language mode requested.
bool recognition_done_; ///< page_res_ contains recognition data.
/**
* @defgroup ThresholderParams Thresholder Parameters
* Parameters saved from the Thresholder. Needed to rebuild coordinates.
*/
/* @{ */
int rect_left_;
int rect_top_;
int rect_width_;
int rect_height_;
int image_width_;
int image_height_;
/* @} */
private:
// A list of image filenames gets special consideration
bool ProcessPagesFileList(FILE *fp, std::string *buf,
const char *retry_config, int timeout_millisec,
TessResultRenderer *renderer,
int tessedit_page_number);
// TIFF supports multipage so gets special consideration.
bool ProcessPagesMultipageTiff(const unsigned char *data, size_t size,
const char *filename, const char *retry_config,
int timeout_millisec,
TessResultRenderer *renderer,
int tessedit_page_number);
}; // class TessBaseAPI.
/** Escape a char string - remove &<>"' with HTML codes. */
std::string HOcrEscape(const char *text);
} // namespace tesseract
#endif // TESSERACT_API_BASEAPI_H_

View File

@ -1,484 +0,0 @@
// SPDX-License-Identifier: Apache-2.0
// File: capi.h
// Description: C-API TessBaseAPI
//
// (C) Copyright 2012, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef API_CAPI_H_
#define API_CAPI_H_
#include "export.h"
#ifdef __cplusplus
# include <tesseract/baseapi.h>
# include <tesseract/ocrclass.h>
# include <tesseract/pageiterator.h>
# include <tesseract/renderer.h>
# include <tesseract/resultiterator.h>
#endif
#include <stdbool.h>
#include <stdio.h>
#ifdef __cplusplus
extern "C" {
#endif
#ifndef BOOL
# define BOOL int
# define TRUE 1
# define FALSE 0
#endif
#ifdef __cplusplus
typedef tesseract::TessResultRenderer TessResultRenderer;
typedef tesseract::TessBaseAPI TessBaseAPI;
typedef tesseract::PageIterator TessPageIterator;
typedef tesseract::ResultIterator TessResultIterator;
typedef tesseract::MutableIterator TessMutableIterator;
typedef tesseract::ChoiceIterator TessChoiceIterator;
typedef tesseract::OcrEngineMode TessOcrEngineMode;
typedef tesseract::PageSegMode TessPageSegMode;
typedef tesseract::PageIteratorLevel TessPageIteratorLevel;
typedef tesseract::Orientation TessOrientation;
typedef tesseract::ParagraphJustification TessParagraphJustification;
typedef tesseract::WritingDirection TessWritingDirection;
typedef tesseract::TextlineOrder TessTextlineOrder;
typedef tesseract::PolyBlockType TessPolyBlockType;
typedef tesseract::ETEXT_DESC ETEXT_DESC;
#else
typedef struct TessResultRenderer TessResultRenderer;
typedef struct TessBaseAPI TessBaseAPI;
typedef struct TessPageIterator TessPageIterator;
typedef struct TessResultIterator TessResultIterator;
typedef struct TessMutableIterator TessMutableIterator;
typedef struct TessChoiceIterator TessChoiceIterator;
typedef enum TessOcrEngineMode {
OEM_TESSERACT_ONLY,
OEM_LSTM_ONLY,
OEM_TESSERACT_LSTM_COMBINED,
OEM_DEFAULT
} TessOcrEngineMode;
typedef enum TessPageSegMode {
PSM_OSD_ONLY,
PSM_AUTO_OSD,
PSM_AUTO_ONLY,
PSM_AUTO,
PSM_SINGLE_COLUMN,
PSM_SINGLE_BLOCK_VERT_TEXT,
PSM_SINGLE_BLOCK,
PSM_SINGLE_LINE,
PSM_SINGLE_WORD,
PSM_CIRCLE_WORD,
PSM_SINGLE_CHAR,
PSM_SPARSE_TEXT,
PSM_SPARSE_TEXT_OSD,
PSM_RAW_LINE,
PSM_COUNT
} TessPageSegMode;
typedef enum TessPageIteratorLevel {
RIL_BLOCK,
RIL_PARA,
RIL_TEXTLINE,
RIL_WORD,
RIL_SYMBOL
} TessPageIteratorLevel;
typedef enum TessPolyBlockType {
PT_UNKNOWN,
PT_FLOWING_TEXT,
PT_HEADING_TEXT,
PT_PULLOUT_TEXT,
PT_EQUATION,
PT_INLINE_EQUATION,
PT_TABLE,
PT_VERTICAL_TEXT,
PT_CAPTION_TEXT,
PT_FLOWING_IMAGE,
PT_HEADING_IMAGE,
PT_PULLOUT_IMAGE,
PT_HORZ_LINE,
PT_VERT_LINE,
PT_NOISE,
PT_COUNT
} TessPolyBlockType;
typedef enum TessOrientation {
ORIENTATION_PAGE_UP,
ORIENTATION_PAGE_RIGHT,
ORIENTATION_PAGE_DOWN,
ORIENTATION_PAGE_LEFT
} TessOrientation;
typedef enum TessParagraphJustification {
JUSTIFICATION_UNKNOWN,
JUSTIFICATION_LEFT,
JUSTIFICATION_CENTER,
JUSTIFICATION_RIGHT
} TessParagraphJustification;
typedef enum TessWritingDirection {
WRITING_DIRECTION_LEFT_TO_RIGHT,
WRITING_DIRECTION_RIGHT_TO_LEFT,
WRITING_DIRECTION_TOP_TO_BOTTOM
} TessWritingDirection;
typedef enum TessTextlineOrder {
TEXTLINE_ORDER_LEFT_TO_RIGHT,
TEXTLINE_ORDER_RIGHT_TO_LEFT,
TEXTLINE_ORDER_TOP_TO_BOTTOM
} TessTextlineOrder;
typedef struct ETEXT_DESC ETEXT_DESC;
#endif
typedef bool (*TessCancelFunc)(void *cancel_this, int words);
typedef bool (*TessProgressFunc)(ETEXT_DESC *ths, int left, int right, int top,
int bottom);
struct Pix;
struct Boxa;
struct Pixa;
/* General free functions */
TESS_API const char *TessVersion();
TESS_API void TessDeleteText(const char *text);
TESS_API void TessDeleteTextArray(char **arr);
TESS_API void TessDeleteIntArray(const int *arr);
/* Renderer API */
TESS_API TessResultRenderer *TessTextRendererCreate(const char *outputbase);
TESS_API TessResultRenderer *TessHOcrRendererCreate(const char *outputbase);
TESS_API TessResultRenderer *TessHOcrRendererCreate2(const char *outputbase,
BOOL font_info);
TESS_API TessResultRenderer *TessAltoRendererCreate(const char *outputbase);
TESS_API TessResultRenderer *TessTsvRendererCreate(const char *outputbase);
TESS_API TessResultRenderer *TessPDFRendererCreate(const char *outputbase,
const char *datadir,
BOOL textonly);
TESS_API TessResultRenderer *TessUnlvRendererCreate(const char *outputbase);
TESS_API TessResultRenderer *TessBoxTextRendererCreate(const char *outputbase);
TESS_API TessResultRenderer *TessLSTMBoxRendererCreate(const char *outputbase);
TESS_API TessResultRenderer *TessWordStrBoxRendererCreate(
const char *outputbase);
TESS_API void TessDeleteResultRenderer(TessResultRenderer *renderer);
TESS_API void TessResultRendererInsert(TessResultRenderer *renderer,
TessResultRenderer *next);
TESS_API TessResultRenderer *TessResultRendererNext(
TessResultRenderer *renderer);
TESS_API BOOL TessResultRendererBeginDocument(TessResultRenderer *renderer,
const char *title);
TESS_API BOOL TessResultRendererAddImage(TessResultRenderer *renderer,
TessBaseAPI *api);
TESS_API BOOL TessResultRendererEndDocument(TessResultRenderer *renderer);
TESS_API const char *TessResultRendererExtention(TessResultRenderer *renderer);
TESS_API const char *TessResultRendererTitle(TessResultRenderer *renderer);
TESS_API int TessResultRendererImageNum(TessResultRenderer *renderer);
/* Base API */
TESS_API TessBaseAPI *TessBaseAPICreate();
TESS_API void TessBaseAPIDelete(TessBaseAPI *handle);
TESS_API size_t TessBaseAPIGetOpenCLDevice(TessBaseAPI *handle, void **device);
TESS_API void TessBaseAPISetInputName(TessBaseAPI *handle, const char *name);
TESS_API const char *TessBaseAPIGetInputName(TessBaseAPI *handle);
TESS_API void TessBaseAPISetInputImage(TessBaseAPI *handle, struct Pix *pix);
TESS_API struct Pix *TessBaseAPIGetInputImage(TessBaseAPI *handle);
TESS_API int TessBaseAPIGetSourceYResolution(TessBaseAPI *handle);
TESS_API const char *TessBaseAPIGetDatapath(TessBaseAPI *handle);
TESS_API void TessBaseAPISetOutputName(TessBaseAPI *handle, const char *name);
TESS_API BOOL TessBaseAPISetVariable(TessBaseAPI *handle, const char *name,
const char *value);
TESS_API BOOL TessBaseAPISetDebugVariable(TessBaseAPI *handle, const char *name,
const char *value);
TESS_API BOOL TessBaseAPIGetIntVariable(const TessBaseAPI *handle,
const char *name, int *value);
TESS_API BOOL TessBaseAPIGetBoolVariable(const TessBaseAPI *handle,
const char *name, BOOL *value);
TESS_API BOOL TessBaseAPIGetDoubleVariable(const TessBaseAPI *handle,
const char *name, double *value);
TESS_API const char *TessBaseAPIGetStringVariable(const TessBaseAPI *handle,
const char *name);
TESS_API void TessBaseAPIPrintVariables(const TessBaseAPI *handle, FILE *fp);
TESS_API BOOL TessBaseAPIPrintVariablesToFile(const TessBaseAPI *handle,
const char *filename);
TESS_API int TessBaseAPIInit1(TessBaseAPI *handle, const char *datapath,
const char *language, TessOcrEngineMode oem,
char **configs, int configs_size);
TESS_API int TessBaseAPIInit2(TessBaseAPI *handle, const char *datapath,
const char *language, TessOcrEngineMode oem);
TESS_API int TessBaseAPIInit3(TessBaseAPI *handle, const char *datapath,
const char *language);
TESS_API int TessBaseAPIInit4(TessBaseAPI *handle, const char *datapath,
const char *language, TessOcrEngineMode mode,
char **configs, int configs_size, char **vars_vec,
char **vars_values, size_t vars_vec_size,
BOOL set_only_non_debug_params);
TESS_API int TessBaseAPIInit5(TessBaseAPI *handle, const char *data, int data_size,
const char *language, TessOcrEngineMode mode,
char **configs, int configs_size, char **vars_vec,
char **vars_values, size_t vars_vec_size,
BOOL set_only_non_debug_params);
TESS_API const char *TessBaseAPIGetInitLanguagesAsString(
const TessBaseAPI *handle);
TESS_API char **TessBaseAPIGetLoadedLanguagesAsVector(
const TessBaseAPI *handle);
TESS_API char **TessBaseAPIGetAvailableLanguagesAsVector(
const TessBaseAPI *handle);
TESS_API void TessBaseAPIInitForAnalysePage(TessBaseAPI *handle);
TESS_API void TessBaseAPIReadConfigFile(TessBaseAPI *handle,
const char *filename);
TESS_API void TessBaseAPIReadDebugConfigFile(TessBaseAPI *handle,
const char *filename);
TESS_API void TessBaseAPISetPageSegMode(TessBaseAPI *handle,
TessPageSegMode mode);
TESS_API TessPageSegMode TessBaseAPIGetPageSegMode(const TessBaseAPI *handle);
TESS_API char *TessBaseAPIRect(TessBaseAPI *handle,
const unsigned char *imagedata,
int bytes_per_pixel, int bytes_per_line,
int left, int top, int width, int height);
TESS_API void TessBaseAPIClearAdaptiveClassifier(TessBaseAPI *handle);
TESS_API void TessBaseAPISetImage(TessBaseAPI *handle,
const unsigned char *imagedata, int width,
int height, int bytes_per_pixel,
int bytes_per_line);
TESS_API void TessBaseAPISetImage2(TessBaseAPI *handle, struct Pix *pix);
TESS_API void TessBaseAPISetSourceResolution(TessBaseAPI *handle, int ppi);
TESS_API void TessBaseAPISetRectangle(TessBaseAPI *handle, int left, int top,
int width, int height);
TESS_API struct Pix *TessBaseAPIGetThresholdedImage(TessBaseAPI *handle);
TESS_API struct Boxa *TessBaseAPIGetRegions(TessBaseAPI *handle,
struct Pixa **pixa);
TESS_API struct Boxa *TessBaseAPIGetTextlines(TessBaseAPI *handle,
struct Pixa **pixa,
int **blockids);
TESS_API struct Boxa *TessBaseAPIGetTextlines1(TessBaseAPI *handle,
BOOL raw_image, int raw_padding,
struct Pixa **pixa,
int **blockids, int **paraids);
TESS_API struct Boxa *TessBaseAPIGetStrips(TessBaseAPI *handle,
struct Pixa **pixa, int **blockids);
TESS_API struct Boxa *TessBaseAPIGetWords(TessBaseAPI *handle,
struct Pixa **pixa);
TESS_API struct Boxa *TessBaseAPIGetConnectedComponents(TessBaseAPI *handle,
struct Pixa **cc);
TESS_API struct Boxa *TessBaseAPIGetComponentImages(TessBaseAPI *handle,
TessPageIteratorLevel level,
BOOL text_only,
struct Pixa **pixa,
int **blockids);
TESS_API struct Boxa *TessBaseAPIGetComponentImages1(
TessBaseAPI *handle, TessPageIteratorLevel level, BOOL text_only,
BOOL raw_image, int raw_padding, struct Pixa **pixa, int **blockids,
int **paraids);
TESS_API int TessBaseAPIGetThresholdedImageScaleFactor(
const TessBaseAPI *handle);
TESS_API TessPageIterator *TessBaseAPIAnalyseLayout(TessBaseAPI *handle);
TESS_API int TessBaseAPIRecognize(TessBaseAPI *handle, ETEXT_DESC *monitor);
TESS_API BOOL TessBaseAPIProcessPages(TessBaseAPI *handle, const char *filename,
const char *retry_config,
int timeout_millisec,
TessResultRenderer *renderer);
TESS_API BOOL TessBaseAPIProcessPage(TessBaseAPI *handle, struct Pix *pix,
int page_index, const char *filename,
const char *retry_config,
int timeout_millisec,
TessResultRenderer *renderer);
TESS_API TessResultIterator *TessBaseAPIGetIterator(TessBaseAPI *handle);
TESS_API TessMutableIterator *TessBaseAPIGetMutableIterator(
TessBaseAPI *handle);
TESS_API char *TessBaseAPIGetUTF8Text(TessBaseAPI *handle);
TESS_API char *TessBaseAPIGetHOCRText(TessBaseAPI *handle, int page_number);
TESS_API char *TessBaseAPIGetAltoText(TessBaseAPI *handle, int page_number);
TESS_API char *TessBaseAPIGetTsvText(TessBaseAPI *handle, int page_number);
TESS_API char *TessBaseAPIGetBoxText(TessBaseAPI *handle, int page_number);
TESS_API char *TessBaseAPIGetLSTMBoxText(TessBaseAPI *handle, int page_number);
TESS_API char *TessBaseAPIGetWordStrBoxText(TessBaseAPI *handle,
int page_number);
TESS_API char *TessBaseAPIGetUNLVText(TessBaseAPI *handle);
TESS_API int TessBaseAPIMeanTextConf(TessBaseAPI *handle);
TESS_API int *TessBaseAPIAllWordConfidences(TessBaseAPI *handle);
#ifndef DISABLED_LEGACY_ENGINE
TESS_API BOOL TessBaseAPIAdaptToWordStr(TessBaseAPI *handle,
TessPageSegMode mode,
const char *wordstr);
#endif // #ifndef DISABLED_LEGACY_ENGINE
TESS_API void TessBaseAPIClear(TessBaseAPI *handle);
TESS_API void TessBaseAPIEnd(TessBaseAPI *handle);
TESS_API int TessBaseAPIIsValidWord(TessBaseAPI *handle, const char *word);
TESS_API BOOL TessBaseAPIGetTextDirection(TessBaseAPI *handle, int *out_offset,
float *out_slope);
TESS_API const char *TessBaseAPIGetUnichar(TessBaseAPI *handle, int unichar_id);
TESS_API void TessBaseAPIClearPersistentCache(TessBaseAPI *handle);
#ifndef DISABLED_LEGACY_ENGINE
// Call TessDeleteText(*best_script_name) to free memory allocated by this
// function
TESS_API BOOL TessBaseAPIDetectOrientationScript(TessBaseAPI *handle,
int *orient_deg,
float *orient_conf,
const char **script_name,
float *script_conf);
#endif // #ifndef DISABLED_LEGACY_ENGINE
TESS_API void TessBaseAPISetMinOrientationMargin(TessBaseAPI *handle,
double margin);
TESS_API int TessBaseAPINumDawgs(const TessBaseAPI *handle);
TESS_API TessOcrEngineMode TessBaseAPIOem(const TessBaseAPI *handle);
TESS_API void TessBaseGetBlockTextOrientations(TessBaseAPI *handle,
int **block_orientation,
bool **vertical_writing);
/* Page iterator */
TESS_API void TessPageIteratorDelete(TessPageIterator *handle);
TESS_API TessPageIterator *TessPageIteratorCopy(const TessPageIterator *handle);
TESS_API void TessPageIteratorBegin(TessPageIterator *handle);
TESS_API BOOL TessPageIteratorNext(TessPageIterator *handle,
TessPageIteratorLevel level);
TESS_API BOOL TessPageIteratorIsAtBeginningOf(const TessPageIterator *handle,
TessPageIteratorLevel level);
TESS_API BOOL TessPageIteratorIsAtFinalElement(const TessPageIterator *handle,
TessPageIteratorLevel level,
TessPageIteratorLevel element);
TESS_API BOOL TessPageIteratorBoundingBox(const TessPageIterator *handle,
TessPageIteratorLevel level,
int *left, int *top, int *right,
int *bottom);
TESS_API TessPolyBlockType
TessPageIteratorBlockType(const TessPageIterator *handle);
TESS_API struct Pix *TessPageIteratorGetBinaryImage(
const TessPageIterator *handle, TessPageIteratorLevel level);
TESS_API struct Pix *TessPageIteratorGetImage(const TessPageIterator *handle,
TessPageIteratorLevel level,
int padding,
struct Pix *original_image,
int *left, int *top);
TESS_API BOOL TessPageIteratorBaseline(const TessPageIterator *handle,
TessPageIteratorLevel level, int *x1,
int *y1, int *x2, int *y2);
TESS_API void TessPageIteratorOrientation(
TessPageIterator *handle, TessOrientation *orientation,
TessWritingDirection *writing_direction, TessTextlineOrder *textline_order,
float *deskew_angle);
TESS_API void TessPageIteratorParagraphInfo(
TessPageIterator *handle, TessParagraphJustification *justification,
BOOL *is_list_item, BOOL *is_crown, int *first_line_indent);
/* Result iterator */
TESS_API void TessResultIteratorDelete(TessResultIterator *handle);
TESS_API TessResultIterator *TessResultIteratorCopy(
const TessResultIterator *handle);
TESS_API TessPageIterator *TessResultIteratorGetPageIterator(
TessResultIterator *handle);
TESS_API const TessPageIterator *TessResultIteratorGetPageIteratorConst(
const TessResultIterator *handle);
TESS_API TessChoiceIterator *TessResultIteratorGetChoiceIterator(
const TessResultIterator *handle);
TESS_API BOOL TessResultIteratorNext(TessResultIterator *handle,
TessPageIteratorLevel level);
TESS_API char *TessResultIteratorGetUTF8Text(const TessResultIterator *handle,
TessPageIteratorLevel level);
TESS_API float TessResultIteratorConfidence(const TessResultIterator *handle,
TessPageIteratorLevel level);
TESS_API const char *TessResultIteratorWordRecognitionLanguage(
const TessResultIterator *handle);
TESS_API const char *TessResultIteratorWordFontAttributes(
const TessResultIterator *handle, BOOL *is_bold, BOOL *is_italic,
BOOL *is_underlined, BOOL *is_monospace, BOOL *is_serif, BOOL *is_smallcaps,
int *pointsize, int *font_id);
TESS_API BOOL
TessResultIteratorWordIsFromDictionary(const TessResultIterator *handle);
TESS_API BOOL TessResultIteratorWordIsNumeric(const TessResultIterator *handle);
TESS_API BOOL
TessResultIteratorSymbolIsSuperscript(const TessResultIterator *handle);
TESS_API BOOL
TessResultIteratorSymbolIsSubscript(const TessResultIterator *handle);
TESS_API BOOL
TessResultIteratorSymbolIsDropcap(const TessResultIterator *handle);
TESS_API void TessChoiceIteratorDelete(TessChoiceIterator *handle);
TESS_API BOOL TessChoiceIteratorNext(TessChoiceIterator *handle);
TESS_API const char *TessChoiceIteratorGetUTF8Text(
const TessChoiceIterator *handle);
TESS_API float TessChoiceIteratorConfidence(const TessChoiceIterator *handle);
/* Progress monitor */
TESS_API ETEXT_DESC *TessMonitorCreate();
TESS_API void TessMonitorDelete(ETEXT_DESC *monitor);
TESS_API void TessMonitorSetCancelFunc(ETEXT_DESC *monitor,
TessCancelFunc cancelFunc);
TESS_API void TessMonitorSetCancelThis(ETEXT_DESC *monitor, void *cancelThis);
TESS_API void *TessMonitorGetCancelThis(ETEXT_DESC *monitor);
TESS_API void TessMonitorSetProgressFunc(ETEXT_DESC *monitor,
TessProgressFunc progressFunc);
TESS_API int TessMonitorGetProgress(ETEXT_DESC *monitor);
TESS_API void TessMonitorSetDeadlineMSecs(ETEXT_DESC *monitor, int deadline);
#ifdef __cplusplus
}
#endif
#endif // API_CAPI_H_

View File

@ -1,37 +0,0 @@
// SPDX-License-Identifier: Apache-2.0
// File: export.h
// Description: Place holder
//
// (C) Copyright 2006, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef TESSERACT_PLATFORM_H_
#define TESSERACT_PLATFORM_H_
#ifndef TESS_API
# if defined(_WIN32) || defined(__CYGWIN__)
# if defined(TESS_EXPORTS)
# define TESS_API __declspec(dllexport)
# elif defined(TESS_IMPORTS)
# define TESS_API __declspec(dllimport)
# else
# define TESS_API
# endif
# else
# if defined(TESS_EXPORTS) || defined(TESS_IMPORTS)
# define TESS_API __attribute__((visibility("default")))
# else
# define TESS_API
# endif
# endif
#endif
#endif // TESSERACT_PLATFORM_H_

View File

@ -1,235 +0,0 @@
// SPDX-License-Identifier: Apache-2.0
// File: ltrresultiterator.h
// Description: Iterator for tesseract results in strict left-to-right
// order that avoids using tesseract internal data structures.
// Author: Ray Smith
//
// (C) Copyright 2010, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef TESSERACT_CCMAIN_LTR_RESULT_ITERATOR_H_
#define TESSERACT_CCMAIN_LTR_RESULT_ITERATOR_H_
#include "export.h" // for TESS_API
#include "pageiterator.h" // for PageIterator
#include "publictypes.h" // for PageIteratorLevel
#include "unichar.h" // for StrongScriptDirection
namespace tesseract {
class BLOB_CHOICE_IT;
class PAGE_RES;
class WERD_RES;
class Tesseract;
// Class to iterate over tesseract results, providing access to all levels
// of the page hierarchy, without including any tesseract headers or having
// to handle any tesseract structures.
// WARNING! This class points to data held within the TessBaseAPI class, and
// therefore can only be used while the TessBaseAPI class still exists and
// has not been subjected to a call of Init, SetImage, Recognize, Clear, End
// DetectOS, or anything else that changes the internal PAGE_RES.
// See tesseract/publictypes.h for the definition of PageIteratorLevel.
// See also base class PageIterator, which contains the bulk of the interface.
// LTRResultIterator adds text-specific methods for access to OCR output.
class TESS_API LTRResultIterator : public PageIterator {
friend class ChoiceIterator;
public:
// page_res and tesseract come directly from the BaseAPI.
// The rectangle parameters are copied indirectly from the Thresholder,
// via the BaseAPI. They represent the coordinates of some rectangle in an
// original image (in top-left-origin coordinates) and therefore the top-left
// needs to be added to any output boxes in order to specify coordinates
// in the original image. See TessBaseAPI::SetRectangle.
// The scale and scaled_yres are in case the Thresholder scaled the image
// rectangle prior to thresholding. Any coordinates in tesseract's image
// must be divided by scale before adding (rect_left, rect_top).
// The scaled_yres indicates the effective resolution of the binary image
// that tesseract has been given by the Thresholder.
// After the constructor, Begin has already been called.
LTRResultIterator(PAGE_RES *page_res, Tesseract *tesseract, int scale,
int scaled_yres, int rect_left, int rect_top,
int rect_width, int rect_height);
~LTRResultIterator() override;
// LTRResultIterators may be copied! This makes it possible to iterate over
// all the objects at a lower level, while maintaining an iterator to
// objects at a higher level. These constructors DO NOT CALL Begin, so
// iterations will continue from the location of src.
// TODO: For now the copy constructor and operator= only need the base class
// versions, but if new data members are added, don't forget to add them!
// ============= Moving around within the page ============.
// See PageIterator.
// ============= Accessing data ==============.
// Returns the null terminated UTF-8 encoded text string for the current
// object at the given level. Use delete [] to free after use.
char *GetUTF8Text(PageIteratorLevel level) const;
// Set the string inserted at the end of each text line. "\n" by default.
void SetLineSeparator(const char *new_line);
// Set the string inserted at the end of each paragraph. "\n" by default.
void SetParagraphSeparator(const char *new_para);
// Returns the mean confidence of the current object at the given level.
// The number should be interpreted as a percent probability. (0.0f-100.0f)
float Confidence(PageIteratorLevel level) const;
// ============= Functions that refer to words only ============.
// Returns the font attributes of the current word. If iterating at a higher
// level object than words, eg textlines, then this will return the
// attributes of the first word in that textline.
// The actual return value is a string representing a font name. It points
// to an internal table and SHOULD NOT BE DELETED. Lifespan is the same as
// the iterator itself, ie rendered invalid by various members of
// TessBaseAPI, including Init, SetImage, End or deleting the TessBaseAPI.
// Pointsize is returned in printers points (1/72 inch.)
const char *WordFontAttributes(bool *is_bold, bool *is_italic,
bool *is_underlined, bool *is_monospace,
bool *is_serif, bool *is_smallcaps,
int *pointsize, int *font_id) const;
// Return the name of the language used to recognize this word.
// On error, nullptr. Do not delete this pointer.
const char *WordRecognitionLanguage() const;
// Return the overall directionality of this word.
StrongScriptDirection WordDirection() const;
// Returns true if the current word was found in a dictionary.
bool WordIsFromDictionary() const;
// Returns the number of blanks before the current word.
int BlanksBeforeWord() const;
// Returns true if the current word is numeric.
bool WordIsNumeric() const;
// Returns true if the word contains blamer information.
bool HasBlamerInfo() const;
// Returns the pointer to ParamsTrainingBundle stored in the BlamerBundle
// of the current word.
const void *GetParamsTrainingBundle() const;
// Returns a pointer to the string with blamer information for this word.
// Assumes that the word's blamer_bundle is not nullptr.
const char *GetBlamerDebug() const;
// Returns a pointer to the string with misadaption information for this word.
// Assumes that the word's blamer_bundle is not nullptr.
const char *GetBlamerMisadaptionDebug() const;
// Returns true if a truth string was recorded for the current word.
bool HasTruthString() const;
// Returns true if the given string is equivalent to the truth string for
// the current word.
bool EquivalentToTruth(const char *str) const;
// Returns a null terminated UTF-8 encoded truth string for the current word.
// Use delete [] to free after use.
char *WordTruthUTF8Text() const;
// Returns a null terminated UTF-8 encoded normalized OCR string for the
// current word. Use delete [] to free after use.
char *WordNormedUTF8Text() const;
// Returns a pointer to serialized choice lattice.
// Fills lattice_size with the number of bytes in lattice data.
const char *WordLattice(int *lattice_size) const;
// ============= Functions that refer to symbols only ============.
// Returns true if the current symbol is a superscript.
// If iterating at a higher level object than symbols, eg words, then
// this will return the attributes of the first symbol in that word.
bool SymbolIsSuperscript() const;
// Returns true if the current symbol is a subscript.
// If iterating at a higher level object than symbols, eg words, then
// this will return the attributes of the first symbol in that word.
bool SymbolIsSubscript() const;
// Returns true if the current symbol is a dropcap.
// If iterating at a higher level object than symbols, eg words, then
// this will return the attributes of the first symbol in that word.
bool SymbolIsDropcap() const;
protected:
const char *line_separator_;
const char *paragraph_separator_;
};
// Class to iterate over the classifier choices for a single RIL_SYMBOL.
class TESS_API ChoiceIterator {
public:
// Construction is from a LTRResultIterator that points to the symbol of
// interest. The ChoiceIterator allows a one-shot iteration over the
// choices for this symbol and after that it is useless.
explicit ChoiceIterator(const LTRResultIterator &result_it);
~ChoiceIterator();
// Moves to the next choice for the symbol and returns false if there
// are none left.
bool Next();
// ============= Accessing data ==============.
// Returns the null terminated UTF-8 encoded text string for the current
// choice.
// NOTE: Unlike LTRResultIterator::GetUTF8Text, the return points to an
// internal structure and should NOT be delete[]ed to free after use.
const char *GetUTF8Text() const;
// Returns the confidence of the current choice depending on the used language
// data. If only LSTM traineddata is used the value range is 0.0f - 1.0f. All
// choices for one symbol should roughly add up to 1.0f.
// If only traineddata of the legacy engine is used, the number should be
// interpreted as a percent probability. (0.0f-100.0f) In this case
// probabilities won't add up to 100. Each one stands on its own.
float Confidence() const;
// Returns a vector containing all timesteps, which belong to the currently
// selected symbol. A timestep is a vector containing pairs of symbols and
// floating point numbers. The number states the probability for the
// corresponding symbol.
std::vector<std::vector<std::pair<const char *, float>>> *Timesteps() const;
private:
// clears the remaining spaces out of the results and adapt the probabilities
void filterSpaces();
// Pointer to the WERD_RES object owned by the API.
WERD_RES *word_res_;
// Iterator over the blob choices.
BLOB_CHOICE_IT *choice_it_;
std::vector<std::pair<const char *, float>> *LSTM_choices_ = nullptr;
std::vector<std::pair<const char *, float>>::iterator LSTM_choice_it_;
const int *tstep_index_;
// regulates the rating granularity
double rating_coefficient_;
// leading blanks
int blanks_before_word_;
// true when there is lstm engine related trained data
bool oemLSTM_;
};
} // namespace tesseract.
#endif // TESSERACT_CCMAIN_LTR_RESULT_ITERATOR_H_

View File

@ -1,158 +0,0 @@
// SPDX-License-Identifier: Apache-2.0
/**********************************************************************
* File: ocrclass.h
* Description: Class definitions and constants for the OCR API.
* Author: Hewlett-Packard Co
*
* (C) Copyright 1996, Hewlett-Packard Co.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
/**********************************************************************
* This file contains typedefs for all the structures used by
* the HP OCR interface.
* The structures are designed to allow them to be used with any
* structure alignment up to 8.
**********************************************************************/
#ifndef CCUTIL_OCRCLASS_H_
#define CCUTIL_OCRCLASS_H_
#include <chrono>
#include <ctime>
namespace tesseract {
/**********************************************************************
* EANYCODE_CHAR
* Description of a single character. The character code is defined by
* the character set of the current font.
* Output text is sent as an array of these structures.
* Spaces and line endings in the output are represented in the
* structures of the surrounding characters. They are not directly
* represented as characters.
* The first character in a word has a positive value of blanks.
* Missing information should be set to the defaults in the comments.
* If word bounds are known, but not character bounds, then the top and
* bottom of each character should be those of the word. The left of the
* first and right of the last char in each word should be set. All other
* lefts and rights should be set to -1.
* If set, the values of right and bottom are left+width and top+height.
* Most of the members come directly from the parameters to ocr_append_char.
* The formatting member uses the enhancement parameter and combines the
* line direction stuff into the top 3 bits.
* The coding is 0=RL char, 1=LR char, 2=DR NL, 3=UL NL, 4=DR Para,
* 5=UL Para, 6=TB char, 7=BT char. API users do not need to know what
* the coding is, only that it is backwards compatible with the previous
* version.
**********************************************************************/
struct EANYCODE_CHAR { /*single character */
// It should be noted that the format for char_code for version 2.0 and beyond
// is UTF8 which means that ASCII characters will come out as one structure
// but other characters will be returned in two or more instances of this
// structure with a single byte of the UTF8 code in each, but each will have
// the same bounding box. Programs which want to handle languagues with
// different characters sets will need to handle extended characters
// appropriately, but *all* code needs to be prepared to receive UTF8 coded
// characters for characters such as bullet and fancy quotes.
uint16_t char_code; /*character itself */
int16_t left; /*of char (-1) */
int16_t right; /*of char (-1) */
int16_t top; /*of char (-1) */
int16_t bottom; /*of char (-1) */
int16_t font_index; /*what font (0) */
uint8_t confidence; /*0=perfect, 100=reject (0/100) */
uint8_t point_size; /*of char, 72=i inch, (10) */
int8_t blanks; /*no of spaces before this char (1) */
uint8_t formatting; /*char formatting (0) */
};
/**********************************************************************
* ETEXT_DESC
* Description of the output of the OCR engine.
* This structure is used as both a progress monitor and the final
* output header, since it needs to be a valid progress monitor while
* the OCR engine is storing its output to shared memory.
* During progress, all the buffer info is -1.
* Progress starts at 0 and increases to 100 during OCR. No other constraint.
* Additionally the progress callback contains the bounding box of the word that
* is currently being processed.
* Every progress callback, the OCR engine must set ocr_alive to 1.
* The HP side will set ocr_alive to 0. Repeated failure to reset
* to 1 indicates that the OCR engine is dead.
* If the cancel function is not null then it is called with the number of
* user words found. If it returns true then operation is cancelled.
**********************************************************************/
class ETEXT_DESC;
using CANCEL_FUNC = bool (*)(void *, int);
using PROGRESS_FUNC = bool (*)(int, int, int, int, int);
using PROGRESS_FUNC2 = bool (*)(ETEXT_DESC *, int, int, int, int);
class ETEXT_DESC { // output header
public:
int16_t count{0}; /// chars in this buffer(0)
int16_t progress{0}; /// percent complete increasing (0-100)
/** Progress monitor covers word recognition and it does not cover layout
* analysis.
* See Ray comment in https://github.com/tesseract-ocr/tesseract/pull/27 */
int8_t more_to_come{0}; /// true if not last
volatile int8_t ocr_alive{0}; /// ocr sets to 1, HP 0
int8_t err_code{0}; /// for errcode use
CANCEL_FUNC cancel{nullptr}; /// returns true to cancel
PROGRESS_FUNC progress_callback{
nullptr}; /// called whenever progress increases
PROGRESS_FUNC2 progress_callback2; /// monitor-aware progress callback
void *cancel_this{nullptr}; /// this or other data for cancel
std::chrono::steady_clock::time_point end_time;
/// Time to stop. Expected to be set only
/// by call to set_deadline_msecs().
EANYCODE_CHAR text[1]{}; /// character data
ETEXT_DESC() : progress_callback2(&default_progress_func) {
end_time = std::chrono::time_point<std::chrono::steady_clock,
std::chrono::milliseconds>();
}
// Sets the end time to be deadline_msecs milliseconds from now.
void set_deadline_msecs(int32_t deadline_msecs) {
if (deadline_msecs > 0) {
end_time = std::chrono::steady_clock::now() +
std::chrono::milliseconds(deadline_msecs);
}
}
// Returns false if we've not passed the end_time, or have not set a deadline.
bool deadline_exceeded() const {
if (end_time.time_since_epoch() ==
std::chrono::steady_clock::duration::zero()) {
return false;
}
auto now = std::chrono::steady_clock::now();
return (now > end_time);
}
private:
static bool default_progress_func(ETEXT_DESC *ths, int left, int right,
int top, int bottom) {
if (ths->progress_callback != nullptr) {
return (*(ths->progress_callback))(ths->progress, left, right, top,
bottom);
}
return true;
}
};
} // namespace tesseract
#endif // CCUTIL_OCRCLASS_H_

View File

@ -1,139 +0,0 @@
// SPDX-License-Identifier: Apache-2.0
// File: osdetect.h
// Description: Orientation and script detection.
// Author: Samuel Charron
// Ranjith Unnikrishnan
//
// (C) Copyright 2008, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef TESSERACT_CCMAIN_OSDETECT_H_
#define TESSERACT_CCMAIN_OSDETECT_H_
#include "export.h" // for TESS_API
#include <vector> // for std::vector
namespace tesseract {
class BLOBNBOX;
class BLOBNBOX_CLIST;
class BLOB_CHOICE_LIST;
class TO_BLOCK_LIST;
class UNICHARSET;
class Tesseract;
// Max number of scripts in ICU + "NULL" + Japanese and Korean + Fraktur
const int kMaxNumberOfScripts = 116 + 1 + 2 + 1;
struct OSBestResult {
OSBestResult()
: orientation_id(0), script_id(0), sconfidence(0.0), oconfidence(0.0) {}
int orientation_id;
int script_id;
float sconfidence;
float oconfidence;
};
struct OSResults {
OSResults() : unicharset(nullptr) {
for (int i = 0; i < 4; ++i) {
for (int j = 0; j < kMaxNumberOfScripts; ++j) {
scripts_na[i][j] = 0;
}
orientations[i] = 0;
}
}
void update_best_orientation();
// Set the estimate of the orientation to the given id.
void set_best_orientation(int orientation_id);
// Update/Compute the best estimate of the script assuming the given
// orientation id.
void update_best_script(int orientation_id);
// Return the index of the script with the highest score for this orientation.
TESS_API int get_best_script(int orientation_id) const;
// Accumulate scores with given OSResults instance and update the best script.
void accumulate(const OSResults &osr);
// Print statistics.
void print_scores(void) const;
void print_scores(int orientation_id) const;
// Array holding scores for each orientation id [0,3].
// Orientation ids [0..3] map to [0, 270, 180, 90] degree orientations of the
// page respectively, where the values refer to the amount of clockwise
// rotation to be applied to the page for the text to be upright and readable.
float orientations[4];
// Script confidence scores for each of 4 possible orientations.
float scripts_na[4][kMaxNumberOfScripts];
UNICHARSET *unicharset;
OSBestResult best_result;
};
class OrientationDetector {
public:
OrientationDetector(const std::vector<int> *allowed_scripts,
OSResults *results);
bool detect_blob(BLOB_CHOICE_LIST *scores);
int get_orientation();
private:
OSResults *osr_;
const std::vector<int> *allowed_scripts_;
};
class ScriptDetector {
public:
ScriptDetector(const std::vector<int> *allowed_scripts, OSResults *osr,
tesseract::Tesseract *tess);
void detect_blob(BLOB_CHOICE_LIST *scores);
bool must_stop(int orientation) const;
private:
OSResults *osr_;
static const char *korean_script_;
static const char *japanese_script_;
static const char *fraktur_script_;
int korean_id_;
int japanese_id_;
int katakana_id_;
int hiragana_id_;
int han_id_;
int hangul_id_;
int latin_id_;
int fraktur_id_;
tesseract::Tesseract *tess_;
const std::vector<int> *allowed_scripts_;
};
int orientation_and_script_detection(const char *filename, OSResults *,
tesseract::Tesseract *);
int os_detect(TO_BLOCK_LIST *port_blocks, OSResults *osr,
tesseract::Tesseract *tess);
int os_detect_blobs(const std::vector<int> *allowed_scripts,
BLOBNBOX_CLIST *blob_list, OSResults *osr,
tesseract::Tesseract *tess);
bool os_detect_blob(BLOBNBOX *bbox, OrientationDetector *o, ScriptDetector *s,
OSResults *, tesseract::Tesseract *tess);
// Helper method to convert an orientation index to its value in degrees.
// The value represents the amount of clockwise rotation in degrees that must be
// applied for the text to be upright (readable).
TESS_API int OrientationIdToValue(const int &id);
} // namespace tesseract
#endif // TESSERACT_CCMAIN_OSDETECT_H_

View File

@ -1,364 +0,0 @@
// SPDX-License-Identifier: Apache-2.0
// File: pageiterator.h
// Description: Iterator for tesseract page structure that avoids using
// tesseract internal data structures.
// Author: Ray Smith
//
// (C) Copyright 2010, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef TESSERACT_CCMAIN_PAGEITERATOR_H_
#define TESSERACT_CCMAIN_PAGEITERATOR_H_
#include "export.h"
#include "publictypes.h"
struct Pix;
struct Pta;
namespace tesseract {
struct BlamerBundle;
class C_BLOB_IT;
class PAGE_RES;
class PAGE_RES_IT;
class WERD;
class Tesseract;
/**
* Class to iterate over tesseract page structure, providing access to all
* levels of the page hierarchy, without including any tesseract headers or
* having to handle any tesseract structures.
* WARNING! This class points to data held within the TessBaseAPI class, and
* therefore can only be used while the TessBaseAPI class still exists and
* has not been subjected to a call of Init, SetImage, Recognize, Clear, End
* DetectOS, or anything else that changes the internal PAGE_RES.
* See tesseract/publictypes.h for the definition of PageIteratorLevel.
* See also ResultIterator, derived from PageIterator, which adds in the
* ability to access OCR output with text-specific methods.
*/
class TESS_API PageIterator {
public:
/**
* page_res and tesseract come directly from the BaseAPI.
* The rectangle parameters are copied indirectly from the Thresholder,
* via the BaseAPI. They represent the coordinates of some rectangle in an
* original image (in top-left-origin coordinates) and therefore the top-left
* needs to be added to any output boxes in order to specify coordinates
* in the original image. See TessBaseAPI::SetRectangle.
* The scale and scaled_yres are in case the Thresholder scaled the image
* rectangle prior to thresholding. Any coordinates in tesseract's image
* must be divided by scale before adding (rect_left, rect_top).
* The scaled_yres indicates the effective resolution of the binary image
* that tesseract has been given by the Thresholder.
* After the constructor, Begin has already been called.
*/
PageIterator(PAGE_RES *page_res, Tesseract *tesseract, int scale,
int scaled_yres, int rect_left, int rect_top, int rect_width,
int rect_height);
virtual ~PageIterator();
/**
* Page/ResultIterators may be copied! This makes it possible to iterate over
* all the objects at a lower level, while maintaining an iterator to
* objects at a higher level. These constructors DO NOT CALL Begin, so
* iterations will continue from the location of src.
*/
PageIterator(const PageIterator &src);
const PageIterator &operator=(const PageIterator &src);
/** Are we positioned at the same location as other? */
bool PositionedAtSameWord(const PAGE_RES_IT *other) const;
// ============= Moving around within the page ============.
/**
* Moves the iterator to point to the start of the page to begin an
* iteration.
*/
virtual void Begin();
/**
* Moves the iterator to the beginning of the paragraph.
* This class implements this functionality by moving it to the zero indexed
* blob of the first (leftmost) word on the first row of the paragraph.
*/
virtual void RestartParagraph();
/**
* Return whether this iterator points anywhere in the first textline of a
* paragraph.
*/
bool IsWithinFirstTextlineOfParagraph() const;
/**
* Moves the iterator to the beginning of the text line.
* This class implements this functionality by moving it to the zero indexed
* blob of the first (leftmost) word of the row.
*/
virtual void RestartRow();
/**
* Moves to the start of the next object at the given level in the
* page hierarchy, and returns false if the end of the page was reached.
* NOTE that RIL_SYMBOL will skip non-text blocks, but all other
* PageIteratorLevel level values will visit each non-text block once.
* Think of non text blocks as containing a single para, with a single line,
* with a single imaginary word.
* Calls to Next with different levels may be freely intermixed.
* This function iterates words in right-to-left scripts correctly, if
* the appropriate language has been loaded into Tesseract.
*/
virtual bool Next(PageIteratorLevel level);
/**
* Returns true if the iterator is at the start of an object at the given
* level.
*
* For instance, suppose an iterator it is pointed to the first symbol of the
* first word of the third line of the second paragraph of the first block in
* a page, then:
* it.IsAtBeginningOf(RIL_BLOCK) = false
* it.IsAtBeginningOf(RIL_PARA) = false
* it.IsAtBeginningOf(RIL_TEXTLINE) = true
* it.IsAtBeginningOf(RIL_WORD) = true
* it.IsAtBeginningOf(RIL_SYMBOL) = true
*/
virtual bool IsAtBeginningOf(PageIteratorLevel level) const;
/**
* Returns whether the iterator is positioned at the last element in a
* given level. (e.g. the last word in a line, the last line in a block)
*
* Here's some two-paragraph example
* text. It starts off innocuously
* enough but quickly turns bizarre.
* The author inserts a cornucopia
* of words to guard against confused
* references.
*
* Now take an iterator it pointed to the start of "bizarre."
* it.IsAtFinalElement(RIL_PARA, RIL_SYMBOL) = false
* it.IsAtFinalElement(RIL_PARA, RIL_WORD) = true
* it.IsAtFinalElement(RIL_BLOCK, RIL_WORD) = false
*/
virtual bool IsAtFinalElement(PageIteratorLevel level,
PageIteratorLevel element) const;
/**
* Returns whether this iterator is positioned
* before other: -1
* equal to other: 0
* after other: 1
*/
int Cmp(const PageIterator &other) const;
// ============= Accessing data ==============.
// Coordinate system:
// Integer coordinates are at the cracks between the pixels.
// The top-left corner of the top-left pixel in the image is at (0,0).
// The bottom-right corner of the bottom-right pixel in the image is at
// (width, height).
// Every bounding box goes from the top-left of the top-left contained
// pixel to the bottom-right of the bottom-right contained pixel, so
// the bounding box of the single top-left pixel in the image is:
// (0,0)->(1,1).
// If an image rectangle has been set in the API, then returned coordinates
// relate to the original (full) image, rather than the rectangle.
/**
* Controls what to include in a bounding box. Bounding boxes of all levels
* between RIL_WORD and RIL_BLOCK can include or exclude potential diacritics.
* Between layout analysis and recognition, it isn't known where all
* diacritics belong, so this control is used to include or exclude some
* diacritics that are above or below the main body of the word. In most cases
* where the placement is obvious, and after recognition, it doesn't make as
* much difference, as the diacritics will already be included in the word.
*/
void SetBoundingBoxComponents(bool include_upper_dots,
bool include_lower_dots) {
include_upper_dots_ = include_upper_dots;
include_lower_dots_ = include_lower_dots;
}
/**
* Returns the bounding rectangle of the current object at the given level.
* See comment on coordinate system above.
* Returns false if there is no such object at the current position.
* The returned bounding box is guaranteed to match the size and position
* of the image returned by GetBinaryImage, but may clip foreground pixels
* from a grey image. The padding argument to GetImage can be used to expand
* the image to include more foreground pixels. See GetImage below.
*/
bool BoundingBox(PageIteratorLevel level, int *left, int *top, int *right,
int *bottom) const;
bool BoundingBox(PageIteratorLevel level, int padding, int *left, int *top,
int *right, int *bottom) const;
/**
* Returns the bounding rectangle of the object in a coordinate system of the
* working image rectangle having its origin at (rect_left_, rect_top_) with
* respect to the original image and is scaled by a factor scale_.
*/
bool BoundingBoxInternal(PageIteratorLevel level, int *left, int *top,
int *right, int *bottom) const;
/** Returns whether there is no object of a given level. */
bool Empty(PageIteratorLevel level) const;
/**
* Returns the type of the current block.
* See tesseract/publictypes.h for PolyBlockType.
*/
PolyBlockType BlockType() const;
/**
* Returns the polygon outline of the current block. The returned Pta must
* be ptaDestroy-ed after use. Note that the returned Pta lists the vertices
* of the polygon, and the last edge is the line segment between the last
* point and the first point. nullptr will be returned if the iterator is
* at the end of the document or layout analysis was not used.
*/
Pta *BlockPolygon() const;
/**
* Returns a binary image of the current object at the given level.
* The position and size match the return from BoundingBoxInternal, and so
* this could be upscaled with respect to the original input image.
* Use pixDestroy to delete the image after use.
*/
Pix *GetBinaryImage(PageIteratorLevel level) const;
/**
* Returns an image of the current object at the given level in greyscale
* if available in the input. To guarantee a binary image use BinaryImage.
* NOTE that in order to give the best possible image, the bounds are
* expanded slightly over the binary connected component, by the supplied
* padding, so the top-left position of the returned image is returned
* in (left,top). These will most likely not match the coordinates
* returned by BoundingBox.
* If you do not supply an original image, you will get a binary one.
* Use pixDestroy to delete the image after use.
*/
Pix *GetImage(PageIteratorLevel level, int padding, Pix *original_img,
int *left, int *top) const;
/**
* Returns the baseline of the current object at the given level.
* The baseline is the line that passes through (x1, y1) and (x2, y2).
* WARNING: with vertical text, baselines may be vertical!
* Returns false if there is no baseline at the current position.
*/
bool Baseline(PageIteratorLevel level, int *x1, int *y1, int *x2,
int *y2) const;
// Returns the attributes of the current row.
void RowAttributes(float *row_height, float *descenders,
float *ascenders) const;
/**
* Returns orientation for the block the iterator points to.
* orientation, writing_direction, textline_order: see publictypes.h
* deskew_angle: after rotating the block so the text orientation is
* upright, how many radians does one have to rotate the
* block anti-clockwise for it to be level?
* -Pi/4 <= deskew_angle <= Pi/4
*/
void Orientation(tesseract::Orientation *orientation,
tesseract::WritingDirection *writing_direction,
tesseract::TextlineOrder *textline_order,
float *deskew_angle) const;
/**
* Returns information about the current paragraph, if available.
*
* justification -
* LEFT if ragged right, or fully justified and script is left-to-right.
* RIGHT if ragged left, or fully justified and script is right-to-left.
* unknown if it looks like source code or we have very few lines.
* is_list_item -
* true if we believe this is a member of an ordered or unordered list.
* is_crown -
* true if the first line of the paragraph is aligned with the other
* lines of the paragraph even though subsequent paragraphs have first
* line indents. This typically indicates that this is the continuation
* of a previous paragraph or that it is the very first paragraph in
* the chapter.
* first_line_indent -
* For LEFT aligned paragraphs, the first text line of paragraphs of
* this kind are indented this many pixels from the left edge of the
* rest of the paragraph.
* for RIGHT aligned paragraphs, the first text line of paragraphs of
* this kind are indented this many pixels from the right edge of the
* rest of the paragraph.
* NOTE 1: This value may be negative.
* NOTE 2: if *is_crown == true, the first line of this paragraph is
* actually flush, and first_line_indent is set to the "common"
* first_line_indent for subsequent paragraphs in this block
* of text.
*/
void ParagraphInfo(tesseract::ParagraphJustification *justification,
bool *is_list_item, bool *is_crown,
int *first_line_indent) const;
// If the current WERD_RES (it_->word()) is not nullptr, sets the BlamerBundle
// of the current word to the given pointer (takes ownership of the pointer)
// and returns true.
// Can only be used when iterating on the word level.
bool SetWordBlamerBundle(BlamerBundle *blamer_bundle);
protected:
/**
* Sets up the internal data for iterating the blobs of a new word, then
* moves the iterator to the given offset.
*/
void BeginWord(int offset);
/** Pointer to the page_res owned by the API. */
PAGE_RES *page_res_;
/** Pointer to the Tesseract object owned by the API. */
Tesseract *tesseract_;
/**
* The iterator to the page_res_. Owned by this ResultIterator.
* A pointer just to avoid dragging in Tesseract includes.
*/
PAGE_RES_IT *it_;
/**
* The current input WERD being iterated. If there is an output from OCR,
* then word_ is nullptr. Owned by the API
*/
WERD *word_;
/** The length of the current word_. */
int word_length_;
/** The current blob index within the word. */
int blob_index_;
/**
* Iterator to the blobs within the word. If nullptr, then we are iterating
* OCR results in the box_word.
* Owned by this ResultIterator.
*/
C_BLOB_IT *cblob_it_;
/** Control over what to include in bounding boxes. */
bool include_upper_dots_;
bool include_lower_dots_;
/** Parameters saved from the Thresholder. Needed to rebuild coordinates.*/
int scale_;
int scaled_yres_;
int rect_left_;
int rect_top_;
int rect_width_;
int rect_height_;
};
} // namespace tesseract.
#endif // TESSERACT_CCMAIN_PAGEITERATOR_H_

View File

@ -1,281 +0,0 @@
// SPDX-License-Identifier: Apache-2.0
// File: publictypes.h
// Description: Types used in both the API and internally
// Author: Ray Smith
//
// (C) Copyright 2010, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef TESSERACT_CCSTRUCT_PUBLICTYPES_H_
#define TESSERACT_CCSTRUCT_PUBLICTYPES_H_
namespace tesseract {
// This file contains types that are used both by the API and internally
// to Tesseract. In order to decouple the API from Tesseract and prevent cyclic
// dependencies, THIS FILE SHOULD NOT DEPEND ON ANY OTHER PART OF TESSERACT.
// Restated: It is OK for low-level Tesseract files to include publictypes.h,
// but not for the low-level tesseract code to include top-level API code.
// This file should not use other Tesseract types, as that would drag
// their includes into the API-level.
/** Number of printers' points in an inch. The unit of the pointsize return. */
constexpr int kPointsPerInch = 72;
/**
* Minimum believable resolution. Used as a default if there is no other
* information, as it is safer to under-estimate than over-estimate.
*/
constexpr int kMinCredibleResolution = 70;
/** Maximum believable resolution. */
constexpr int kMaxCredibleResolution = 2400;
/**
* Ratio between median blob size and likely resolution. Used to estimate
* resolution when none is provided. This is basically 1/usual text size in
* inches. */
constexpr int kResolutionEstimationFactor = 10;
/**
* Possible types for a POLY_BLOCK or ColPartition.
* Must be kept in sync with kPBColors in polyblk.cpp and PTIs*Type functions
* below, as well as kPolyBlockNames in layout_test.cc.
* Used extensively by ColPartition, and POLY_BLOCK.
*/
enum PolyBlockType {
PT_UNKNOWN, // Type is not yet known. Keep as the first element.
PT_FLOWING_TEXT, // Text that lives inside a column.
PT_HEADING_TEXT, // Text that spans more than one column.
PT_PULLOUT_TEXT, // Text that is in a cross-column pull-out region.
PT_EQUATION, // Partition belonging to an equation region.
PT_INLINE_EQUATION, // Partition has inline equation.
PT_TABLE, // Partition belonging to a table region.
PT_VERTICAL_TEXT, // Text-line runs vertically.
PT_CAPTION_TEXT, // Text that belongs to an image.
PT_FLOWING_IMAGE, // Image that lives inside a column.
PT_HEADING_IMAGE, // Image that spans more than one column.
PT_PULLOUT_IMAGE, // Image that is in a cross-column pull-out region.
PT_HORZ_LINE, // Horizontal Line.
PT_VERT_LINE, // Vertical Line.
PT_NOISE, // Lies outside of any column.
PT_COUNT
};
/** Returns true if PolyBlockType is of horizontal line type */
inline bool PTIsLineType(PolyBlockType type) {
return type == PT_HORZ_LINE || type == PT_VERT_LINE;
}
/** Returns true if PolyBlockType is of image type */
inline bool PTIsImageType(PolyBlockType type) {
return type == PT_FLOWING_IMAGE || type == PT_HEADING_IMAGE ||
type == PT_PULLOUT_IMAGE;
}
/** Returns true if PolyBlockType is of text type */
inline bool PTIsTextType(PolyBlockType type) {
return type == PT_FLOWING_TEXT || type == PT_HEADING_TEXT ||
type == PT_PULLOUT_TEXT || type == PT_TABLE ||
type == PT_VERTICAL_TEXT || type == PT_CAPTION_TEXT ||
type == PT_INLINE_EQUATION;
}
// Returns true if PolyBlockType is of pullout(inter-column) type
inline bool PTIsPulloutType(PolyBlockType type) {
return type == PT_PULLOUT_IMAGE || type == PT_PULLOUT_TEXT;
}
/**
* +------------------+ Orientation Example:
* | 1 Aaaa Aaaa Aaaa | ====================
* | Aaa aa aaa aa | To left is a diagram of some (1) English and
* | aaaaaa A aa aaa. | (2) Chinese text and a (3) photo credit.
* | 2 |
* | ####### c c C | Upright Latin characters are represented as A and a.
* | ####### c c c | '<' represents a latin character rotated
* | < ####### c c c | anti-clockwise 90 degrees.
* | < ####### c c |
* | < ####### . c | Upright Chinese characters are represented C and c.
* | 3 ####### c |
* +------------------+ NOTA BENE: enum values here should match goodoc.proto
* If you orient your head so that "up" aligns with Orientation,
* then the characters will appear "right side up" and readable.
*
* In the example above, both the English and Chinese paragraphs are oriented
* so their "up" is the top of the page (page up). The photo credit is read
* with one's head turned leftward ("up" is to page left).
*
* The values of this enum match the convention of Tesseract's osdetect.h
*/
enum Orientation {
ORIENTATION_PAGE_UP = 0,
ORIENTATION_PAGE_RIGHT = 1,
ORIENTATION_PAGE_DOWN = 2,
ORIENTATION_PAGE_LEFT = 3,
};
/**
* The grapheme clusters within a line of text are laid out logically
* in this direction, judged when looking at the text line rotated so that
* its Orientation is "page up".
*
* For English text, the writing direction is left-to-right. For the
* Chinese text in the above example, the writing direction is top-to-bottom.
*/
enum WritingDirection {
WRITING_DIRECTION_LEFT_TO_RIGHT = 0,
WRITING_DIRECTION_RIGHT_TO_LEFT = 1,
WRITING_DIRECTION_TOP_TO_BOTTOM = 2,
};
/**
* The text lines are read in the given sequence.
*
* In English, the order is top-to-bottom.
* In Chinese, vertical text lines are read right-to-left. Mongolian is
* written in vertical columns top to bottom like Chinese, but the lines
* order left-to right.
*
* Note that only some combinations make sense. For example,
* WRITING_DIRECTION_LEFT_TO_RIGHT implies TEXTLINE_ORDER_TOP_TO_BOTTOM
*/
enum TextlineOrder {
TEXTLINE_ORDER_LEFT_TO_RIGHT = 0,
TEXTLINE_ORDER_RIGHT_TO_LEFT = 1,
TEXTLINE_ORDER_TOP_TO_BOTTOM = 2,
};
/**
* Possible modes for page layout analysis. These *must* be kept in order
* of decreasing amount of layout analysis to be done, except for OSD_ONLY,
* so that the inequality test macros below work.
*/
enum PageSegMode {
PSM_OSD_ONLY = 0, ///< Orientation and script detection only.
PSM_AUTO_OSD = 1, ///< Automatic page segmentation with orientation and
///< script detection. (OSD)
PSM_AUTO_ONLY = 2, ///< Automatic page segmentation, but no OSD, or OCR.
PSM_AUTO = 3, ///< Fully automatic page segmentation, but no OSD.
PSM_SINGLE_COLUMN = 4, ///< Assume a single column of text of variable sizes.
PSM_SINGLE_BLOCK_VERT_TEXT = 5, ///< Assume a single uniform block of
///< vertically aligned text.
PSM_SINGLE_BLOCK = 6, ///< Assume a single uniform block of text. (Default.)
PSM_SINGLE_LINE = 7, ///< Treat the image as a single text line.
PSM_SINGLE_WORD = 8, ///< Treat the image as a single word.
PSM_CIRCLE_WORD = 9, ///< Treat the image as a single word in a circle.
PSM_SINGLE_CHAR = 10, ///< Treat the image as a single character.
PSM_SPARSE_TEXT =
11, ///< Find as much text as possible in no particular order.
PSM_SPARSE_TEXT_OSD = 12, ///< Sparse text with orientation and script det.
PSM_RAW_LINE = 13, ///< Treat the image as a single text line, bypassing
///< hacks that are Tesseract-specific.
PSM_COUNT ///< Number of enum entries.
};
/**
* Inline functions that act on a PageSegMode to determine whether components of
* layout analysis are enabled.
* *Depend critically on the order of elements of PageSegMode.*
* NOTE that arg is an int for compatibility with INT_PARAM.
*/
inline bool PSM_OSD_ENABLED(int pageseg_mode) {
return pageseg_mode <= PSM_AUTO_OSD || pageseg_mode == PSM_SPARSE_TEXT_OSD;
}
inline bool PSM_ORIENTATION_ENABLED(int pageseg_mode) {
return pageseg_mode <= PSM_AUTO || pageseg_mode == PSM_SPARSE_TEXT_OSD;
}
inline bool PSM_COL_FIND_ENABLED(int pageseg_mode) {
return pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_AUTO;
}
inline bool PSM_SPARSE(int pageseg_mode) {
return pageseg_mode == PSM_SPARSE_TEXT || pageseg_mode == PSM_SPARSE_TEXT_OSD;
}
inline bool PSM_BLOCK_FIND_ENABLED(int pageseg_mode) {
return pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_SINGLE_COLUMN;
}
inline bool PSM_LINE_FIND_ENABLED(int pageseg_mode) {
return pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_SINGLE_BLOCK;
}
inline bool PSM_WORD_FIND_ENABLED(int pageseg_mode) {
return (pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_SINGLE_LINE) ||
pageseg_mode == PSM_SPARSE_TEXT || pageseg_mode == PSM_SPARSE_TEXT_OSD;
}
/**
* enum of the elements of the page hierarchy, used in ResultIterator
* to provide functions that operate on each level without having to
* have 5x as many functions.
*/
enum PageIteratorLevel {
RIL_BLOCK, // Block of text/image/separator line.
RIL_PARA, // Paragraph within a block.
RIL_TEXTLINE, // Line within a paragraph.
RIL_WORD, // Word within a textline.
RIL_SYMBOL // Symbol/character within a word.
};
/**
* JUSTIFICATION_UNKNOWN
* The alignment is not clearly one of the other options. This could happen
* for example if there are only one or two lines of text or the text looks
* like source code or poetry.
*
* NOTA BENE: Fully justified paragraphs (text aligned to both left and right
* margins) are marked by Tesseract with JUSTIFICATION_LEFT if their text
* is written with a left-to-right script and with JUSTIFICATION_RIGHT if
* their text is written in a right-to-left script.
*
* Interpretation for text read in vertical lines:
* "Left" is wherever the starting reading position is.
*
* JUSTIFICATION_LEFT
* Each line, except possibly the first, is flush to the same left tab stop.
*
* JUSTIFICATION_CENTER
* The text lines of the paragraph are centered about a line going
* down through their middle of the text lines.
*
* JUSTIFICATION_RIGHT
* Each line, except possibly the first, is flush to the same right tab stop.
*/
enum ParagraphJustification {
JUSTIFICATION_UNKNOWN,
JUSTIFICATION_LEFT,
JUSTIFICATION_CENTER,
JUSTIFICATION_RIGHT,
};
/**
* When Tesseract/Cube is initialized we can choose to instantiate/load/run
* only the Tesseract part, only the Cube part or both along with the combiner.
* The preference of which engine to use is stored in tessedit_ocr_engine_mode.
*
* ATTENTION: When modifying this enum, please make sure to make the
* appropriate changes to all the enums mirroring it (e.g. OCREngine in
* cityblock/workflow/detection/detection_storage.proto). Such enums will
* mention the connection to OcrEngineMode in the comments.
*/
enum OcrEngineMode {
OEM_TESSERACT_ONLY, // Run Tesseract only - fastest; deprecated
OEM_LSTM_ONLY, // Run just the LSTM line recognizer.
OEM_TESSERACT_LSTM_COMBINED, // Run the LSTM recognizer, but allow fallback
// to Tesseract when things get difficult.
// deprecated
OEM_DEFAULT, // Specify this mode when calling init_*(),
// to indicate that any of the above modes
// should be automatically inferred from the
// variables in the language-specific config,
// command-line configs, or if not specified
// in any of the above should be set to the
// default OEM_TESSERACT_ONLY.
OEM_COUNT // Number of OEMs
};
} // namespace tesseract.
#endif // TESSERACT_CCSTRUCT_PUBLICTYPES_H_

View File

@ -1,311 +0,0 @@
// SPDX-License-Identifier: Apache-2.0
// File: renderer.h
// Description: Rendering interface to inject into TessBaseAPI
//
// (C) Copyright 2011, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef TESSERACT_API_RENDERER_H_
#define TESSERACT_API_RENDERER_H_
#include "export.h"
// To avoid collision with other typenames include the ABSOLUTE MINIMUM
// complexity of includes here. Use forward declarations wherever possible
// and hide includes of complex types in baseapi.cpp.
#include <cstdint>
#include <string> // for std::string
#include <vector> // for std::vector
struct Pix;
namespace tesseract {
class TessBaseAPI;
/**
* Interface for rendering tesseract results into a document, such as text,
* HOCR or pdf. This class is abstract. Specific classes handle individual
* formats. This interface is then used to inject the renderer class into
* tesseract when processing images.
*
* For simplicity implementing this with tesseract version 3.01,
* the renderer contains document state that is cleared from document
* to document just as the TessBaseAPI is. This way the base API can just
* delegate its rendering functionality to injected renderers, and the
* renderers can manage the associated state needed for the specific formats
* in addition to the heuristics for producing it.
*/
class TESS_API TessResultRenderer {
public:
virtual ~TessResultRenderer();
// Takes ownership of pointer so must be new'd instance.
// Renderers aren't ordered, but appends the sequences of next parameter
// and existing next(). The renderers should be unique across both lists.
void insert(TessResultRenderer *next);
// Returns the next renderer or nullptr.
TessResultRenderer *next() {
return next_;
}
/**
* Starts a new document with the given title.
* This clears the contents of the output data.
* Title should use UTF-8 encoding.
*/
bool BeginDocument(const char *title);
/**
* Adds the recognized text from the source image to the current document.
* Invalid if BeginDocument not yet called.
*
* Note that this API is a bit weird but is designed to fit into the
* current TessBaseAPI implementation where the api has lots of state
* information that we might want to add in.
*/
bool AddImage(TessBaseAPI *api);
/**
* Finishes the document and finalizes the output data
* Invalid if BeginDocument not yet called.
*/
bool EndDocument();
const char *file_extension() const {
return file_extension_;
}
const char *title() const {
return title_.c_str();
}
// Is everything fine? Otherwise something went wrong.
bool happy() const {
return happy_;
}
/**
* Returns the index of the last image given to AddImage
* (i.e. images are incremented whether the image succeeded or not)
*
* This is always defined. It means either the number of the
* current image, the last image ended, or in the completed document
* depending on when in the document lifecycle you are looking at it.
* Will return -1 if a document was never started.
*/
int imagenum() const {
return imagenum_;
}
protected:
/**
* Called by concrete classes.
*
* outputbase is the name of the output file excluding
* extension. For example, "/path/to/chocolate-chip-cookie-recipe"
*
* extension indicates the file extension to be used for output
* files. For example "pdf" will produce a .pdf file, and "hocr"
* will produce .hocr files.
*/
TessResultRenderer(const char *outputbase, const char *extension);
// Hook for specialized handling in BeginDocument()
virtual bool BeginDocumentHandler();
// This must be overridden to render the OCR'd results
virtual bool AddImageHandler(TessBaseAPI *api) = 0;
// Hook for specialized handling in EndDocument()
virtual bool EndDocumentHandler();
// Renderers can call this to append '\0' terminated strings into
// the output string returned by GetOutput.
// This method will grow the output buffer if needed.
void AppendString(const char *s);
// Renderers can call this to append binary byte sequences into
// the output string returned by GetOutput. Note that s is not necessarily
// '\0' terminated (and can contain '\0' within it).
// This method will grow the output buffer if needed.
void AppendData(const char *s, int len);
private:
TessResultRenderer *next_; // Can link multiple renderers together
FILE *fout_; // output file pointer
const char *file_extension_; // standard extension for generated output
std::string title_; // title of document being rendered
int imagenum_; // index of last image added
bool happy_; // I get grumpy when the disk fills up, etc.
};
/**
* Renders tesseract output into a plain UTF-8 text string
*/
class TESS_API TessTextRenderer : public TessResultRenderer {
public:
explicit TessTextRenderer(const char *outputbase);
protected:
bool AddImageHandler(TessBaseAPI *api) override;
};
/**
* Renders tesseract output into an hocr text string
*/
class TESS_API TessHOcrRenderer : public TessResultRenderer {
public:
explicit TessHOcrRenderer(const char *outputbase, bool font_info);
explicit TessHOcrRenderer(const char *outputbase);
protected:
bool BeginDocumentHandler() override;
bool AddImageHandler(TessBaseAPI *api) override;
bool EndDocumentHandler() override;
private:
bool font_info_; // whether to print font information
};
/**
* Renders tesseract output into an alto text string
*/
class TESS_API TessAltoRenderer : public TessResultRenderer {
public:
explicit TessAltoRenderer(const char *outputbase);
protected:
bool BeginDocumentHandler() override;
bool AddImageHandler(TessBaseAPI *api) override;
bool EndDocumentHandler() override;
private:
bool begin_document;
};
/**
* Renders Tesseract output into a TSV string
*/
class TESS_API TessTsvRenderer : public TessResultRenderer {
public:
explicit TessTsvRenderer(const char *outputbase, bool font_info);
explicit TessTsvRenderer(const char *outputbase);
protected:
bool BeginDocumentHandler() override;
bool AddImageHandler(TessBaseAPI *api) override;
bool EndDocumentHandler() override;
private:
bool font_info_; // whether to print font information
};
/**
* Renders tesseract output into searchable PDF
*/
class TESS_API TessPDFRenderer : public TessResultRenderer {
public:
// datadir is the location of the TESSDATA. We need it because
// we load a custom PDF font from this location.
TessPDFRenderer(const char *outputbase, const char *datadir,
bool textonly = false);
protected:
bool BeginDocumentHandler() override;
bool AddImageHandler(TessBaseAPI *api) override;
bool EndDocumentHandler() override;
private:
// We don't want to have every image in memory at once,
// so we store some metadata as we go along producing
// PDFs one page at a time. At the end, that metadata is
// used to make everything that isn't easily handled in a
// streaming fashion.
long int obj_; // counter for PDF objects
std::vector<uint64_t> offsets_; // offset of every PDF object in bytes
std::vector<long int> pages_; // object number for every /Page object
std::string datadir_; // where to find the custom font
bool textonly_; // skip images if set
// Bookkeeping only. DIY = Do It Yourself.
void AppendPDFObjectDIY(size_t objectsize);
// Bookkeeping + emit data.
void AppendPDFObject(const char *data);
// Create the /Contents object for an entire page.
char *GetPDFTextObjects(TessBaseAPI *api, double width, double height);
// Turn an image into a PDF object. Only transcode if we have to.
static bool imageToPDFObj(Pix *pix, const char *filename, long int objnum,
char **pdf_object, long int *pdf_object_size,
int jpg_quality);
};
/**
* Renders tesseract output into a plain UTF-8 text string
*/
class TESS_API TessUnlvRenderer : public TessResultRenderer {
public:
explicit TessUnlvRenderer(const char *outputbase);
protected:
bool AddImageHandler(TessBaseAPI *api) override;
};
/**
* Renders tesseract output into a plain UTF-8 text string for LSTMBox
*/
class TESS_API TessLSTMBoxRenderer : public TessResultRenderer {
public:
explicit TessLSTMBoxRenderer(const char *outputbase);
protected:
bool AddImageHandler(TessBaseAPI *api) override;
};
/**
* Renders tesseract output into a plain UTF-8 text string
*/
class TESS_API TessBoxTextRenderer : public TessResultRenderer {
public:
explicit TessBoxTextRenderer(const char *outputbase);
protected:
bool AddImageHandler(TessBaseAPI *api) override;
};
/**
* Renders tesseract output into a plain UTF-8 text string in WordStr format
*/
class TESS_API TessWordStrBoxRenderer : public TessResultRenderer {
public:
explicit TessWordStrBoxRenderer(const char *outputbase);
protected:
bool AddImageHandler(TessBaseAPI *api) override;
};
#ifndef DISABLED_LEGACY_ENGINE
/**
* Renders tesseract output into an osd text string
*/
class TESS_API TessOsdRenderer : public TessResultRenderer {
public:
explicit TessOsdRenderer(const char *outputbase);
protected:
bool AddImageHandler(TessBaseAPI *api) override;
};
#endif // ndef DISABLED_LEGACY_ENGINE
} // namespace tesseract.
#endif // TESSERACT_API_RENDERER_H_

View File

@ -1,250 +0,0 @@
// SPDX-License-Identifier: Apache-2.0
// File: resultiterator.h
// Description: Iterator for tesseract results that is capable of
// iterating in proper reading order over Bi Directional
// (e.g. mixed Hebrew and English) text.
// Author: David Eger
//
// (C) Copyright 2011, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef TESSERACT_CCMAIN_RESULT_ITERATOR_H_
#define TESSERACT_CCMAIN_RESULT_ITERATOR_H_
#include "export.h" // for TESS_API, TESS_LOCAL
#include "ltrresultiterator.h" // for LTRResultIterator
#include "publictypes.h" // for PageIteratorLevel
#include "unichar.h" // for StrongScriptDirection
#include <set> // for std::pair
#include <vector> // for std::vector
namespace tesseract {
class TESS_API ResultIterator : public LTRResultIterator {
public:
static ResultIterator *StartOfParagraph(const LTRResultIterator &resit);
/**
* ResultIterator is copy constructible!
* The default copy constructor works just fine for us.
*/
~ResultIterator() override = default;
// ============= Moving around within the page ============.
/**
* Moves the iterator to point to the start of the page to begin
* an iteration.
*/
void Begin() override;
/**
* Moves to the start of the next object at the given level in the
* page hierarchy in the appropriate reading order and returns false if
* the end of the page was reached.
* NOTE that RIL_SYMBOL will skip non-text blocks, but all other
* PageIteratorLevel level values will visit each non-text block once.
* Think of non text blocks as containing a single para, with a single line,
* with a single imaginary word.
* Calls to Next with different levels may be freely intermixed.
* This function iterates words in right-to-left scripts correctly, if
* the appropriate language has been loaded into Tesseract.
*/
bool Next(PageIteratorLevel level) override;
/**
* IsAtBeginningOf() returns whether we're at the logical beginning of the
* given level. (as opposed to ResultIterator's left-to-right top-to-bottom
* order). Otherwise, this acts the same as PageIterator::IsAtBeginningOf().
* For a full description, see pageiterator.h
*/
bool IsAtBeginningOf(PageIteratorLevel level) const override;
/**
* Implement PageIterator's IsAtFinalElement correctly in a BiDi context.
* For instance, IsAtFinalElement(RIL_PARA, RIL_WORD) returns whether we
* point at the last word in a paragraph. See PageIterator for full comment.
*/
bool IsAtFinalElement(PageIteratorLevel level,
PageIteratorLevel element) const override;
// ============= Functions that refer to words only ============.
// Returns the number of blanks before the current word.
int BlanksBeforeWord() const;
// ============= Accessing data ==============.
/**
* Returns the null terminated UTF-8 encoded text string for the current
* object at the given level. Use delete [] to free after use.
*/
virtual char *GetUTF8Text(PageIteratorLevel level) const;
/**
* Returns the LSTM choices for every LSTM timestep for the current word.
*/
virtual std::vector<std::vector<std::vector<std::pair<const char *, float>>>>
*GetRawLSTMTimesteps() const;
virtual std::vector<std::vector<std::pair<const char *, float>>>
*GetBestLSTMSymbolChoices() const;
/**
* Return whether the current paragraph's dominant reading direction
* is left-to-right (as opposed to right-to-left).
*/
bool ParagraphIsLtr() const;
// ============= Exposed only for testing =============.
/**
* Yields the reading order as a sequence of indices and (optional)
* meta-marks for a set of words (given left-to-right).
* The meta marks are passed as negative values:
* kMinorRunStart Start of minor direction text.
* kMinorRunEnd End of minor direction text.
* kComplexWord The next indexed word contains both left-to-right and
* right-to-left characters and was treated as neutral.
*
* For example, suppose we have five words in a text line,
* indexed [0,1,2,3,4] from the leftmost side of the text line.
* The following are all believable reading_orders:
*
* Left-to-Right (in ltr paragraph):
* { 0, 1, 2, 3, 4 }
* Left-to-Right (in rtl paragraph):
* { kMinorRunStart, 0, 1, 2, 3, 4, kMinorRunEnd }
* Right-to-Left (in rtl paragraph):
* { 4, 3, 2, 1, 0 }
* Left-to-Right except for an RTL phrase in words 2, 3 in an ltr paragraph:
* { 0, 1, kMinorRunStart, 3, 2, kMinorRunEnd, 4 }
*/
static void CalculateTextlineOrder(
bool paragraph_is_ltr,
const std::vector<StrongScriptDirection> &word_dirs,
std::vector<int> *reading_order);
static const int kMinorRunStart;
static const int kMinorRunEnd;
static const int kComplexWord;
protected:
/**
* We presume the data associated with the given iterator will outlive us.
* NB: This is private because it does something that is non-obvious:
* it resets to the beginning of the paragraph instead of staying wherever
* resit might have pointed.
*/
explicit ResultIterator(const LTRResultIterator &resit);
private:
/**
* Calculates the current paragraph's dominant writing direction.
* Typically, members should use current_paragraph_ltr_ instead.
*/
bool CurrentParagraphIsLtr() const;
/**
* Returns word indices as measured from resit->RestartRow() = index 0
* for the reading order of words within a textline given an iterator
* into the middle of the text line.
* In addition to non-negative word indices, the following negative values
* may be inserted:
* kMinorRunStart Start of minor direction text.
* kMinorRunEnd End of minor direction text.
* kComplexWord The previous word contains both left-to-right and
* right-to-left characters and was treated as neutral.
*/
void CalculateTextlineOrder(bool paragraph_is_ltr,
const LTRResultIterator &resit,
std::vector<int> *indices) const;
/** Same as above, but the caller's ssd gets filled in if ssd != nullptr. */
void CalculateTextlineOrder(bool paragraph_is_ltr,
const LTRResultIterator &resit,
std::vector<StrongScriptDirection> *ssd,
std::vector<int> *indices) const;
/**
* What is the index of the current word in a strict left-to-right reading
* of the row?
*/
int LTRWordIndex() const;
/**
* Given an iterator pointing at a word, returns the logical reading order
* of blob indices for the word.
*/
void CalculateBlobOrder(std::vector<int> *blob_indices) const;
/** Precondition: current_paragraph_is_ltr_ is set. */
void MoveToLogicalStartOfTextline();
/**
* Precondition: current_paragraph_is_ltr_ and in_minor_direction_
* are set.
*/
void MoveToLogicalStartOfWord();
/** Are we pointing at the final (reading order) symbol of the word? */
bool IsAtFinalSymbolOfWord() const;
/** Are we pointing at the first (reading order) symbol of the word? */
bool IsAtFirstSymbolOfWord() const;
/**
* Append any extra marks that should be appended to this word when printed.
* Mostly, these are Unicode BiDi control characters.
*/
void AppendSuffixMarks(std::string *text) const;
/** Appends the current word in reading order to the given buffer.*/
void AppendUTF8WordText(std::string *text) const;
/**
* Appends the text of the current text line, *assuming this iterator is
* positioned at the beginning of the text line* This function
* updates the iterator to point to the first position past the text line.
* Each textline is terminated in a single newline character.
* If the textline ends a paragraph, it gets a second terminal newline.
*/
void IterateAndAppendUTF8TextlineText(std::string *text);
/**
* Appends the text of the current paragraph in reading order
* to the given buffer.
* Each textline is terminated in a single newline character, and the
* paragraph gets an extra newline at the end.
*/
void AppendUTF8ParagraphText(std::string *text) const;
/** Returns whether the bidi_debug flag is set to at least min_level. */
bool BidiDebug(int min_level) const;
bool current_paragraph_is_ltr_;
/**
* Is the currently pointed-at character at the beginning of
* a minor-direction run?
*/
bool at_beginning_of_minor_run_;
/** Is the currently pointed-at character in a minor-direction sequence? */
bool in_minor_direction_;
/**
* Should detected inter-word spaces be preserved, or "compressed" to a single
* space character (default behavior).
*/
bool preserve_interword_spaces_;
};
} // namespace tesseract.
#endif // TESSERACT_CCMAIN_RESULT_ITERATOR_H_

View File

@ -1,174 +0,0 @@
// SPDX-License-Identifier: Apache-2.0
// File: unichar.h
// Description: Unicode character/ligature class.
// Author: Ray Smith
//
// (C) Copyright 2006, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef TESSERACT_CCUTIL_UNICHAR_H_
#define TESSERACT_CCUTIL_UNICHAR_H_
#include "export.h"
#include <memory.h>
#include <cstring>
#include <string>
#include <vector>
namespace tesseract {
// Maximum number of characters that can be stored in a UNICHAR. Must be
// at least 4. Must not exceed 31 without changing the coding of length.
#define UNICHAR_LEN 30
// A UNICHAR_ID is the unique id of a unichar.
using UNICHAR_ID = int;
// A variable to indicate an invalid or uninitialized unichar id.
static const int INVALID_UNICHAR_ID = -1;
// A special unichar that corresponds to INVALID_UNICHAR_ID.
static const char INVALID_UNICHAR[] = "__INVALID_UNICHAR__";
enum StrongScriptDirection {
DIR_NEUTRAL = 0, // Text contains only neutral characters.
DIR_LEFT_TO_RIGHT = 1, // Text contains no Right-to-Left characters.
DIR_RIGHT_TO_LEFT = 2, // Text contains no Left-to-Right characters.
DIR_MIX = 3, // Text contains a mixture of left-to-right
// and right-to-left characters.
};
using char32 = signed int;
// The UNICHAR class holds a single classification result. This may be
// a single Unicode character (stored as between 1 and 4 utf8 bytes) or
// multiple Unicode characters representing the NFKC expansion of a ligature
// such as fi, ffl etc. These are also stored as utf8.
class TESS_API UNICHAR {
public:
UNICHAR() {
memset(chars, 0, UNICHAR_LEN);
}
// Construct from a utf8 string. If len<0 then the string is null terminated.
// If the string is too long to fit in the UNICHAR then it takes only what
// will fit.
UNICHAR(const char *utf8_str, int len);
// Construct from a single UCS4 character.
explicit UNICHAR(int unicode);
// Default copy constructor and operator= are OK.
// Get the first character as UCS-4.
int first_uni() const;
// Get the length of the UTF8 string.
int utf8_len() const {
int len = chars[UNICHAR_LEN - 1];
return len >= 0 && len < UNICHAR_LEN ? len : UNICHAR_LEN;
}
// Get a UTF8 string, but NOT nullptr terminated.
const char *utf8() const {
return chars;
}
// Get a terminated UTF8 string: Must delete[] it after use.
char *utf8_str() const;
// Get the number of bytes in the first character of the given utf8 string.
static int utf8_step(const char *utf8_str);
// A class to simplify iterating over and accessing elements of a UTF8
// string. Note that unlike the UNICHAR class, const_iterator does NOT COPY or
// take ownership of the underlying byte array. It also does not permit
// modification of the array (as the name suggests).
//
// Example:
// for (UNICHAR::const_iterator it = UNICHAR::begin(str, str_len);
// it != UNICHAR::end(str, len);
// ++it) {
// printf("UCS-4 symbol code = %d\n", *it);
// char buf[5];
// int char_len = it.get_utf8(buf); buf[char_len] = '\0';
// printf("Char = %s\n", buf);
// }
class TESS_API const_iterator {
using CI = const_iterator;
public:
// Step to the next UTF8 character.
// If the current position is at an illegal UTF8 character, then print an
// error message and step by one byte. If the current position is at a
// nullptr value, don't step past it.
const_iterator &operator++();
// Return the UCS-4 value at the current position.
// If the current position is at an illegal UTF8 value, return a single
// space character.
int operator*() const;
// Store the UTF-8 encoding of the current codepoint into buf, which must be
// at least 4 bytes long. Return the number of bytes written.
// If the current position is at an illegal UTF8 value, writes a single
// space character and returns 1.
// Note that this method does not null-terminate the buffer.
int get_utf8(char *buf) const;
// Returns the number of bytes of the current codepoint. Returns 1 if the
// current position is at an illegal UTF8 value.
int utf8_len() const;
// Returns true if the UTF-8 encoding at the current position is legal.
bool is_legal() const;
// Return the pointer into the string at the current position.
const char *utf8_data() const {
return it_;
}
// Iterator equality operators.
friend bool operator==(const CI &lhs, const CI &rhs) {
return lhs.it_ == rhs.it_;
}
friend bool operator!=(const CI &lhs, const CI &rhs) {
return !(lhs == rhs);
}
private:
friend class UNICHAR;
explicit const_iterator(const char *it) : it_(it) {}
const char *it_; // Pointer into the string.
};
// Create a start/end iterator pointing to a string. Note that these methods
// are static and do NOT create a copy or take ownership of the underlying
// array.
static const_iterator begin(const char *utf8_str, int byte_length);
static const_iterator end(const char *utf8_str, int byte_length);
// Converts a utf-8 string to a vector of unicodes.
// Returns an empty vector if the input contains invalid UTF-8.
static std::vector<char32> UTF8ToUTF32(const char *utf8_str);
// Converts a vector of unicodes to a utf8 string.
// Returns an empty string if the input contains an invalid unicode.
static std::string UTF32ToUTF8(const std::vector<char32> &str32);
private:
// A UTF-8 representation of 1 or more Unicode characters.
// The last element (chars[UNICHAR_LEN - 1]) is a length if
// its value < UNICHAR_LEN, otherwise it is a genuine character.
char chars[UNICHAR_LEN]{};
};
} // namespace tesseract
#endif // TESSERACT_CCUTIL_UNICHAR_H_

View File

@ -1,34 +0,0 @@
// SPDX-License-Identifier: Apache-2.0
// File: version.h
// Description: Version information
//
// (C) Copyright 2018, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef TESSERACT_API_VERSION_H_
#define TESSERACT_API_VERSION_H_
// clang-format off
#define TESSERACT_MAJOR_VERSION @GENERIC_MAJOR_VERSION@
#define TESSERACT_MINOR_VERSION @GENERIC_MINOR_VERSION@
#define TESSERACT_MICRO_VERSION @GENERIC_MICRO_VERSION@
#define TESSERACT_VERSION \
(TESSERACT_MAJOR_VERSION << 16 | \
TESSERACT_MINOR_VERSION << 8 | \
TESSERACT_MICRO_VERSION)
#define TESSERACT_VERSION_STR "@PACKAGE_VERSION@"
// clang-format on
#endif // TESSERACT_API_VERSION_H_

View File

@ -1,812 +0,0 @@
// SPDX-License-Identifier: Apache-2.0
// File: baseapi.h
// Description: Simple API for calling tesseract.
// Author: Ray Smith
//
// (C) Copyright 2006, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef TESSERACT_API_BASEAPI_H_
#define TESSERACT_API_BASEAPI_H_
#ifdef HAVE_CONFIG_H
# include "config_auto.h" // DISABLED_LEGACY_ENGINE
#endif
#include "export.h"
#include "pageiterator.h"
#include "publictypes.h"
#include "resultiterator.h"
#include "unichar.h"
#include "version.h"
#include <cstdio>
#include <vector> // for std::vector
struct Pix;
struct Pixa;
struct Boxa;
namespace tesseract {
class PAGE_RES;
class ParagraphModel;
class BLOCK_LIST;
class ETEXT_DESC;
struct OSResults;
class UNICHARSET;
class Dawg;
class Dict;
class EquationDetect;
class PageIterator;
class ImageThresholder;
class LTRResultIterator;
class ResultIterator;
class MutableIterator;
class TessResultRenderer;
class Tesseract;
// Function to read a std::vector<char> from a whole file.
// Returns false on failure.
using FileReader = bool (*)(const char *filename, std::vector<char> *data);
using DictFunc = int (Dict::*)(void *, const UNICHARSET &, UNICHAR_ID,
bool) const;
using ProbabilityInContextFunc = double (Dict::*)(const char *, const char *,
int, const char *, int);
/**
* Base class for all tesseract APIs.
* Specific classes can add ability to work on different inputs or produce
* different outputs.
* This class is mostly an interface layer on top of the Tesseract instance
* class to hide the data types so that users of this class don't have to
* include any other Tesseract headers.
*/
class TESS_API TessBaseAPI {
public:
TessBaseAPI();
virtual ~TessBaseAPI();
// Copy constructor and assignment operator are currently unsupported.
TessBaseAPI(TessBaseAPI const &) = delete;
TessBaseAPI &operator=(TessBaseAPI const &) = delete;
/**
* Returns the version identifier as a static string. Do not delete.
*/
static const char *Version();
/**
* If compiled with OpenCL AND an available OpenCL
* device is deemed faster than serial code, then
* "device" is populated with the cl_device_id
* and returns sizeof(cl_device_id)
* otherwise *device=nullptr and returns 0.
*/
static size_t getOpenCLDevice(void **device);
/**
* Set the name of the input file. Needed for training and
* reading a UNLV zone file, and for searchable PDF output.
*/
void SetInputName(const char *name);
/**
* These functions are required for searchable PDF output.
* We need our hands on the input file so that we can include
* it in the PDF without transcoding. If that is not possible,
* we need the original image. Finally, resolution metadata
* is stored in the PDF so we need that as well.
*/
const char *GetInputName();
// Takes ownership of the input pix.
void SetInputImage(Pix *pix);
Pix *GetInputImage();
int GetSourceYResolution();
const char *GetDatapath();
/** Set the name of the bonus output files. Needed only for debugging. */
void SetOutputName(const char *name);
/**
* Set the value of an internal "parameter."
* Supply the name of the parameter and the value as a string, just as
* you would in a config file.
* Returns false if the name lookup failed.
* Eg SetVariable("tessedit_char_blacklist", "xyz"); to ignore x, y and z.
* Or SetVariable("classify_bln_numeric_mode", "1"); to set numeric-only mode.
* SetVariable may be used before Init, but settings will revert to
* defaults on End().
*
* Note: Must be called after Init(). Only works for non-init variables
* (init variables should be passed to Init()).
*/
bool SetVariable(const char *name, const char *value);
bool SetDebugVariable(const char *name, const char *value);
/**
* Returns true if the parameter was found among Tesseract parameters.
* Fills in value with the value of the parameter.
*/
bool GetIntVariable(const char *name, int *value) const;
bool GetBoolVariable(const char *name, bool *value) const;
bool GetDoubleVariable(const char *name, double *value) const;
/**
* Returns the pointer to the string that represents the value of the
* parameter if it was found among Tesseract parameters.
*/
const char *GetStringVariable(const char *name) const;
#ifndef DISABLED_LEGACY_ENGINE
/**
* Print Tesseract fonts table to the given file.
*/
void PrintFontsTable(FILE *fp) const;
#endif
/**
* Print Tesseract parameters to the given file.
*/
void PrintVariables(FILE *fp) const;
/**
* Get value of named variable as a string, if it exists.
*/
bool GetVariableAsString(const char *name, std::string *val) const;
/**
* Instances are now mostly thread-safe and totally independent,
* but some global parameters remain. Basically it is safe to use multiple
* TessBaseAPIs in different threads in parallel, UNLESS:
* you use SetVariable on some of the Params in classify and textord.
* If you do, then the effect will be to change it for all your instances.
*
* Start tesseract. Returns zero on success and -1 on failure.
* NOTE that the only members that may be called before Init are those
* listed above here in the class definition.
*
* The datapath must be the name of the tessdata directory.
* The language is (usually) an ISO 639-3 string or nullptr will default to
* eng. It is entirely safe (and eventually will be efficient too) to call
* Init multiple times on the same instance to change language, or just
* to reset the classifier.
* The language may be a string of the form [~]<lang>[+[~]<lang>]* indicating
* that multiple languages are to be loaded. Eg hin+eng will load Hindi and
* English. Languages may specify internally that they want to be loaded
* with one or more other languages, so the ~ sign is available to override
* that. Eg if hin were set to load eng by default, then hin+~eng would force
* loading only hin. The number of loaded languages is limited only by
* memory, with the caveat that loading additional languages will impact
* both speed and accuracy, as there is more work to do to decide on the
* applicable language, and there is more chance of hallucinating incorrect
* words.
* WARNING: On changing languages, all Tesseract parameters are reset
* back to their default values. (Which may vary between languages.)
* If you have a rare need to set a Variable that controls
* initialization for a second call to Init you should explicitly
* call End() and then use SetVariable before Init. This is only a very
* rare use case, since there are very few uses that require any parameters
* to be set before Init.
*
* If set_only_non_debug_params is true, only params that do not contain
* "debug" in the name will be set.
*/
int Init(const char *datapath, const char *language, OcrEngineMode mode,
char **configs, int configs_size,
const std::vector<std::string> *vars_vec,
const std::vector<std::string> *vars_values,
bool set_only_non_debug_params);
int Init(const char *datapath, const char *language, OcrEngineMode oem) {
return Init(datapath, language, oem, nullptr, 0, nullptr, nullptr, false);
}
int Init(const char *datapath, const char *language) {
return Init(datapath, language, OEM_DEFAULT, nullptr, 0, nullptr, nullptr,
false);
}
// In-memory version reads the traineddata file directly from the given
// data[data_size] array, and/or reads data via a FileReader.
int Init(const char *data, int data_size, const char *language,
OcrEngineMode mode, char **configs, int configs_size,
const std::vector<std::string> *vars_vec,
const std::vector<std::string> *vars_values,
bool set_only_non_debug_params, FileReader reader);
/**
* Returns the languages string used in the last valid initialization.
* If the last initialization specified "deu+hin" then that will be
* returned. If hin loaded eng automatically as well, then that will
* not be included in this list. To find the languages actually
* loaded use GetLoadedLanguagesAsVector.
* The returned string should NOT be deleted.
*/
const char *GetInitLanguagesAsString() const;
/**
* Returns the loaded languages in the vector of std::string.
* Includes all languages loaded by the last Init, including those loaded
* as dependencies of other loaded languages.
*/
void GetLoadedLanguagesAsVector(std::vector<std::string> *langs) const;
/**
* Returns the available languages in the sorted vector of std::string.
*/
void GetAvailableLanguagesAsVector(std::vector<std::string> *langs) const;
/**
* Init only for page layout analysis. Use only for calls to SetImage and
* AnalysePage. Calls that attempt recognition will generate an error.
*/
void InitForAnalysePage();
/**
* Read a "config" file containing a set of param, value pairs.
* Searches the standard places: tessdata/configs, tessdata/tessconfigs
* and also accepts a relative or absolute path name.
* Note: only non-init params will be set (init params are set by Init()).
*/
void ReadConfigFile(const char *filename);
/** Same as above, but only set debug params from the given config file. */
void ReadDebugConfigFile(const char *filename);
/**
* Set the current page segmentation mode. Defaults to PSM_SINGLE_BLOCK.
* The mode is stored as an IntParam so it can also be modified by
* ReadConfigFile or SetVariable("tessedit_pageseg_mode", mode as string).
*/
void SetPageSegMode(PageSegMode mode);
/** Return the current page segmentation mode. */
PageSegMode GetPageSegMode() const;
/**
* Recognize a rectangle from an image and return the result as a string.
* May be called many times for a single Init.
* Currently has no error checking.
* Greyscale of 8 and color of 24 or 32 bits per pixel may be given.
* Palette color images will not work properly and must be converted to
* 24 bit.
* Binary images of 1 bit per pixel may also be given but they must be
* byte packed with the MSB of the first byte being the first pixel, and a
* 1 represents WHITE. For binary images set bytes_per_pixel=0.
* The recognized text is returned as a char* which is coded
* as UTF8 and must be freed with the delete [] operator.
*
* Note that TesseractRect is the simplified convenience interface.
* For advanced uses, use SetImage, (optionally) SetRectangle, Recognize,
* and one or more of the Get*Text functions below.
*/
char *TesseractRect(const unsigned char *imagedata, int bytes_per_pixel,
int bytes_per_line, int left, int top, int width,
int height);
/**
* Call between pages or documents etc to free up memory and forget
* adaptive data.
*/
void ClearAdaptiveClassifier();
/**
* @defgroup AdvancedAPI Advanced API
* The following methods break TesseractRect into pieces, so you can
* get hold of the thresholded image, get the text in different formats,
* get bounding boxes, confidences etc.
*/
/* @{ */
/**
* Provide an image for Tesseract to recognize. Format is as
* TesseractRect above. Copies the image buffer and converts to Pix.
* SetImage clears all recognition results, and sets the rectangle to the
* full image, so it may be followed immediately by a GetUTF8Text, and it
* will automatically perform recognition.
*/
void SetImage(const unsigned char *imagedata, int width, int height,
int bytes_per_pixel, int bytes_per_line);
/**
* Provide an image for Tesseract to recognize. As with SetImage above,
* Tesseract takes its own copy of the image, so it need not persist until
* after Recognize.
* Pix vs raw, which to use?
* Use Pix where possible. Tesseract uses Pix as its internal representation
* and it is therefore more efficient to provide a Pix directly.
*/
void SetImage(Pix *pix);
/**
* Set the resolution of the source image in pixels per inch so font size
* information can be calculated in results. Call this after SetImage().
*/
void SetSourceResolution(int ppi);
/**
* Restrict recognition to a sub-rectangle of the image. Call after SetImage.
* Each SetRectangle clears the recogntion results so multiple rectangles
* can be recognized with the same image.
*/
void SetRectangle(int left, int top, int width, int height);
/**
* Get a copy of the internal thresholded image from Tesseract.
* Caller takes ownership of the Pix and must pixDestroy it.
* May be called any time after SetImage, or after TesseractRect.
*/
Pix *GetThresholdedImage();
/**
* Get the result of page layout analysis as a leptonica-style
* Boxa, Pixa pair, in reading order.
* Can be called before or after Recognize.
*/
Boxa *GetRegions(Pixa **pixa);
/**
* Get the textlines as a leptonica-style
* Boxa, Pixa pair, in reading order.
* Can be called before or after Recognize.
* If raw_image is true, then extract from the original image instead of the
* thresholded image and pad by raw_padding pixels.
* If blockids is not nullptr, the block-id of each line is also returned as
* an array of one element per line. delete [] after use. If paraids is not
* nullptr, the paragraph-id of each line within its block is also returned as
* an array of one element per line. delete [] after use.
*/
Boxa *GetTextlines(bool raw_image, int raw_padding, Pixa **pixa,
int **blockids, int **paraids);
/*
Helper method to extract from the thresholded image. (most common usage)
*/
Boxa *GetTextlines(Pixa **pixa, int **blockids) {
return GetTextlines(false, 0, pixa, blockids, nullptr);
}
/**
* Get textlines and strips of image regions as a leptonica-style Boxa, Pixa
* pair, in reading order. Enables downstream handling of non-rectangular
* regions.
* Can be called before or after Recognize.
* If blockids is not nullptr, the block-id of each line is also returned as
* an array of one element per line. delete [] after use.
*/
Boxa *GetStrips(Pixa **pixa, int **blockids);
/**
* Get the words as a leptonica-style
* Boxa, Pixa pair, in reading order.
* Can be called before or after Recognize.
*/
Boxa *GetWords(Pixa **pixa);
/**
* Gets the individual connected (text) components (created
* after pages segmentation step, but before recognition)
* as a leptonica-style Boxa, Pixa pair, in reading order.
* Can be called before or after Recognize.
* Note: the caller is responsible for calling boxaDestroy()
* on the returned Boxa array and pixaDestroy() on cc array.
*/
Boxa *GetConnectedComponents(Pixa **cc);
/**
* Get the given level kind of components (block, textline, word etc.) as a
* leptonica-style Boxa, Pixa pair, in reading order.
* Can be called before or after Recognize.
* If blockids is not nullptr, the block-id of each component is also returned
* as an array of one element per component. delete [] after use.
* If blockids is not nullptr, the paragraph-id of each component with its
* block is also returned as an array of one element per component. delete []
* after use. If raw_image is true, then portions of the original image are
* extracted instead of the thresholded image and padded with raw_padding. If
* text_only is true, then only text components are returned.
*/
Boxa *GetComponentImages(PageIteratorLevel level, bool text_only,
bool raw_image, int raw_padding, Pixa **pixa,
int **blockids, int **paraids);
// Helper function to get binary images with no padding (most common usage).
Boxa *GetComponentImages(const PageIteratorLevel level, const bool text_only,
Pixa **pixa, int **blockids) {
return GetComponentImages(level, text_only, false, 0, pixa, blockids,
nullptr);
}
/**
* Returns the scale factor of the thresholded image that would be returned by
* GetThresholdedImage() and the various GetX() methods that call
* GetComponentImages().
* Returns 0 if no thresholder has been set.
*/
int GetThresholdedImageScaleFactor() const;
/**
* Runs page layout analysis in the mode set by SetPageSegMode.
* May optionally be called prior to Recognize to get access to just
* the page layout results. Returns an iterator to the results.
* If merge_similar_words is true, words are combined where suitable for use
* with a line recognizer. Use if you want to use AnalyseLayout to find the
* textlines, and then want to process textline fragments with an external
* line recognizer.
* Returns nullptr on error or an empty page.
* The returned iterator must be deleted after use.
* WARNING! This class points to data held within the TessBaseAPI class, and
* therefore can only be used while the TessBaseAPI class still exists and
* has not been subjected to a call of Init, SetImage, Recognize, Clear, End
* DetectOS, or anything else that changes the internal PAGE_RES.
*/
PageIterator *AnalyseLayout();
PageIterator *AnalyseLayout(bool merge_similar_words);
/**
* Recognize the image from SetAndThresholdImage, generating Tesseract
* internal structures. Returns 0 on success.
* Optional. The Get*Text functions below will call Recognize if needed.
* After Recognize, the output is kept internally until the next SetImage.
*/
int Recognize(ETEXT_DESC *monitor);
/**
* Methods to retrieve information after SetAndThresholdImage(),
* Recognize() or TesseractRect(). (Recognize is called implicitly if needed.)
*/
/**
* Turns images into symbolic text.
*
* filename can point to a single image, a multi-page TIFF,
* or a plain text list of image filenames.
*
* retry_config is useful for debugging. If not nullptr, you can fall
* back to an alternate configuration if a page fails for some
* reason.
*
* timeout_millisec terminates processing if any single page
* takes too long. Set to 0 for unlimited time.
*
* renderer is responible for creating the output. For example,
* use the TessTextRenderer if you want plaintext output, or
* the TessPDFRender to produce searchable PDF.
*
* If tessedit_page_number is non-negative, will only process that
* single page. Works for multi-page tiff file, or filelist.
*
* Returns true if successful, false on error.
*/
bool ProcessPages(const char *filename, const char *retry_config,
int timeout_millisec, TessResultRenderer *renderer);
// Does the real work of ProcessPages.
bool ProcessPagesInternal(const char *filename, const char *retry_config,
int timeout_millisec, TessResultRenderer *renderer);
/**
* Turn a single image into symbolic text.
*
* The pix is the image processed. filename and page_index are
* metadata used by side-effect processes, such as reading a box
* file or formatting as hOCR.
*
* See ProcessPages for descriptions of other parameters.
*/
bool ProcessPage(Pix *pix, int page_index, const char *filename,
const char *retry_config, int timeout_millisec,
TessResultRenderer *renderer);
/**
* Get a reading-order iterator to the results of LayoutAnalysis and/or
* Recognize. The returned iterator must be deleted after use.
* WARNING! This class points to data held within the TessBaseAPI class, and
* therefore can only be used while the TessBaseAPI class still exists and
* has not been subjected to a call of Init, SetImage, Recognize, Clear, End
* DetectOS, or anything else that changes the internal PAGE_RES.
*/
ResultIterator *GetIterator();
/**
* Get a mutable iterator to the results of LayoutAnalysis and/or Recognize.
* The returned iterator must be deleted after use.
* WARNING! This class points to data held within the TessBaseAPI class, and
* therefore can only be used while the TessBaseAPI class still exists and
* has not been subjected to a call of Init, SetImage, Recognize, Clear, End
* DetectOS, or anything else that changes the internal PAGE_RES.
*/
MutableIterator *GetMutableIterator();
/**
* The recognized text is returned as a char* which is coded
* as UTF8 and must be freed with the delete [] operator.
*/
char *GetUTF8Text();
/**
* Make a HTML-formatted string with hOCR markup from the internal
* data structures.
* page_number is 0-based but will appear in the output as 1-based.
* monitor can be used to
* cancel the recognition
* receive progress callbacks
* Returned string must be freed with the delete [] operator.
*/
char *GetHOCRText(ETEXT_DESC *monitor, int page_number);
/**
* Make a HTML-formatted string with hOCR markup from the internal
* data structures.
* page_number is 0-based but will appear in the output as 1-based.
* Returned string must be freed with the delete [] operator.
*/
char *GetHOCRText(int page_number);
/**
* Make an XML-formatted string with Alto markup from the internal
* data structures.
*/
char *GetAltoText(ETEXT_DESC *monitor, int page_number);
/**
* Make an XML-formatted string with Alto markup from the internal
* data structures.
*/
char *GetAltoText(int page_number);
/**
* Make a TSV-formatted string from the internal data structures.
* page_number is 0-based but will appear in the output as 1-based.
* Returned string must be freed with the delete [] operator.
*/
char *GetTSVText(int page_number);
/**
* Make a box file for LSTM training from the internal data structures.
* Constructs coordinates in the original image - not just the rectangle.
* page_number is a 0-based page index that will appear in the box file.
* Returned string must be freed with the delete [] operator.
*/
char *GetLSTMBoxText(int page_number);
/**
* The recognized text is returned as a char* which is coded in the same
* format as a box file used in training.
* Constructs coordinates in the original image - not just the rectangle.
* page_number is a 0-based page index that will appear in the box file.
* Returned string must be freed with the delete [] operator.
*/
char *GetBoxText(int page_number);
/**
* The recognized text is returned as a char* which is coded in the same
* format as a WordStr box file used in training.
* page_number is a 0-based page index that will appear in the box file.
* Returned string must be freed with the delete [] operator.
*/
char *GetWordStrBoxText(int page_number);
/**
* The recognized text is returned as a char* which is coded
* as UNLV format Latin-1 with specific reject and suspect codes.
* Returned string must be freed with the delete [] operator.
*/
char *GetUNLVText();
/**
* Detect the orientation of the input image and apparent script (alphabet).
* orient_deg is the detected clockwise rotation of the input image in degrees
* (0, 90, 180, 270)
* orient_conf is the confidence (15.0 is reasonably confident)
* script_name is an ASCII string, the name of the script, e.g. "Latin"
* script_conf is confidence level in the script
* Returns true on success and writes values to each parameter as an output
*/
bool DetectOrientationScript(int *orient_deg, float *orient_conf,
const char **script_name, float *script_conf);
/**
* The recognized text is returned as a char* which is coded
* as UTF8 and must be freed with the delete [] operator.
* page_number is a 0-based page index that will appear in the osd file.
*/
char *GetOsdText(int page_number);
/** Returns the (average) confidence value between 0 and 100. */
int MeanTextConf();
/**
* Returns all word confidences (between 0 and 100) in an array, terminated
* by -1. The calling function must delete [] after use.
* The number of confidences should correspond to the number of space-
* delimited words in GetUTF8Text.
*/
int *AllWordConfidences();
#ifndef DISABLED_LEGACY_ENGINE
/**
* Applies the given word to the adaptive classifier if possible.
* The word must be SPACE-DELIMITED UTF-8 - l i k e t h i s , so it can
* tell the boundaries of the graphemes.
* Assumes that SetImage/SetRectangle have been used to set the image
* to the given word. The mode arg should be PSM_SINGLE_WORD or
* PSM_CIRCLE_WORD, as that will be used to control layout analysis.
* The currently set PageSegMode is preserved.
* Returns false if adaption was not possible for some reason.
*/
bool AdaptToWordStr(PageSegMode mode, const char *wordstr);
#endif // ndef DISABLED_LEGACY_ENGINE
/**
* Free up recognition results and any stored image data, without actually
* freeing any recognition data that would be time-consuming to reload.
* Afterwards, you must call SetImage or TesseractRect before doing
* any Recognize or Get* operation.
*/
void Clear();
/**
* Close down tesseract and free up all memory. End() is equivalent to
* destructing and reconstructing your TessBaseAPI.
* Once End() has been used, none of the other API functions may be used
* other than Init and anything declared above it in the class definition.
*/
void End();
/**
* Clear any library-level memory caches.
* There are a variety of expensive-to-load constant data structures (mostly
* language dictionaries) that are cached globally -- surviving the Init()
* and End() of individual TessBaseAPI's. This function allows the clearing
* of these caches.
**/
static void ClearPersistentCache();
/**
* Check whether a word is valid according to Tesseract's language model
* @return 0 if the word is invalid, non-zero if valid.
* @warning temporary! This function will be removed from here and placed
* in a separate API at some future time.
*/
int IsValidWord(const char *word) const;
// Returns true if utf8_character is defined in the UniCharset.
bool IsValidCharacter(const char *utf8_character) const;
bool GetTextDirection(int *out_offset, float *out_slope);
/** Sets Dict::letter_is_okay_ function to point to the given function. */
void SetDictFunc(DictFunc f);
/** Sets Dict::probability_in_context_ function to point to the given
* function.
*/
void SetProbabilityInContextFunc(ProbabilityInContextFunc f);
/**
* Estimates the Orientation And Script of the image.
* @return true if the image was processed successfully.
*/
bool DetectOS(OSResults *);
/**
* Return text orientation of each block as determined by an earlier run
* of layout analysis.
*/
void GetBlockTextOrientations(int **block_orientation,
bool **vertical_writing);
/** This method returns the string form of the specified unichar. */
const char *GetUnichar(int unichar_id) const;
/** Return the pointer to the i-th dawg loaded into tesseract_ object. */
const Dawg *GetDawg(int i) const;
/** Return the number of dawgs loaded into tesseract_ object. */
int NumDawgs() const;
Tesseract *tesseract() const {
return tesseract_;
}
OcrEngineMode oem() const {
return last_oem_requested_;
}
void set_min_orientation_margin(double margin);
/* @} */
protected:
/** Common code for setting the image. Returns true if Init has been called.
*/
bool InternalSetImage();
/**
* Run the thresholder to make the thresholded image. If pix is not nullptr,
* the source is thresholded to pix instead of the internal IMAGE.
*/
virtual bool Threshold(Pix **pix);
/**
* Find lines from the image making the BLOCK_LIST.
* @return 0 on success.
*/
int FindLines();
/** Delete the pageres and block list ready for a new page. */
void ClearResults();
/**
* Return an LTR Result Iterator -- used only for training, as we really want
* to ignore all BiDi smarts at that point.
* delete once you're done with it.
*/
LTRResultIterator *GetLTRIterator();
/**
* Return the length of the output text string, as UTF8, assuming
* one newline per line and one per block, with a terminator,
* and assuming a single character reject marker for each rejected character.
* Also return the number of recognized blobs in blob_count.
*/
int TextLength(int *blob_count) const;
//// paragraphs.cpp ////////////////////////////////////////////////////
void DetectParagraphs(bool after_text_recognition);
const PAGE_RES *GetPageRes() const {
return page_res_;
}
protected:
Tesseract *tesseract_; ///< The underlying data object.
Tesseract *osd_tesseract_; ///< For orientation & script detection.
EquationDetect *equ_detect_; ///< The equation detector.
FileReader reader_; ///< Reads files from any filesystem.
ImageThresholder *thresholder_; ///< Image thresholding module.
std::vector<ParagraphModel *> *paragraph_models_;
BLOCK_LIST *block_list_; ///< The page layout.
PAGE_RES *page_res_; ///< The page-level data.
std::string input_file_; ///< Name used by training code.
std::string output_file_; ///< Name used by debug code.
std::string datapath_; ///< Current location of tessdata.
std::string language_; ///< Last initialized language.
OcrEngineMode last_oem_requested_; ///< Last ocr language mode requested.
bool recognition_done_; ///< page_res_ contains recognition data.
/**
* @defgroup ThresholderParams Thresholder Parameters
* Parameters saved from the Thresholder. Needed to rebuild coordinates.
*/
/* @{ */
int rect_left_;
int rect_top_;
int rect_width_;
int rect_height_;
int image_width_;
int image_height_;
/* @} */
private:
// A list of image filenames gets special consideration
bool ProcessPagesFileList(FILE *fp, std::string *buf,
const char *retry_config, int timeout_millisec,
TessResultRenderer *renderer,
int tessedit_page_number);
// TIFF supports multipage so gets special consideration.
bool ProcessPagesMultipageTiff(const unsigned char *data, size_t size,
const char *filename, const char *retry_config,
int timeout_millisec,
TessResultRenderer *renderer,
int tessedit_page_number);
}; // class TessBaseAPI.
/** Escape a char string - remove &<>"' with HTML codes. */
std::string HOcrEscape(const char *text);
} // namespace tesseract
#endif // TESSERACT_API_BASEAPI_H_

View File

@ -1,484 +0,0 @@
// SPDX-License-Identifier: Apache-2.0
// File: capi.h
// Description: C-API TessBaseAPI
//
// (C) Copyright 2012, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef API_CAPI_H_
#define API_CAPI_H_
#include "export.h"
#ifdef __cplusplus
# include <tesseract/baseapi.h>
# include <tesseract/ocrclass.h>
# include <tesseract/pageiterator.h>
# include <tesseract/renderer.h>
# include <tesseract/resultiterator.h>
#endif
#include <stdbool.h>
#include <stdio.h>
#ifdef __cplusplus
extern "C" {
#endif
#ifndef BOOL
# define BOOL int
# define TRUE 1
# define FALSE 0
#endif
#ifdef __cplusplus
typedef tesseract::TessResultRenderer TessResultRenderer;
typedef tesseract::TessBaseAPI TessBaseAPI;
typedef tesseract::PageIterator TessPageIterator;
typedef tesseract::ResultIterator TessResultIterator;
typedef tesseract::MutableIterator TessMutableIterator;
typedef tesseract::ChoiceIterator TessChoiceIterator;
typedef tesseract::OcrEngineMode TessOcrEngineMode;
typedef tesseract::PageSegMode TessPageSegMode;
typedef tesseract::PageIteratorLevel TessPageIteratorLevel;
typedef tesseract::Orientation TessOrientation;
typedef tesseract::ParagraphJustification TessParagraphJustification;
typedef tesseract::WritingDirection TessWritingDirection;
typedef tesseract::TextlineOrder TessTextlineOrder;
typedef tesseract::PolyBlockType TessPolyBlockType;
typedef tesseract::ETEXT_DESC ETEXT_DESC;
#else
typedef struct TessResultRenderer TessResultRenderer;
typedef struct TessBaseAPI TessBaseAPI;
typedef struct TessPageIterator TessPageIterator;
typedef struct TessResultIterator TessResultIterator;
typedef struct TessMutableIterator TessMutableIterator;
typedef struct TessChoiceIterator TessChoiceIterator;
typedef enum TessOcrEngineMode {
OEM_TESSERACT_ONLY,
OEM_LSTM_ONLY,
OEM_TESSERACT_LSTM_COMBINED,
OEM_DEFAULT
} TessOcrEngineMode;
typedef enum TessPageSegMode {
PSM_OSD_ONLY,
PSM_AUTO_OSD,
PSM_AUTO_ONLY,
PSM_AUTO,
PSM_SINGLE_COLUMN,
PSM_SINGLE_BLOCK_VERT_TEXT,
PSM_SINGLE_BLOCK,
PSM_SINGLE_LINE,
PSM_SINGLE_WORD,
PSM_CIRCLE_WORD,
PSM_SINGLE_CHAR,
PSM_SPARSE_TEXT,
PSM_SPARSE_TEXT_OSD,
PSM_RAW_LINE,
PSM_COUNT
} TessPageSegMode;
typedef enum TessPageIteratorLevel {
RIL_BLOCK,
RIL_PARA,
RIL_TEXTLINE,
RIL_WORD,
RIL_SYMBOL
} TessPageIteratorLevel;
typedef enum TessPolyBlockType {
PT_UNKNOWN,
PT_FLOWING_TEXT,
PT_HEADING_TEXT,
PT_PULLOUT_TEXT,
PT_EQUATION,
PT_INLINE_EQUATION,
PT_TABLE,
PT_VERTICAL_TEXT,
PT_CAPTION_TEXT,
PT_FLOWING_IMAGE,
PT_HEADING_IMAGE,
PT_PULLOUT_IMAGE,
PT_HORZ_LINE,
PT_VERT_LINE,
PT_NOISE,
PT_COUNT
} TessPolyBlockType;
typedef enum TessOrientation {
ORIENTATION_PAGE_UP,
ORIENTATION_PAGE_RIGHT,
ORIENTATION_PAGE_DOWN,
ORIENTATION_PAGE_LEFT
} TessOrientation;
typedef enum TessParagraphJustification {
JUSTIFICATION_UNKNOWN,
JUSTIFICATION_LEFT,
JUSTIFICATION_CENTER,
JUSTIFICATION_RIGHT
} TessParagraphJustification;
typedef enum TessWritingDirection {
WRITING_DIRECTION_LEFT_TO_RIGHT,
WRITING_DIRECTION_RIGHT_TO_LEFT,
WRITING_DIRECTION_TOP_TO_BOTTOM
} TessWritingDirection;
typedef enum TessTextlineOrder {
TEXTLINE_ORDER_LEFT_TO_RIGHT,
TEXTLINE_ORDER_RIGHT_TO_LEFT,
TEXTLINE_ORDER_TOP_TO_BOTTOM
} TessTextlineOrder;
typedef struct ETEXT_DESC ETEXT_DESC;
#endif
typedef bool (*TessCancelFunc)(void *cancel_this, int words);
typedef bool (*TessProgressFunc)(ETEXT_DESC *ths, int left, int right, int top,
int bottom);
struct Pix;
struct Boxa;
struct Pixa;
/* General free functions */
TESS_API const char *TessVersion();
TESS_API void TessDeleteText(const char *text);
TESS_API void TessDeleteTextArray(char **arr);
TESS_API void TessDeleteIntArray(const int *arr);
/* Renderer API */
TESS_API TessResultRenderer *TessTextRendererCreate(const char *outputbase);
TESS_API TessResultRenderer *TessHOcrRendererCreate(const char *outputbase);
TESS_API TessResultRenderer *TessHOcrRendererCreate2(const char *outputbase,
BOOL font_info);
TESS_API TessResultRenderer *TessAltoRendererCreate(const char *outputbase);
TESS_API TessResultRenderer *TessTsvRendererCreate(const char *outputbase);
TESS_API TessResultRenderer *TessPDFRendererCreate(const char *outputbase,
const char *datadir,
BOOL textonly);
TESS_API TessResultRenderer *TessUnlvRendererCreate(const char *outputbase);
TESS_API TessResultRenderer *TessBoxTextRendererCreate(const char *outputbase);
TESS_API TessResultRenderer *TessLSTMBoxRendererCreate(const char *outputbase);
TESS_API TessResultRenderer *TessWordStrBoxRendererCreate(
const char *outputbase);
TESS_API void TessDeleteResultRenderer(TessResultRenderer *renderer);
TESS_API void TessResultRendererInsert(TessResultRenderer *renderer,
TessResultRenderer *next);
TESS_API TessResultRenderer *TessResultRendererNext(
TessResultRenderer *renderer);
TESS_API BOOL TessResultRendererBeginDocument(TessResultRenderer *renderer,
const char *title);
TESS_API BOOL TessResultRendererAddImage(TessResultRenderer *renderer,
TessBaseAPI *api);
TESS_API BOOL TessResultRendererEndDocument(TessResultRenderer *renderer);
TESS_API const char *TessResultRendererExtention(TessResultRenderer *renderer);
TESS_API const char *TessResultRendererTitle(TessResultRenderer *renderer);
TESS_API int TessResultRendererImageNum(TessResultRenderer *renderer);
/* Base API */
TESS_API TessBaseAPI *TessBaseAPICreate();
TESS_API void TessBaseAPIDelete(TessBaseAPI *handle);
TESS_API size_t TessBaseAPIGetOpenCLDevice(TessBaseAPI *handle, void **device);
TESS_API void TessBaseAPISetInputName(TessBaseAPI *handle, const char *name);
TESS_API const char *TessBaseAPIGetInputName(TessBaseAPI *handle);
TESS_API void TessBaseAPISetInputImage(TessBaseAPI *handle, struct Pix *pix);
TESS_API struct Pix *TessBaseAPIGetInputImage(TessBaseAPI *handle);
TESS_API int TessBaseAPIGetSourceYResolution(TessBaseAPI *handle);
TESS_API const char *TessBaseAPIGetDatapath(TessBaseAPI *handle);
TESS_API void TessBaseAPISetOutputName(TessBaseAPI *handle, const char *name);
TESS_API BOOL TessBaseAPISetVariable(TessBaseAPI *handle, const char *name,
const char *value);
TESS_API BOOL TessBaseAPISetDebugVariable(TessBaseAPI *handle, const char *name,
const char *value);
TESS_API BOOL TessBaseAPIGetIntVariable(const TessBaseAPI *handle,
const char *name, int *value);
TESS_API BOOL TessBaseAPIGetBoolVariable(const TessBaseAPI *handle,
const char *name, BOOL *value);
TESS_API BOOL TessBaseAPIGetDoubleVariable(const TessBaseAPI *handle,
const char *name, double *value);
TESS_API const char *TessBaseAPIGetStringVariable(const TessBaseAPI *handle,
const char *name);
TESS_API void TessBaseAPIPrintVariables(const TessBaseAPI *handle, FILE *fp);
TESS_API BOOL TessBaseAPIPrintVariablesToFile(const TessBaseAPI *handle,
const char *filename);
TESS_API int TessBaseAPIInit1(TessBaseAPI *handle, const char *datapath,
const char *language, TessOcrEngineMode oem,
char **configs, int configs_size);
TESS_API int TessBaseAPIInit2(TessBaseAPI *handle, const char *datapath,
const char *language, TessOcrEngineMode oem);
TESS_API int TessBaseAPIInit3(TessBaseAPI *handle, const char *datapath,
const char *language);
TESS_API int TessBaseAPIInit4(TessBaseAPI *handle, const char *datapath,
const char *language, TessOcrEngineMode mode,
char **configs, int configs_size, char **vars_vec,
char **vars_values, size_t vars_vec_size,
BOOL set_only_non_debug_params);
TESS_API int TessBaseAPIInit5(TessBaseAPI *handle, const char *data, int data_size,
const char *language, TessOcrEngineMode mode,
char **configs, int configs_size, char **vars_vec,
char **vars_values, size_t vars_vec_size,
BOOL set_only_non_debug_params);
TESS_API const char *TessBaseAPIGetInitLanguagesAsString(
const TessBaseAPI *handle);
TESS_API char **TessBaseAPIGetLoadedLanguagesAsVector(
const TessBaseAPI *handle);
TESS_API char **TessBaseAPIGetAvailableLanguagesAsVector(
const TessBaseAPI *handle);
TESS_API void TessBaseAPIInitForAnalysePage(TessBaseAPI *handle);
TESS_API void TessBaseAPIReadConfigFile(TessBaseAPI *handle,
const char *filename);
TESS_API void TessBaseAPIReadDebugConfigFile(TessBaseAPI *handle,
const char *filename);
TESS_API void TessBaseAPISetPageSegMode(TessBaseAPI *handle,
TessPageSegMode mode);
TESS_API TessPageSegMode TessBaseAPIGetPageSegMode(const TessBaseAPI *handle);
TESS_API char *TessBaseAPIRect(TessBaseAPI *handle,
const unsigned char *imagedata,
int bytes_per_pixel, int bytes_per_line,
int left, int top, int width, int height);
TESS_API void TessBaseAPIClearAdaptiveClassifier(TessBaseAPI *handle);
TESS_API void TessBaseAPISetImage(TessBaseAPI *handle,
const unsigned char *imagedata, int width,
int height, int bytes_per_pixel,
int bytes_per_line);
TESS_API void TessBaseAPISetImage2(TessBaseAPI *handle, struct Pix *pix);
TESS_API void TessBaseAPISetSourceResolution(TessBaseAPI *handle, int ppi);
TESS_API void TessBaseAPISetRectangle(TessBaseAPI *handle, int left, int top,
int width, int height);
TESS_API struct Pix *TessBaseAPIGetThresholdedImage(TessBaseAPI *handle);
TESS_API struct Boxa *TessBaseAPIGetRegions(TessBaseAPI *handle,
struct Pixa **pixa);
TESS_API struct Boxa *TessBaseAPIGetTextlines(TessBaseAPI *handle,
struct Pixa **pixa,
int **blockids);
TESS_API struct Boxa *TessBaseAPIGetTextlines1(TessBaseAPI *handle,
BOOL raw_image, int raw_padding,
struct Pixa **pixa,
int **blockids, int **paraids);
TESS_API struct Boxa *TessBaseAPIGetStrips(TessBaseAPI *handle,
struct Pixa **pixa, int **blockids);
TESS_API struct Boxa *TessBaseAPIGetWords(TessBaseAPI *handle,
struct Pixa **pixa);
TESS_API struct Boxa *TessBaseAPIGetConnectedComponents(TessBaseAPI *handle,
struct Pixa **cc);
TESS_API struct Boxa *TessBaseAPIGetComponentImages(TessBaseAPI *handle,
TessPageIteratorLevel level,
BOOL text_only,
struct Pixa **pixa,
int **blockids);
TESS_API struct Boxa *TessBaseAPIGetComponentImages1(
TessBaseAPI *handle, TessPageIteratorLevel level, BOOL text_only,
BOOL raw_image, int raw_padding, struct Pixa **pixa, int **blockids,
int **paraids);
TESS_API int TessBaseAPIGetThresholdedImageScaleFactor(
const TessBaseAPI *handle);
TESS_API TessPageIterator *TessBaseAPIAnalyseLayout(TessBaseAPI *handle);
TESS_API int TessBaseAPIRecognize(TessBaseAPI *handle, ETEXT_DESC *monitor);
TESS_API BOOL TessBaseAPIProcessPages(TessBaseAPI *handle, const char *filename,
const char *retry_config,
int timeout_millisec,
TessResultRenderer *renderer);
TESS_API BOOL TessBaseAPIProcessPage(TessBaseAPI *handle, struct Pix *pix,
int page_index, const char *filename,
const char *retry_config,
int timeout_millisec,
TessResultRenderer *renderer);
TESS_API TessResultIterator *TessBaseAPIGetIterator(TessBaseAPI *handle);
TESS_API TessMutableIterator *TessBaseAPIGetMutableIterator(
TessBaseAPI *handle);
TESS_API char *TessBaseAPIGetUTF8Text(TessBaseAPI *handle);
TESS_API char *TessBaseAPIGetHOCRText(TessBaseAPI *handle, int page_number);
TESS_API char *TessBaseAPIGetAltoText(TessBaseAPI *handle, int page_number);
TESS_API char *TessBaseAPIGetTsvText(TessBaseAPI *handle, int page_number);
TESS_API char *TessBaseAPIGetBoxText(TessBaseAPI *handle, int page_number);
TESS_API char *TessBaseAPIGetLSTMBoxText(TessBaseAPI *handle, int page_number);
TESS_API char *TessBaseAPIGetWordStrBoxText(TessBaseAPI *handle,
int page_number);
TESS_API char *TessBaseAPIGetUNLVText(TessBaseAPI *handle);
TESS_API int TessBaseAPIMeanTextConf(TessBaseAPI *handle);
TESS_API int *TessBaseAPIAllWordConfidences(TessBaseAPI *handle);
#ifndef DISABLED_LEGACY_ENGINE
TESS_API BOOL TessBaseAPIAdaptToWordStr(TessBaseAPI *handle,
TessPageSegMode mode,
const char *wordstr);
#endif // #ifndef DISABLED_LEGACY_ENGINE
TESS_API void TessBaseAPIClear(TessBaseAPI *handle);
TESS_API void TessBaseAPIEnd(TessBaseAPI *handle);
TESS_API int TessBaseAPIIsValidWord(TessBaseAPI *handle, const char *word);
TESS_API BOOL TessBaseAPIGetTextDirection(TessBaseAPI *handle, int *out_offset,
float *out_slope);
TESS_API const char *TessBaseAPIGetUnichar(TessBaseAPI *handle, int unichar_id);
TESS_API void TessBaseAPIClearPersistentCache(TessBaseAPI *handle);
#ifndef DISABLED_LEGACY_ENGINE
// Call TessDeleteText(*best_script_name) to free memory allocated by this
// function
TESS_API BOOL TessBaseAPIDetectOrientationScript(TessBaseAPI *handle,
int *orient_deg,
float *orient_conf,
const char **script_name,
float *script_conf);
#endif // #ifndef DISABLED_LEGACY_ENGINE
TESS_API void TessBaseAPISetMinOrientationMargin(TessBaseAPI *handle,
double margin);
TESS_API int TessBaseAPINumDawgs(const TessBaseAPI *handle);
TESS_API TessOcrEngineMode TessBaseAPIOem(const TessBaseAPI *handle);
TESS_API void TessBaseGetBlockTextOrientations(TessBaseAPI *handle,
int **block_orientation,
bool **vertical_writing);
/* Page iterator */
TESS_API void TessPageIteratorDelete(TessPageIterator *handle);
TESS_API TessPageIterator *TessPageIteratorCopy(const TessPageIterator *handle);
TESS_API void TessPageIteratorBegin(TessPageIterator *handle);
TESS_API BOOL TessPageIteratorNext(TessPageIterator *handle,
TessPageIteratorLevel level);
TESS_API BOOL TessPageIteratorIsAtBeginningOf(const TessPageIterator *handle,
TessPageIteratorLevel level);
TESS_API BOOL TessPageIteratorIsAtFinalElement(const TessPageIterator *handle,
TessPageIteratorLevel level,
TessPageIteratorLevel element);
TESS_API BOOL TessPageIteratorBoundingBox(const TessPageIterator *handle,
TessPageIteratorLevel level,
int *left, int *top, int *right,
int *bottom);
TESS_API TessPolyBlockType
TessPageIteratorBlockType(const TessPageIterator *handle);
TESS_API struct Pix *TessPageIteratorGetBinaryImage(
const TessPageIterator *handle, TessPageIteratorLevel level);
TESS_API struct Pix *TessPageIteratorGetImage(const TessPageIterator *handle,
TessPageIteratorLevel level,
int padding,
struct Pix *original_image,
int *left, int *top);
TESS_API BOOL TessPageIteratorBaseline(const TessPageIterator *handle,
TessPageIteratorLevel level, int *x1,
int *y1, int *x2, int *y2);
TESS_API void TessPageIteratorOrientation(
TessPageIterator *handle, TessOrientation *orientation,
TessWritingDirection *writing_direction, TessTextlineOrder *textline_order,
float *deskew_angle);
TESS_API void TessPageIteratorParagraphInfo(
TessPageIterator *handle, TessParagraphJustification *justification,
BOOL *is_list_item, BOOL *is_crown, int *first_line_indent);
/* Result iterator */
TESS_API void TessResultIteratorDelete(TessResultIterator *handle);
TESS_API TessResultIterator *TessResultIteratorCopy(
const TessResultIterator *handle);
TESS_API TessPageIterator *TessResultIteratorGetPageIterator(
TessResultIterator *handle);
TESS_API const TessPageIterator *TessResultIteratorGetPageIteratorConst(
const TessResultIterator *handle);
TESS_API TessChoiceIterator *TessResultIteratorGetChoiceIterator(
const TessResultIterator *handle);
TESS_API BOOL TessResultIteratorNext(TessResultIterator *handle,
TessPageIteratorLevel level);
TESS_API char *TessResultIteratorGetUTF8Text(const TessResultIterator *handle,
TessPageIteratorLevel level);
TESS_API float TessResultIteratorConfidence(const TessResultIterator *handle,
TessPageIteratorLevel level);
TESS_API const char *TessResultIteratorWordRecognitionLanguage(
const TessResultIterator *handle);
TESS_API const char *TessResultIteratorWordFontAttributes(
const TessResultIterator *handle, BOOL *is_bold, BOOL *is_italic,
BOOL *is_underlined, BOOL *is_monospace, BOOL *is_serif, BOOL *is_smallcaps,
int *pointsize, int *font_id);
TESS_API BOOL
TessResultIteratorWordIsFromDictionary(const TessResultIterator *handle);
TESS_API BOOL TessResultIteratorWordIsNumeric(const TessResultIterator *handle);
TESS_API BOOL
TessResultIteratorSymbolIsSuperscript(const TessResultIterator *handle);
TESS_API BOOL
TessResultIteratorSymbolIsSubscript(const TessResultIterator *handle);
TESS_API BOOL
TessResultIteratorSymbolIsDropcap(const TessResultIterator *handle);
TESS_API void TessChoiceIteratorDelete(TessChoiceIterator *handle);
TESS_API BOOL TessChoiceIteratorNext(TessChoiceIterator *handle);
TESS_API const char *TessChoiceIteratorGetUTF8Text(
const TessChoiceIterator *handle);
TESS_API float TessChoiceIteratorConfidence(const TessChoiceIterator *handle);
/* Progress monitor */
TESS_API ETEXT_DESC *TessMonitorCreate();
TESS_API void TessMonitorDelete(ETEXT_DESC *monitor);
TESS_API void TessMonitorSetCancelFunc(ETEXT_DESC *monitor,
TessCancelFunc cancelFunc);
TESS_API void TessMonitorSetCancelThis(ETEXT_DESC *monitor, void *cancelThis);
TESS_API void *TessMonitorGetCancelThis(ETEXT_DESC *monitor);
TESS_API void TessMonitorSetProgressFunc(ETEXT_DESC *monitor,
TessProgressFunc progressFunc);
TESS_API int TessMonitorGetProgress(ETEXT_DESC *monitor);
TESS_API void TessMonitorSetDeadlineMSecs(ETEXT_DESC *monitor, int deadline);
#ifdef __cplusplus
}
#endif
#endif // API_CAPI_H_

View File

@ -1,37 +0,0 @@
// SPDX-License-Identifier: Apache-2.0
// File: export.h
// Description: Place holder
//
// (C) Copyright 2006, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef TESSERACT_PLATFORM_H_
#define TESSERACT_PLATFORM_H_
#ifndef TESS_API
# if defined(_WIN32) || defined(__CYGWIN__)
# if defined(TESS_EXPORTS)
# define TESS_API __declspec(dllexport)
# elif defined(TESS_IMPORTS)
# define TESS_API __declspec(dllimport)
# else
# define TESS_API
# endif
# else
# if defined(TESS_EXPORTS) || defined(TESS_IMPORTS)
# define TESS_API __attribute__((visibility("default")))
# else
# define TESS_API
# endif
# endif
#endif
#endif // TESSERACT_PLATFORM_H_

View File

@ -1,235 +0,0 @@
// SPDX-License-Identifier: Apache-2.0
// File: ltrresultiterator.h
// Description: Iterator for tesseract results in strict left-to-right
// order that avoids using tesseract internal data structures.
// Author: Ray Smith
//
// (C) Copyright 2010, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef TESSERACT_CCMAIN_LTR_RESULT_ITERATOR_H_
#define TESSERACT_CCMAIN_LTR_RESULT_ITERATOR_H_
#include "export.h" // for TESS_API
#include "pageiterator.h" // for PageIterator
#include "publictypes.h" // for PageIteratorLevel
#include "unichar.h" // for StrongScriptDirection
namespace tesseract {
class BLOB_CHOICE_IT;
class PAGE_RES;
class WERD_RES;
class Tesseract;
// Class to iterate over tesseract results, providing access to all levels
// of the page hierarchy, without including any tesseract headers or having
// to handle any tesseract structures.
// WARNING! This class points to data held within the TessBaseAPI class, and
// therefore can only be used while the TessBaseAPI class still exists and
// has not been subjected to a call of Init, SetImage, Recognize, Clear, End
// DetectOS, or anything else that changes the internal PAGE_RES.
// See tesseract/publictypes.h for the definition of PageIteratorLevel.
// See also base class PageIterator, which contains the bulk of the interface.
// LTRResultIterator adds text-specific methods for access to OCR output.
class TESS_API LTRResultIterator : public PageIterator {
friend class ChoiceIterator;
public:
// page_res and tesseract come directly from the BaseAPI.
// The rectangle parameters are copied indirectly from the Thresholder,
// via the BaseAPI. They represent the coordinates of some rectangle in an
// original image (in top-left-origin coordinates) and therefore the top-left
// needs to be added to any output boxes in order to specify coordinates
// in the original image. See TessBaseAPI::SetRectangle.
// The scale and scaled_yres are in case the Thresholder scaled the image
// rectangle prior to thresholding. Any coordinates in tesseract's image
// must be divided by scale before adding (rect_left, rect_top).
// The scaled_yres indicates the effective resolution of the binary image
// that tesseract has been given by the Thresholder.
// After the constructor, Begin has already been called.
LTRResultIterator(PAGE_RES *page_res, Tesseract *tesseract, int scale,
int scaled_yres, int rect_left, int rect_top,
int rect_width, int rect_height);
~LTRResultIterator() override;
// LTRResultIterators may be copied! This makes it possible to iterate over
// all the objects at a lower level, while maintaining an iterator to
// objects at a higher level. These constructors DO NOT CALL Begin, so
// iterations will continue from the location of src.
// TODO: For now the copy constructor and operator= only need the base class
// versions, but if new data members are added, don't forget to add them!
// ============= Moving around within the page ============.
// See PageIterator.
// ============= Accessing data ==============.
// Returns the null terminated UTF-8 encoded text string for the current
// object at the given level. Use delete [] to free after use.
char *GetUTF8Text(PageIteratorLevel level) const;
// Set the string inserted at the end of each text line. "\n" by default.
void SetLineSeparator(const char *new_line);
// Set the string inserted at the end of each paragraph. "\n" by default.
void SetParagraphSeparator(const char *new_para);
// Returns the mean confidence of the current object at the given level.
// The number should be interpreted as a percent probability. (0.0f-100.0f)
float Confidence(PageIteratorLevel level) const;
// ============= Functions that refer to words only ============.
// Returns the font attributes of the current word. If iterating at a higher
// level object than words, eg textlines, then this will return the
// attributes of the first word in that textline.
// The actual return value is a string representing a font name. It points
// to an internal table and SHOULD NOT BE DELETED. Lifespan is the same as
// the iterator itself, ie rendered invalid by various members of
// TessBaseAPI, including Init, SetImage, End or deleting the TessBaseAPI.
// Pointsize is returned in printers points (1/72 inch.)
const char *WordFontAttributes(bool *is_bold, bool *is_italic,
bool *is_underlined, bool *is_monospace,
bool *is_serif, bool *is_smallcaps,
int *pointsize, int *font_id) const;
// Return the name of the language used to recognize this word.
// On error, nullptr. Do not delete this pointer.
const char *WordRecognitionLanguage() const;
// Return the overall directionality of this word.
StrongScriptDirection WordDirection() const;
// Returns true if the current word was found in a dictionary.
bool WordIsFromDictionary() const;
// Returns the number of blanks before the current word.
int BlanksBeforeWord() const;
// Returns true if the current word is numeric.
bool WordIsNumeric() const;
// Returns true if the word contains blamer information.
bool HasBlamerInfo() const;
// Returns the pointer to ParamsTrainingBundle stored in the BlamerBundle
// of the current word.
const void *GetParamsTrainingBundle() const;
// Returns a pointer to the string with blamer information for this word.
// Assumes that the word's blamer_bundle is not nullptr.
const char *GetBlamerDebug() const;
// Returns a pointer to the string with misadaption information for this word.
// Assumes that the word's blamer_bundle is not nullptr.
const char *GetBlamerMisadaptionDebug() const;
// Returns true if a truth string was recorded for the current word.
bool HasTruthString() const;
// Returns true if the given string is equivalent to the truth string for
// the current word.
bool EquivalentToTruth(const char *str) const;
// Returns a null terminated UTF-8 encoded truth string for the current word.
// Use delete [] to free after use.
char *WordTruthUTF8Text() const;
// Returns a null terminated UTF-8 encoded normalized OCR string for the
// current word. Use delete [] to free after use.
char *WordNormedUTF8Text() const;
// Returns a pointer to serialized choice lattice.
// Fills lattice_size with the number of bytes in lattice data.
const char *WordLattice(int *lattice_size) const;
// ============= Functions that refer to symbols only ============.
// Returns true if the current symbol is a superscript.
// If iterating at a higher level object than symbols, eg words, then
// this will return the attributes of the first symbol in that word.
bool SymbolIsSuperscript() const;
// Returns true if the current symbol is a subscript.
// If iterating at a higher level object than symbols, eg words, then
// this will return the attributes of the first symbol in that word.
bool SymbolIsSubscript() const;
// Returns true if the current symbol is a dropcap.
// If iterating at a higher level object than symbols, eg words, then
// this will return the attributes of the first symbol in that word.
bool SymbolIsDropcap() const;
protected:
const char *line_separator_;
const char *paragraph_separator_;
};
// Class to iterate over the classifier choices for a single RIL_SYMBOL.
class TESS_API ChoiceIterator {
public:
// Construction is from a LTRResultIterator that points to the symbol of
// interest. The ChoiceIterator allows a one-shot iteration over the
// choices for this symbol and after that it is useless.
explicit ChoiceIterator(const LTRResultIterator &result_it);
~ChoiceIterator();
// Moves to the next choice for the symbol and returns false if there
// are none left.
bool Next();
// ============= Accessing data ==============.
// Returns the null terminated UTF-8 encoded text string for the current
// choice.
// NOTE: Unlike LTRResultIterator::GetUTF8Text, the return points to an
// internal structure and should NOT be delete[]ed to free after use.
const char *GetUTF8Text() const;
// Returns the confidence of the current choice depending on the used language
// data. If only LSTM traineddata is used the value range is 0.0f - 1.0f. All
// choices for one symbol should roughly add up to 1.0f.
// If only traineddata of the legacy engine is used, the number should be
// interpreted as a percent probability. (0.0f-100.0f) In this case
// probabilities won't add up to 100. Each one stands on its own.
float Confidence() const;
// Returns a vector containing all timesteps, which belong to the currently
// selected symbol. A timestep is a vector containing pairs of symbols and
// floating point numbers. The number states the probability for the
// corresponding symbol.
std::vector<std::vector<std::pair<const char *, float>>> *Timesteps() const;
private:
// clears the remaining spaces out of the results and adapt the probabilities
void filterSpaces();
// Pointer to the WERD_RES object owned by the API.
WERD_RES *word_res_;
// Iterator over the blob choices.
BLOB_CHOICE_IT *choice_it_;
std::vector<std::pair<const char *, float>> *LSTM_choices_ = nullptr;
std::vector<std::pair<const char *, float>>::iterator LSTM_choice_it_;
const int *tstep_index_;
// regulates the rating granularity
double rating_coefficient_;
// leading blanks
int blanks_before_word_;
// true when there is lstm engine related trained data
bool oemLSTM_;
};
} // namespace tesseract.
#endif // TESSERACT_CCMAIN_LTR_RESULT_ITERATOR_H_

View File

@ -1,158 +0,0 @@
// SPDX-License-Identifier: Apache-2.0
/**********************************************************************
* File: ocrclass.h
* Description: Class definitions and constants for the OCR API.
* Author: Hewlett-Packard Co
*
* (C) Copyright 1996, Hewlett-Packard Co.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
/**********************************************************************
* This file contains typedefs for all the structures used by
* the HP OCR interface.
* The structures are designed to allow them to be used with any
* structure alignment up to 8.
**********************************************************************/
#ifndef CCUTIL_OCRCLASS_H_
#define CCUTIL_OCRCLASS_H_
#include <chrono>
#include <ctime>
namespace tesseract {
/**********************************************************************
* EANYCODE_CHAR
* Description of a single character. The character code is defined by
* the character set of the current font.
* Output text is sent as an array of these structures.
* Spaces and line endings in the output are represented in the
* structures of the surrounding characters. They are not directly
* represented as characters.
* The first character in a word has a positive value of blanks.
* Missing information should be set to the defaults in the comments.
* If word bounds are known, but not character bounds, then the top and
* bottom of each character should be those of the word. The left of the
* first and right of the last char in each word should be set. All other
* lefts and rights should be set to -1.
* If set, the values of right and bottom are left+width and top+height.
* Most of the members come directly from the parameters to ocr_append_char.
* The formatting member uses the enhancement parameter and combines the
* line direction stuff into the top 3 bits.
* The coding is 0=RL char, 1=LR char, 2=DR NL, 3=UL NL, 4=DR Para,
* 5=UL Para, 6=TB char, 7=BT char. API users do not need to know what
* the coding is, only that it is backwards compatible with the previous
* version.
**********************************************************************/
struct EANYCODE_CHAR { /*single character */
// It should be noted that the format for char_code for version 2.0 and beyond
// is UTF8 which means that ASCII characters will come out as one structure
// but other characters will be returned in two or more instances of this
// structure with a single byte of the UTF8 code in each, but each will have
// the same bounding box. Programs which want to handle languagues with
// different characters sets will need to handle extended characters
// appropriately, but *all* code needs to be prepared to receive UTF8 coded
// characters for characters such as bullet and fancy quotes.
uint16_t char_code; /*character itself */
int16_t left; /*of char (-1) */
int16_t right; /*of char (-1) */
int16_t top; /*of char (-1) */
int16_t bottom; /*of char (-1) */
int16_t font_index; /*what font (0) */
uint8_t confidence; /*0=perfect, 100=reject (0/100) */
uint8_t point_size; /*of char, 72=i inch, (10) */
int8_t blanks; /*no of spaces before this char (1) */
uint8_t formatting; /*char formatting (0) */
};
/**********************************************************************
* ETEXT_DESC
* Description of the output of the OCR engine.
* This structure is used as both a progress monitor and the final
* output header, since it needs to be a valid progress monitor while
* the OCR engine is storing its output to shared memory.
* During progress, all the buffer info is -1.
* Progress starts at 0 and increases to 100 during OCR. No other constraint.
* Additionally the progress callback contains the bounding box of the word that
* is currently being processed.
* Every progress callback, the OCR engine must set ocr_alive to 1.
* The HP side will set ocr_alive to 0. Repeated failure to reset
* to 1 indicates that the OCR engine is dead.
* If the cancel function is not null then it is called with the number of
* user words found. If it returns true then operation is cancelled.
**********************************************************************/
class ETEXT_DESC;
using CANCEL_FUNC = bool (*)(void *, int);
using PROGRESS_FUNC = bool (*)(int, int, int, int, int);
using PROGRESS_FUNC2 = bool (*)(ETEXT_DESC *, int, int, int, int);
class ETEXT_DESC { // output header
public:
int16_t count{0}; /// chars in this buffer(0)
int16_t progress{0}; /// percent complete increasing (0-100)
/** Progress monitor covers word recognition and it does not cover layout
* analysis.
* See Ray comment in https://github.com/tesseract-ocr/tesseract/pull/27 */
int8_t more_to_come{0}; /// true if not last
volatile int8_t ocr_alive{0}; /// ocr sets to 1, HP 0
int8_t err_code{0}; /// for errcode use
CANCEL_FUNC cancel{nullptr}; /// returns true to cancel
PROGRESS_FUNC progress_callback{
nullptr}; /// called whenever progress increases
PROGRESS_FUNC2 progress_callback2; /// monitor-aware progress callback
void *cancel_this{nullptr}; /// this or other data for cancel
std::chrono::steady_clock::time_point end_time;
/// Time to stop. Expected to be set only
/// by call to set_deadline_msecs().
EANYCODE_CHAR text[1]{}; /// character data
ETEXT_DESC() : progress_callback2(&default_progress_func) {
end_time = std::chrono::time_point<std::chrono::steady_clock,
std::chrono::milliseconds>();
}
// Sets the end time to be deadline_msecs milliseconds from now.
void set_deadline_msecs(int32_t deadline_msecs) {
if (deadline_msecs > 0) {
end_time = std::chrono::steady_clock::now() +
std::chrono::milliseconds(deadline_msecs);
}
}
// Returns false if we've not passed the end_time, or have not set a deadline.
bool deadline_exceeded() const {
if (end_time.time_since_epoch() ==
std::chrono::steady_clock::duration::zero()) {
return false;
}
auto now = std::chrono::steady_clock::now();
return (now > end_time);
}
private:
static bool default_progress_func(ETEXT_DESC *ths, int left, int right,
int top, int bottom) {
if (ths->progress_callback != nullptr) {
return (*(ths->progress_callback))(ths->progress, left, right, top,
bottom);
}
return true;
}
};
} // namespace tesseract
#endif // CCUTIL_OCRCLASS_H_

View File

@ -1,139 +0,0 @@
// SPDX-License-Identifier: Apache-2.0
// File: osdetect.h
// Description: Orientation and script detection.
// Author: Samuel Charron
// Ranjith Unnikrishnan
//
// (C) Copyright 2008, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef TESSERACT_CCMAIN_OSDETECT_H_
#define TESSERACT_CCMAIN_OSDETECT_H_
#include "export.h" // for TESS_API
#include <vector> // for std::vector
namespace tesseract {
class BLOBNBOX;
class BLOBNBOX_CLIST;
class BLOB_CHOICE_LIST;
class TO_BLOCK_LIST;
class UNICHARSET;
class Tesseract;
// Max number of scripts in ICU + "NULL" + Japanese and Korean + Fraktur
const int kMaxNumberOfScripts = 116 + 1 + 2 + 1;
struct OSBestResult {
OSBestResult()
: orientation_id(0), script_id(0), sconfidence(0.0), oconfidence(0.0) {}
int orientation_id;
int script_id;
float sconfidence;
float oconfidence;
};
struct OSResults {
OSResults() : unicharset(nullptr) {
for (int i = 0; i < 4; ++i) {
for (int j = 0; j < kMaxNumberOfScripts; ++j) {
scripts_na[i][j] = 0;
}
orientations[i] = 0;
}
}
void update_best_orientation();
// Set the estimate of the orientation to the given id.
void set_best_orientation(int orientation_id);
// Update/Compute the best estimate of the script assuming the given
// orientation id.
void update_best_script(int orientation_id);
// Return the index of the script with the highest score for this orientation.
TESS_API int get_best_script(int orientation_id) const;
// Accumulate scores with given OSResults instance and update the best script.
void accumulate(const OSResults &osr);
// Print statistics.
void print_scores(void) const;
void print_scores(int orientation_id) const;
// Array holding scores for each orientation id [0,3].
// Orientation ids [0..3] map to [0, 270, 180, 90] degree orientations of the
// page respectively, where the values refer to the amount of clockwise
// rotation to be applied to the page for the text to be upright and readable.
float orientations[4];
// Script confidence scores for each of 4 possible orientations.
float scripts_na[4][kMaxNumberOfScripts];
UNICHARSET *unicharset;
OSBestResult best_result;
};
class OrientationDetector {
public:
OrientationDetector(const std::vector<int> *allowed_scripts,
OSResults *results);
bool detect_blob(BLOB_CHOICE_LIST *scores);
int get_orientation();
private:
OSResults *osr_;
const std::vector<int> *allowed_scripts_;
};
class ScriptDetector {
public:
ScriptDetector(const std::vector<int> *allowed_scripts, OSResults *osr,
tesseract::Tesseract *tess);
void detect_blob(BLOB_CHOICE_LIST *scores);
bool must_stop(int orientation) const;
private:
OSResults *osr_;
static const char *korean_script_;
static const char *japanese_script_;
static const char *fraktur_script_;
int korean_id_;
int japanese_id_;
int katakana_id_;
int hiragana_id_;
int han_id_;
int hangul_id_;
int latin_id_;
int fraktur_id_;
tesseract::Tesseract *tess_;
const std::vector<int> *allowed_scripts_;
};
int orientation_and_script_detection(const char *filename, OSResults *,
tesseract::Tesseract *);
int os_detect(TO_BLOCK_LIST *port_blocks, OSResults *osr,
tesseract::Tesseract *tess);
int os_detect_blobs(const std::vector<int> *allowed_scripts,
BLOBNBOX_CLIST *blob_list, OSResults *osr,
tesseract::Tesseract *tess);
bool os_detect_blob(BLOBNBOX *bbox, OrientationDetector *o, ScriptDetector *s,
OSResults *, tesseract::Tesseract *tess);
// Helper method to convert an orientation index to its value in degrees.
// The value represents the amount of clockwise rotation in degrees that must be
// applied for the text to be upright (readable).
TESS_API int OrientationIdToValue(const int &id);
} // namespace tesseract
#endif // TESSERACT_CCMAIN_OSDETECT_H_

View File

@ -1,364 +0,0 @@
// SPDX-License-Identifier: Apache-2.0
// File: pageiterator.h
// Description: Iterator for tesseract page structure that avoids using
// tesseract internal data structures.
// Author: Ray Smith
//
// (C) Copyright 2010, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef TESSERACT_CCMAIN_PAGEITERATOR_H_
#define TESSERACT_CCMAIN_PAGEITERATOR_H_
#include "export.h"
#include "publictypes.h"
struct Pix;
struct Pta;
namespace tesseract {
struct BlamerBundle;
class C_BLOB_IT;
class PAGE_RES;
class PAGE_RES_IT;
class WERD;
class Tesseract;
/**
* Class to iterate over tesseract page structure, providing access to all
* levels of the page hierarchy, without including any tesseract headers or
* having to handle any tesseract structures.
* WARNING! This class points to data held within the TessBaseAPI class, and
* therefore can only be used while the TessBaseAPI class still exists and
* has not been subjected to a call of Init, SetImage, Recognize, Clear, End
* DetectOS, or anything else that changes the internal PAGE_RES.
* See tesseract/publictypes.h for the definition of PageIteratorLevel.
* See also ResultIterator, derived from PageIterator, which adds in the
* ability to access OCR output with text-specific methods.
*/
class TESS_API PageIterator {
public:
/**
* page_res and tesseract come directly from the BaseAPI.
* The rectangle parameters are copied indirectly from the Thresholder,
* via the BaseAPI. They represent the coordinates of some rectangle in an
* original image (in top-left-origin coordinates) and therefore the top-left
* needs to be added to any output boxes in order to specify coordinates
* in the original image. See TessBaseAPI::SetRectangle.
* The scale and scaled_yres are in case the Thresholder scaled the image
* rectangle prior to thresholding. Any coordinates in tesseract's image
* must be divided by scale before adding (rect_left, rect_top).
* The scaled_yres indicates the effective resolution of the binary image
* that tesseract has been given by the Thresholder.
* After the constructor, Begin has already been called.
*/
PageIterator(PAGE_RES *page_res, Tesseract *tesseract, int scale,
int scaled_yres, int rect_left, int rect_top, int rect_width,
int rect_height);
virtual ~PageIterator();
/**
* Page/ResultIterators may be copied! This makes it possible to iterate over
* all the objects at a lower level, while maintaining an iterator to
* objects at a higher level. These constructors DO NOT CALL Begin, so
* iterations will continue from the location of src.
*/
PageIterator(const PageIterator &src);
const PageIterator &operator=(const PageIterator &src);
/** Are we positioned at the same location as other? */
bool PositionedAtSameWord(const PAGE_RES_IT *other) const;
// ============= Moving around within the page ============.
/**
* Moves the iterator to point to the start of the page to begin an
* iteration.
*/
virtual void Begin();
/**
* Moves the iterator to the beginning of the paragraph.
* This class implements this functionality by moving it to the zero indexed
* blob of the first (leftmost) word on the first row of the paragraph.
*/
virtual void RestartParagraph();
/**
* Return whether this iterator points anywhere in the first textline of a
* paragraph.
*/
bool IsWithinFirstTextlineOfParagraph() const;
/**
* Moves the iterator to the beginning of the text line.
* This class implements this functionality by moving it to the zero indexed
* blob of the first (leftmost) word of the row.
*/
virtual void RestartRow();
/**
* Moves to the start of the next object at the given level in the
* page hierarchy, and returns false if the end of the page was reached.
* NOTE that RIL_SYMBOL will skip non-text blocks, but all other
* PageIteratorLevel level values will visit each non-text block once.
* Think of non text blocks as containing a single para, with a single line,
* with a single imaginary word.
* Calls to Next with different levels may be freely intermixed.
* This function iterates words in right-to-left scripts correctly, if
* the appropriate language has been loaded into Tesseract.
*/
virtual bool Next(PageIteratorLevel level);
/**
* Returns true if the iterator is at the start of an object at the given
* level.
*
* For instance, suppose an iterator it is pointed to the first symbol of the
* first word of the third line of the second paragraph of the first block in
* a page, then:
* it.IsAtBeginningOf(RIL_BLOCK) = false
* it.IsAtBeginningOf(RIL_PARA) = false
* it.IsAtBeginningOf(RIL_TEXTLINE) = true
* it.IsAtBeginningOf(RIL_WORD) = true
* it.IsAtBeginningOf(RIL_SYMBOL) = true
*/
virtual bool IsAtBeginningOf(PageIteratorLevel level) const;
/**
* Returns whether the iterator is positioned at the last element in a
* given level. (e.g. the last word in a line, the last line in a block)
*
* Here's some two-paragraph example
* text. It starts off innocuously
* enough but quickly turns bizarre.
* The author inserts a cornucopia
* of words to guard against confused
* references.
*
* Now take an iterator it pointed to the start of "bizarre."
* it.IsAtFinalElement(RIL_PARA, RIL_SYMBOL) = false
* it.IsAtFinalElement(RIL_PARA, RIL_WORD) = true
* it.IsAtFinalElement(RIL_BLOCK, RIL_WORD) = false
*/
virtual bool IsAtFinalElement(PageIteratorLevel level,
PageIteratorLevel element) const;
/**
* Returns whether this iterator is positioned
* before other: -1
* equal to other: 0
* after other: 1
*/
int Cmp(const PageIterator &other) const;
// ============= Accessing data ==============.
// Coordinate system:
// Integer coordinates are at the cracks between the pixels.
// The top-left corner of the top-left pixel in the image is at (0,0).
// The bottom-right corner of the bottom-right pixel in the image is at
// (width, height).
// Every bounding box goes from the top-left of the top-left contained
// pixel to the bottom-right of the bottom-right contained pixel, so
// the bounding box of the single top-left pixel in the image is:
// (0,0)->(1,1).
// If an image rectangle has been set in the API, then returned coordinates
// relate to the original (full) image, rather than the rectangle.
/**
* Controls what to include in a bounding box. Bounding boxes of all levels
* between RIL_WORD and RIL_BLOCK can include or exclude potential diacritics.
* Between layout analysis and recognition, it isn't known where all
* diacritics belong, so this control is used to include or exclude some
* diacritics that are above or below the main body of the word. In most cases
* where the placement is obvious, and after recognition, it doesn't make as
* much difference, as the diacritics will already be included in the word.
*/
void SetBoundingBoxComponents(bool include_upper_dots,
bool include_lower_dots) {
include_upper_dots_ = include_upper_dots;
include_lower_dots_ = include_lower_dots;
}
/**
* Returns the bounding rectangle of the current object at the given level.
* See comment on coordinate system above.
* Returns false if there is no such object at the current position.
* The returned bounding box is guaranteed to match the size and position
* of the image returned by GetBinaryImage, but may clip foreground pixels
* from a grey image. The padding argument to GetImage can be used to expand
* the image to include more foreground pixels. See GetImage below.
*/
bool BoundingBox(PageIteratorLevel level, int *left, int *top, int *right,
int *bottom) const;
bool BoundingBox(PageIteratorLevel level, int padding, int *left, int *top,
int *right, int *bottom) const;
/**
* Returns the bounding rectangle of the object in a coordinate system of the
* working image rectangle having its origin at (rect_left_, rect_top_) with
* respect to the original image and is scaled by a factor scale_.
*/
bool BoundingBoxInternal(PageIteratorLevel level, int *left, int *top,
int *right, int *bottom) const;
/** Returns whether there is no object of a given level. */
bool Empty(PageIteratorLevel level) const;
/**
* Returns the type of the current block.
* See tesseract/publictypes.h for PolyBlockType.
*/
PolyBlockType BlockType() const;
/**
* Returns the polygon outline of the current block. The returned Pta must
* be ptaDestroy-ed after use. Note that the returned Pta lists the vertices
* of the polygon, and the last edge is the line segment between the last
* point and the first point. nullptr will be returned if the iterator is
* at the end of the document or layout analysis was not used.
*/
Pta *BlockPolygon() const;
/**
* Returns a binary image of the current object at the given level.
* The position and size match the return from BoundingBoxInternal, and so
* this could be upscaled with respect to the original input image.
* Use pixDestroy to delete the image after use.
*/
Pix *GetBinaryImage(PageIteratorLevel level) const;
/**
* Returns an image of the current object at the given level in greyscale
* if available in the input. To guarantee a binary image use BinaryImage.
* NOTE that in order to give the best possible image, the bounds are
* expanded slightly over the binary connected component, by the supplied
* padding, so the top-left position of the returned image is returned
* in (left,top). These will most likely not match the coordinates
* returned by BoundingBox.
* If you do not supply an original image, you will get a binary one.
* Use pixDestroy to delete the image after use.
*/
Pix *GetImage(PageIteratorLevel level, int padding, Pix *original_img,
int *left, int *top) const;
/**
* Returns the baseline of the current object at the given level.
* The baseline is the line that passes through (x1, y1) and (x2, y2).
* WARNING: with vertical text, baselines may be vertical!
* Returns false if there is no baseline at the current position.
*/
bool Baseline(PageIteratorLevel level, int *x1, int *y1, int *x2,
int *y2) const;
// Returns the attributes of the current row.
void RowAttributes(float *row_height, float *descenders,
float *ascenders) const;
/**
* Returns orientation for the block the iterator points to.
* orientation, writing_direction, textline_order: see publictypes.h
* deskew_angle: after rotating the block so the text orientation is
* upright, how many radians does one have to rotate the
* block anti-clockwise for it to be level?
* -Pi/4 <= deskew_angle <= Pi/4
*/
void Orientation(tesseract::Orientation *orientation,
tesseract::WritingDirection *writing_direction,
tesseract::TextlineOrder *textline_order,
float *deskew_angle) const;
/**
* Returns information about the current paragraph, if available.
*
* justification -
* LEFT if ragged right, or fully justified and script is left-to-right.
* RIGHT if ragged left, or fully justified and script is right-to-left.
* unknown if it looks like source code or we have very few lines.
* is_list_item -
* true if we believe this is a member of an ordered or unordered list.
* is_crown -
* true if the first line of the paragraph is aligned with the other
* lines of the paragraph even though subsequent paragraphs have first
* line indents. This typically indicates that this is the continuation
* of a previous paragraph or that it is the very first paragraph in
* the chapter.
* first_line_indent -
* For LEFT aligned paragraphs, the first text line of paragraphs of
* this kind are indented this many pixels from the left edge of the
* rest of the paragraph.
* for RIGHT aligned paragraphs, the first text line of paragraphs of
* this kind are indented this many pixels from the right edge of the
* rest of the paragraph.
* NOTE 1: This value may be negative.
* NOTE 2: if *is_crown == true, the first line of this paragraph is
* actually flush, and first_line_indent is set to the "common"
* first_line_indent for subsequent paragraphs in this block
* of text.
*/
void ParagraphInfo(tesseract::ParagraphJustification *justification,
bool *is_list_item, bool *is_crown,
int *first_line_indent) const;
// If the current WERD_RES (it_->word()) is not nullptr, sets the BlamerBundle
// of the current word to the given pointer (takes ownership of the pointer)
// and returns true.
// Can only be used when iterating on the word level.
bool SetWordBlamerBundle(BlamerBundle *blamer_bundle);
protected:
/**
* Sets up the internal data for iterating the blobs of a new word, then
* moves the iterator to the given offset.
*/
void BeginWord(int offset);
/** Pointer to the page_res owned by the API. */
PAGE_RES *page_res_;
/** Pointer to the Tesseract object owned by the API. */
Tesseract *tesseract_;
/**
* The iterator to the page_res_. Owned by this ResultIterator.
* A pointer just to avoid dragging in Tesseract includes.
*/
PAGE_RES_IT *it_;
/**
* The current input WERD being iterated. If there is an output from OCR,
* then word_ is nullptr. Owned by the API
*/
WERD *word_;
/** The length of the current word_. */
int word_length_;
/** The current blob index within the word. */
int blob_index_;
/**
* Iterator to the blobs within the word. If nullptr, then we are iterating
* OCR results in the box_word.
* Owned by this ResultIterator.
*/
C_BLOB_IT *cblob_it_;
/** Control over what to include in bounding boxes. */
bool include_upper_dots_;
bool include_lower_dots_;
/** Parameters saved from the Thresholder. Needed to rebuild coordinates.*/
int scale_;
int scaled_yres_;
int rect_left_;
int rect_top_;
int rect_width_;
int rect_height_;
};
} // namespace tesseract.
#endif // TESSERACT_CCMAIN_PAGEITERATOR_H_

View File

@ -1,281 +0,0 @@
// SPDX-License-Identifier: Apache-2.0
// File: publictypes.h
// Description: Types used in both the API and internally
// Author: Ray Smith
//
// (C) Copyright 2010, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef TESSERACT_CCSTRUCT_PUBLICTYPES_H_
#define TESSERACT_CCSTRUCT_PUBLICTYPES_H_
namespace tesseract {
// This file contains types that are used both by the API and internally
// to Tesseract. In order to decouple the API from Tesseract and prevent cyclic
// dependencies, THIS FILE SHOULD NOT DEPEND ON ANY OTHER PART OF TESSERACT.
// Restated: It is OK for low-level Tesseract files to include publictypes.h,
// but not for the low-level tesseract code to include top-level API code.
// This file should not use other Tesseract types, as that would drag
// their includes into the API-level.
/** Number of printers' points in an inch. The unit of the pointsize return. */
constexpr int kPointsPerInch = 72;
/**
* Minimum believable resolution. Used as a default if there is no other
* information, as it is safer to under-estimate than over-estimate.
*/
constexpr int kMinCredibleResolution = 70;
/** Maximum believable resolution. */
constexpr int kMaxCredibleResolution = 2400;
/**
* Ratio between median blob size and likely resolution. Used to estimate
* resolution when none is provided. This is basically 1/usual text size in
* inches. */
constexpr int kResolutionEstimationFactor = 10;
/**
* Possible types for a POLY_BLOCK or ColPartition.
* Must be kept in sync with kPBColors in polyblk.cpp and PTIs*Type functions
* below, as well as kPolyBlockNames in layout_test.cc.
* Used extensively by ColPartition, and POLY_BLOCK.
*/
enum PolyBlockType {
PT_UNKNOWN, // Type is not yet known. Keep as the first element.
PT_FLOWING_TEXT, // Text that lives inside a column.
PT_HEADING_TEXT, // Text that spans more than one column.
PT_PULLOUT_TEXT, // Text that is in a cross-column pull-out region.
PT_EQUATION, // Partition belonging to an equation region.
PT_INLINE_EQUATION, // Partition has inline equation.
PT_TABLE, // Partition belonging to a table region.
PT_VERTICAL_TEXT, // Text-line runs vertically.
PT_CAPTION_TEXT, // Text that belongs to an image.
PT_FLOWING_IMAGE, // Image that lives inside a column.
PT_HEADING_IMAGE, // Image that spans more than one column.
PT_PULLOUT_IMAGE, // Image that is in a cross-column pull-out region.
PT_HORZ_LINE, // Horizontal Line.
PT_VERT_LINE, // Vertical Line.
PT_NOISE, // Lies outside of any column.
PT_COUNT
};
/** Returns true if PolyBlockType is of horizontal line type */
inline bool PTIsLineType(PolyBlockType type) {
return type == PT_HORZ_LINE || type == PT_VERT_LINE;
}
/** Returns true if PolyBlockType is of image type */
inline bool PTIsImageType(PolyBlockType type) {
return type == PT_FLOWING_IMAGE || type == PT_HEADING_IMAGE ||
type == PT_PULLOUT_IMAGE;
}
/** Returns true if PolyBlockType is of text type */
inline bool PTIsTextType(PolyBlockType type) {
return type == PT_FLOWING_TEXT || type == PT_HEADING_TEXT ||
type == PT_PULLOUT_TEXT || type == PT_TABLE ||
type == PT_VERTICAL_TEXT || type == PT_CAPTION_TEXT ||
type == PT_INLINE_EQUATION;
}
// Returns true if PolyBlockType is of pullout(inter-column) type
inline bool PTIsPulloutType(PolyBlockType type) {
return type == PT_PULLOUT_IMAGE || type == PT_PULLOUT_TEXT;
}
/**
* +------------------+ Orientation Example:
* | 1 Aaaa Aaaa Aaaa | ====================
* | Aaa aa aaa aa | To left is a diagram of some (1) English and
* | aaaaaa A aa aaa. | (2) Chinese text and a (3) photo credit.
* | 2 |
* | ####### c c C | Upright Latin characters are represented as A and a.
* | ####### c c c | '<' represents a latin character rotated
* | < ####### c c c | anti-clockwise 90 degrees.
* | < ####### c c |
* | < ####### . c | Upright Chinese characters are represented C and c.
* | 3 ####### c |
* +------------------+ NOTA BENE: enum values here should match goodoc.proto
* If you orient your head so that "up" aligns with Orientation,
* then the characters will appear "right side up" and readable.
*
* In the example above, both the English and Chinese paragraphs are oriented
* so their "up" is the top of the page (page up). The photo credit is read
* with one's head turned leftward ("up" is to page left).
*
* The values of this enum match the convention of Tesseract's osdetect.h
*/
enum Orientation {
ORIENTATION_PAGE_UP = 0,
ORIENTATION_PAGE_RIGHT = 1,
ORIENTATION_PAGE_DOWN = 2,
ORIENTATION_PAGE_LEFT = 3,
};
/**
* The grapheme clusters within a line of text are laid out logically
* in this direction, judged when looking at the text line rotated so that
* its Orientation is "page up".
*
* For English text, the writing direction is left-to-right. For the
* Chinese text in the above example, the writing direction is top-to-bottom.
*/
enum WritingDirection {
WRITING_DIRECTION_LEFT_TO_RIGHT = 0,
WRITING_DIRECTION_RIGHT_TO_LEFT = 1,
WRITING_DIRECTION_TOP_TO_BOTTOM = 2,
};
/**
* The text lines are read in the given sequence.
*
* In English, the order is top-to-bottom.
* In Chinese, vertical text lines are read right-to-left. Mongolian is
* written in vertical columns top to bottom like Chinese, but the lines
* order left-to right.
*
* Note that only some combinations make sense. For example,
* WRITING_DIRECTION_LEFT_TO_RIGHT implies TEXTLINE_ORDER_TOP_TO_BOTTOM
*/
enum TextlineOrder {
TEXTLINE_ORDER_LEFT_TO_RIGHT = 0,
TEXTLINE_ORDER_RIGHT_TO_LEFT = 1,
TEXTLINE_ORDER_TOP_TO_BOTTOM = 2,
};
/**
* Possible modes for page layout analysis. These *must* be kept in order
* of decreasing amount of layout analysis to be done, except for OSD_ONLY,
* so that the inequality test macros below work.
*/
enum PageSegMode {
PSM_OSD_ONLY = 0, ///< Orientation and script detection only.
PSM_AUTO_OSD = 1, ///< Automatic page segmentation with orientation and
///< script detection. (OSD)
PSM_AUTO_ONLY = 2, ///< Automatic page segmentation, but no OSD, or OCR.
PSM_AUTO = 3, ///< Fully automatic page segmentation, but no OSD.
PSM_SINGLE_COLUMN = 4, ///< Assume a single column of text of variable sizes.
PSM_SINGLE_BLOCK_VERT_TEXT = 5, ///< Assume a single uniform block of
///< vertically aligned text.
PSM_SINGLE_BLOCK = 6, ///< Assume a single uniform block of text. (Default.)
PSM_SINGLE_LINE = 7, ///< Treat the image as a single text line.
PSM_SINGLE_WORD = 8, ///< Treat the image as a single word.
PSM_CIRCLE_WORD = 9, ///< Treat the image as a single word in a circle.
PSM_SINGLE_CHAR = 10, ///< Treat the image as a single character.
PSM_SPARSE_TEXT =
11, ///< Find as much text as possible in no particular order.
PSM_SPARSE_TEXT_OSD = 12, ///< Sparse text with orientation and script det.
PSM_RAW_LINE = 13, ///< Treat the image as a single text line, bypassing
///< hacks that are Tesseract-specific.
PSM_COUNT ///< Number of enum entries.
};
/**
* Inline functions that act on a PageSegMode to determine whether components of
* layout analysis are enabled.
* *Depend critically on the order of elements of PageSegMode.*
* NOTE that arg is an int for compatibility with INT_PARAM.
*/
inline bool PSM_OSD_ENABLED(int pageseg_mode) {
return pageseg_mode <= PSM_AUTO_OSD || pageseg_mode == PSM_SPARSE_TEXT_OSD;
}
inline bool PSM_ORIENTATION_ENABLED(int pageseg_mode) {
return pageseg_mode <= PSM_AUTO || pageseg_mode == PSM_SPARSE_TEXT_OSD;
}
inline bool PSM_COL_FIND_ENABLED(int pageseg_mode) {
return pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_AUTO;
}
inline bool PSM_SPARSE(int pageseg_mode) {
return pageseg_mode == PSM_SPARSE_TEXT || pageseg_mode == PSM_SPARSE_TEXT_OSD;
}
inline bool PSM_BLOCK_FIND_ENABLED(int pageseg_mode) {
return pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_SINGLE_COLUMN;
}
inline bool PSM_LINE_FIND_ENABLED(int pageseg_mode) {
return pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_SINGLE_BLOCK;
}
inline bool PSM_WORD_FIND_ENABLED(int pageseg_mode) {
return (pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_SINGLE_LINE) ||
pageseg_mode == PSM_SPARSE_TEXT || pageseg_mode == PSM_SPARSE_TEXT_OSD;
}
/**
* enum of the elements of the page hierarchy, used in ResultIterator
* to provide functions that operate on each level without having to
* have 5x as many functions.
*/
enum PageIteratorLevel {
RIL_BLOCK, // Block of text/image/separator line.
RIL_PARA, // Paragraph within a block.
RIL_TEXTLINE, // Line within a paragraph.
RIL_WORD, // Word within a textline.
RIL_SYMBOL // Symbol/character within a word.
};
/**
* JUSTIFICATION_UNKNOWN
* The alignment is not clearly one of the other options. This could happen
* for example if there are only one or two lines of text or the text looks
* like source code or poetry.
*
* NOTA BENE: Fully justified paragraphs (text aligned to both left and right
* margins) are marked by Tesseract with JUSTIFICATION_LEFT if their text
* is written with a left-to-right script and with JUSTIFICATION_RIGHT if
* their text is written in a right-to-left script.
*
* Interpretation for text read in vertical lines:
* "Left" is wherever the starting reading position is.
*
* JUSTIFICATION_LEFT
* Each line, except possibly the first, is flush to the same left tab stop.
*
* JUSTIFICATION_CENTER
* The text lines of the paragraph are centered about a line going
* down through their middle of the text lines.
*
* JUSTIFICATION_RIGHT
* Each line, except possibly the first, is flush to the same right tab stop.
*/
enum ParagraphJustification {
JUSTIFICATION_UNKNOWN,
JUSTIFICATION_LEFT,
JUSTIFICATION_CENTER,
JUSTIFICATION_RIGHT,
};
/**
* When Tesseract/Cube is initialized we can choose to instantiate/load/run
* only the Tesseract part, only the Cube part or both along with the combiner.
* The preference of which engine to use is stored in tessedit_ocr_engine_mode.
*
* ATTENTION: When modifying this enum, please make sure to make the
* appropriate changes to all the enums mirroring it (e.g. OCREngine in
* cityblock/workflow/detection/detection_storage.proto). Such enums will
* mention the connection to OcrEngineMode in the comments.
*/
enum OcrEngineMode {
OEM_TESSERACT_ONLY, // Run Tesseract only - fastest; deprecated
OEM_LSTM_ONLY, // Run just the LSTM line recognizer.
OEM_TESSERACT_LSTM_COMBINED, // Run the LSTM recognizer, but allow fallback
// to Tesseract when things get difficult.
// deprecated
OEM_DEFAULT, // Specify this mode when calling init_*(),
// to indicate that any of the above modes
// should be automatically inferred from the
// variables in the language-specific config,
// command-line configs, or if not specified
// in any of the above should be set to the
// default OEM_TESSERACT_ONLY.
OEM_COUNT // Number of OEMs
};
} // namespace tesseract.
#endif // TESSERACT_CCSTRUCT_PUBLICTYPES_H_

View File

@ -1,311 +0,0 @@
// SPDX-License-Identifier: Apache-2.0
// File: renderer.h
// Description: Rendering interface to inject into TessBaseAPI
//
// (C) Copyright 2011, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef TESSERACT_API_RENDERER_H_
#define TESSERACT_API_RENDERER_H_
#include "export.h"
// To avoid collision with other typenames include the ABSOLUTE MINIMUM
// complexity of includes here. Use forward declarations wherever possible
// and hide includes of complex types in baseapi.cpp.
#include <cstdint>
#include <string> // for std::string
#include <vector> // for std::vector
struct Pix;
namespace tesseract {
class TessBaseAPI;
/**
* Interface for rendering tesseract results into a document, such as text,
* HOCR or pdf. This class is abstract. Specific classes handle individual
* formats. This interface is then used to inject the renderer class into
* tesseract when processing images.
*
* For simplicity implementing this with tesseract version 3.01,
* the renderer contains document state that is cleared from document
* to document just as the TessBaseAPI is. This way the base API can just
* delegate its rendering functionality to injected renderers, and the
* renderers can manage the associated state needed for the specific formats
* in addition to the heuristics for producing it.
*/
class TESS_API TessResultRenderer {
public:
virtual ~TessResultRenderer();
// Takes ownership of pointer so must be new'd instance.
// Renderers aren't ordered, but appends the sequences of next parameter
// and existing next(). The renderers should be unique across both lists.
void insert(TessResultRenderer *next);
// Returns the next renderer or nullptr.
TessResultRenderer *next() {
return next_;
}
/**
* Starts a new document with the given title.
* This clears the contents of the output data.
* Title should use UTF-8 encoding.
*/
bool BeginDocument(const char *title);
/**
* Adds the recognized text from the source image to the current document.
* Invalid if BeginDocument not yet called.
*
* Note that this API is a bit weird but is designed to fit into the
* current TessBaseAPI implementation where the api has lots of state
* information that we might want to add in.
*/
bool AddImage(TessBaseAPI *api);
/**
* Finishes the document and finalizes the output data
* Invalid if BeginDocument not yet called.
*/
bool EndDocument();
const char *file_extension() const {
return file_extension_;
}
const char *title() const {
return title_.c_str();
}
// Is everything fine? Otherwise something went wrong.
bool happy() const {
return happy_;
}
/**
* Returns the index of the last image given to AddImage
* (i.e. images are incremented whether the image succeeded or not)
*
* This is always defined. It means either the number of the
* current image, the last image ended, or in the completed document
* depending on when in the document lifecycle you are looking at it.
* Will return -1 if a document was never started.
*/
int imagenum() const {
return imagenum_;
}
protected:
/**
* Called by concrete classes.
*
* outputbase is the name of the output file excluding
* extension. For example, "/path/to/chocolate-chip-cookie-recipe"
*
* extension indicates the file extension to be used for output
* files. For example "pdf" will produce a .pdf file, and "hocr"
* will produce .hocr files.
*/
TessResultRenderer(const char *outputbase, const char *extension);
// Hook for specialized handling in BeginDocument()
virtual bool BeginDocumentHandler();
// This must be overridden to render the OCR'd results
virtual bool AddImageHandler(TessBaseAPI *api) = 0;
// Hook for specialized handling in EndDocument()
virtual bool EndDocumentHandler();
// Renderers can call this to append '\0' terminated strings into
// the output string returned by GetOutput.
// This method will grow the output buffer if needed.
void AppendString(const char *s);
// Renderers can call this to append binary byte sequences into
// the output string returned by GetOutput. Note that s is not necessarily
// '\0' terminated (and can contain '\0' within it).
// This method will grow the output buffer if needed.
void AppendData(const char *s, int len);
private:
TessResultRenderer *next_; // Can link multiple renderers together
FILE *fout_; // output file pointer
const char *file_extension_; // standard extension for generated output
std::string title_; // title of document being rendered
int imagenum_; // index of last image added
bool happy_; // I get grumpy when the disk fills up, etc.
};
/**
* Renders tesseract output into a plain UTF-8 text string
*/
class TESS_API TessTextRenderer : public TessResultRenderer {
public:
explicit TessTextRenderer(const char *outputbase);
protected:
bool AddImageHandler(TessBaseAPI *api) override;
};
/**
* Renders tesseract output into an hocr text string
*/
class TESS_API TessHOcrRenderer : public TessResultRenderer {
public:
explicit TessHOcrRenderer(const char *outputbase, bool font_info);
explicit TessHOcrRenderer(const char *outputbase);
protected:
bool BeginDocumentHandler() override;
bool AddImageHandler(TessBaseAPI *api) override;
bool EndDocumentHandler() override;
private:
bool font_info_; // whether to print font information
};
/**
* Renders tesseract output into an alto text string
*/
class TESS_API TessAltoRenderer : public TessResultRenderer {
public:
explicit TessAltoRenderer(const char *outputbase);
protected:
bool BeginDocumentHandler() override;
bool AddImageHandler(TessBaseAPI *api) override;
bool EndDocumentHandler() override;
private:
bool begin_document;
};
/**
* Renders Tesseract output into a TSV string
*/
class TESS_API TessTsvRenderer : public TessResultRenderer {
public:
explicit TessTsvRenderer(const char *outputbase, bool font_info);
explicit TessTsvRenderer(const char *outputbase);
protected:
bool BeginDocumentHandler() override;
bool AddImageHandler(TessBaseAPI *api) override;
bool EndDocumentHandler() override;
private:
bool font_info_; // whether to print font information
};
/**
* Renders tesseract output into searchable PDF
*/
class TESS_API TessPDFRenderer : public TessResultRenderer {
public:
// datadir is the location of the TESSDATA. We need it because
// we load a custom PDF font from this location.
TessPDFRenderer(const char *outputbase, const char *datadir,
bool textonly = false);
protected:
bool BeginDocumentHandler() override;
bool AddImageHandler(TessBaseAPI *api) override;
bool EndDocumentHandler() override;
private:
// We don't want to have every image in memory at once,
// so we store some metadata as we go along producing
// PDFs one page at a time. At the end, that metadata is
// used to make everything that isn't easily handled in a
// streaming fashion.
long int obj_; // counter for PDF objects
std::vector<uint64_t> offsets_; // offset of every PDF object in bytes
std::vector<long int> pages_; // object number for every /Page object
std::string datadir_; // where to find the custom font
bool textonly_; // skip images if set
// Bookkeeping only. DIY = Do It Yourself.
void AppendPDFObjectDIY(size_t objectsize);
// Bookkeeping + emit data.
void AppendPDFObject(const char *data);
// Create the /Contents object for an entire page.
char *GetPDFTextObjects(TessBaseAPI *api, double width, double height);
// Turn an image into a PDF object. Only transcode if we have to.
static bool imageToPDFObj(Pix *pix, const char *filename, long int objnum,
char **pdf_object, long int *pdf_object_size,
int jpg_quality);
};
/**
* Renders tesseract output into a plain UTF-8 text string
*/
class TESS_API TessUnlvRenderer : public TessResultRenderer {
public:
explicit TessUnlvRenderer(const char *outputbase);
protected:
bool AddImageHandler(TessBaseAPI *api) override;
};
/**
* Renders tesseract output into a plain UTF-8 text string for LSTMBox
*/
class TESS_API TessLSTMBoxRenderer : public TessResultRenderer {
public:
explicit TessLSTMBoxRenderer(const char *outputbase);
protected:
bool AddImageHandler(TessBaseAPI *api) override;
};
/**
* Renders tesseract output into a plain UTF-8 text string
*/
class TESS_API TessBoxTextRenderer : public TessResultRenderer {
public:
explicit TessBoxTextRenderer(const char *outputbase);
protected:
bool AddImageHandler(TessBaseAPI *api) override;
};
/**
* Renders tesseract output into a plain UTF-8 text string in WordStr format
*/
class TESS_API TessWordStrBoxRenderer : public TessResultRenderer {
public:
explicit TessWordStrBoxRenderer(const char *outputbase);
protected:
bool AddImageHandler(TessBaseAPI *api) override;
};
#ifndef DISABLED_LEGACY_ENGINE
/**
* Renders tesseract output into an osd text string
*/
class TESS_API TessOsdRenderer : public TessResultRenderer {
public:
explicit TessOsdRenderer(const char *outputbase);
protected:
bool AddImageHandler(TessBaseAPI *api) override;
};
#endif // ndef DISABLED_LEGACY_ENGINE
} // namespace tesseract.
#endif // TESSERACT_API_RENDERER_H_

View File

@ -1,250 +0,0 @@
// SPDX-License-Identifier: Apache-2.0
// File: resultiterator.h
// Description: Iterator for tesseract results that is capable of
// iterating in proper reading order over Bi Directional
// (e.g. mixed Hebrew and English) text.
// Author: David Eger
//
// (C) Copyright 2011, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef TESSERACT_CCMAIN_RESULT_ITERATOR_H_
#define TESSERACT_CCMAIN_RESULT_ITERATOR_H_
#include "export.h" // for TESS_API, TESS_LOCAL
#include "ltrresultiterator.h" // for LTRResultIterator
#include "publictypes.h" // for PageIteratorLevel
#include "unichar.h" // for StrongScriptDirection
#include <set> // for std::pair
#include <vector> // for std::vector
namespace tesseract {
class TESS_API ResultIterator : public LTRResultIterator {
public:
static ResultIterator *StartOfParagraph(const LTRResultIterator &resit);
/**
* ResultIterator is copy constructible!
* The default copy constructor works just fine for us.
*/
~ResultIterator() override = default;
// ============= Moving around within the page ============.
/**
* Moves the iterator to point to the start of the page to begin
* an iteration.
*/
void Begin() override;
/**
* Moves to the start of the next object at the given level in the
* page hierarchy in the appropriate reading order and returns false if
* the end of the page was reached.
* NOTE that RIL_SYMBOL will skip non-text blocks, but all other
* PageIteratorLevel level values will visit each non-text block once.
* Think of non text blocks as containing a single para, with a single line,
* with a single imaginary word.
* Calls to Next with different levels may be freely intermixed.
* This function iterates words in right-to-left scripts correctly, if
* the appropriate language has been loaded into Tesseract.
*/
bool Next(PageIteratorLevel level) override;
/**
* IsAtBeginningOf() returns whether we're at the logical beginning of the
* given level. (as opposed to ResultIterator's left-to-right top-to-bottom
* order). Otherwise, this acts the same as PageIterator::IsAtBeginningOf().
* For a full description, see pageiterator.h
*/
bool IsAtBeginningOf(PageIteratorLevel level) const override;
/**
* Implement PageIterator's IsAtFinalElement correctly in a BiDi context.
* For instance, IsAtFinalElement(RIL_PARA, RIL_WORD) returns whether we
* point at the last word in a paragraph. See PageIterator for full comment.
*/
bool IsAtFinalElement(PageIteratorLevel level,
PageIteratorLevel element) const override;
// ============= Functions that refer to words only ============.
// Returns the number of blanks before the current word.
int BlanksBeforeWord() const;
// ============= Accessing data ==============.
/**
* Returns the null terminated UTF-8 encoded text string for the current
* object at the given level. Use delete [] to free after use.
*/
virtual char *GetUTF8Text(PageIteratorLevel level) const;
/**
* Returns the LSTM choices for every LSTM timestep for the current word.
*/
virtual std::vector<std::vector<std::vector<std::pair<const char *, float>>>>
*GetRawLSTMTimesteps() const;
virtual std::vector<std::vector<std::pair<const char *, float>>>
*GetBestLSTMSymbolChoices() const;
/**
* Return whether the current paragraph's dominant reading direction
* is left-to-right (as opposed to right-to-left).
*/
bool ParagraphIsLtr() const;
// ============= Exposed only for testing =============.
/**
* Yields the reading order as a sequence of indices and (optional)
* meta-marks for a set of words (given left-to-right).
* The meta marks are passed as negative values:
* kMinorRunStart Start of minor direction text.
* kMinorRunEnd End of minor direction text.
* kComplexWord The next indexed word contains both left-to-right and
* right-to-left characters and was treated as neutral.
*
* For example, suppose we have five words in a text line,
* indexed [0,1,2,3,4] from the leftmost side of the text line.
* The following are all believable reading_orders:
*
* Left-to-Right (in ltr paragraph):
* { 0, 1, 2, 3, 4 }
* Left-to-Right (in rtl paragraph):
* { kMinorRunStart, 0, 1, 2, 3, 4, kMinorRunEnd }
* Right-to-Left (in rtl paragraph):
* { 4, 3, 2, 1, 0 }
* Left-to-Right except for an RTL phrase in words 2, 3 in an ltr paragraph:
* { 0, 1, kMinorRunStart, 3, 2, kMinorRunEnd, 4 }
*/
static void CalculateTextlineOrder(
bool paragraph_is_ltr,
const std::vector<StrongScriptDirection> &word_dirs,
std::vector<int> *reading_order);
static const int kMinorRunStart;
static const int kMinorRunEnd;
static const int kComplexWord;
protected:
/**
* We presume the data associated with the given iterator will outlive us.
* NB: This is private because it does something that is non-obvious:
* it resets to the beginning of the paragraph instead of staying wherever
* resit might have pointed.
*/
explicit ResultIterator(const LTRResultIterator &resit);
private:
/**
* Calculates the current paragraph's dominant writing direction.
* Typically, members should use current_paragraph_ltr_ instead.
*/
bool CurrentParagraphIsLtr() const;
/**
* Returns word indices as measured from resit->RestartRow() = index 0
* for the reading order of words within a textline given an iterator
* into the middle of the text line.
* In addition to non-negative word indices, the following negative values
* may be inserted:
* kMinorRunStart Start of minor direction text.
* kMinorRunEnd End of minor direction text.
* kComplexWord The previous word contains both left-to-right and
* right-to-left characters and was treated as neutral.
*/
void CalculateTextlineOrder(bool paragraph_is_ltr,
const LTRResultIterator &resit,
std::vector<int> *indices) const;
/** Same as above, but the caller's ssd gets filled in if ssd != nullptr. */
void CalculateTextlineOrder(bool paragraph_is_ltr,
const LTRResultIterator &resit,
std::vector<StrongScriptDirection> *ssd,
std::vector<int> *indices) const;
/**
* What is the index of the current word in a strict left-to-right reading
* of the row?
*/
int LTRWordIndex() const;
/**
* Given an iterator pointing at a word, returns the logical reading order
* of blob indices for the word.
*/
void CalculateBlobOrder(std::vector<int> *blob_indices) const;
/** Precondition: current_paragraph_is_ltr_ is set. */
void MoveToLogicalStartOfTextline();
/**
* Precondition: current_paragraph_is_ltr_ and in_minor_direction_
* are set.
*/
void MoveToLogicalStartOfWord();
/** Are we pointing at the final (reading order) symbol of the word? */
bool IsAtFinalSymbolOfWord() const;
/** Are we pointing at the first (reading order) symbol of the word? */
bool IsAtFirstSymbolOfWord() const;
/**
* Append any extra marks that should be appended to this word when printed.
* Mostly, these are Unicode BiDi control characters.
*/
void AppendSuffixMarks(std::string *text) const;
/** Appends the current word in reading order to the given buffer.*/
void AppendUTF8WordText(std::string *text) const;
/**
* Appends the text of the current text line, *assuming this iterator is
* positioned at the beginning of the text line* This function
* updates the iterator to point to the first position past the text line.
* Each textline is terminated in a single newline character.
* If the textline ends a paragraph, it gets a second terminal newline.
*/
void IterateAndAppendUTF8TextlineText(std::string *text);
/**
* Appends the text of the current paragraph in reading order
* to the given buffer.
* Each textline is terminated in a single newline character, and the
* paragraph gets an extra newline at the end.
*/
void AppendUTF8ParagraphText(std::string *text) const;
/** Returns whether the bidi_debug flag is set to at least min_level. */
bool BidiDebug(int min_level) const;
bool current_paragraph_is_ltr_;
/**
* Is the currently pointed-at character at the beginning of
* a minor-direction run?
*/
bool at_beginning_of_minor_run_;
/** Is the currently pointed-at character in a minor-direction sequence? */
bool in_minor_direction_;
/**
* Should detected inter-word spaces be preserved, or "compressed" to a single
* space character (default behavior).
*/
bool preserve_interword_spaces_;
};
} // namespace tesseract.
#endif // TESSERACT_CCMAIN_RESULT_ITERATOR_H_

View File

@ -1,174 +0,0 @@
// SPDX-License-Identifier: Apache-2.0
// File: unichar.h
// Description: Unicode character/ligature class.
// Author: Ray Smith
//
// (C) Copyright 2006, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef TESSERACT_CCUTIL_UNICHAR_H_
#define TESSERACT_CCUTIL_UNICHAR_H_
#include "export.h"
#include <memory.h>
#include <cstring>
#include <string>
#include <vector>
namespace tesseract {
// Maximum number of characters that can be stored in a UNICHAR. Must be
// at least 4. Must not exceed 31 without changing the coding of length.
#define UNICHAR_LEN 30
// A UNICHAR_ID is the unique id of a unichar.
using UNICHAR_ID = int;
// A variable to indicate an invalid or uninitialized unichar id.
static const int INVALID_UNICHAR_ID = -1;
// A special unichar that corresponds to INVALID_UNICHAR_ID.
static const char INVALID_UNICHAR[] = "__INVALID_UNICHAR__";
enum StrongScriptDirection {
DIR_NEUTRAL = 0, // Text contains only neutral characters.
DIR_LEFT_TO_RIGHT = 1, // Text contains no Right-to-Left characters.
DIR_RIGHT_TO_LEFT = 2, // Text contains no Left-to-Right characters.
DIR_MIX = 3, // Text contains a mixture of left-to-right
// and right-to-left characters.
};
using char32 = signed int;
// The UNICHAR class holds a single classification result. This may be
// a single Unicode character (stored as between 1 and 4 utf8 bytes) or
// multiple Unicode characters representing the NFKC expansion of a ligature
// such as fi, ffl etc. These are also stored as utf8.
class TESS_API UNICHAR {
public:
UNICHAR() {
memset(chars, 0, UNICHAR_LEN);
}
// Construct from a utf8 string. If len<0 then the string is null terminated.
// If the string is too long to fit in the UNICHAR then it takes only what
// will fit.
UNICHAR(const char *utf8_str, int len);
// Construct from a single UCS4 character.
explicit UNICHAR(int unicode);
// Default copy constructor and operator= are OK.
// Get the first character as UCS-4.
int first_uni() const;
// Get the length of the UTF8 string.
int utf8_len() const {
int len = chars[UNICHAR_LEN - 1];
return len >= 0 && len < UNICHAR_LEN ? len : UNICHAR_LEN;
}
// Get a UTF8 string, but NOT nullptr terminated.
const char *utf8() const {
return chars;
}
// Get a terminated UTF8 string: Must delete[] it after use.
char *utf8_str() const;
// Get the number of bytes in the first character of the given utf8 string.
static int utf8_step(const char *utf8_str);
// A class to simplify iterating over and accessing elements of a UTF8
// string. Note that unlike the UNICHAR class, const_iterator does NOT COPY or
// take ownership of the underlying byte array. It also does not permit
// modification of the array (as the name suggests).
//
// Example:
// for (UNICHAR::const_iterator it = UNICHAR::begin(str, str_len);
// it != UNICHAR::end(str, len);
// ++it) {
// printf("UCS-4 symbol code = %d\n", *it);
// char buf[5];
// int char_len = it.get_utf8(buf); buf[char_len] = '\0';
// printf("Char = %s\n", buf);
// }
class TESS_API const_iterator {
using CI = const_iterator;
public:
// Step to the next UTF8 character.
// If the current position is at an illegal UTF8 character, then print an
// error message and step by one byte. If the current position is at a
// nullptr value, don't step past it.
const_iterator &operator++();
// Return the UCS-4 value at the current position.
// If the current position is at an illegal UTF8 value, return a single
// space character.
int operator*() const;
// Store the UTF-8 encoding of the current codepoint into buf, which must be
// at least 4 bytes long. Return the number of bytes written.
// If the current position is at an illegal UTF8 value, writes a single
// space character and returns 1.
// Note that this method does not null-terminate the buffer.
int get_utf8(char *buf) const;
// Returns the number of bytes of the current codepoint. Returns 1 if the
// current position is at an illegal UTF8 value.
int utf8_len() const;
// Returns true if the UTF-8 encoding at the current position is legal.
bool is_legal() const;
// Return the pointer into the string at the current position.
const char *utf8_data() const {
return it_;
}
// Iterator equality operators.
friend bool operator==(const CI &lhs, const CI &rhs) {
return lhs.it_ == rhs.it_;
}
friend bool operator!=(const CI &lhs, const CI &rhs) {
return !(lhs == rhs);
}
private:
friend class UNICHAR;
explicit const_iterator(const char *it) : it_(it) {}
const char *it_; // Pointer into the string.
};
// Create a start/end iterator pointing to a string. Note that these methods
// are static and do NOT create a copy or take ownership of the underlying
// array.
static const_iterator begin(const char *utf8_str, int byte_length);
static const_iterator end(const char *utf8_str, int byte_length);
// Converts a utf-8 string to a vector of unicodes.
// Returns an empty vector if the input contains invalid UTF-8.
static std::vector<char32> UTF8ToUTF32(const char *utf8_str);
// Converts a vector of unicodes to a utf8 string.
// Returns an empty string if the input contains an invalid unicode.
static std::string UTF32ToUTF8(const std::vector<char32> &str32);
private:
// A UTF-8 representation of 1 or more Unicode characters.
// The last element (chars[UNICHAR_LEN - 1]) is a length if
// its value < UNICHAR_LEN, otherwise it is a genuine character.
char chars[UNICHAR_LEN]{};
};
} // namespace tesseract
#endif // TESSERACT_CCUTIL_UNICHAR_H_

View File

@ -1,34 +0,0 @@
// SPDX-License-Identifier: Apache-2.0
// File: version.h
// Description: Version information
//
// (C) Copyright 2018, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef TESSERACT_API_VERSION_H_
#define TESSERACT_API_VERSION_H_
// clang-format off
#define TESSERACT_MAJOR_VERSION @GENERIC_MAJOR_VERSION@
#define TESSERACT_MINOR_VERSION @GENERIC_MINOR_VERSION@
#define TESSERACT_MICRO_VERSION @GENERIC_MICRO_VERSION@
#define TESSERACT_VERSION \
(TESSERACT_MAJOR_VERSION << 16 | \
TESSERACT_MINOR_VERSION << 8 | \
TESSERACT_MICRO_VERSION)
#define TESSERACT_VERSION_STR "@PACKAGE_VERSION@"
// clang-format on
#endif // TESSERACT_API_VERSION_H_

View File

@ -1,812 +0,0 @@
// SPDX-License-Identifier: Apache-2.0
// File: baseapi.h
// Description: Simple API for calling tesseract.
// Author: Ray Smith
//
// (C) Copyright 2006, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef TESSERACT_API_BASEAPI_H_
#define TESSERACT_API_BASEAPI_H_
#ifdef HAVE_CONFIG_H
# include "config_auto.h" // DISABLED_LEGACY_ENGINE
#endif
#include "export.h"
#include "pageiterator.h"
#include "publictypes.h"
#include "resultiterator.h"
#include "unichar.h"
#include "version.h"
#include <cstdio>
#include <vector> // for std::vector
struct Pix;
struct Pixa;
struct Boxa;
namespace tesseract {
class PAGE_RES;
class ParagraphModel;
class BLOCK_LIST;
class ETEXT_DESC;
struct OSResults;
class UNICHARSET;
class Dawg;
class Dict;
class EquationDetect;
class PageIterator;
class ImageThresholder;
class LTRResultIterator;
class ResultIterator;
class MutableIterator;
class TessResultRenderer;
class Tesseract;
// Function to read a std::vector<char> from a whole file.
// Returns false on failure.
using FileReader = bool (*)(const char *filename, std::vector<char> *data);
using DictFunc = int (Dict::*)(void *, const UNICHARSET &, UNICHAR_ID,
bool) const;
using ProbabilityInContextFunc = double (Dict::*)(const char *, const char *,
int, const char *, int);
/**
* Base class for all tesseract APIs.
* Specific classes can add ability to work on different inputs or produce
* different outputs.
* This class is mostly an interface layer on top of the Tesseract instance
* class to hide the data types so that users of this class don't have to
* include any other Tesseract headers.
*/
class TESS_API TessBaseAPI {
public:
TessBaseAPI();
virtual ~TessBaseAPI();
// Copy constructor and assignment operator are currently unsupported.
TessBaseAPI(TessBaseAPI const &) = delete;
TessBaseAPI &operator=(TessBaseAPI const &) = delete;
/**
* Returns the version identifier as a static string. Do not delete.
*/
static const char *Version();
/**
* If compiled with OpenCL AND an available OpenCL
* device is deemed faster than serial code, then
* "device" is populated with the cl_device_id
* and returns sizeof(cl_device_id)
* otherwise *device=nullptr and returns 0.
*/
static size_t getOpenCLDevice(void **device);
/**
* Set the name of the input file. Needed for training and
* reading a UNLV zone file, and for searchable PDF output.
*/
void SetInputName(const char *name);
/**
* These functions are required for searchable PDF output.
* We need our hands on the input file so that we can include
* it in the PDF without transcoding. If that is not possible,
* we need the original image. Finally, resolution metadata
* is stored in the PDF so we need that as well.
*/
const char *GetInputName();
// Takes ownership of the input pix.
void SetInputImage(Pix *pix);
Pix *GetInputImage();
int GetSourceYResolution();
const char *GetDatapath();
/** Set the name of the bonus output files. Needed only for debugging. */
void SetOutputName(const char *name);
/**
* Set the value of an internal "parameter."
* Supply the name of the parameter and the value as a string, just as
* you would in a config file.
* Returns false if the name lookup failed.
* Eg SetVariable("tessedit_char_blacklist", "xyz"); to ignore x, y and z.
* Or SetVariable("classify_bln_numeric_mode", "1"); to set numeric-only mode.
* SetVariable may be used before Init, but settings will revert to
* defaults on End().
*
* Note: Must be called after Init(). Only works for non-init variables
* (init variables should be passed to Init()).
*/
bool SetVariable(const char *name, const char *value);
bool SetDebugVariable(const char *name, const char *value);
/**
* Returns true if the parameter was found among Tesseract parameters.
* Fills in value with the value of the parameter.
*/
bool GetIntVariable(const char *name, int *value) const;
bool GetBoolVariable(const char *name, bool *value) const;
bool GetDoubleVariable(const char *name, double *value) const;
/**
* Returns the pointer to the string that represents the value of the
* parameter if it was found among Tesseract parameters.
*/
const char *GetStringVariable(const char *name) const;
#ifndef DISABLED_LEGACY_ENGINE
/**
* Print Tesseract fonts table to the given file.
*/
void PrintFontsTable(FILE *fp) const;
#endif
/**
* Print Tesseract parameters to the given file.
*/
void PrintVariables(FILE *fp) const;
/**
* Get value of named variable as a string, if it exists.
*/
bool GetVariableAsString(const char *name, std::string *val) const;
/**
* Instances are now mostly thread-safe and totally independent,
* but some global parameters remain. Basically it is safe to use multiple
* TessBaseAPIs in different threads in parallel, UNLESS:
* you use SetVariable on some of the Params in classify and textord.
* If you do, then the effect will be to change it for all your instances.
*
* Start tesseract. Returns zero on success and -1 on failure.
* NOTE that the only members that may be called before Init are those
* listed above here in the class definition.
*
* The datapath must be the name of the tessdata directory.
* The language is (usually) an ISO 639-3 string or nullptr will default to
* eng. It is entirely safe (and eventually will be efficient too) to call
* Init multiple times on the same instance to change language, or just
* to reset the classifier.
* The language may be a string of the form [~]<lang>[+[~]<lang>]* indicating
* that multiple languages are to be loaded. Eg hin+eng will load Hindi and
* English. Languages may specify internally that they want to be loaded
* with one or more other languages, so the ~ sign is available to override
* that. Eg if hin were set to load eng by default, then hin+~eng would force
* loading only hin. The number of loaded languages is limited only by
* memory, with the caveat that loading additional languages will impact
* both speed and accuracy, as there is more work to do to decide on the
* applicable language, and there is more chance of hallucinating incorrect
* words.
* WARNING: On changing languages, all Tesseract parameters are reset
* back to their default values. (Which may vary between languages.)
* If you have a rare need to set a Variable that controls
* initialization for a second call to Init you should explicitly
* call End() and then use SetVariable before Init. This is only a very
* rare use case, since there are very few uses that require any parameters
* to be set before Init.
*
* If set_only_non_debug_params is true, only params that do not contain
* "debug" in the name will be set.
*/
int Init(const char *datapath, const char *language, OcrEngineMode mode,
char **configs, int configs_size,
const std::vector<std::string> *vars_vec,
const std::vector<std::string> *vars_values,
bool set_only_non_debug_params);
int Init(const char *datapath, const char *language, OcrEngineMode oem) {
return Init(datapath, language, oem, nullptr, 0, nullptr, nullptr, false);
}
int Init(const char *datapath, const char *language) {
return Init(datapath, language, OEM_DEFAULT, nullptr, 0, nullptr, nullptr,
false);
}
// In-memory version reads the traineddata file directly from the given
// data[data_size] array, and/or reads data via a FileReader.
int Init(const char *data, int data_size, const char *language,
OcrEngineMode mode, char **configs, int configs_size,
const std::vector<std::string> *vars_vec,
const std::vector<std::string> *vars_values,
bool set_only_non_debug_params, FileReader reader);
/**
* Returns the languages string used in the last valid initialization.
* If the last initialization specified "deu+hin" then that will be
* returned. If hin loaded eng automatically as well, then that will
* not be included in this list. To find the languages actually
* loaded use GetLoadedLanguagesAsVector.
* The returned string should NOT be deleted.
*/
const char *GetInitLanguagesAsString() const;
/**
* Returns the loaded languages in the vector of std::string.
* Includes all languages loaded by the last Init, including those loaded
* as dependencies of other loaded languages.
*/
void GetLoadedLanguagesAsVector(std::vector<std::string> *langs) const;
/**
* Returns the available languages in the sorted vector of std::string.
*/
void GetAvailableLanguagesAsVector(std::vector<std::string> *langs) const;
/**
* Init only for page layout analysis. Use only for calls to SetImage and
* AnalysePage. Calls that attempt recognition will generate an error.
*/
void InitForAnalysePage();
/**
* Read a "config" file containing a set of param, value pairs.
* Searches the standard places: tessdata/configs, tessdata/tessconfigs
* and also accepts a relative or absolute path name.
* Note: only non-init params will be set (init params are set by Init()).
*/
void ReadConfigFile(const char *filename);
/** Same as above, but only set debug params from the given config file. */
void ReadDebugConfigFile(const char *filename);
/**
* Set the current page segmentation mode. Defaults to PSM_SINGLE_BLOCK.
* The mode is stored as an IntParam so it can also be modified by
* ReadConfigFile or SetVariable("tessedit_pageseg_mode", mode as string).
*/
void SetPageSegMode(PageSegMode mode);
/** Return the current page segmentation mode. */
PageSegMode GetPageSegMode() const;
/**
* Recognize a rectangle from an image and return the result as a string.
* May be called many times for a single Init.
* Currently has no error checking.
* Greyscale of 8 and color of 24 or 32 bits per pixel may be given.
* Palette color images will not work properly and must be converted to
* 24 bit.
* Binary images of 1 bit per pixel may also be given but they must be
* byte packed with the MSB of the first byte being the first pixel, and a
* 1 represents WHITE. For binary images set bytes_per_pixel=0.
* The recognized text is returned as a char* which is coded
* as UTF8 and must be freed with the delete [] operator.
*
* Note that TesseractRect is the simplified convenience interface.
* For advanced uses, use SetImage, (optionally) SetRectangle, Recognize,
* and one or more of the Get*Text functions below.
*/
char *TesseractRect(const unsigned char *imagedata, int bytes_per_pixel,
int bytes_per_line, int left, int top, int width,
int height);
/**
* Call between pages or documents etc to free up memory and forget
* adaptive data.
*/
void ClearAdaptiveClassifier();
/**
* @defgroup AdvancedAPI Advanced API
* The following methods break TesseractRect into pieces, so you can
* get hold of the thresholded image, get the text in different formats,
* get bounding boxes, confidences etc.
*/
/* @{ */
/**
* Provide an image for Tesseract to recognize. Format is as
* TesseractRect above. Copies the image buffer and converts to Pix.
* SetImage clears all recognition results, and sets the rectangle to the
* full image, so it may be followed immediately by a GetUTF8Text, and it
* will automatically perform recognition.
*/
void SetImage(const unsigned char *imagedata, int width, int height,
int bytes_per_pixel, int bytes_per_line);
/**
* Provide an image for Tesseract to recognize. As with SetImage above,
* Tesseract takes its own copy of the image, so it need not persist until
* after Recognize.
* Pix vs raw, which to use?
* Use Pix where possible. Tesseract uses Pix as its internal representation
* and it is therefore more efficient to provide a Pix directly.
*/
void SetImage(Pix *pix);
/**
* Set the resolution of the source image in pixels per inch so font size
* information can be calculated in results. Call this after SetImage().
*/
void SetSourceResolution(int ppi);
/**
* Restrict recognition to a sub-rectangle of the image. Call after SetImage.
* Each SetRectangle clears the recogntion results so multiple rectangles
* can be recognized with the same image.
*/
void SetRectangle(int left, int top, int width, int height);
/**
* Get a copy of the internal thresholded image from Tesseract.
* Caller takes ownership of the Pix and must pixDestroy it.
* May be called any time after SetImage, or after TesseractRect.
*/
Pix *GetThresholdedImage();
/**
* Get the result of page layout analysis as a leptonica-style
* Boxa, Pixa pair, in reading order.
* Can be called before or after Recognize.
*/
Boxa *GetRegions(Pixa **pixa);
/**
* Get the textlines as a leptonica-style
* Boxa, Pixa pair, in reading order.
* Can be called before or after Recognize.
* If raw_image is true, then extract from the original image instead of the
* thresholded image and pad by raw_padding pixels.
* If blockids is not nullptr, the block-id of each line is also returned as
* an array of one element per line. delete [] after use. If paraids is not
* nullptr, the paragraph-id of each line within its block is also returned as
* an array of one element per line. delete [] after use.
*/
Boxa *GetTextlines(bool raw_image, int raw_padding, Pixa **pixa,
int **blockids, int **paraids);
/*
Helper method to extract from the thresholded image. (most common usage)
*/
Boxa *GetTextlines(Pixa **pixa, int **blockids) {
return GetTextlines(false, 0, pixa, blockids, nullptr);
}
/**
* Get textlines and strips of image regions as a leptonica-style Boxa, Pixa
* pair, in reading order. Enables downstream handling of non-rectangular
* regions.
* Can be called before or after Recognize.
* If blockids is not nullptr, the block-id of each line is also returned as
* an array of one element per line. delete [] after use.
*/
Boxa *GetStrips(Pixa **pixa, int **blockids);
/**
* Get the words as a leptonica-style
* Boxa, Pixa pair, in reading order.
* Can be called before or after Recognize.
*/
Boxa *GetWords(Pixa **pixa);
/**
* Gets the individual connected (text) components (created
* after pages segmentation step, but before recognition)
* as a leptonica-style Boxa, Pixa pair, in reading order.
* Can be called before or after Recognize.
* Note: the caller is responsible for calling boxaDestroy()
* on the returned Boxa array and pixaDestroy() on cc array.
*/
Boxa *GetConnectedComponents(Pixa **cc);
/**
* Get the given level kind of components (block, textline, word etc.) as a
* leptonica-style Boxa, Pixa pair, in reading order.
* Can be called before or after Recognize.
* If blockids is not nullptr, the block-id of each component is also returned
* as an array of one element per component. delete [] after use.
* If blockids is not nullptr, the paragraph-id of each component with its
* block is also returned as an array of one element per component. delete []
* after use. If raw_image is true, then portions of the original image are
* extracted instead of the thresholded image and padded with raw_padding. If
* text_only is true, then only text components are returned.
*/
Boxa *GetComponentImages(PageIteratorLevel level, bool text_only,
bool raw_image, int raw_padding, Pixa **pixa,
int **blockids, int **paraids);
// Helper function to get binary images with no padding (most common usage).
Boxa *GetComponentImages(const PageIteratorLevel level, const bool text_only,
Pixa **pixa, int **blockids) {
return GetComponentImages(level, text_only, false, 0, pixa, blockids,
nullptr);
}
/**
* Returns the scale factor of the thresholded image that would be returned by
* GetThresholdedImage() and the various GetX() methods that call
* GetComponentImages().
* Returns 0 if no thresholder has been set.
*/
int GetThresholdedImageScaleFactor() const;
/**
* Runs page layout analysis in the mode set by SetPageSegMode.
* May optionally be called prior to Recognize to get access to just
* the page layout results. Returns an iterator to the results.
* If merge_similar_words is true, words are combined where suitable for use
* with a line recognizer. Use if you want to use AnalyseLayout to find the
* textlines, and then want to process textline fragments with an external
* line recognizer.
* Returns nullptr on error or an empty page.
* The returned iterator must be deleted after use.
* WARNING! This class points to data held within the TessBaseAPI class, and
* therefore can only be used while the TessBaseAPI class still exists and
* has not been subjected to a call of Init, SetImage, Recognize, Clear, End
* DetectOS, or anything else that changes the internal PAGE_RES.
*/
PageIterator *AnalyseLayout();
PageIterator *AnalyseLayout(bool merge_similar_words);
/**
* Recognize the image from SetAndThresholdImage, generating Tesseract
* internal structures. Returns 0 on success.
* Optional. The Get*Text functions below will call Recognize if needed.
* After Recognize, the output is kept internally until the next SetImage.
*/
int Recognize(ETEXT_DESC *monitor);
/**
* Methods to retrieve information after SetAndThresholdImage(),
* Recognize() or TesseractRect(). (Recognize is called implicitly if needed.)
*/
/**
* Turns images into symbolic text.
*
* filename can point to a single image, a multi-page TIFF,
* or a plain text list of image filenames.
*
* retry_config is useful for debugging. If not nullptr, you can fall
* back to an alternate configuration if a page fails for some
* reason.
*
* timeout_millisec terminates processing if any single page
* takes too long. Set to 0 for unlimited time.
*
* renderer is responible for creating the output. For example,
* use the TessTextRenderer if you want plaintext output, or
* the TessPDFRender to produce searchable PDF.
*
* If tessedit_page_number is non-negative, will only process that
* single page. Works for multi-page tiff file, or filelist.
*
* Returns true if successful, false on error.
*/
bool ProcessPages(const char *filename, const char *retry_config,
int timeout_millisec, TessResultRenderer *renderer);
// Does the real work of ProcessPages.
bool ProcessPagesInternal(const char *filename, const char *retry_config,
int timeout_millisec, TessResultRenderer *renderer);
/**
* Turn a single image into symbolic text.
*
* The pix is the image processed. filename and page_index are
* metadata used by side-effect processes, such as reading a box
* file or formatting as hOCR.
*
* See ProcessPages for descriptions of other parameters.
*/
bool ProcessPage(Pix *pix, int page_index, const char *filename,
const char *retry_config, int timeout_millisec,
TessResultRenderer *renderer);
/**
* Get a reading-order iterator to the results of LayoutAnalysis and/or
* Recognize. The returned iterator must be deleted after use.
* WARNING! This class points to data held within the TessBaseAPI class, and
* therefore can only be used while the TessBaseAPI class still exists and
* has not been subjected to a call of Init, SetImage, Recognize, Clear, End
* DetectOS, or anything else that changes the internal PAGE_RES.
*/
ResultIterator *GetIterator();
/**
* Get a mutable iterator to the results of LayoutAnalysis and/or Recognize.
* The returned iterator must be deleted after use.
* WARNING! This class points to data held within the TessBaseAPI class, and
* therefore can only be used while the TessBaseAPI class still exists and
* has not been subjected to a call of Init, SetImage, Recognize, Clear, End
* DetectOS, or anything else that changes the internal PAGE_RES.
*/
MutableIterator *GetMutableIterator();
/**
* The recognized text is returned as a char* which is coded
* as UTF8 and must be freed with the delete [] operator.
*/
char *GetUTF8Text();
/**
* Make a HTML-formatted string with hOCR markup from the internal
* data structures.
* page_number is 0-based but will appear in the output as 1-based.
* monitor can be used to
* cancel the recognition
* receive progress callbacks
* Returned string must be freed with the delete [] operator.
*/
char *GetHOCRText(ETEXT_DESC *monitor, int page_number);
/**
* Make a HTML-formatted string with hOCR markup from the internal
* data structures.
* page_number is 0-based but will appear in the output as 1-based.
* Returned string must be freed with the delete [] operator.
*/
char *GetHOCRText(int page_number);
/**
* Make an XML-formatted string with Alto markup from the internal
* data structures.
*/
char *GetAltoText(ETEXT_DESC *monitor, int page_number);
/**
* Make an XML-formatted string with Alto markup from the internal
* data structures.
*/
char *GetAltoText(int page_number);
/**
* Make a TSV-formatted string from the internal data structures.
* page_number is 0-based but will appear in the output as 1-based.
* Returned string must be freed with the delete [] operator.
*/
char *GetTSVText(int page_number);
/**
* Make a box file for LSTM training from the internal data structures.
* Constructs coordinates in the original image - not just the rectangle.
* page_number is a 0-based page index that will appear in the box file.
* Returned string must be freed with the delete [] operator.
*/
char *GetLSTMBoxText(int page_number);
/**
* The recognized text is returned as a char* which is coded in the same
* format as a box file used in training.
* Constructs coordinates in the original image - not just the rectangle.
* page_number is a 0-based page index that will appear in the box file.
* Returned string must be freed with the delete [] operator.
*/
char *GetBoxText(int page_number);
/**
* The recognized text is returned as a char* which is coded in the same
* format as a WordStr box file used in training.
* page_number is a 0-based page index that will appear in the box file.
* Returned string must be freed with the delete [] operator.
*/
char *GetWordStrBoxText(int page_number);
/**
* The recognized text is returned as a char* which is coded
* as UNLV format Latin-1 with specific reject and suspect codes.
* Returned string must be freed with the delete [] operator.
*/
char *GetUNLVText();
/**
* Detect the orientation of the input image and apparent script (alphabet).
* orient_deg is the detected clockwise rotation of the input image in degrees
* (0, 90, 180, 270)
* orient_conf is the confidence (15.0 is reasonably confident)
* script_name is an ASCII string, the name of the script, e.g. "Latin"
* script_conf is confidence level in the script
* Returns true on success and writes values to each parameter as an output
*/
bool DetectOrientationScript(int *orient_deg, float *orient_conf,
const char **script_name, float *script_conf);
/**
* The recognized text is returned as a char* which is coded
* as UTF8 and must be freed with the delete [] operator.
* page_number is a 0-based page index that will appear in the osd file.
*/
char *GetOsdText(int page_number);
/** Returns the (average) confidence value between 0 and 100. */
int MeanTextConf();
/**
* Returns all word confidences (between 0 and 100) in an array, terminated
* by -1. The calling function must delete [] after use.
* The number of confidences should correspond to the number of space-
* delimited words in GetUTF8Text.
*/
int *AllWordConfidences();
#ifndef DISABLED_LEGACY_ENGINE
/**
* Applies the given word to the adaptive classifier if possible.
* The word must be SPACE-DELIMITED UTF-8 - l i k e t h i s , so it can
* tell the boundaries of the graphemes.
* Assumes that SetImage/SetRectangle have been used to set the image
* to the given word. The mode arg should be PSM_SINGLE_WORD or
* PSM_CIRCLE_WORD, as that will be used to control layout analysis.
* The currently set PageSegMode is preserved.
* Returns false if adaption was not possible for some reason.
*/
bool AdaptToWordStr(PageSegMode mode, const char *wordstr);
#endif // ndef DISABLED_LEGACY_ENGINE
/**
* Free up recognition results and any stored image data, without actually
* freeing any recognition data that would be time-consuming to reload.
* Afterwards, you must call SetImage or TesseractRect before doing
* any Recognize or Get* operation.
*/
void Clear();
/**
* Close down tesseract and free up all memory. End() is equivalent to
* destructing and reconstructing your TessBaseAPI.
* Once End() has been used, none of the other API functions may be used
* other than Init and anything declared above it in the class definition.
*/
void End();
/**
* Clear any library-level memory caches.
* There are a variety of expensive-to-load constant data structures (mostly
* language dictionaries) that are cached globally -- surviving the Init()
* and End() of individual TessBaseAPI's. This function allows the clearing
* of these caches.
**/
static void ClearPersistentCache();
/**
* Check whether a word is valid according to Tesseract's language model
* @return 0 if the word is invalid, non-zero if valid.
* @warning temporary! This function will be removed from here and placed
* in a separate API at some future time.
*/
int IsValidWord(const char *word) const;
// Returns true if utf8_character is defined in the UniCharset.
bool IsValidCharacter(const char *utf8_character) const;
bool GetTextDirection(int *out_offset, float *out_slope);
/** Sets Dict::letter_is_okay_ function to point to the given function. */
void SetDictFunc(DictFunc f);
/** Sets Dict::probability_in_context_ function to point to the given
* function.
*/
void SetProbabilityInContextFunc(ProbabilityInContextFunc f);
/**
* Estimates the Orientation And Script of the image.
* @return true if the image was processed successfully.
*/
bool DetectOS(OSResults *);
/**
* Return text orientation of each block as determined by an earlier run
* of layout analysis.
*/
void GetBlockTextOrientations(int **block_orientation,
bool **vertical_writing);
/** This method returns the string form of the specified unichar. */
const char *GetUnichar(int unichar_id) const;
/** Return the pointer to the i-th dawg loaded into tesseract_ object. */
const Dawg *GetDawg(int i) const;
/** Return the number of dawgs loaded into tesseract_ object. */
int NumDawgs() const;
Tesseract *tesseract() const {
return tesseract_;
}
OcrEngineMode oem() const {
return last_oem_requested_;
}
void set_min_orientation_margin(double margin);
/* @} */
protected:
/** Common code for setting the image. Returns true if Init has been called.
*/
bool InternalSetImage();
/**
* Run the thresholder to make the thresholded image. If pix is not nullptr,
* the source is thresholded to pix instead of the internal IMAGE.
*/
virtual bool Threshold(Pix **pix);
/**
* Find lines from the image making the BLOCK_LIST.
* @return 0 on success.
*/
int FindLines();
/** Delete the pageres and block list ready for a new page. */
void ClearResults();
/**
* Return an LTR Result Iterator -- used only for training, as we really want
* to ignore all BiDi smarts at that point.
* delete once you're done with it.
*/
LTRResultIterator *GetLTRIterator();
/**
* Return the length of the output text string, as UTF8, assuming
* one newline per line and one per block, with a terminator,
* and assuming a single character reject marker for each rejected character.
* Also return the number of recognized blobs in blob_count.
*/
int TextLength(int *blob_count) const;
//// paragraphs.cpp ////////////////////////////////////////////////////
void DetectParagraphs(bool after_text_recognition);
const PAGE_RES *GetPageRes() const {
return page_res_;
}
protected:
Tesseract *tesseract_; ///< The underlying data object.
Tesseract *osd_tesseract_; ///< For orientation & script detection.
EquationDetect *equ_detect_; ///< The equation detector.
FileReader reader_; ///< Reads files from any filesystem.
ImageThresholder *thresholder_; ///< Image thresholding module.
std::vector<ParagraphModel *> *paragraph_models_;
BLOCK_LIST *block_list_; ///< The page layout.
PAGE_RES *page_res_; ///< The page-level data.
std::string input_file_; ///< Name used by training code.
std::string output_file_; ///< Name used by debug code.
std::string datapath_; ///< Current location of tessdata.
std::string language_; ///< Last initialized language.
OcrEngineMode last_oem_requested_; ///< Last ocr language mode requested.
bool recognition_done_; ///< page_res_ contains recognition data.
/**
* @defgroup ThresholderParams Thresholder Parameters
* Parameters saved from the Thresholder. Needed to rebuild coordinates.
*/
/* @{ */
int rect_left_;
int rect_top_;
int rect_width_;
int rect_height_;
int image_width_;
int image_height_;
/* @} */
private:
// A list of image filenames gets special consideration
bool ProcessPagesFileList(FILE *fp, std::string *buf,
const char *retry_config, int timeout_millisec,
TessResultRenderer *renderer,
int tessedit_page_number);
// TIFF supports multipage so gets special consideration.
bool ProcessPagesMultipageTiff(const unsigned char *data, size_t size,
const char *filename, const char *retry_config,
int timeout_millisec,
TessResultRenderer *renderer,
int tessedit_page_number);
}; // class TessBaseAPI.
/** Escape a char string - remove &<>"' with HTML codes. */
std::string HOcrEscape(const char *text);
} // namespace tesseract
#endif // TESSERACT_API_BASEAPI_H_

View File

@ -1,484 +0,0 @@
// SPDX-License-Identifier: Apache-2.0
// File: capi.h
// Description: C-API TessBaseAPI
//
// (C) Copyright 2012, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef API_CAPI_H_
#define API_CAPI_H_
#include "export.h"
#ifdef __cplusplus
# include <tesseract/baseapi.h>
# include <tesseract/ocrclass.h>
# include <tesseract/pageiterator.h>
# include <tesseract/renderer.h>
# include <tesseract/resultiterator.h>
#endif
#include <stdbool.h>
#include <stdio.h>
#ifdef __cplusplus
extern "C" {
#endif
#ifndef BOOL
# define BOOL int
# define TRUE 1
# define FALSE 0
#endif
#ifdef __cplusplus
typedef tesseract::TessResultRenderer TessResultRenderer;
typedef tesseract::TessBaseAPI TessBaseAPI;
typedef tesseract::PageIterator TessPageIterator;
typedef tesseract::ResultIterator TessResultIterator;
typedef tesseract::MutableIterator TessMutableIterator;
typedef tesseract::ChoiceIterator TessChoiceIterator;
typedef tesseract::OcrEngineMode TessOcrEngineMode;
typedef tesseract::PageSegMode TessPageSegMode;
typedef tesseract::PageIteratorLevel TessPageIteratorLevel;
typedef tesseract::Orientation TessOrientation;
typedef tesseract::ParagraphJustification TessParagraphJustification;
typedef tesseract::WritingDirection TessWritingDirection;
typedef tesseract::TextlineOrder TessTextlineOrder;
typedef tesseract::PolyBlockType TessPolyBlockType;
typedef tesseract::ETEXT_DESC ETEXT_DESC;
#else
typedef struct TessResultRenderer TessResultRenderer;
typedef struct TessBaseAPI TessBaseAPI;
typedef struct TessPageIterator TessPageIterator;
typedef struct TessResultIterator TessResultIterator;
typedef struct TessMutableIterator TessMutableIterator;
typedef struct TessChoiceIterator TessChoiceIterator;
typedef enum TessOcrEngineMode {
OEM_TESSERACT_ONLY,
OEM_LSTM_ONLY,
OEM_TESSERACT_LSTM_COMBINED,
OEM_DEFAULT
} TessOcrEngineMode;
typedef enum TessPageSegMode {
PSM_OSD_ONLY,
PSM_AUTO_OSD,
PSM_AUTO_ONLY,
PSM_AUTO,
PSM_SINGLE_COLUMN,
PSM_SINGLE_BLOCK_VERT_TEXT,
PSM_SINGLE_BLOCK,
PSM_SINGLE_LINE,
PSM_SINGLE_WORD,
PSM_CIRCLE_WORD,
PSM_SINGLE_CHAR,
PSM_SPARSE_TEXT,
PSM_SPARSE_TEXT_OSD,
PSM_RAW_LINE,
PSM_COUNT
} TessPageSegMode;
typedef enum TessPageIteratorLevel {
RIL_BLOCK,
RIL_PARA,
RIL_TEXTLINE,
RIL_WORD,
RIL_SYMBOL
} TessPageIteratorLevel;
typedef enum TessPolyBlockType {
PT_UNKNOWN,
PT_FLOWING_TEXT,
PT_HEADING_TEXT,
PT_PULLOUT_TEXT,
PT_EQUATION,
PT_INLINE_EQUATION,
PT_TABLE,
PT_VERTICAL_TEXT,
PT_CAPTION_TEXT,
PT_FLOWING_IMAGE,
PT_HEADING_IMAGE,
PT_PULLOUT_IMAGE,
PT_HORZ_LINE,
PT_VERT_LINE,
PT_NOISE,
PT_COUNT
} TessPolyBlockType;
typedef enum TessOrientation {
ORIENTATION_PAGE_UP,
ORIENTATION_PAGE_RIGHT,
ORIENTATION_PAGE_DOWN,
ORIENTATION_PAGE_LEFT
} TessOrientation;
typedef enum TessParagraphJustification {
JUSTIFICATION_UNKNOWN,
JUSTIFICATION_LEFT,
JUSTIFICATION_CENTER,
JUSTIFICATION_RIGHT
} TessParagraphJustification;
typedef enum TessWritingDirection {
WRITING_DIRECTION_LEFT_TO_RIGHT,
WRITING_DIRECTION_RIGHT_TO_LEFT,
WRITING_DIRECTION_TOP_TO_BOTTOM
} TessWritingDirection;
typedef enum TessTextlineOrder {
TEXTLINE_ORDER_LEFT_TO_RIGHT,
TEXTLINE_ORDER_RIGHT_TO_LEFT,
TEXTLINE_ORDER_TOP_TO_BOTTOM
} TessTextlineOrder;
typedef struct ETEXT_DESC ETEXT_DESC;
#endif
typedef bool (*TessCancelFunc)(void *cancel_this, int words);
typedef bool (*TessProgressFunc)(ETEXT_DESC *ths, int left, int right, int top,
int bottom);
struct Pix;
struct Boxa;
struct Pixa;
/* General free functions */
TESS_API const char *TessVersion();
TESS_API void TessDeleteText(const char *text);
TESS_API void TessDeleteTextArray(char **arr);
TESS_API void TessDeleteIntArray(const int *arr);
/* Renderer API */
TESS_API TessResultRenderer *TessTextRendererCreate(const char *outputbase);
TESS_API TessResultRenderer *TessHOcrRendererCreate(const char *outputbase);
TESS_API TessResultRenderer *TessHOcrRendererCreate2(const char *outputbase,
BOOL font_info);
TESS_API TessResultRenderer *TessAltoRendererCreate(const char *outputbase);
TESS_API TessResultRenderer *TessTsvRendererCreate(const char *outputbase);
TESS_API TessResultRenderer *TessPDFRendererCreate(const char *outputbase,
const char *datadir,
BOOL textonly);
TESS_API TessResultRenderer *TessUnlvRendererCreate(const char *outputbase);
TESS_API TessResultRenderer *TessBoxTextRendererCreate(const char *outputbase);
TESS_API TessResultRenderer *TessLSTMBoxRendererCreate(const char *outputbase);
TESS_API TessResultRenderer *TessWordStrBoxRendererCreate(
const char *outputbase);
TESS_API void TessDeleteResultRenderer(TessResultRenderer *renderer);
TESS_API void TessResultRendererInsert(TessResultRenderer *renderer,
TessResultRenderer *next);
TESS_API TessResultRenderer *TessResultRendererNext(
TessResultRenderer *renderer);
TESS_API BOOL TessResultRendererBeginDocument(TessResultRenderer *renderer,
const char *title);
TESS_API BOOL TessResultRendererAddImage(TessResultRenderer *renderer,
TessBaseAPI *api);
TESS_API BOOL TessResultRendererEndDocument(TessResultRenderer *renderer);
TESS_API const char *TessResultRendererExtention(TessResultRenderer *renderer);
TESS_API const char *TessResultRendererTitle(TessResultRenderer *renderer);
TESS_API int TessResultRendererImageNum(TessResultRenderer *renderer);
/* Base API */
TESS_API TessBaseAPI *TessBaseAPICreate();
TESS_API void TessBaseAPIDelete(TessBaseAPI *handle);
TESS_API size_t TessBaseAPIGetOpenCLDevice(TessBaseAPI *handle, void **device);
TESS_API void TessBaseAPISetInputName(TessBaseAPI *handle, const char *name);
TESS_API const char *TessBaseAPIGetInputName(TessBaseAPI *handle);
TESS_API void TessBaseAPISetInputImage(TessBaseAPI *handle, struct Pix *pix);
TESS_API struct Pix *TessBaseAPIGetInputImage(TessBaseAPI *handle);
TESS_API int TessBaseAPIGetSourceYResolution(TessBaseAPI *handle);
TESS_API const char *TessBaseAPIGetDatapath(TessBaseAPI *handle);
TESS_API void TessBaseAPISetOutputName(TessBaseAPI *handle, const char *name);
TESS_API BOOL TessBaseAPISetVariable(TessBaseAPI *handle, const char *name,
const char *value);
TESS_API BOOL TessBaseAPISetDebugVariable(TessBaseAPI *handle, const char *name,
const char *value);
TESS_API BOOL TessBaseAPIGetIntVariable(const TessBaseAPI *handle,
const char *name, int *value);
TESS_API BOOL TessBaseAPIGetBoolVariable(const TessBaseAPI *handle,
const char *name, BOOL *value);
TESS_API BOOL TessBaseAPIGetDoubleVariable(const TessBaseAPI *handle,
const char *name, double *value);
TESS_API const char *TessBaseAPIGetStringVariable(const TessBaseAPI *handle,
const char *name);
TESS_API void TessBaseAPIPrintVariables(const TessBaseAPI *handle, FILE *fp);
TESS_API BOOL TessBaseAPIPrintVariablesToFile(const TessBaseAPI *handle,
const char *filename);
TESS_API int TessBaseAPIInit1(TessBaseAPI *handle, const char *datapath,
const char *language, TessOcrEngineMode oem,
char **configs, int configs_size);
TESS_API int TessBaseAPIInit2(TessBaseAPI *handle, const char *datapath,
const char *language, TessOcrEngineMode oem);
TESS_API int TessBaseAPIInit3(TessBaseAPI *handle, const char *datapath,
const char *language);
TESS_API int TessBaseAPIInit4(TessBaseAPI *handle, const char *datapath,
const char *language, TessOcrEngineMode mode,
char **configs, int configs_size, char **vars_vec,
char **vars_values, size_t vars_vec_size,
BOOL set_only_non_debug_params);
TESS_API int TessBaseAPIInit5(TessBaseAPI *handle, const char *data, int data_size,
const char *language, TessOcrEngineMode mode,
char **configs, int configs_size, char **vars_vec,
char **vars_values, size_t vars_vec_size,
BOOL set_only_non_debug_params);
TESS_API const char *TessBaseAPIGetInitLanguagesAsString(
const TessBaseAPI *handle);
TESS_API char **TessBaseAPIGetLoadedLanguagesAsVector(
const TessBaseAPI *handle);
TESS_API char **TessBaseAPIGetAvailableLanguagesAsVector(
const TessBaseAPI *handle);
TESS_API void TessBaseAPIInitForAnalysePage(TessBaseAPI *handle);
TESS_API void TessBaseAPIReadConfigFile(TessBaseAPI *handle,
const char *filename);
TESS_API void TessBaseAPIReadDebugConfigFile(TessBaseAPI *handle,
const char *filename);
TESS_API void TessBaseAPISetPageSegMode(TessBaseAPI *handle,
TessPageSegMode mode);
TESS_API TessPageSegMode TessBaseAPIGetPageSegMode(const TessBaseAPI *handle);
TESS_API char *TessBaseAPIRect(TessBaseAPI *handle,
const unsigned char *imagedata,
int bytes_per_pixel, int bytes_per_line,
int left, int top, int width, int height);
TESS_API void TessBaseAPIClearAdaptiveClassifier(TessBaseAPI *handle);
TESS_API void TessBaseAPISetImage(TessBaseAPI *handle,
const unsigned char *imagedata, int width,
int height, int bytes_per_pixel,
int bytes_per_line);
TESS_API void TessBaseAPISetImage2(TessBaseAPI *handle, struct Pix *pix);
TESS_API void TessBaseAPISetSourceResolution(TessBaseAPI *handle, int ppi);
TESS_API void TessBaseAPISetRectangle(TessBaseAPI *handle, int left, int top,
int width, int height);
TESS_API struct Pix *TessBaseAPIGetThresholdedImage(TessBaseAPI *handle);
TESS_API struct Boxa *TessBaseAPIGetRegions(TessBaseAPI *handle,
struct Pixa **pixa);
TESS_API struct Boxa *TessBaseAPIGetTextlines(TessBaseAPI *handle,
struct Pixa **pixa,
int **blockids);
TESS_API struct Boxa *TessBaseAPIGetTextlines1(TessBaseAPI *handle,
BOOL raw_image, int raw_padding,
struct Pixa **pixa,
int **blockids, int **paraids);
TESS_API struct Boxa *TessBaseAPIGetStrips(TessBaseAPI *handle,
struct Pixa **pixa, int **blockids);
TESS_API struct Boxa *TessBaseAPIGetWords(TessBaseAPI *handle,
struct Pixa **pixa);
TESS_API struct Boxa *TessBaseAPIGetConnectedComponents(TessBaseAPI *handle,
struct Pixa **cc);
TESS_API struct Boxa *TessBaseAPIGetComponentImages(TessBaseAPI *handle,
TessPageIteratorLevel level,
BOOL text_only,
struct Pixa **pixa,
int **blockids);
TESS_API struct Boxa *TessBaseAPIGetComponentImages1(
TessBaseAPI *handle, TessPageIteratorLevel level, BOOL text_only,
BOOL raw_image, int raw_padding, struct Pixa **pixa, int **blockids,
int **paraids);
TESS_API int TessBaseAPIGetThresholdedImageScaleFactor(
const TessBaseAPI *handle);
TESS_API TessPageIterator *TessBaseAPIAnalyseLayout(TessBaseAPI *handle);
TESS_API int TessBaseAPIRecognize(TessBaseAPI *handle, ETEXT_DESC *monitor);
TESS_API BOOL TessBaseAPIProcessPages(TessBaseAPI *handle, const char *filename,
const char *retry_config,
int timeout_millisec,
TessResultRenderer *renderer);
TESS_API BOOL TessBaseAPIProcessPage(TessBaseAPI *handle, struct Pix *pix,
int page_index, const char *filename,
const char *retry_config,
int timeout_millisec,
TessResultRenderer *renderer);
TESS_API TessResultIterator *TessBaseAPIGetIterator(TessBaseAPI *handle);
TESS_API TessMutableIterator *TessBaseAPIGetMutableIterator(
TessBaseAPI *handle);
TESS_API char *TessBaseAPIGetUTF8Text(TessBaseAPI *handle);
TESS_API char *TessBaseAPIGetHOCRText(TessBaseAPI *handle, int page_number);
TESS_API char *TessBaseAPIGetAltoText(TessBaseAPI *handle, int page_number);
TESS_API char *TessBaseAPIGetTsvText(TessBaseAPI *handle, int page_number);
TESS_API char *TessBaseAPIGetBoxText(TessBaseAPI *handle, int page_number);
TESS_API char *TessBaseAPIGetLSTMBoxText(TessBaseAPI *handle, int page_number);
TESS_API char *TessBaseAPIGetWordStrBoxText(TessBaseAPI *handle,
int page_number);
TESS_API char *TessBaseAPIGetUNLVText(TessBaseAPI *handle);
TESS_API int TessBaseAPIMeanTextConf(TessBaseAPI *handle);
TESS_API int *TessBaseAPIAllWordConfidences(TessBaseAPI *handle);
#ifndef DISABLED_LEGACY_ENGINE
TESS_API BOOL TessBaseAPIAdaptToWordStr(TessBaseAPI *handle,
TessPageSegMode mode,
const char *wordstr);
#endif // #ifndef DISABLED_LEGACY_ENGINE
TESS_API void TessBaseAPIClear(TessBaseAPI *handle);
TESS_API void TessBaseAPIEnd(TessBaseAPI *handle);
TESS_API int TessBaseAPIIsValidWord(TessBaseAPI *handle, const char *word);
TESS_API BOOL TessBaseAPIGetTextDirection(TessBaseAPI *handle, int *out_offset,
float *out_slope);
TESS_API const char *TessBaseAPIGetUnichar(TessBaseAPI *handle, int unichar_id);
TESS_API void TessBaseAPIClearPersistentCache(TessBaseAPI *handle);
#ifndef DISABLED_LEGACY_ENGINE
// Call TessDeleteText(*best_script_name) to free memory allocated by this
// function
TESS_API BOOL TessBaseAPIDetectOrientationScript(TessBaseAPI *handle,
int *orient_deg,
float *orient_conf,
const char **script_name,
float *script_conf);
#endif // #ifndef DISABLED_LEGACY_ENGINE
TESS_API void TessBaseAPISetMinOrientationMargin(TessBaseAPI *handle,
double margin);
TESS_API int TessBaseAPINumDawgs(const TessBaseAPI *handle);
TESS_API TessOcrEngineMode TessBaseAPIOem(const TessBaseAPI *handle);
TESS_API void TessBaseGetBlockTextOrientations(TessBaseAPI *handle,
int **block_orientation,
bool **vertical_writing);
/* Page iterator */
TESS_API void TessPageIteratorDelete(TessPageIterator *handle);
TESS_API TessPageIterator *TessPageIteratorCopy(const TessPageIterator *handle);
TESS_API void TessPageIteratorBegin(TessPageIterator *handle);
TESS_API BOOL TessPageIteratorNext(TessPageIterator *handle,
TessPageIteratorLevel level);
TESS_API BOOL TessPageIteratorIsAtBeginningOf(const TessPageIterator *handle,
TessPageIteratorLevel level);
TESS_API BOOL TessPageIteratorIsAtFinalElement(const TessPageIterator *handle,
TessPageIteratorLevel level,
TessPageIteratorLevel element);
TESS_API BOOL TessPageIteratorBoundingBox(const TessPageIterator *handle,
TessPageIteratorLevel level,
int *left, int *top, int *right,
int *bottom);
TESS_API TessPolyBlockType
TessPageIteratorBlockType(const TessPageIterator *handle);
TESS_API struct Pix *TessPageIteratorGetBinaryImage(
const TessPageIterator *handle, TessPageIteratorLevel level);
TESS_API struct Pix *TessPageIteratorGetImage(const TessPageIterator *handle,
TessPageIteratorLevel level,
int padding,
struct Pix *original_image,
int *left, int *top);
TESS_API BOOL TessPageIteratorBaseline(const TessPageIterator *handle,
TessPageIteratorLevel level, int *x1,
int *y1, int *x2, int *y2);
TESS_API void TessPageIteratorOrientation(
TessPageIterator *handle, TessOrientation *orientation,
TessWritingDirection *writing_direction, TessTextlineOrder *textline_order,
float *deskew_angle);
TESS_API void TessPageIteratorParagraphInfo(
TessPageIterator *handle, TessParagraphJustification *justification,
BOOL *is_list_item, BOOL *is_crown, int *first_line_indent);
/* Result iterator */
TESS_API void TessResultIteratorDelete(TessResultIterator *handle);
TESS_API TessResultIterator *TessResultIteratorCopy(
const TessResultIterator *handle);
TESS_API TessPageIterator *TessResultIteratorGetPageIterator(
TessResultIterator *handle);
TESS_API const TessPageIterator *TessResultIteratorGetPageIteratorConst(
const TessResultIterator *handle);
TESS_API TessChoiceIterator *TessResultIteratorGetChoiceIterator(
const TessResultIterator *handle);
TESS_API BOOL TessResultIteratorNext(TessResultIterator *handle,
TessPageIteratorLevel level);
TESS_API char *TessResultIteratorGetUTF8Text(const TessResultIterator *handle,
TessPageIteratorLevel level);
TESS_API float TessResultIteratorConfidence(const TessResultIterator *handle,
TessPageIteratorLevel level);
TESS_API const char *TessResultIteratorWordRecognitionLanguage(
const TessResultIterator *handle);
TESS_API const char *TessResultIteratorWordFontAttributes(
const TessResultIterator *handle, BOOL *is_bold, BOOL *is_italic,
BOOL *is_underlined, BOOL *is_monospace, BOOL *is_serif, BOOL *is_smallcaps,
int *pointsize, int *font_id);
TESS_API BOOL
TessResultIteratorWordIsFromDictionary(const TessResultIterator *handle);
TESS_API BOOL TessResultIteratorWordIsNumeric(const TessResultIterator *handle);
TESS_API BOOL
TessResultIteratorSymbolIsSuperscript(const TessResultIterator *handle);
TESS_API BOOL
TessResultIteratorSymbolIsSubscript(const TessResultIterator *handle);
TESS_API BOOL
TessResultIteratorSymbolIsDropcap(const TessResultIterator *handle);
TESS_API void TessChoiceIteratorDelete(TessChoiceIterator *handle);
TESS_API BOOL TessChoiceIteratorNext(TessChoiceIterator *handle);
TESS_API const char *TessChoiceIteratorGetUTF8Text(
const TessChoiceIterator *handle);
TESS_API float TessChoiceIteratorConfidence(const TessChoiceIterator *handle);
/* Progress monitor */
TESS_API ETEXT_DESC *TessMonitorCreate();
TESS_API void TessMonitorDelete(ETEXT_DESC *monitor);
TESS_API void TessMonitorSetCancelFunc(ETEXT_DESC *monitor,
TessCancelFunc cancelFunc);
TESS_API void TessMonitorSetCancelThis(ETEXT_DESC *monitor, void *cancelThis);
TESS_API void *TessMonitorGetCancelThis(ETEXT_DESC *monitor);
TESS_API void TessMonitorSetProgressFunc(ETEXT_DESC *monitor,
TessProgressFunc progressFunc);
TESS_API int TessMonitorGetProgress(ETEXT_DESC *monitor);
TESS_API void TessMonitorSetDeadlineMSecs(ETEXT_DESC *monitor, int deadline);
#ifdef __cplusplus
}
#endif
#endif // API_CAPI_H_

View File

@ -1,37 +0,0 @@
// SPDX-License-Identifier: Apache-2.0
// File: export.h
// Description: Place holder
//
// (C) Copyright 2006, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef TESSERACT_PLATFORM_H_
#define TESSERACT_PLATFORM_H_
#ifndef TESS_API
# if defined(_WIN32) || defined(__CYGWIN__)
# if defined(TESS_EXPORTS)
# define TESS_API __declspec(dllexport)
# elif defined(TESS_IMPORTS)
# define TESS_API __declspec(dllimport)
# else
# define TESS_API
# endif
# else
# if defined(TESS_EXPORTS) || defined(TESS_IMPORTS)
# define TESS_API __attribute__((visibility("default")))
# else
# define TESS_API
# endif
# endif
#endif
#endif // TESSERACT_PLATFORM_H_

View File

@ -1,235 +0,0 @@
// SPDX-License-Identifier: Apache-2.0
// File: ltrresultiterator.h
// Description: Iterator for tesseract results in strict left-to-right
// order that avoids using tesseract internal data structures.
// Author: Ray Smith
//
// (C) Copyright 2010, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef TESSERACT_CCMAIN_LTR_RESULT_ITERATOR_H_
#define TESSERACT_CCMAIN_LTR_RESULT_ITERATOR_H_
#include "export.h" // for TESS_API
#include "pageiterator.h" // for PageIterator
#include "publictypes.h" // for PageIteratorLevel
#include "unichar.h" // for StrongScriptDirection
namespace tesseract {
class BLOB_CHOICE_IT;
class PAGE_RES;
class WERD_RES;
class Tesseract;
// Class to iterate over tesseract results, providing access to all levels
// of the page hierarchy, without including any tesseract headers or having
// to handle any tesseract structures.
// WARNING! This class points to data held within the TessBaseAPI class, and
// therefore can only be used while the TessBaseAPI class still exists and
// has not been subjected to a call of Init, SetImage, Recognize, Clear, End
// DetectOS, or anything else that changes the internal PAGE_RES.
// See tesseract/publictypes.h for the definition of PageIteratorLevel.
// See also base class PageIterator, which contains the bulk of the interface.
// LTRResultIterator adds text-specific methods for access to OCR output.
class TESS_API LTRResultIterator : public PageIterator {
friend class ChoiceIterator;
public:
// page_res and tesseract come directly from the BaseAPI.
// The rectangle parameters are copied indirectly from the Thresholder,
// via the BaseAPI. They represent the coordinates of some rectangle in an
// original image (in top-left-origin coordinates) and therefore the top-left
// needs to be added to any output boxes in order to specify coordinates
// in the original image. See TessBaseAPI::SetRectangle.
// The scale and scaled_yres are in case the Thresholder scaled the image
// rectangle prior to thresholding. Any coordinates in tesseract's image
// must be divided by scale before adding (rect_left, rect_top).
// The scaled_yres indicates the effective resolution of the binary image
// that tesseract has been given by the Thresholder.
// After the constructor, Begin has already been called.
LTRResultIterator(PAGE_RES *page_res, Tesseract *tesseract, int scale,
int scaled_yres, int rect_left, int rect_top,
int rect_width, int rect_height);
~LTRResultIterator() override;
// LTRResultIterators may be copied! This makes it possible to iterate over
// all the objects at a lower level, while maintaining an iterator to
// objects at a higher level. These constructors DO NOT CALL Begin, so
// iterations will continue from the location of src.
// TODO: For now the copy constructor and operator= only need the base class
// versions, but if new data members are added, don't forget to add them!
// ============= Moving around within the page ============.
// See PageIterator.
// ============= Accessing data ==============.
// Returns the null terminated UTF-8 encoded text string for the current
// object at the given level. Use delete [] to free after use.
char *GetUTF8Text(PageIteratorLevel level) const;
// Set the string inserted at the end of each text line. "\n" by default.
void SetLineSeparator(const char *new_line);
// Set the string inserted at the end of each paragraph. "\n" by default.
void SetParagraphSeparator(const char *new_para);
// Returns the mean confidence of the current object at the given level.
// The number should be interpreted as a percent probability. (0.0f-100.0f)
float Confidence(PageIteratorLevel level) const;
// ============= Functions that refer to words only ============.
// Returns the font attributes of the current word. If iterating at a higher
// level object than words, eg textlines, then this will return the
// attributes of the first word in that textline.
// The actual return value is a string representing a font name. It points
// to an internal table and SHOULD NOT BE DELETED. Lifespan is the same as
// the iterator itself, ie rendered invalid by various members of
// TessBaseAPI, including Init, SetImage, End or deleting the TessBaseAPI.
// Pointsize is returned in printers points (1/72 inch.)
const char *WordFontAttributes(bool *is_bold, bool *is_italic,
bool *is_underlined, bool *is_monospace,
bool *is_serif, bool *is_smallcaps,
int *pointsize, int *font_id) const;
// Return the name of the language used to recognize this word.
// On error, nullptr. Do not delete this pointer.
const char *WordRecognitionLanguage() const;
// Return the overall directionality of this word.
StrongScriptDirection WordDirection() const;
// Returns true if the current word was found in a dictionary.
bool WordIsFromDictionary() const;
// Returns the number of blanks before the current word.
int BlanksBeforeWord() const;
// Returns true if the current word is numeric.
bool WordIsNumeric() const;
// Returns true if the word contains blamer information.
bool HasBlamerInfo() const;
// Returns the pointer to ParamsTrainingBundle stored in the BlamerBundle
// of the current word.
const void *GetParamsTrainingBundle() const;
// Returns a pointer to the string with blamer information for this word.
// Assumes that the word's blamer_bundle is not nullptr.
const char *GetBlamerDebug() const;
// Returns a pointer to the string with misadaption information for this word.
// Assumes that the word's blamer_bundle is not nullptr.
const char *GetBlamerMisadaptionDebug() const;
// Returns true if a truth string was recorded for the current word.
bool HasTruthString() const;
// Returns true if the given string is equivalent to the truth string for
// the current word.
bool EquivalentToTruth(const char *str) const;
// Returns a null terminated UTF-8 encoded truth string for the current word.
// Use delete [] to free after use.
char *WordTruthUTF8Text() const;
// Returns a null terminated UTF-8 encoded normalized OCR string for the
// current word. Use delete [] to free after use.
char *WordNormedUTF8Text() const;
// Returns a pointer to serialized choice lattice.
// Fills lattice_size with the number of bytes in lattice data.
const char *WordLattice(int *lattice_size) const;
// ============= Functions that refer to symbols only ============.
// Returns true if the current symbol is a superscript.
// If iterating at a higher level object than symbols, eg words, then
// this will return the attributes of the first symbol in that word.
bool SymbolIsSuperscript() const;
// Returns true if the current symbol is a subscript.
// If iterating at a higher level object than symbols, eg words, then
// this will return the attributes of the first symbol in that word.
bool SymbolIsSubscript() const;
// Returns true if the current symbol is a dropcap.
// If iterating at a higher level object than symbols, eg words, then
// this will return the attributes of the first symbol in that word.
bool SymbolIsDropcap() const;
protected:
const char *line_separator_;
const char *paragraph_separator_;
};
// Class to iterate over the classifier choices for a single RIL_SYMBOL.
class TESS_API ChoiceIterator {
public:
// Construction is from a LTRResultIterator that points to the symbol of
// interest. The ChoiceIterator allows a one-shot iteration over the
// choices for this symbol and after that it is useless.
explicit ChoiceIterator(const LTRResultIterator &result_it);
~ChoiceIterator();
// Moves to the next choice for the symbol and returns false if there
// are none left.
bool Next();
// ============= Accessing data ==============.
// Returns the null terminated UTF-8 encoded text string for the current
// choice.
// NOTE: Unlike LTRResultIterator::GetUTF8Text, the return points to an
// internal structure and should NOT be delete[]ed to free after use.
const char *GetUTF8Text() const;
// Returns the confidence of the current choice depending on the used language
// data. If only LSTM traineddata is used the value range is 0.0f - 1.0f. All
// choices for one symbol should roughly add up to 1.0f.
// If only traineddata of the legacy engine is used, the number should be
// interpreted as a percent probability. (0.0f-100.0f) In this case
// probabilities won't add up to 100. Each one stands on its own.
float Confidence() const;
// Returns a vector containing all timesteps, which belong to the currently
// selected symbol. A timestep is a vector containing pairs of symbols and
// floating point numbers. The number states the probability for the
// corresponding symbol.
std::vector<std::vector<std::pair<const char *, float>>> *Timesteps() const;
private:
// clears the remaining spaces out of the results and adapt the probabilities
void filterSpaces();
// Pointer to the WERD_RES object owned by the API.
WERD_RES *word_res_;
// Iterator over the blob choices.
BLOB_CHOICE_IT *choice_it_;
std::vector<std::pair<const char *, float>> *LSTM_choices_ = nullptr;
std::vector<std::pair<const char *, float>>::iterator LSTM_choice_it_;
const int *tstep_index_;
// regulates the rating granularity
double rating_coefficient_;
// leading blanks
int blanks_before_word_;
// true when there is lstm engine related trained data
bool oemLSTM_;
};
} // namespace tesseract.
#endif // TESSERACT_CCMAIN_LTR_RESULT_ITERATOR_H_

View File

@ -1,158 +0,0 @@
// SPDX-License-Identifier: Apache-2.0
/**********************************************************************
* File: ocrclass.h
* Description: Class definitions and constants for the OCR API.
* Author: Hewlett-Packard Co
*
* (C) Copyright 1996, Hewlett-Packard Co.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
/**********************************************************************
* This file contains typedefs for all the structures used by
* the HP OCR interface.
* The structures are designed to allow them to be used with any
* structure alignment up to 8.
**********************************************************************/
#ifndef CCUTIL_OCRCLASS_H_
#define CCUTIL_OCRCLASS_H_
#include <chrono>
#include <ctime>
namespace tesseract {
/**********************************************************************
* EANYCODE_CHAR
* Description of a single character. The character code is defined by
* the character set of the current font.
* Output text is sent as an array of these structures.
* Spaces and line endings in the output are represented in the
* structures of the surrounding characters. They are not directly
* represented as characters.
* The first character in a word has a positive value of blanks.
* Missing information should be set to the defaults in the comments.
* If word bounds are known, but not character bounds, then the top and
* bottom of each character should be those of the word. The left of the
* first and right of the last char in each word should be set. All other
* lefts and rights should be set to -1.
* If set, the values of right and bottom are left+width and top+height.
* Most of the members come directly from the parameters to ocr_append_char.
* The formatting member uses the enhancement parameter and combines the
* line direction stuff into the top 3 bits.
* The coding is 0=RL char, 1=LR char, 2=DR NL, 3=UL NL, 4=DR Para,
* 5=UL Para, 6=TB char, 7=BT char. API users do not need to know what
* the coding is, only that it is backwards compatible with the previous
* version.
**********************************************************************/
struct EANYCODE_CHAR { /*single character */
// It should be noted that the format for char_code for version 2.0 and beyond
// is UTF8 which means that ASCII characters will come out as one structure
// but other characters will be returned in two or more instances of this
// structure with a single byte of the UTF8 code in each, but each will have
// the same bounding box. Programs which want to handle languagues with
// different characters sets will need to handle extended characters
// appropriately, but *all* code needs to be prepared to receive UTF8 coded
// characters for characters such as bullet and fancy quotes.
uint16_t char_code; /*character itself */
int16_t left; /*of char (-1) */
int16_t right; /*of char (-1) */
int16_t top; /*of char (-1) */
int16_t bottom; /*of char (-1) */
int16_t font_index; /*what font (0) */
uint8_t confidence; /*0=perfect, 100=reject (0/100) */
uint8_t point_size; /*of char, 72=i inch, (10) */
int8_t blanks; /*no of spaces before this char (1) */
uint8_t formatting; /*char formatting (0) */
};
/**********************************************************************
* ETEXT_DESC
* Description of the output of the OCR engine.
* This structure is used as both a progress monitor and the final
* output header, since it needs to be a valid progress monitor while
* the OCR engine is storing its output to shared memory.
* During progress, all the buffer info is -1.
* Progress starts at 0 and increases to 100 during OCR. No other constraint.
* Additionally the progress callback contains the bounding box of the word that
* is currently being processed.
* Every progress callback, the OCR engine must set ocr_alive to 1.
* The HP side will set ocr_alive to 0. Repeated failure to reset
* to 1 indicates that the OCR engine is dead.
* If the cancel function is not null then it is called with the number of
* user words found. If it returns true then operation is cancelled.
**********************************************************************/
class ETEXT_DESC;
using CANCEL_FUNC = bool (*)(void *, int);
using PROGRESS_FUNC = bool (*)(int, int, int, int, int);
using PROGRESS_FUNC2 = bool (*)(ETEXT_DESC *, int, int, int, int);
class ETEXT_DESC { // output header
public:
int16_t count{0}; /// chars in this buffer(0)
int16_t progress{0}; /// percent complete increasing (0-100)
/** Progress monitor covers word recognition and it does not cover layout
* analysis.
* See Ray comment in https://github.com/tesseract-ocr/tesseract/pull/27 */
int8_t more_to_come{0}; /// true if not last
volatile int8_t ocr_alive{0}; /// ocr sets to 1, HP 0
int8_t err_code{0}; /// for errcode use
CANCEL_FUNC cancel{nullptr}; /// returns true to cancel
PROGRESS_FUNC progress_callback{
nullptr}; /// called whenever progress increases
PROGRESS_FUNC2 progress_callback2; /// monitor-aware progress callback
void *cancel_this{nullptr}; /// this or other data for cancel
std::chrono::steady_clock::time_point end_time;
/// Time to stop. Expected to be set only
/// by call to set_deadline_msecs().
EANYCODE_CHAR text[1]{}; /// character data
ETEXT_DESC() : progress_callback2(&default_progress_func) {
end_time = std::chrono::time_point<std::chrono::steady_clock,
std::chrono::milliseconds>();
}
// Sets the end time to be deadline_msecs milliseconds from now.
void set_deadline_msecs(int32_t deadline_msecs) {
if (deadline_msecs > 0) {
end_time = std::chrono::steady_clock::now() +
std::chrono::milliseconds(deadline_msecs);
}
}
// Returns false if we've not passed the end_time, or have not set a deadline.
bool deadline_exceeded() const {
if (end_time.time_since_epoch() ==
std::chrono::steady_clock::duration::zero()) {
return false;
}
auto now = std::chrono::steady_clock::now();
return (now > end_time);
}
private:
static bool default_progress_func(ETEXT_DESC *ths, int left, int right,
int top, int bottom) {
if (ths->progress_callback != nullptr) {
return (*(ths->progress_callback))(ths->progress, left, right, top,
bottom);
}
return true;
}
};
} // namespace tesseract
#endif // CCUTIL_OCRCLASS_H_

View File

@ -1,139 +0,0 @@
// SPDX-License-Identifier: Apache-2.0
// File: osdetect.h
// Description: Orientation and script detection.
// Author: Samuel Charron
// Ranjith Unnikrishnan
//
// (C) Copyright 2008, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef TESSERACT_CCMAIN_OSDETECT_H_
#define TESSERACT_CCMAIN_OSDETECT_H_
#include "export.h" // for TESS_API
#include <vector> // for std::vector
namespace tesseract {
class BLOBNBOX;
class BLOBNBOX_CLIST;
class BLOB_CHOICE_LIST;
class TO_BLOCK_LIST;
class UNICHARSET;
class Tesseract;
// Max number of scripts in ICU + "NULL" + Japanese and Korean + Fraktur
const int kMaxNumberOfScripts = 116 + 1 + 2 + 1;
struct OSBestResult {
OSBestResult()
: orientation_id(0), script_id(0), sconfidence(0.0), oconfidence(0.0) {}
int orientation_id;
int script_id;
float sconfidence;
float oconfidence;
};
struct OSResults {
OSResults() : unicharset(nullptr) {
for (int i = 0; i < 4; ++i) {
for (int j = 0; j < kMaxNumberOfScripts; ++j) {
scripts_na[i][j] = 0;
}
orientations[i] = 0;
}
}
void update_best_orientation();
// Set the estimate of the orientation to the given id.
void set_best_orientation(int orientation_id);
// Update/Compute the best estimate of the script assuming the given
// orientation id.
void update_best_script(int orientation_id);
// Return the index of the script with the highest score for this orientation.
TESS_API int get_best_script(int orientation_id) const;
// Accumulate scores with given OSResults instance and update the best script.
void accumulate(const OSResults &osr);
// Print statistics.
void print_scores(void) const;
void print_scores(int orientation_id) const;
// Array holding scores for each orientation id [0,3].
// Orientation ids [0..3] map to [0, 270, 180, 90] degree orientations of the
// page respectively, where the values refer to the amount of clockwise
// rotation to be applied to the page for the text to be upright and readable.
float orientations[4];
// Script confidence scores for each of 4 possible orientations.
float scripts_na[4][kMaxNumberOfScripts];
UNICHARSET *unicharset;
OSBestResult best_result;
};
class OrientationDetector {
public:
OrientationDetector(const std::vector<int> *allowed_scripts,
OSResults *results);
bool detect_blob(BLOB_CHOICE_LIST *scores);
int get_orientation();
private:
OSResults *osr_;
const std::vector<int> *allowed_scripts_;
};
class ScriptDetector {
public:
ScriptDetector(const std::vector<int> *allowed_scripts, OSResults *osr,
tesseract::Tesseract *tess);
void detect_blob(BLOB_CHOICE_LIST *scores);
bool must_stop(int orientation) const;
private:
OSResults *osr_;
static const char *korean_script_;
static const char *japanese_script_;
static const char *fraktur_script_;
int korean_id_;
int japanese_id_;
int katakana_id_;
int hiragana_id_;
int han_id_;
int hangul_id_;
int latin_id_;
int fraktur_id_;
tesseract::Tesseract *tess_;
const std::vector<int> *allowed_scripts_;
};
int orientation_and_script_detection(const char *filename, OSResults *,
tesseract::Tesseract *);
int os_detect(TO_BLOCK_LIST *port_blocks, OSResults *osr,
tesseract::Tesseract *tess);
int os_detect_blobs(const std::vector<int> *allowed_scripts,
BLOBNBOX_CLIST *blob_list, OSResults *osr,
tesseract::Tesseract *tess);
bool os_detect_blob(BLOBNBOX *bbox, OrientationDetector *o, ScriptDetector *s,
OSResults *, tesseract::Tesseract *tess);
// Helper method to convert an orientation index to its value in degrees.
// The value represents the amount of clockwise rotation in degrees that must be
// applied for the text to be upright (readable).
TESS_API int OrientationIdToValue(const int &id);
} // namespace tesseract
#endif // TESSERACT_CCMAIN_OSDETECT_H_

View File

@ -1,364 +0,0 @@
// SPDX-License-Identifier: Apache-2.0
// File: pageiterator.h
// Description: Iterator for tesseract page structure that avoids using
// tesseract internal data structures.
// Author: Ray Smith
//
// (C) Copyright 2010, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef TESSERACT_CCMAIN_PAGEITERATOR_H_
#define TESSERACT_CCMAIN_PAGEITERATOR_H_
#include "export.h"
#include "publictypes.h"
struct Pix;
struct Pta;
namespace tesseract {
struct BlamerBundle;
class C_BLOB_IT;
class PAGE_RES;
class PAGE_RES_IT;
class WERD;
class Tesseract;
/**
* Class to iterate over tesseract page structure, providing access to all
* levels of the page hierarchy, without including any tesseract headers or
* having to handle any tesseract structures.
* WARNING! This class points to data held within the TessBaseAPI class, and
* therefore can only be used while the TessBaseAPI class still exists and
* has not been subjected to a call of Init, SetImage, Recognize, Clear, End
* DetectOS, or anything else that changes the internal PAGE_RES.
* See tesseract/publictypes.h for the definition of PageIteratorLevel.
* See also ResultIterator, derived from PageIterator, which adds in the
* ability to access OCR output with text-specific methods.
*/
class TESS_API PageIterator {
public:
/**
* page_res and tesseract come directly from the BaseAPI.
* The rectangle parameters are copied indirectly from the Thresholder,
* via the BaseAPI. They represent the coordinates of some rectangle in an
* original image (in top-left-origin coordinates) and therefore the top-left
* needs to be added to any output boxes in order to specify coordinates
* in the original image. See TessBaseAPI::SetRectangle.
* The scale and scaled_yres are in case the Thresholder scaled the image
* rectangle prior to thresholding. Any coordinates in tesseract's image
* must be divided by scale before adding (rect_left, rect_top).
* The scaled_yres indicates the effective resolution of the binary image
* that tesseract has been given by the Thresholder.
* After the constructor, Begin has already been called.
*/
PageIterator(PAGE_RES *page_res, Tesseract *tesseract, int scale,
int scaled_yres, int rect_left, int rect_top, int rect_width,
int rect_height);
virtual ~PageIterator();
/**
* Page/ResultIterators may be copied! This makes it possible to iterate over
* all the objects at a lower level, while maintaining an iterator to
* objects at a higher level. These constructors DO NOT CALL Begin, so
* iterations will continue from the location of src.
*/
PageIterator(const PageIterator &src);
const PageIterator &operator=(const PageIterator &src);
/** Are we positioned at the same location as other? */
bool PositionedAtSameWord(const PAGE_RES_IT *other) const;
// ============= Moving around within the page ============.
/**
* Moves the iterator to point to the start of the page to begin an
* iteration.
*/
virtual void Begin();
/**
* Moves the iterator to the beginning of the paragraph.
* This class implements this functionality by moving it to the zero indexed
* blob of the first (leftmost) word on the first row of the paragraph.
*/
virtual void RestartParagraph();
/**
* Return whether this iterator points anywhere in the first textline of a
* paragraph.
*/
bool IsWithinFirstTextlineOfParagraph() const;
/**
* Moves the iterator to the beginning of the text line.
* This class implements this functionality by moving it to the zero indexed
* blob of the first (leftmost) word of the row.
*/
virtual void RestartRow();
/**
* Moves to the start of the next object at the given level in the
* page hierarchy, and returns false if the end of the page was reached.
* NOTE that RIL_SYMBOL will skip non-text blocks, but all other
* PageIteratorLevel level values will visit each non-text block once.
* Think of non text blocks as containing a single para, with a single line,
* with a single imaginary word.
* Calls to Next with different levels may be freely intermixed.
* This function iterates words in right-to-left scripts correctly, if
* the appropriate language has been loaded into Tesseract.
*/
virtual bool Next(PageIteratorLevel level);
/**
* Returns true if the iterator is at the start of an object at the given
* level.
*
* For instance, suppose an iterator it is pointed to the first symbol of the
* first word of the third line of the second paragraph of the first block in
* a page, then:
* it.IsAtBeginningOf(RIL_BLOCK) = false
* it.IsAtBeginningOf(RIL_PARA) = false
* it.IsAtBeginningOf(RIL_TEXTLINE) = true
* it.IsAtBeginningOf(RIL_WORD) = true
* it.IsAtBeginningOf(RIL_SYMBOL) = true
*/
virtual bool IsAtBeginningOf(PageIteratorLevel level) const;
/**
* Returns whether the iterator is positioned at the last element in a
* given level. (e.g. the last word in a line, the last line in a block)
*
* Here's some two-paragraph example
* text. It starts off innocuously
* enough but quickly turns bizarre.
* The author inserts a cornucopia
* of words to guard against confused
* references.
*
* Now take an iterator it pointed to the start of "bizarre."
* it.IsAtFinalElement(RIL_PARA, RIL_SYMBOL) = false
* it.IsAtFinalElement(RIL_PARA, RIL_WORD) = true
* it.IsAtFinalElement(RIL_BLOCK, RIL_WORD) = false
*/
virtual bool IsAtFinalElement(PageIteratorLevel level,
PageIteratorLevel element) const;
/**
* Returns whether this iterator is positioned
* before other: -1
* equal to other: 0
* after other: 1
*/
int Cmp(const PageIterator &other) const;
// ============= Accessing data ==============.
// Coordinate system:
// Integer coordinates are at the cracks between the pixels.
// The top-left corner of the top-left pixel in the image is at (0,0).
// The bottom-right corner of the bottom-right pixel in the image is at
// (width, height).
// Every bounding box goes from the top-left of the top-left contained
// pixel to the bottom-right of the bottom-right contained pixel, so
// the bounding box of the single top-left pixel in the image is:
// (0,0)->(1,1).
// If an image rectangle has been set in the API, then returned coordinates
// relate to the original (full) image, rather than the rectangle.
/**
* Controls what to include in a bounding box. Bounding boxes of all levels
* between RIL_WORD and RIL_BLOCK can include or exclude potential diacritics.
* Between layout analysis and recognition, it isn't known where all
* diacritics belong, so this control is used to include or exclude some
* diacritics that are above or below the main body of the word. In most cases
* where the placement is obvious, and after recognition, it doesn't make as
* much difference, as the diacritics will already be included in the word.
*/
void SetBoundingBoxComponents(bool include_upper_dots,
bool include_lower_dots) {
include_upper_dots_ = include_upper_dots;
include_lower_dots_ = include_lower_dots;
}
/**
* Returns the bounding rectangle of the current object at the given level.
* See comment on coordinate system above.
* Returns false if there is no such object at the current position.
* The returned bounding box is guaranteed to match the size and position
* of the image returned by GetBinaryImage, but may clip foreground pixels
* from a grey image. The padding argument to GetImage can be used to expand
* the image to include more foreground pixels. See GetImage below.
*/
bool BoundingBox(PageIteratorLevel level, int *left, int *top, int *right,
int *bottom) const;
bool BoundingBox(PageIteratorLevel level, int padding, int *left, int *top,
int *right, int *bottom) const;
/**
* Returns the bounding rectangle of the object in a coordinate system of the
* working image rectangle having its origin at (rect_left_, rect_top_) with
* respect to the original image and is scaled by a factor scale_.
*/
bool BoundingBoxInternal(PageIteratorLevel level, int *left, int *top,
int *right, int *bottom) const;
/** Returns whether there is no object of a given level. */
bool Empty(PageIteratorLevel level) const;
/**
* Returns the type of the current block.
* See tesseract/publictypes.h for PolyBlockType.
*/
PolyBlockType BlockType() const;
/**
* Returns the polygon outline of the current block. The returned Pta must
* be ptaDestroy-ed after use. Note that the returned Pta lists the vertices
* of the polygon, and the last edge is the line segment between the last
* point and the first point. nullptr will be returned if the iterator is
* at the end of the document or layout analysis was not used.
*/
Pta *BlockPolygon() const;
/**
* Returns a binary image of the current object at the given level.
* The position and size match the return from BoundingBoxInternal, and so
* this could be upscaled with respect to the original input image.
* Use pixDestroy to delete the image after use.
*/
Pix *GetBinaryImage(PageIteratorLevel level) const;
/**
* Returns an image of the current object at the given level in greyscale
* if available in the input. To guarantee a binary image use BinaryImage.
* NOTE that in order to give the best possible image, the bounds are
* expanded slightly over the binary connected component, by the supplied
* padding, so the top-left position of the returned image is returned
* in (left,top). These will most likely not match the coordinates
* returned by BoundingBox.
* If you do not supply an original image, you will get a binary one.
* Use pixDestroy to delete the image after use.
*/
Pix *GetImage(PageIteratorLevel level, int padding, Pix *original_img,
int *left, int *top) const;
/**
* Returns the baseline of the current object at the given level.
* The baseline is the line that passes through (x1, y1) and (x2, y2).
* WARNING: with vertical text, baselines may be vertical!
* Returns false if there is no baseline at the current position.
*/
bool Baseline(PageIteratorLevel level, int *x1, int *y1, int *x2,
int *y2) const;
// Returns the attributes of the current row.
void RowAttributes(float *row_height, float *descenders,
float *ascenders) const;
/**
* Returns orientation for the block the iterator points to.
* orientation, writing_direction, textline_order: see publictypes.h
* deskew_angle: after rotating the block so the text orientation is
* upright, how many radians does one have to rotate the
* block anti-clockwise for it to be level?
* -Pi/4 <= deskew_angle <= Pi/4
*/
void Orientation(tesseract::Orientation *orientation,
tesseract::WritingDirection *writing_direction,
tesseract::TextlineOrder *textline_order,
float *deskew_angle) const;
/**
* Returns information about the current paragraph, if available.
*
* justification -
* LEFT if ragged right, or fully justified and script is left-to-right.
* RIGHT if ragged left, or fully justified and script is right-to-left.
* unknown if it looks like source code or we have very few lines.
* is_list_item -
* true if we believe this is a member of an ordered or unordered list.
* is_crown -
* true if the first line of the paragraph is aligned with the other
* lines of the paragraph even though subsequent paragraphs have first
* line indents. This typically indicates that this is the continuation
* of a previous paragraph or that it is the very first paragraph in
* the chapter.
* first_line_indent -
* For LEFT aligned paragraphs, the first text line of paragraphs of
* this kind are indented this many pixels from the left edge of the
* rest of the paragraph.
* for RIGHT aligned paragraphs, the first text line of paragraphs of
* this kind are indented this many pixels from the right edge of the
* rest of the paragraph.
* NOTE 1: This value may be negative.
* NOTE 2: if *is_crown == true, the first line of this paragraph is
* actually flush, and first_line_indent is set to the "common"
* first_line_indent for subsequent paragraphs in this block
* of text.
*/
void ParagraphInfo(tesseract::ParagraphJustification *justification,
bool *is_list_item, bool *is_crown,
int *first_line_indent) const;
// If the current WERD_RES (it_->word()) is not nullptr, sets the BlamerBundle
// of the current word to the given pointer (takes ownership of the pointer)
// and returns true.
// Can only be used when iterating on the word level.
bool SetWordBlamerBundle(BlamerBundle *blamer_bundle);
protected:
/**
* Sets up the internal data for iterating the blobs of a new word, then
* moves the iterator to the given offset.
*/
void BeginWord(int offset);
/** Pointer to the page_res owned by the API. */
PAGE_RES *page_res_;
/** Pointer to the Tesseract object owned by the API. */
Tesseract *tesseract_;
/**
* The iterator to the page_res_. Owned by this ResultIterator.
* A pointer just to avoid dragging in Tesseract includes.
*/
PAGE_RES_IT *it_;
/**
* The current input WERD being iterated. If there is an output from OCR,
* then word_ is nullptr. Owned by the API
*/
WERD *word_;
/** The length of the current word_. */
int word_length_;
/** The current blob index within the word. */
int blob_index_;
/**
* Iterator to the blobs within the word. If nullptr, then we are iterating
* OCR results in the box_word.
* Owned by this ResultIterator.
*/
C_BLOB_IT *cblob_it_;
/** Control over what to include in bounding boxes. */
bool include_upper_dots_;
bool include_lower_dots_;
/** Parameters saved from the Thresholder. Needed to rebuild coordinates.*/
int scale_;
int scaled_yres_;
int rect_left_;
int rect_top_;
int rect_width_;
int rect_height_;
};
} // namespace tesseract.
#endif // TESSERACT_CCMAIN_PAGEITERATOR_H_

View File

@ -1,281 +0,0 @@
// SPDX-License-Identifier: Apache-2.0
// File: publictypes.h
// Description: Types used in both the API and internally
// Author: Ray Smith
//
// (C) Copyright 2010, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef TESSERACT_CCSTRUCT_PUBLICTYPES_H_
#define TESSERACT_CCSTRUCT_PUBLICTYPES_H_
namespace tesseract {
// This file contains types that are used both by the API and internally
// to Tesseract. In order to decouple the API from Tesseract and prevent cyclic
// dependencies, THIS FILE SHOULD NOT DEPEND ON ANY OTHER PART OF TESSERACT.
// Restated: It is OK for low-level Tesseract files to include publictypes.h,
// but not for the low-level tesseract code to include top-level API code.
// This file should not use other Tesseract types, as that would drag
// their includes into the API-level.
/** Number of printers' points in an inch. The unit of the pointsize return. */
constexpr int kPointsPerInch = 72;
/**
* Minimum believable resolution. Used as a default if there is no other
* information, as it is safer to under-estimate than over-estimate.
*/
constexpr int kMinCredibleResolution = 70;
/** Maximum believable resolution. */
constexpr int kMaxCredibleResolution = 2400;
/**
* Ratio between median blob size and likely resolution. Used to estimate
* resolution when none is provided. This is basically 1/usual text size in
* inches. */
constexpr int kResolutionEstimationFactor = 10;
/**
* Possible types for a POLY_BLOCK or ColPartition.
* Must be kept in sync with kPBColors in polyblk.cpp and PTIs*Type functions
* below, as well as kPolyBlockNames in layout_test.cc.
* Used extensively by ColPartition, and POLY_BLOCK.
*/
enum PolyBlockType {
PT_UNKNOWN, // Type is not yet known. Keep as the first element.
PT_FLOWING_TEXT, // Text that lives inside a column.
PT_HEADING_TEXT, // Text that spans more than one column.
PT_PULLOUT_TEXT, // Text that is in a cross-column pull-out region.
PT_EQUATION, // Partition belonging to an equation region.
PT_INLINE_EQUATION, // Partition has inline equation.
PT_TABLE, // Partition belonging to a table region.
PT_VERTICAL_TEXT, // Text-line runs vertically.
PT_CAPTION_TEXT, // Text that belongs to an image.
PT_FLOWING_IMAGE, // Image that lives inside a column.
PT_HEADING_IMAGE, // Image that spans more than one column.
PT_PULLOUT_IMAGE, // Image that is in a cross-column pull-out region.
PT_HORZ_LINE, // Horizontal Line.
PT_VERT_LINE, // Vertical Line.
PT_NOISE, // Lies outside of any column.
PT_COUNT
};
/** Returns true if PolyBlockType is of horizontal line type */
inline bool PTIsLineType(PolyBlockType type) {
return type == PT_HORZ_LINE || type == PT_VERT_LINE;
}
/** Returns true if PolyBlockType is of image type */
inline bool PTIsImageType(PolyBlockType type) {
return type == PT_FLOWING_IMAGE || type == PT_HEADING_IMAGE ||
type == PT_PULLOUT_IMAGE;
}
/** Returns true if PolyBlockType is of text type */
inline bool PTIsTextType(PolyBlockType type) {
return type == PT_FLOWING_TEXT || type == PT_HEADING_TEXT ||
type == PT_PULLOUT_TEXT || type == PT_TABLE ||
type == PT_VERTICAL_TEXT || type == PT_CAPTION_TEXT ||
type == PT_INLINE_EQUATION;
}
// Returns true if PolyBlockType is of pullout(inter-column) type
inline bool PTIsPulloutType(PolyBlockType type) {
return type == PT_PULLOUT_IMAGE || type == PT_PULLOUT_TEXT;
}
/**
* +------------------+ Orientation Example:
* | 1 Aaaa Aaaa Aaaa | ====================
* | Aaa aa aaa aa | To left is a diagram of some (1) English and
* | aaaaaa A aa aaa. | (2) Chinese text and a (3) photo credit.
* | 2 |
* | ####### c c C | Upright Latin characters are represented as A and a.
* | ####### c c c | '<' represents a latin character rotated
* | < ####### c c c | anti-clockwise 90 degrees.
* | < ####### c c |
* | < ####### . c | Upright Chinese characters are represented C and c.
* | 3 ####### c |
* +------------------+ NOTA BENE: enum values here should match goodoc.proto
* If you orient your head so that "up" aligns with Orientation,
* then the characters will appear "right side up" and readable.
*
* In the example above, both the English and Chinese paragraphs are oriented
* so their "up" is the top of the page (page up). The photo credit is read
* with one's head turned leftward ("up" is to page left).
*
* The values of this enum match the convention of Tesseract's osdetect.h
*/
enum Orientation {
ORIENTATION_PAGE_UP = 0,
ORIENTATION_PAGE_RIGHT = 1,
ORIENTATION_PAGE_DOWN = 2,
ORIENTATION_PAGE_LEFT = 3,
};
/**
* The grapheme clusters within a line of text are laid out logically
* in this direction, judged when looking at the text line rotated so that
* its Orientation is "page up".
*
* For English text, the writing direction is left-to-right. For the
* Chinese text in the above example, the writing direction is top-to-bottom.
*/
enum WritingDirection {
WRITING_DIRECTION_LEFT_TO_RIGHT = 0,
WRITING_DIRECTION_RIGHT_TO_LEFT = 1,
WRITING_DIRECTION_TOP_TO_BOTTOM = 2,
};
/**
* The text lines are read in the given sequence.
*
* In English, the order is top-to-bottom.
* In Chinese, vertical text lines are read right-to-left. Mongolian is
* written in vertical columns top to bottom like Chinese, but the lines
* order left-to right.
*
* Note that only some combinations make sense. For example,
* WRITING_DIRECTION_LEFT_TO_RIGHT implies TEXTLINE_ORDER_TOP_TO_BOTTOM
*/
enum TextlineOrder {
TEXTLINE_ORDER_LEFT_TO_RIGHT = 0,
TEXTLINE_ORDER_RIGHT_TO_LEFT = 1,
TEXTLINE_ORDER_TOP_TO_BOTTOM = 2,
};
/**
* Possible modes for page layout analysis. These *must* be kept in order
* of decreasing amount of layout analysis to be done, except for OSD_ONLY,
* so that the inequality test macros below work.
*/
enum PageSegMode {
PSM_OSD_ONLY = 0, ///< Orientation and script detection only.
PSM_AUTO_OSD = 1, ///< Automatic page segmentation with orientation and
///< script detection. (OSD)
PSM_AUTO_ONLY = 2, ///< Automatic page segmentation, but no OSD, or OCR.
PSM_AUTO = 3, ///< Fully automatic page segmentation, but no OSD.
PSM_SINGLE_COLUMN = 4, ///< Assume a single column of text of variable sizes.
PSM_SINGLE_BLOCK_VERT_TEXT = 5, ///< Assume a single uniform block of
///< vertically aligned text.
PSM_SINGLE_BLOCK = 6, ///< Assume a single uniform block of text. (Default.)
PSM_SINGLE_LINE = 7, ///< Treat the image as a single text line.
PSM_SINGLE_WORD = 8, ///< Treat the image as a single word.
PSM_CIRCLE_WORD = 9, ///< Treat the image as a single word in a circle.
PSM_SINGLE_CHAR = 10, ///< Treat the image as a single character.
PSM_SPARSE_TEXT =
11, ///< Find as much text as possible in no particular order.
PSM_SPARSE_TEXT_OSD = 12, ///< Sparse text with orientation and script det.
PSM_RAW_LINE = 13, ///< Treat the image as a single text line, bypassing
///< hacks that are Tesseract-specific.
PSM_COUNT ///< Number of enum entries.
};
/**
* Inline functions that act on a PageSegMode to determine whether components of
* layout analysis are enabled.
* *Depend critically on the order of elements of PageSegMode.*
* NOTE that arg is an int for compatibility with INT_PARAM.
*/
inline bool PSM_OSD_ENABLED(int pageseg_mode) {
return pageseg_mode <= PSM_AUTO_OSD || pageseg_mode == PSM_SPARSE_TEXT_OSD;
}
inline bool PSM_ORIENTATION_ENABLED(int pageseg_mode) {
return pageseg_mode <= PSM_AUTO || pageseg_mode == PSM_SPARSE_TEXT_OSD;
}
inline bool PSM_COL_FIND_ENABLED(int pageseg_mode) {
return pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_AUTO;
}
inline bool PSM_SPARSE(int pageseg_mode) {
return pageseg_mode == PSM_SPARSE_TEXT || pageseg_mode == PSM_SPARSE_TEXT_OSD;
}
inline bool PSM_BLOCK_FIND_ENABLED(int pageseg_mode) {
return pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_SINGLE_COLUMN;
}
inline bool PSM_LINE_FIND_ENABLED(int pageseg_mode) {
return pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_SINGLE_BLOCK;
}
inline bool PSM_WORD_FIND_ENABLED(int pageseg_mode) {
return (pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_SINGLE_LINE) ||
pageseg_mode == PSM_SPARSE_TEXT || pageseg_mode == PSM_SPARSE_TEXT_OSD;
}
/**
* enum of the elements of the page hierarchy, used in ResultIterator
* to provide functions that operate on each level without having to
* have 5x as many functions.
*/
enum PageIteratorLevel {
RIL_BLOCK, // Block of text/image/separator line.
RIL_PARA, // Paragraph within a block.
RIL_TEXTLINE, // Line within a paragraph.
RIL_WORD, // Word within a textline.
RIL_SYMBOL // Symbol/character within a word.
};
/**
* JUSTIFICATION_UNKNOWN
* The alignment is not clearly one of the other options. This could happen
* for example if there are only one or two lines of text or the text looks
* like source code or poetry.
*
* NOTA BENE: Fully justified paragraphs (text aligned to both left and right
* margins) are marked by Tesseract with JUSTIFICATION_LEFT if their text
* is written with a left-to-right script and with JUSTIFICATION_RIGHT if
* their text is written in a right-to-left script.
*
* Interpretation for text read in vertical lines:
* "Left" is wherever the starting reading position is.
*
* JUSTIFICATION_LEFT
* Each line, except possibly the first, is flush to the same left tab stop.
*
* JUSTIFICATION_CENTER
* The text lines of the paragraph are centered about a line going
* down through their middle of the text lines.
*
* JUSTIFICATION_RIGHT
* Each line, except possibly the first, is flush to the same right tab stop.
*/
enum ParagraphJustification {
JUSTIFICATION_UNKNOWN,
JUSTIFICATION_LEFT,
JUSTIFICATION_CENTER,
JUSTIFICATION_RIGHT,
};
/**
* When Tesseract/Cube is initialized we can choose to instantiate/load/run
* only the Tesseract part, only the Cube part or both along with the combiner.
* The preference of which engine to use is stored in tessedit_ocr_engine_mode.
*
* ATTENTION: When modifying this enum, please make sure to make the
* appropriate changes to all the enums mirroring it (e.g. OCREngine in
* cityblock/workflow/detection/detection_storage.proto). Such enums will
* mention the connection to OcrEngineMode in the comments.
*/
enum OcrEngineMode {
OEM_TESSERACT_ONLY, // Run Tesseract only - fastest; deprecated
OEM_LSTM_ONLY, // Run just the LSTM line recognizer.
OEM_TESSERACT_LSTM_COMBINED, // Run the LSTM recognizer, but allow fallback
// to Tesseract when things get difficult.
// deprecated
OEM_DEFAULT, // Specify this mode when calling init_*(),
// to indicate that any of the above modes
// should be automatically inferred from the
// variables in the language-specific config,
// command-line configs, or if not specified
// in any of the above should be set to the
// default OEM_TESSERACT_ONLY.
OEM_COUNT // Number of OEMs
};
} // namespace tesseract.
#endif // TESSERACT_CCSTRUCT_PUBLICTYPES_H_

View File

@ -1,311 +0,0 @@
// SPDX-License-Identifier: Apache-2.0
// File: renderer.h
// Description: Rendering interface to inject into TessBaseAPI
//
// (C) Copyright 2011, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef TESSERACT_API_RENDERER_H_
#define TESSERACT_API_RENDERER_H_
#include "export.h"
// To avoid collision with other typenames include the ABSOLUTE MINIMUM
// complexity of includes here. Use forward declarations wherever possible
// and hide includes of complex types in baseapi.cpp.
#include <cstdint>
#include <string> // for std::string
#include <vector> // for std::vector
struct Pix;
namespace tesseract {
class TessBaseAPI;
/**
* Interface for rendering tesseract results into a document, such as text,
* HOCR or pdf. This class is abstract. Specific classes handle individual
* formats. This interface is then used to inject the renderer class into
* tesseract when processing images.
*
* For simplicity implementing this with tesseract version 3.01,
* the renderer contains document state that is cleared from document
* to document just as the TessBaseAPI is. This way the base API can just
* delegate its rendering functionality to injected renderers, and the
* renderers can manage the associated state needed for the specific formats
* in addition to the heuristics for producing it.
*/
class TESS_API TessResultRenderer {
public:
virtual ~TessResultRenderer();
// Takes ownership of pointer so must be new'd instance.
// Renderers aren't ordered, but appends the sequences of next parameter
// and existing next(). The renderers should be unique across both lists.
void insert(TessResultRenderer *next);
// Returns the next renderer or nullptr.
TessResultRenderer *next() {
return next_;
}
/**
* Starts a new document with the given title.
* This clears the contents of the output data.
* Title should use UTF-8 encoding.
*/
bool BeginDocument(const char *title);
/**
* Adds the recognized text from the source image to the current document.
* Invalid if BeginDocument not yet called.
*
* Note that this API is a bit weird but is designed to fit into the
* current TessBaseAPI implementation where the api has lots of state
* information that we might want to add in.
*/
bool AddImage(TessBaseAPI *api);
/**
* Finishes the document and finalizes the output data
* Invalid if BeginDocument not yet called.
*/
bool EndDocument();
const char *file_extension() const {
return file_extension_;
}
const char *title() const {
return title_.c_str();
}
// Is everything fine? Otherwise something went wrong.
bool happy() const {
return happy_;
}
/**
* Returns the index of the last image given to AddImage
* (i.e. images are incremented whether the image succeeded or not)
*
* This is always defined. It means either the number of the
* current image, the last image ended, or in the completed document
* depending on when in the document lifecycle you are looking at it.
* Will return -1 if a document was never started.
*/
int imagenum() const {
return imagenum_;
}
protected:
/**
* Called by concrete classes.
*
* outputbase is the name of the output file excluding
* extension. For example, "/path/to/chocolate-chip-cookie-recipe"
*
* extension indicates the file extension to be used for output
* files. For example "pdf" will produce a .pdf file, and "hocr"
* will produce .hocr files.
*/
TessResultRenderer(const char *outputbase, const char *extension);
// Hook for specialized handling in BeginDocument()
virtual bool BeginDocumentHandler();
// This must be overridden to render the OCR'd results
virtual bool AddImageHandler(TessBaseAPI *api) = 0;
// Hook for specialized handling in EndDocument()
virtual bool EndDocumentHandler();
// Renderers can call this to append '\0' terminated strings into
// the output string returned by GetOutput.
// This method will grow the output buffer if needed.
void AppendString(const char *s);
// Renderers can call this to append binary byte sequences into
// the output string returned by GetOutput. Note that s is not necessarily
// '\0' terminated (and can contain '\0' within it).
// This method will grow the output buffer if needed.
void AppendData(const char *s, int len);
private:
TessResultRenderer *next_; // Can link multiple renderers together
FILE *fout_; // output file pointer
const char *file_extension_; // standard extension for generated output
std::string title_; // title of document being rendered
int imagenum_; // index of last image added
bool happy_; // I get grumpy when the disk fills up, etc.
};
/**
* Renders tesseract output into a plain UTF-8 text string
*/
class TESS_API TessTextRenderer : public TessResultRenderer {
public:
explicit TessTextRenderer(const char *outputbase);
protected:
bool AddImageHandler(TessBaseAPI *api) override;
};
/**
* Renders tesseract output into an hocr text string
*/
class TESS_API TessHOcrRenderer : public TessResultRenderer {
public:
explicit TessHOcrRenderer(const char *outputbase, bool font_info);
explicit TessHOcrRenderer(const char *outputbase);
protected:
bool BeginDocumentHandler() override;
bool AddImageHandler(TessBaseAPI *api) override;
bool EndDocumentHandler() override;
private:
bool font_info_; // whether to print font information
};
/**
* Renders tesseract output into an alto text string
*/
class TESS_API TessAltoRenderer : public TessResultRenderer {
public:
explicit TessAltoRenderer(const char *outputbase);
protected:
bool BeginDocumentHandler() override;
bool AddImageHandler(TessBaseAPI *api) override;
bool EndDocumentHandler() override;
private:
bool begin_document;
};
/**
* Renders Tesseract output into a TSV string
*/
class TESS_API TessTsvRenderer : public TessResultRenderer {
public:
explicit TessTsvRenderer(const char *outputbase, bool font_info);
explicit TessTsvRenderer(const char *outputbase);
protected:
bool BeginDocumentHandler() override;
bool AddImageHandler(TessBaseAPI *api) override;
bool EndDocumentHandler() override;
private:
bool font_info_; // whether to print font information
};
/**
* Renders tesseract output into searchable PDF
*/
class TESS_API TessPDFRenderer : public TessResultRenderer {
public:
// datadir is the location of the TESSDATA. We need it because
// we load a custom PDF font from this location.
TessPDFRenderer(const char *outputbase, const char *datadir,
bool textonly = false);
protected:
bool BeginDocumentHandler() override;
bool AddImageHandler(TessBaseAPI *api) override;
bool EndDocumentHandler() override;
private:
// We don't want to have every image in memory at once,
// so we store some metadata as we go along producing
// PDFs one page at a time. At the end, that metadata is
// used to make everything that isn't easily handled in a
// streaming fashion.
long int obj_; // counter for PDF objects
std::vector<uint64_t> offsets_; // offset of every PDF object in bytes
std::vector<long int> pages_; // object number for every /Page object
std::string datadir_; // where to find the custom font
bool textonly_; // skip images if set
// Bookkeeping only. DIY = Do It Yourself.
void AppendPDFObjectDIY(size_t objectsize);
// Bookkeeping + emit data.
void AppendPDFObject(const char *data);
// Create the /Contents object for an entire page.
char *GetPDFTextObjects(TessBaseAPI *api, double width, double height);
// Turn an image into a PDF object. Only transcode if we have to.
static bool imageToPDFObj(Pix *pix, const char *filename, long int objnum,
char **pdf_object, long int *pdf_object_size,
int jpg_quality);
};
/**
* Renders tesseract output into a plain UTF-8 text string
*/
class TESS_API TessUnlvRenderer : public TessResultRenderer {
public:
explicit TessUnlvRenderer(const char *outputbase);
protected:
bool AddImageHandler(TessBaseAPI *api) override;
};
/**
* Renders tesseract output into a plain UTF-8 text string for LSTMBox
*/
class TESS_API TessLSTMBoxRenderer : public TessResultRenderer {
public:
explicit TessLSTMBoxRenderer(const char *outputbase);
protected:
bool AddImageHandler(TessBaseAPI *api) override;
};
/**
* Renders tesseract output into a plain UTF-8 text string
*/
class TESS_API TessBoxTextRenderer : public TessResultRenderer {
public:
explicit TessBoxTextRenderer(const char *outputbase);
protected:
bool AddImageHandler(TessBaseAPI *api) override;
};
/**
* Renders tesseract output into a plain UTF-8 text string in WordStr format
*/
class TESS_API TessWordStrBoxRenderer : public TessResultRenderer {
public:
explicit TessWordStrBoxRenderer(const char *outputbase);
protected:
bool AddImageHandler(TessBaseAPI *api) override;
};
#ifndef DISABLED_LEGACY_ENGINE
/**
* Renders tesseract output into an osd text string
*/
class TESS_API TessOsdRenderer : public TessResultRenderer {
public:
explicit TessOsdRenderer(const char *outputbase);
protected:
bool AddImageHandler(TessBaseAPI *api) override;
};
#endif // ndef DISABLED_LEGACY_ENGINE
} // namespace tesseract.
#endif // TESSERACT_API_RENDERER_H_

View File

@ -1,250 +0,0 @@
// SPDX-License-Identifier: Apache-2.0
// File: resultiterator.h
// Description: Iterator for tesseract results that is capable of
// iterating in proper reading order over Bi Directional
// (e.g. mixed Hebrew and English) text.
// Author: David Eger
//
// (C) Copyright 2011, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef TESSERACT_CCMAIN_RESULT_ITERATOR_H_
#define TESSERACT_CCMAIN_RESULT_ITERATOR_H_
#include "export.h" // for TESS_API, TESS_LOCAL
#include "ltrresultiterator.h" // for LTRResultIterator
#include "publictypes.h" // for PageIteratorLevel
#include "unichar.h" // for StrongScriptDirection
#include <set> // for std::pair
#include <vector> // for std::vector
namespace tesseract {
class TESS_API ResultIterator : public LTRResultIterator {
public:
static ResultIterator *StartOfParagraph(const LTRResultIterator &resit);
/**
* ResultIterator is copy constructible!
* The default copy constructor works just fine for us.
*/
~ResultIterator() override = default;
// ============= Moving around within the page ============.
/**
* Moves the iterator to point to the start of the page to begin
* an iteration.
*/
void Begin() override;
/**
* Moves to the start of the next object at the given level in the
* page hierarchy in the appropriate reading order and returns false if
* the end of the page was reached.
* NOTE that RIL_SYMBOL will skip non-text blocks, but all other
* PageIteratorLevel level values will visit each non-text block once.
* Think of non text blocks as containing a single para, with a single line,
* with a single imaginary word.
* Calls to Next with different levels may be freely intermixed.
* This function iterates words in right-to-left scripts correctly, if
* the appropriate language has been loaded into Tesseract.
*/
bool Next(PageIteratorLevel level) override;
/**
* IsAtBeginningOf() returns whether we're at the logical beginning of the
* given level. (as opposed to ResultIterator's left-to-right top-to-bottom
* order). Otherwise, this acts the same as PageIterator::IsAtBeginningOf().
* For a full description, see pageiterator.h
*/
bool IsAtBeginningOf(PageIteratorLevel level) const override;
/**
* Implement PageIterator's IsAtFinalElement correctly in a BiDi context.
* For instance, IsAtFinalElement(RIL_PARA, RIL_WORD) returns whether we
* point at the last word in a paragraph. See PageIterator for full comment.
*/
bool IsAtFinalElement(PageIteratorLevel level,
PageIteratorLevel element) const override;
// ============= Functions that refer to words only ============.
// Returns the number of blanks before the current word.
int BlanksBeforeWord() const;
// ============= Accessing data ==============.
/**
* Returns the null terminated UTF-8 encoded text string for the current
* object at the given level. Use delete [] to free after use.
*/
virtual char *GetUTF8Text(PageIteratorLevel level) const;
/**
* Returns the LSTM choices for every LSTM timestep for the current word.
*/
virtual std::vector<std::vector<std::vector<std::pair<const char *, float>>>>
*GetRawLSTMTimesteps() const;
virtual std::vector<std::vector<std::pair<const char *, float>>>
*GetBestLSTMSymbolChoices() const;
/**
* Return whether the current paragraph's dominant reading direction
* is left-to-right (as opposed to right-to-left).
*/
bool ParagraphIsLtr() const;
// ============= Exposed only for testing =============.
/**
* Yields the reading order as a sequence of indices and (optional)
* meta-marks for a set of words (given left-to-right).
* The meta marks are passed as negative values:
* kMinorRunStart Start of minor direction text.
* kMinorRunEnd End of minor direction text.
* kComplexWord The next indexed word contains both left-to-right and
* right-to-left characters and was treated as neutral.
*
* For example, suppose we have five words in a text line,
* indexed [0,1,2,3,4] from the leftmost side of the text line.
* The following are all believable reading_orders:
*
* Left-to-Right (in ltr paragraph):
* { 0, 1, 2, 3, 4 }
* Left-to-Right (in rtl paragraph):
* { kMinorRunStart, 0, 1, 2, 3, 4, kMinorRunEnd }
* Right-to-Left (in rtl paragraph):
* { 4, 3, 2, 1, 0 }
* Left-to-Right except for an RTL phrase in words 2, 3 in an ltr paragraph:
* { 0, 1, kMinorRunStart, 3, 2, kMinorRunEnd, 4 }
*/
static void CalculateTextlineOrder(
bool paragraph_is_ltr,
const std::vector<StrongScriptDirection> &word_dirs,
std::vector<int> *reading_order);
static const int kMinorRunStart;
static const int kMinorRunEnd;
static const int kComplexWord;
protected:
/**
* We presume the data associated with the given iterator will outlive us.
* NB: This is private because it does something that is non-obvious:
* it resets to the beginning of the paragraph instead of staying wherever
* resit might have pointed.
*/
explicit ResultIterator(const LTRResultIterator &resit);
private:
/**
* Calculates the current paragraph's dominant writing direction.
* Typically, members should use current_paragraph_ltr_ instead.
*/
bool CurrentParagraphIsLtr() const;
/**
* Returns word indices as measured from resit->RestartRow() = index 0
* for the reading order of words within a textline given an iterator
* into the middle of the text line.
* In addition to non-negative word indices, the following negative values
* may be inserted:
* kMinorRunStart Start of minor direction text.
* kMinorRunEnd End of minor direction text.
* kComplexWord The previous word contains both left-to-right and
* right-to-left characters and was treated as neutral.
*/
void CalculateTextlineOrder(bool paragraph_is_ltr,
const LTRResultIterator &resit,
std::vector<int> *indices) const;
/** Same as above, but the caller's ssd gets filled in if ssd != nullptr. */
void CalculateTextlineOrder(bool paragraph_is_ltr,
const LTRResultIterator &resit,
std::vector<StrongScriptDirection> *ssd,
std::vector<int> *indices) const;
/**
* What is the index of the current word in a strict left-to-right reading
* of the row?
*/
int LTRWordIndex() const;
/**
* Given an iterator pointing at a word, returns the logical reading order
* of blob indices for the word.
*/
void CalculateBlobOrder(std::vector<int> *blob_indices) const;
/** Precondition: current_paragraph_is_ltr_ is set. */
void MoveToLogicalStartOfTextline();
/**
* Precondition: current_paragraph_is_ltr_ and in_minor_direction_
* are set.
*/
void MoveToLogicalStartOfWord();
/** Are we pointing at the final (reading order) symbol of the word? */
bool IsAtFinalSymbolOfWord() const;
/** Are we pointing at the first (reading order) symbol of the word? */
bool IsAtFirstSymbolOfWord() const;
/**
* Append any extra marks that should be appended to this word when printed.
* Mostly, these are Unicode BiDi control characters.
*/
void AppendSuffixMarks(std::string *text) const;
/** Appends the current word in reading order to the given buffer.*/
void AppendUTF8WordText(std::string *text) const;
/**
* Appends the text of the current text line, *assuming this iterator is
* positioned at the beginning of the text line* This function
* updates the iterator to point to the first position past the text line.
* Each textline is terminated in a single newline character.
* If the textline ends a paragraph, it gets a second terminal newline.
*/
void IterateAndAppendUTF8TextlineText(std::string *text);
/**
* Appends the text of the current paragraph in reading order
* to the given buffer.
* Each textline is terminated in a single newline character, and the
* paragraph gets an extra newline at the end.
*/
void AppendUTF8ParagraphText(std::string *text) const;
/** Returns whether the bidi_debug flag is set to at least min_level. */
bool BidiDebug(int min_level) const;
bool current_paragraph_is_ltr_;
/**
* Is the currently pointed-at character at the beginning of
* a minor-direction run?
*/
bool at_beginning_of_minor_run_;
/** Is the currently pointed-at character in a minor-direction sequence? */
bool in_minor_direction_;
/**
* Should detected inter-word spaces be preserved, or "compressed" to a single
* space character (default behavior).
*/
bool preserve_interword_spaces_;
};
} // namespace tesseract.
#endif // TESSERACT_CCMAIN_RESULT_ITERATOR_H_

View File

@ -1,174 +0,0 @@
// SPDX-License-Identifier: Apache-2.0
// File: unichar.h
// Description: Unicode character/ligature class.
// Author: Ray Smith
//
// (C) Copyright 2006, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef TESSERACT_CCUTIL_UNICHAR_H_
#define TESSERACT_CCUTIL_UNICHAR_H_
#include "export.h"
#include <memory.h>
#include <cstring>
#include <string>
#include <vector>
namespace tesseract {
// Maximum number of characters that can be stored in a UNICHAR. Must be
// at least 4. Must not exceed 31 without changing the coding of length.
#define UNICHAR_LEN 30
// A UNICHAR_ID is the unique id of a unichar.
using UNICHAR_ID = int;
// A variable to indicate an invalid or uninitialized unichar id.
static const int INVALID_UNICHAR_ID = -1;
// A special unichar that corresponds to INVALID_UNICHAR_ID.
static const char INVALID_UNICHAR[] = "__INVALID_UNICHAR__";
enum StrongScriptDirection {
DIR_NEUTRAL = 0, // Text contains only neutral characters.
DIR_LEFT_TO_RIGHT = 1, // Text contains no Right-to-Left characters.
DIR_RIGHT_TO_LEFT = 2, // Text contains no Left-to-Right characters.
DIR_MIX = 3, // Text contains a mixture of left-to-right
// and right-to-left characters.
};
using char32 = signed int;
// The UNICHAR class holds a single classification result. This may be
// a single Unicode character (stored as between 1 and 4 utf8 bytes) or
// multiple Unicode characters representing the NFKC expansion of a ligature
// such as fi, ffl etc. These are also stored as utf8.
class TESS_API UNICHAR {
public:
UNICHAR() {
memset(chars, 0, UNICHAR_LEN);
}
// Construct from a utf8 string. If len<0 then the string is null terminated.
// If the string is too long to fit in the UNICHAR then it takes only what
// will fit.
UNICHAR(const char *utf8_str, int len);
// Construct from a single UCS4 character.
explicit UNICHAR(int unicode);
// Default copy constructor and operator= are OK.
// Get the first character as UCS-4.
int first_uni() const;
// Get the length of the UTF8 string.
int utf8_len() const {
int len = chars[UNICHAR_LEN - 1];
return len >= 0 && len < UNICHAR_LEN ? len : UNICHAR_LEN;
}
// Get a UTF8 string, but NOT nullptr terminated.
const char *utf8() const {
return chars;
}
// Get a terminated UTF8 string: Must delete[] it after use.
char *utf8_str() const;
// Get the number of bytes in the first character of the given utf8 string.
static int utf8_step(const char *utf8_str);
// A class to simplify iterating over and accessing elements of a UTF8
// string. Note that unlike the UNICHAR class, const_iterator does NOT COPY or
// take ownership of the underlying byte array. It also does not permit
// modification of the array (as the name suggests).
//
// Example:
// for (UNICHAR::const_iterator it = UNICHAR::begin(str, str_len);
// it != UNICHAR::end(str, len);
// ++it) {
// printf("UCS-4 symbol code = %d\n", *it);
// char buf[5];
// int char_len = it.get_utf8(buf); buf[char_len] = '\0';
// printf("Char = %s\n", buf);
// }
class TESS_API const_iterator {
using CI = const_iterator;
public:
// Step to the next UTF8 character.
// If the current position is at an illegal UTF8 character, then print an
// error message and step by one byte. If the current position is at a
// nullptr value, don't step past it.
const_iterator &operator++();
// Return the UCS-4 value at the current position.
// If the current position is at an illegal UTF8 value, return a single
// space character.
int operator*() const;
// Store the UTF-8 encoding of the current codepoint into buf, which must be
// at least 4 bytes long. Return the number of bytes written.
// If the current position is at an illegal UTF8 value, writes a single
// space character and returns 1.
// Note that this method does not null-terminate the buffer.
int get_utf8(char *buf) const;
// Returns the number of bytes of the current codepoint. Returns 1 if the
// current position is at an illegal UTF8 value.
int utf8_len() const;
// Returns true if the UTF-8 encoding at the current position is legal.
bool is_legal() const;
// Return the pointer into the string at the current position.
const char *utf8_data() const {
return it_;
}
// Iterator equality operators.
friend bool operator==(const CI &lhs, const CI &rhs) {
return lhs.it_ == rhs.it_;
}
friend bool operator!=(const CI &lhs, const CI &rhs) {
return !(lhs == rhs);
}
private:
friend class UNICHAR;
explicit const_iterator(const char *it) : it_(it) {}
const char *it_; // Pointer into the string.
};
// Create a start/end iterator pointing to a string. Note that these methods
// are static and do NOT create a copy or take ownership of the underlying
// array.
static const_iterator begin(const char *utf8_str, int byte_length);
static const_iterator end(const char *utf8_str, int byte_length);
// Converts a utf-8 string to a vector of unicodes.
// Returns an empty vector if the input contains invalid UTF-8.
static std::vector<char32> UTF8ToUTF32(const char *utf8_str);
// Converts a vector of unicodes to a utf8 string.
// Returns an empty string if the input contains an invalid unicode.
static std::string UTF32ToUTF8(const std::vector<char32> &str32);
private:
// A UTF-8 representation of 1 or more Unicode characters.
// The last element (chars[UNICHAR_LEN - 1]) is a length if
// its value < UNICHAR_LEN, otherwise it is a genuine character.
char chars[UNICHAR_LEN]{};
};
} // namespace tesseract
#endif // TESSERACT_CCUTIL_UNICHAR_H_

View File

@ -1,34 +0,0 @@
// SPDX-License-Identifier: Apache-2.0
// File: version.h
// Description: Version information
//
// (C) Copyright 2018, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef TESSERACT_API_VERSION_H_
#define TESSERACT_API_VERSION_H_
// clang-format off
#define TESSERACT_MAJOR_VERSION @GENERIC_MAJOR_VERSION@
#define TESSERACT_MINOR_VERSION @GENERIC_MINOR_VERSION@
#define TESSERACT_MICRO_VERSION @GENERIC_MICRO_VERSION@
#define TESSERACT_VERSION \
(TESSERACT_MAJOR_VERSION << 16 | \
TESSERACT_MINOR_VERSION << 8 | \
TESSERACT_MICRO_VERSION)
#define TESSERACT_VERSION_STR "@PACKAGE_VERSION@"
// clang-format on
#endif // TESSERACT_API_VERSION_H_

View File

@ -1,812 +0,0 @@
// SPDX-License-Identifier: Apache-2.0
// File: baseapi.h
// Description: Simple API for calling tesseract.
// Author: Ray Smith
//
// (C) Copyright 2006, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef TESSERACT_API_BASEAPI_H_
#define TESSERACT_API_BASEAPI_H_
#ifdef HAVE_CONFIG_H
# include "config_auto.h" // DISABLED_LEGACY_ENGINE
#endif
#include "export.h"
#include "pageiterator.h"
#include "publictypes.h"
#include "resultiterator.h"
#include "unichar.h"
#include "version.h"
#include <cstdio>
#include <vector> // for std::vector
struct Pix;
struct Pixa;
struct Boxa;
namespace tesseract {
class PAGE_RES;
class ParagraphModel;
class BLOCK_LIST;
class ETEXT_DESC;
struct OSResults;
class UNICHARSET;
class Dawg;
class Dict;
class EquationDetect;
class PageIterator;
class ImageThresholder;
class LTRResultIterator;
class ResultIterator;
class MutableIterator;
class TessResultRenderer;
class Tesseract;
// Function to read a std::vector<char> from a whole file.
// Returns false on failure.
using FileReader = bool (*)(const char *filename, std::vector<char> *data);
using DictFunc = int (Dict::*)(void *, const UNICHARSET &, UNICHAR_ID,
bool) const;
using ProbabilityInContextFunc = double (Dict::*)(const char *, const char *,
int, const char *, int);
/**
* Base class for all tesseract APIs.
* Specific classes can add ability to work on different inputs or produce
* different outputs.
* This class is mostly an interface layer on top of the Tesseract instance
* class to hide the data types so that users of this class don't have to
* include any other Tesseract headers.
*/
class TESS_API TessBaseAPI {
public:
TessBaseAPI();
virtual ~TessBaseAPI();
// Copy constructor and assignment operator are currently unsupported.
TessBaseAPI(TessBaseAPI const &) = delete;
TessBaseAPI &operator=(TessBaseAPI const &) = delete;
/**
* Returns the version identifier as a static string. Do not delete.
*/
static const char *Version();
/**
* If compiled with OpenCL AND an available OpenCL
* device is deemed faster than serial code, then
* "device" is populated with the cl_device_id
* and returns sizeof(cl_device_id)
* otherwise *device=nullptr and returns 0.
*/
static size_t getOpenCLDevice(void **device);
/**
* Set the name of the input file. Needed for training and
* reading a UNLV zone file, and for searchable PDF output.
*/
void SetInputName(const char *name);
/**
* These functions are required for searchable PDF output.
* We need our hands on the input file so that we can include
* it in the PDF without transcoding. If that is not possible,
* we need the original image. Finally, resolution metadata
* is stored in the PDF so we need that as well.
*/
const char *GetInputName();
// Takes ownership of the input pix.
void SetInputImage(Pix *pix);
Pix *GetInputImage();
int GetSourceYResolution();
const char *GetDatapath();
/** Set the name of the bonus output files. Needed only for debugging. */
void SetOutputName(const char *name);
/**
* Set the value of an internal "parameter."
* Supply the name of the parameter and the value as a string, just as
* you would in a config file.
* Returns false if the name lookup failed.
* Eg SetVariable("tessedit_char_blacklist", "xyz"); to ignore x, y and z.
* Or SetVariable("classify_bln_numeric_mode", "1"); to set numeric-only mode.
* SetVariable may be used before Init, but settings will revert to
* defaults on End().
*
* Note: Must be called after Init(). Only works for non-init variables
* (init variables should be passed to Init()).
*/
bool SetVariable(const char *name, const char *value);
bool SetDebugVariable(const char *name, const char *value);
/**
* Returns true if the parameter was found among Tesseract parameters.
* Fills in value with the value of the parameter.
*/
bool GetIntVariable(const char *name, int *value) const;
bool GetBoolVariable(const char *name, bool *value) const;
bool GetDoubleVariable(const char *name, double *value) const;
/**
* Returns the pointer to the string that represents the value of the
* parameter if it was found among Tesseract parameters.
*/
const char *GetStringVariable(const char *name) const;
#ifndef DISABLED_LEGACY_ENGINE
/**
* Print Tesseract fonts table to the given file.
*/
void PrintFontsTable(FILE *fp) const;
#endif
/**
* Print Tesseract parameters to the given file.
*/
void PrintVariables(FILE *fp) const;
/**
* Get value of named variable as a string, if it exists.
*/
bool GetVariableAsString(const char *name, std::string *val) const;
/**
* Instances are now mostly thread-safe and totally independent,
* but some global parameters remain. Basically it is safe to use multiple
* TessBaseAPIs in different threads in parallel, UNLESS:
* you use SetVariable on some of the Params in classify and textord.
* If you do, then the effect will be to change it for all your instances.
*
* Start tesseract. Returns zero on success and -1 on failure.
* NOTE that the only members that may be called before Init are those
* listed above here in the class definition.
*
* The datapath must be the name of the tessdata directory.
* The language is (usually) an ISO 639-3 string or nullptr will default to
* eng. It is entirely safe (and eventually will be efficient too) to call
* Init multiple times on the same instance to change language, or just
* to reset the classifier.
* The language may be a string of the form [~]<lang>[+[~]<lang>]* indicating
* that multiple languages are to be loaded. Eg hin+eng will load Hindi and
* English. Languages may specify internally that they want to be loaded
* with one or more other languages, so the ~ sign is available to override
* that. Eg if hin were set to load eng by default, then hin+~eng would force
* loading only hin. The number of loaded languages is limited only by
* memory, with the caveat that loading additional languages will impact
* both speed and accuracy, as there is more work to do to decide on the
* applicable language, and there is more chance of hallucinating incorrect
* words.
* WARNING: On changing languages, all Tesseract parameters are reset
* back to their default values. (Which may vary between languages.)
* If you have a rare need to set a Variable that controls
* initialization for a second call to Init you should explicitly
* call End() and then use SetVariable before Init. This is only a very
* rare use case, since there are very few uses that require any parameters
* to be set before Init.
*
* If set_only_non_debug_params is true, only params that do not contain
* "debug" in the name will be set.
*/
int Init(const char *datapath, const char *language, OcrEngineMode mode,
char **configs, int configs_size,
const std::vector<std::string> *vars_vec,
const std::vector<std::string> *vars_values,
bool set_only_non_debug_params);
int Init(const char *datapath, const char *language, OcrEngineMode oem) {
return Init(datapath, language, oem, nullptr, 0, nullptr, nullptr, false);
}
int Init(const char *datapath, const char *language) {
return Init(datapath, language, OEM_DEFAULT, nullptr, 0, nullptr, nullptr,
false);
}
// In-memory version reads the traineddata file directly from the given
// data[data_size] array, and/or reads data via a FileReader.
int Init(const char *data, int data_size, const char *language,
OcrEngineMode mode, char **configs, int configs_size,
const std::vector<std::string> *vars_vec,
const std::vector<std::string> *vars_values,
bool set_only_non_debug_params, FileReader reader);
/**
* Returns the languages string used in the last valid initialization.
* If the last initialization specified "deu+hin" then that will be
* returned. If hin loaded eng automatically as well, then that will
* not be included in this list. To find the languages actually
* loaded use GetLoadedLanguagesAsVector.
* The returned string should NOT be deleted.
*/
const char *GetInitLanguagesAsString() const;
/**
* Returns the loaded languages in the vector of std::string.
* Includes all languages loaded by the last Init, including those loaded
* as dependencies of other loaded languages.
*/
void GetLoadedLanguagesAsVector(std::vector<std::string> *langs) const;
/**
* Returns the available languages in the sorted vector of std::string.
*/
void GetAvailableLanguagesAsVector(std::vector<std::string> *langs) const;
/**
* Init only for page layout analysis. Use only for calls to SetImage and
* AnalysePage. Calls that attempt recognition will generate an error.
*/
void InitForAnalysePage();
/**
* Read a "config" file containing a set of param, value pairs.
* Searches the standard places: tessdata/configs, tessdata/tessconfigs
* and also accepts a relative or absolute path name.
* Note: only non-init params will be set (init params are set by Init()).
*/
void ReadConfigFile(const char *filename);
/** Same as above, but only set debug params from the given config file. */
void ReadDebugConfigFile(const char *filename);
/**
* Set the current page segmentation mode. Defaults to PSM_SINGLE_BLOCK.
* The mode is stored as an IntParam so it can also be modified by
* ReadConfigFile or SetVariable("tessedit_pageseg_mode", mode as string).
*/
void SetPageSegMode(PageSegMode mode);
/** Return the current page segmentation mode. */
PageSegMode GetPageSegMode() const;
/**
* Recognize a rectangle from an image and return the result as a string.
* May be called many times for a single Init.
* Currently has no error checking.
* Greyscale of 8 and color of 24 or 32 bits per pixel may be given.
* Palette color images will not work properly and must be converted to
* 24 bit.
* Binary images of 1 bit per pixel may also be given but they must be
* byte packed with the MSB of the first byte being the first pixel, and a
* 1 represents WHITE. For binary images set bytes_per_pixel=0.
* The recognized text is returned as a char* which is coded
* as UTF8 and must be freed with the delete [] operator.
*
* Note that TesseractRect is the simplified convenience interface.
* For advanced uses, use SetImage, (optionally) SetRectangle, Recognize,
* and one or more of the Get*Text functions below.
*/
char *TesseractRect(const unsigned char *imagedata, int bytes_per_pixel,
int bytes_per_line, int left, int top, int width,
int height);
/**
* Call between pages or documents etc to free up memory and forget
* adaptive data.
*/
void ClearAdaptiveClassifier();
/**
* @defgroup AdvancedAPI Advanced API
* The following methods break TesseractRect into pieces, so you can
* get hold of the thresholded image, get the text in different formats,
* get bounding boxes, confidences etc.
*/
/* @{ */
/**
* Provide an image for Tesseract to recognize. Format is as
* TesseractRect above. Copies the image buffer and converts to Pix.
* SetImage clears all recognition results, and sets the rectangle to the
* full image, so it may be followed immediately by a GetUTF8Text, and it
* will automatically perform recognition.
*/
void SetImage(const unsigned char *imagedata, int width, int height,
int bytes_per_pixel, int bytes_per_line);
/**
* Provide an image for Tesseract to recognize. As with SetImage above,
* Tesseract takes its own copy of the image, so it need not persist until
* after Recognize.
* Pix vs raw, which to use?
* Use Pix where possible. Tesseract uses Pix as its internal representation
* and it is therefore more efficient to provide a Pix directly.
*/
void SetImage(Pix *pix);
/**
* Set the resolution of the source image in pixels per inch so font size
* information can be calculated in results. Call this after SetImage().
*/
void SetSourceResolution(int ppi);
/**
* Restrict recognition to a sub-rectangle of the image. Call after SetImage.
* Each SetRectangle clears the recogntion results so multiple rectangles
* can be recognized with the same image.
*/
void SetRectangle(int left, int top, int width, int height);
/**
* Get a copy of the internal thresholded image from Tesseract.
* Caller takes ownership of the Pix and must pixDestroy it.
* May be called any time after SetImage, or after TesseractRect.
*/
Pix *GetThresholdedImage();
/**
* Get the result of page layout analysis as a leptonica-style
* Boxa, Pixa pair, in reading order.
* Can be called before or after Recognize.
*/
Boxa *GetRegions(Pixa **pixa);
/**
* Get the textlines as a leptonica-style
* Boxa, Pixa pair, in reading order.
* Can be called before or after Recognize.
* If raw_image is true, then extract from the original image instead of the
* thresholded image and pad by raw_padding pixels.
* If blockids is not nullptr, the block-id of each line is also returned as
* an array of one element per line. delete [] after use. If paraids is not
* nullptr, the paragraph-id of each line within its block is also returned as
* an array of one element per line. delete [] after use.
*/
Boxa *GetTextlines(bool raw_image, int raw_padding, Pixa **pixa,
int **blockids, int **paraids);
/*
Helper method to extract from the thresholded image. (most common usage)
*/
Boxa *GetTextlines(Pixa **pixa, int **blockids) {
return GetTextlines(false, 0, pixa, blockids, nullptr);
}
/**
* Get textlines and strips of image regions as a leptonica-style Boxa, Pixa
* pair, in reading order. Enables downstream handling of non-rectangular
* regions.
* Can be called before or after Recognize.
* If blockids is not nullptr, the block-id of each line is also returned as
* an array of one element per line. delete [] after use.
*/
Boxa *GetStrips(Pixa **pixa, int **blockids);
/**
* Get the words as a leptonica-style
* Boxa, Pixa pair, in reading order.
* Can be called before or after Recognize.
*/
Boxa *GetWords(Pixa **pixa);
/**
* Gets the individual connected (text) components (created
* after pages segmentation step, but before recognition)
* as a leptonica-style Boxa, Pixa pair, in reading order.
* Can be called before or after Recognize.
* Note: the caller is responsible for calling boxaDestroy()
* on the returned Boxa array and pixaDestroy() on cc array.
*/
Boxa *GetConnectedComponents(Pixa **cc);
/**
* Get the given level kind of components (block, textline, word etc.) as a
* leptonica-style Boxa, Pixa pair, in reading order.
* Can be called before or after Recognize.
* If blockids is not nullptr, the block-id of each component is also returned
* as an array of one element per component. delete [] after use.
* If blockids is not nullptr, the paragraph-id of each component with its
* block is also returned as an array of one element per component. delete []
* after use. If raw_image is true, then portions of the original image are
* extracted instead of the thresholded image and padded with raw_padding. If
* text_only is true, then only text components are returned.
*/
Boxa *GetComponentImages(PageIteratorLevel level, bool text_only,
bool raw_image, int raw_padding, Pixa **pixa,
int **blockids, int **paraids);
// Helper function to get binary images with no padding (most common usage).
Boxa *GetComponentImages(const PageIteratorLevel level, const bool text_only,
Pixa **pixa, int **blockids) {
return GetComponentImages(level, text_only, false, 0, pixa, blockids,
nullptr);
}
/**
* Returns the scale factor of the thresholded image that would be returned by
* GetThresholdedImage() and the various GetX() methods that call
* GetComponentImages().
* Returns 0 if no thresholder has been set.
*/
int GetThresholdedImageScaleFactor() const;
/**
* Runs page layout analysis in the mode set by SetPageSegMode.
* May optionally be called prior to Recognize to get access to just
* the page layout results. Returns an iterator to the results.
* If merge_similar_words is true, words are combined where suitable for use
* with a line recognizer. Use if you want to use AnalyseLayout to find the
* textlines, and then want to process textline fragments with an external
* line recognizer.
* Returns nullptr on error or an empty page.
* The returned iterator must be deleted after use.
* WARNING! This class points to data held within the TessBaseAPI class, and
* therefore can only be used while the TessBaseAPI class still exists and
* has not been subjected to a call of Init, SetImage, Recognize, Clear, End
* DetectOS, or anything else that changes the internal PAGE_RES.
*/
PageIterator *AnalyseLayout();
PageIterator *AnalyseLayout(bool merge_similar_words);
/**
* Recognize the image from SetAndThresholdImage, generating Tesseract
* internal structures. Returns 0 on success.
* Optional. The Get*Text functions below will call Recognize if needed.
* After Recognize, the output is kept internally until the next SetImage.
*/
int Recognize(ETEXT_DESC *monitor);
/**
* Methods to retrieve information after SetAndThresholdImage(),
* Recognize() or TesseractRect(). (Recognize is called implicitly if needed.)
*/
/**
* Turns images into symbolic text.
*
* filename can point to a single image, a multi-page TIFF,
* or a plain text list of image filenames.
*
* retry_config is useful for debugging. If not nullptr, you can fall
* back to an alternate configuration if a page fails for some
* reason.
*
* timeout_millisec terminates processing if any single page
* takes too long. Set to 0 for unlimited time.
*
* renderer is responible for creating the output. For example,
* use the TessTextRenderer if you want plaintext output, or
* the TessPDFRender to produce searchable PDF.
*
* If tessedit_page_number is non-negative, will only process that
* single page. Works for multi-page tiff file, or filelist.
*
* Returns true if successful, false on error.
*/
bool ProcessPages(const char *filename, const char *retry_config,
int timeout_millisec, TessResultRenderer *renderer);
// Does the real work of ProcessPages.
bool ProcessPagesInternal(const char *filename, const char *retry_config,
int timeout_millisec, TessResultRenderer *renderer);
/**
* Turn a single image into symbolic text.
*
* The pix is the image processed. filename and page_index are
* metadata used by side-effect processes, such as reading a box
* file or formatting as hOCR.
*
* See ProcessPages for descriptions of other parameters.
*/
bool ProcessPage(Pix *pix, int page_index, const char *filename,
const char *retry_config, int timeout_millisec,
TessResultRenderer *renderer);
/**
* Get a reading-order iterator to the results of LayoutAnalysis and/or
* Recognize. The returned iterator must be deleted after use.
* WARNING! This class points to data held within the TessBaseAPI class, and
* therefore can only be used while the TessBaseAPI class still exists and
* has not been subjected to a call of Init, SetImage, Recognize, Clear, End
* DetectOS, or anything else that changes the internal PAGE_RES.
*/
ResultIterator *GetIterator();
/**
* Get a mutable iterator to the results of LayoutAnalysis and/or Recognize.
* The returned iterator must be deleted after use.
* WARNING! This class points to data held within the TessBaseAPI class, and
* therefore can only be used while the TessBaseAPI class still exists and
* has not been subjected to a call of Init, SetImage, Recognize, Clear, End
* DetectOS, or anything else that changes the internal PAGE_RES.
*/
MutableIterator *GetMutableIterator();
/**
* The recognized text is returned as a char* which is coded
* as UTF8 and must be freed with the delete [] operator.
*/
char *GetUTF8Text();
/**
* Make a HTML-formatted string with hOCR markup from the internal
* data structures.
* page_number is 0-based but will appear in the output as 1-based.
* monitor can be used to
* cancel the recognition
* receive progress callbacks
* Returned string must be freed with the delete [] operator.
*/
char *GetHOCRText(ETEXT_DESC *monitor, int page_number);
/**
* Make a HTML-formatted string with hOCR markup from the internal
* data structures.
* page_number is 0-based but will appear in the output as 1-based.
* Returned string must be freed with the delete [] operator.
*/
char *GetHOCRText(int page_number);
/**
* Make an XML-formatted string with Alto markup from the internal
* data structures.
*/
char *GetAltoText(ETEXT_DESC *monitor, int page_number);
/**
* Make an XML-formatted string with Alto markup from the internal
* data structures.
*/
char *GetAltoText(int page_number);
/**
* Make a TSV-formatted string from the internal data structures.
* page_number is 0-based but will appear in the output as 1-based.
* Returned string must be freed with the delete [] operator.
*/
char *GetTSVText(int page_number);
/**
* Make a box file for LSTM training from the internal data structures.
* Constructs coordinates in the original image - not just the rectangle.
* page_number is a 0-based page index that will appear in the box file.
* Returned string must be freed with the delete [] operator.
*/
char *GetLSTMBoxText(int page_number);
/**
* The recognized text is returned as a char* which is coded in the same
* format as a box file used in training.
* Constructs coordinates in the original image - not just the rectangle.
* page_number is a 0-based page index that will appear in the box file.
* Returned string must be freed with the delete [] operator.
*/
char *GetBoxText(int page_number);
/**
* The recognized text is returned as a char* which is coded in the same
* format as a WordStr box file used in training.
* page_number is a 0-based page index that will appear in the box file.
* Returned string must be freed with the delete [] operator.
*/
char *GetWordStrBoxText(int page_number);
/**
* The recognized text is returned as a char* which is coded
* as UNLV format Latin-1 with specific reject and suspect codes.
* Returned string must be freed with the delete [] operator.
*/
char *GetUNLVText();
/**
* Detect the orientation of the input image and apparent script (alphabet).
* orient_deg is the detected clockwise rotation of the input image in degrees
* (0, 90, 180, 270)
* orient_conf is the confidence (15.0 is reasonably confident)
* script_name is an ASCII string, the name of the script, e.g. "Latin"
* script_conf is confidence level in the script
* Returns true on success and writes values to each parameter as an output
*/
bool DetectOrientationScript(int *orient_deg, float *orient_conf,
const char **script_name, float *script_conf);
/**
* The recognized text is returned as a char* which is coded
* as UTF8 and must be freed with the delete [] operator.
* page_number is a 0-based page index that will appear in the osd file.
*/
char *GetOsdText(int page_number);
/** Returns the (average) confidence value between 0 and 100. */
int MeanTextConf();
/**
* Returns all word confidences (between 0 and 100) in an array, terminated
* by -1. The calling function must delete [] after use.
* The number of confidences should correspond to the number of space-
* delimited words in GetUTF8Text.
*/
int *AllWordConfidences();
#ifndef DISABLED_LEGACY_ENGINE
/**
* Applies the given word to the adaptive classifier if possible.
* The word must be SPACE-DELIMITED UTF-8 - l i k e t h i s , so it can
* tell the boundaries of the graphemes.
* Assumes that SetImage/SetRectangle have been used to set the image
* to the given word. The mode arg should be PSM_SINGLE_WORD or
* PSM_CIRCLE_WORD, as that will be used to control layout analysis.
* The currently set PageSegMode is preserved.
* Returns false if adaption was not possible for some reason.
*/
bool AdaptToWordStr(PageSegMode mode, const char *wordstr);
#endif // ndef DISABLED_LEGACY_ENGINE
/**
* Free up recognition results and any stored image data, without actually
* freeing any recognition data that would be time-consuming to reload.
* Afterwards, you must call SetImage or TesseractRect before doing
* any Recognize or Get* operation.
*/
void Clear();
/**
* Close down tesseract and free up all memory. End() is equivalent to
* destructing and reconstructing your TessBaseAPI.
* Once End() has been used, none of the other API functions may be used
* other than Init and anything declared above it in the class definition.
*/
void End();
/**
* Clear any library-level memory caches.
* There are a variety of expensive-to-load constant data structures (mostly
* language dictionaries) that are cached globally -- surviving the Init()
* and End() of individual TessBaseAPI's. This function allows the clearing
* of these caches.
**/
static void ClearPersistentCache();
/**
* Check whether a word is valid according to Tesseract's language model
* @return 0 if the word is invalid, non-zero if valid.
* @warning temporary! This function will be removed from here and placed
* in a separate API at some future time.
*/
int IsValidWord(const char *word) const;
// Returns true if utf8_character is defined in the UniCharset.
bool IsValidCharacter(const char *utf8_character) const;
bool GetTextDirection(int *out_offset, float *out_slope);
/** Sets Dict::letter_is_okay_ function to point to the given function. */
void SetDictFunc(DictFunc f);
/** Sets Dict::probability_in_context_ function to point to the given
* function.
*/
void SetProbabilityInContextFunc(ProbabilityInContextFunc f);
/**
* Estimates the Orientation And Script of the image.
* @return true if the image was processed successfully.
*/
bool DetectOS(OSResults *);
/**
* Return text orientation of each block as determined by an earlier run
* of layout analysis.
*/
void GetBlockTextOrientations(int **block_orientation,
bool **vertical_writing);
/** This method returns the string form of the specified unichar. */
const char *GetUnichar(int unichar_id) const;
/** Return the pointer to the i-th dawg loaded into tesseract_ object. */
const Dawg *GetDawg(int i) const;
/** Return the number of dawgs loaded into tesseract_ object. */
int NumDawgs() const;
Tesseract *tesseract() const {
return tesseract_;
}
OcrEngineMode oem() const {
return last_oem_requested_;
}
void set_min_orientation_margin(double margin);
/* @} */
protected:
/** Common code for setting the image. Returns true if Init has been called.
*/
bool InternalSetImage();
/**
* Run the thresholder to make the thresholded image. If pix is not nullptr,
* the source is thresholded to pix instead of the internal IMAGE.
*/
virtual bool Threshold(Pix **pix);
/**
* Find lines from the image making the BLOCK_LIST.
* @return 0 on success.
*/
int FindLines();
/** Delete the pageres and block list ready for a new page. */
void ClearResults();
/**
* Return an LTR Result Iterator -- used only for training, as we really want
* to ignore all BiDi smarts at that point.
* delete once you're done with it.
*/
LTRResultIterator *GetLTRIterator();
/**
* Return the length of the output text string, as UTF8, assuming
* one newline per line and one per block, with a terminator,
* and assuming a single character reject marker for each rejected character.
* Also return the number of recognized blobs in blob_count.
*/
int TextLength(int *blob_count) const;
//// paragraphs.cpp ////////////////////////////////////////////////////
void DetectParagraphs(bool after_text_recognition);
const PAGE_RES *GetPageRes() const {
return page_res_;
}
protected:
Tesseract *tesseract_; ///< The underlying data object.
Tesseract *osd_tesseract_; ///< For orientation & script detection.
EquationDetect *equ_detect_; ///< The equation detector.
FileReader reader_; ///< Reads files from any filesystem.
ImageThresholder *thresholder_; ///< Image thresholding module.
std::vector<ParagraphModel *> *paragraph_models_;
BLOCK_LIST *block_list_; ///< The page layout.
PAGE_RES *page_res_; ///< The page-level data.
std::string input_file_; ///< Name used by training code.
std::string output_file_; ///< Name used by debug code.
std::string datapath_; ///< Current location of tessdata.
std::string language_; ///< Last initialized language.
OcrEngineMode last_oem_requested_; ///< Last ocr language mode requested.
bool recognition_done_; ///< page_res_ contains recognition data.
/**
* @defgroup ThresholderParams Thresholder Parameters
* Parameters saved from the Thresholder. Needed to rebuild coordinates.
*/
/* @{ */
int rect_left_;
int rect_top_;
int rect_width_;
int rect_height_;
int image_width_;
int image_height_;
/* @} */
private:
// A list of image filenames gets special consideration
bool ProcessPagesFileList(FILE *fp, std::string *buf,
const char *retry_config, int timeout_millisec,
TessResultRenderer *renderer,
int tessedit_page_number);
// TIFF supports multipage so gets special consideration.
bool ProcessPagesMultipageTiff(const unsigned char *data, size_t size,
const char *filename, const char *retry_config,
int timeout_millisec,
TessResultRenderer *renderer,
int tessedit_page_number);
}; // class TessBaseAPI.
/** Escape a char string - remove &<>"' with HTML codes. */
std::string HOcrEscape(const char *text);
} // namespace tesseract
#endif // TESSERACT_API_BASEAPI_H_

View File

@ -1,484 +0,0 @@
// SPDX-License-Identifier: Apache-2.0
// File: capi.h
// Description: C-API TessBaseAPI
//
// (C) Copyright 2012, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef API_CAPI_H_
#define API_CAPI_H_
#include "export.h"
#ifdef __cplusplus
# include <tesseract/baseapi.h>
# include <tesseract/ocrclass.h>
# include <tesseract/pageiterator.h>
# include <tesseract/renderer.h>
# include <tesseract/resultiterator.h>
#endif
#include <stdbool.h>
#include <stdio.h>
#ifdef __cplusplus
extern "C" {
#endif
#ifndef BOOL
# define BOOL int
# define TRUE 1
# define FALSE 0
#endif
#ifdef __cplusplus
typedef tesseract::TessResultRenderer TessResultRenderer;
typedef tesseract::TessBaseAPI TessBaseAPI;
typedef tesseract::PageIterator TessPageIterator;
typedef tesseract::ResultIterator TessResultIterator;
typedef tesseract::MutableIterator TessMutableIterator;
typedef tesseract::ChoiceIterator TessChoiceIterator;
typedef tesseract::OcrEngineMode TessOcrEngineMode;
typedef tesseract::PageSegMode TessPageSegMode;
typedef tesseract::PageIteratorLevel TessPageIteratorLevel;
typedef tesseract::Orientation TessOrientation;
typedef tesseract::ParagraphJustification TessParagraphJustification;
typedef tesseract::WritingDirection TessWritingDirection;
typedef tesseract::TextlineOrder TessTextlineOrder;
typedef tesseract::PolyBlockType TessPolyBlockType;
typedef tesseract::ETEXT_DESC ETEXT_DESC;
#else
typedef struct TessResultRenderer TessResultRenderer;
typedef struct TessBaseAPI TessBaseAPI;
typedef struct TessPageIterator TessPageIterator;
typedef struct TessResultIterator TessResultIterator;
typedef struct TessMutableIterator TessMutableIterator;
typedef struct TessChoiceIterator TessChoiceIterator;
typedef enum TessOcrEngineMode {
OEM_TESSERACT_ONLY,
OEM_LSTM_ONLY,
OEM_TESSERACT_LSTM_COMBINED,
OEM_DEFAULT
} TessOcrEngineMode;
typedef enum TessPageSegMode {
PSM_OSD_ONLY,
PSM_AUTO_OSD,
PSM_AUTO_ONLY,
PSM_AUTO,
PSM_SINGLE_COLUMN,
PSM_SINGLE_BLOCK_VERT_TEXT,
PSM_SINGLE_BLOCK,
PSM_SINGLE_LINE,
PSM_SINGLE_WORD,
PSM_CIRCLE_WORD,
PSM_SINGLE_CHAR,
PSM_SPARSE_TEXT,
PSM_SPARSE_TEXT_OSD,
PSM_RAW_LINE,
PSM_COUNT
} TessPageSegMode;
typedef enum TessPageIteratorLevel {
RIL_BLOCK,
RIL_PARA,
RIL_TEXTLINE,
RIL_WORD,
RIL_SYMBOL
} TessPageIteratorLevel;
typedef enum TessPolyBlockType {
PT_UNKNOWN,
PT_FLOWING_TEXT,
PT_HEADING_TEXT,
PT_PULLOUT_TEXT,
PT_EQUATION,
PT_INLINE_EQUATION,
PT_TABLE,
PT_VERTICAL_TEXT,
PT_CAPTION_TEXT,
PT_FLOWING_IMAGE,
PT_HEADING_IMAGE,
PT_PULLOUT_IMAGE,
PT_HORZ_LINE,
PT_VERT_LINE,
PT_NOISE,
PT_COUNT
} TessPolyBlockType;
typedef enum TessOrientation {
ORIENTATION_PAGE_UP,
ORIENTATION_PAGE_RIGHT,
ORIENTATION_PAGE_DOWN,
ORIENTATION_PAGE_LEFT
} TessOrientation;
typedef enum TessParagraphJustification {
JUSTIFICATION_UNKNOWN,
JUSTIFICATION_LEFT,
JUSTIFICATION_CENTER,
JUSTIFICATION_RIGHT
} TessParagraphJustification;
typedef enum TessWritingDirection {
WRITING_DIRECTION_LEFT_TO_RIGHT,
WRITING_DIRECTION_RIGHT_TO_LEFT,
WRITING_DIRECTION_TOP_TO_BOTTOM
} TessWritingDirection;
typedef enum TessTextlineOrder {
TEXTLINE_ORDER_LEFT_TO_RIGHT,
TEXTLINE_ORDER_RIGHT_TO_LEFT,
TEXTLINE_ORDER_TOP_TO_BOTTOM
} TessTextlineOrder;
typedef struct ETEXT_DESC ETEXT_DESC;
#endif
typedef bool (*TessCancelFunc)(void *cancel_this, int words);
typedef bool (*TessProgressFunc)(ETEXT_DESC *ths, int left, int right, int top,
int bottom);
struct Pix;
struct Boxa;
struct Pixa;
/* General free functions */
TESS_API const char *TessVersion();
TESS_API void TessDeleteText(const char *text);
TESS_API void TessDeleteTextArray(char **arr);
TESS_API void TessDeleteIntArray(const int *arr);
/* Renderer API */
TESS_API TessResultRenderer *TessTextRendererCreate(const char *outputbase);
TESS_API TessResultRenderer *TessHOcrRendererCreate(const char *outputbase);
TESS_API TessResultRenderer *TessHOcrRendererCreate2(const char *outputbase,
BOOL font_info);
TESS_API TessResultRenderer *TessAltoRendererCreate(const char *outputbase);
TESS_API TessResultRenderer *TessTsvRendererCreate(const char *outputbase);
TESS_API TessResultRenderer *TessPDFRendererCreate(const char *outputbase,
const char *datadir,
BOOL textonly);
TESS_API TessResultRenderer *TessUnlvRendererCreate(const char *outputbase);
TESS_API TessResultRenderer *TessBoxTextRendererCreate(const char *outputbase);
TESS_API TessResultRenderer *TessLSTMBoxRendererCreate(const char *outputbase);
TESS_API TessResultRenderer *TessWordStrBoxRendererCreate(
const char *outputbase);
TESS_API void TessDeleteResultRenderer(TessResultRenderer *renderer);
TESS_API void TessResultRendererInsert(TessResultRenderer *renderer,
TessResultRenderer *next);
TESS_API TessResultRenderer *TessResultRendererNext(
TessResultRenderer *renderer);
TESS_API BOOL TessResultRendererBeginDocument(TessResultRenderer *renderer,
const char *title);
TESS_API BOOL TessResultRendererAddImage(TessResultRenderer *renderer,
TessBaseAPI *api);
TESS_API BOOL TessResultRendererEndDocument(TessResultRenderer *renderer);
TESS_API const char *TessResultRendererExtention(TessResultRenderer *renderer);
TESS_API const char *TessResultRendererTitle(TessResultRenderer *renderer);
TESS_API int TessResultRendererImageNum(TessResultRenderer *renderer);
/* Base API */
TESS_API TessBaseAPI *TessBaseAPICreate();
TESS_API void TessBaseAPIDelete(TessBaseAPI *handle);
TESS_API size_t TessBaseAPIGetOpenCLDevice(TessBaseAPI *handle, void **device);
TESS_API void TessBaseAPISetInputName(TessBaseAPI *handle, const char *name);
TESS_API const char *TessBaseAPIGetInputName(TessBaseAPI *handle);
TESS_API void TessBaseAPISetInputImage(TessBaseAPI *handle, struct Pix *pix);
TESS_API struct Pix *TessBaseAPIGetInputImage(TessBaseAPI *handle);
TESS_API int TessBaseAPIGetSourceYResolution(TessBaseAPI *handle);
TESS_API const char *TessBaseAPIGetDatapath(TessBaseAPI *handle);
TESS_API void TessBaseAPISetOutputName(TessBaseAPI *handle, const char *name);
TESS_API BOOL TessBaseAPISetVariable(TessBaseAPI *handle, const char *name,
const char *value);
TESS_API BOOL TessBaseAPISetDebugVariable(TessBaseAPI *handle, const char *name,
const char *value);
TESS_API BOOL TessBaseAPIGetIntVariable(const TessBaseAPI *handle,
const char *name, int *value);
TESS_API BOOL TessBaseAPIGetBoolVariable(const TessBaseAPI *handle,
const char *name, BOOL *value);
TESS_API BOOL TessBaseAPIGetDoubleVariable(const TessBaseAPI *handle,
const char *name, double *value);
TESS_API const char *TessBaseAPIGetStringVariable(const TessBaseAPI *handle,
const char *name);
TESS_API void TessBaseAPIPrintVariables(const TessBaseAPI *handle, FILE *fp);
TESS_API BOOL TessBaseAPIPrintVariablesToFile(const TessBaseAPI *handle,
const char *filename);
TESS_API int TessBaseAPIInit1(TessBaseAPI *handle, const char *datapath,
const char *language, TessOcrEngineMode oem,
char **configs, int configs_size);
TESS_API int TessBaseAPIInit2(TessBaseAPI *handle, const char *datapath,
const char *language, TessOcrEngineMode oem);
TESS_API int TessBaseAPIInit3(TessBaseAPI *handle, const char *datapath,
const char *language);
TESS_API int TessBaseAPIInit4(TessBaseAPI *handle, const char *datapath,
const char *language, TessOcrEngineMode mode,
char **configs, int configs_size, char **vars_vec,
char **vars_values, size_t vars_vec_size,
BOOL set_only_non_debug_params);
TESS_API int TessBaseAPIInit5(TessBaseAPI *handle, const char *data, int data_size,
const char *language, TessOcrEngineMode mode,
char **configs, int configs_size, char **vars_vec,
char **vars_values, size_t vars_vec_size,
BOOL set_only_non_debug_params);
TESS_API const char *TessBaseAPIGetInitLanguagesAsString(
const TessBaseAPI *handle);
TESS_API char **TessBaseAPIGetLoadedLanguagesAsVector(
const TessBaseAPI *handle);
TESS_API char **TessBaseAPIGetAvailableLanguagesAsVector(
const TessBaseAPI *handle);
TESS_API void TessBaseAPIInitForAnalysePage(TessBaseAPI *handle);
TESS_API void TessBaseAPIReadConfigFile(TessBaseAPI *handle,
const char *filename);
TESS_API void TessBaseAPIReadDebugConfigFile(TessBaseAPI *handle,
const char *filename);
TESS_API void TessBaseAPISetPageSegMode(TessBaseAPI *handle,
TessPageSegMode mode);
TESS_API TessPageSegMode TessBaseAPIGetPageSegMode(const TessBaseAPI *handle);
TESS_API char *TessBaseAPIRect(TessBaseAPI *handle,
const unsigned char *imagedata,
int bytes_per_pixel, int bytes_per_line,
int left, int top, int width, int height);
TESS_API void TessBaseAPIClearAdaptiveClassifier(TessBaseAPI *handle);
TESS_API void TessBaseAPISetImage(TessBaseAPI *handle,
const unsigned char *imagedata, int width,
int height, int bytes_per_pixel,
int bytes_per_line);
TESS_API void TessBaseAPISetImage2(TessBaseAPI *handle, struct Pix *pix);
TESS_API void TessBaseAPISetSourceResolution(TessBaseAPI *handle, int ppi);
TESS_API void TessBaseAPISetRectangle(TessBaseAPI *handle, int left, int top,
int width, int height);
TESS_API struct Pix *TessBaseAPIGetThresholdedImage(TessBaseAPI *handle);
TESS_API struct Boxa *TessBaseAPIGetRegions(TessBaseAPI *handle,
struct Pixa **pixa);
TESS_API struct Boxa *TessBaseAPIGetTextlines(TessBaseAPI *handle,
struct Pixa **pixa,
int **blockids);
TESS_API struct Boxa *TessBaseAPIGetTextlines1(TessBaseAPI *handle,
BOOL raw_image, int raw_padding,
struct Pixa **pixa,
int **blockids, int **paraids);
TESS_API struct Boxa *TessBaseAPIGetStrips(TessBaseAPI *handle,
struct Pixa **pixa, int **blockids);
TESS_API struct Boxa *TessBaseAPIGetWords(TessBaseAPI *handle,
struct Pixa **pixa);
TESS_API struct Boxa *TessBaseAPIGetConnectedComponents(TessBaseAPI *handle,
struct Pixa **cc);
TESS_API struct Boxa *TessBaseAPIGetComponentImages(TessBaseAPI *handle,
TessPageIteratorLevel level,
BOOL text_only,
struct Pixa **pixa,
int **blockids);
TESS_API struct Boxa *TessBaseAPIGetComponentImages1(
TessBaseAPI *handle, TessPageIteratorLevel level, BOOL text_only,
BOOL raw_image, int raw_padding, struct Pixa **pixa, int **blockids,
int **paraids);
TESS_API int TessBaseAPIGetThresholdedImageScaleFactor(
const TessBaseAPI *handle);
TESS_API TessPageIterator *TessBaseAPIAnalyseLayout(TessBaseAPI *handle);
TESS_API int TessBaseAPIRecognize(TessBaseAPI *handle, ETEXT_DESC *monitor);
TESS_API BOOL TessBaseAPIProcessPages(TessBaseAPI *handle, const char *filename,
const char *retry_config,
int timeout_millisec,
TessResultRenderer *renderer);
TESS_API BOOL TessBaseAPIProcessPage(TessBaseAPI *handle, struct Pix *pix,
int page_index, const char *filename,
const char *retry_config,
int timeout_millisec,
TessResultRenderer *renderer);
TESS_API TessResultIterator *TessBaseAPIGetIterator(TessBaseAPI *handle);
TESS_API TessMutableIterator *TessBaseAPIGetMutableIterator(
TessBaseAPI *handle);
TESS_API char *TessBaseAPIGetUTF8Text(TessBaseAPI *handle);
TESS_API char *TessBaseAPIGetHOCRText(TessBaseAPI *handle, int page_number);
TESS_API char *TessBaseAPIGetAltoText(TessBaseAPI *handle, int page_number);
TESS_API char *TessBaseAPIGetTsvText(TessBaseAPI *handle, int page_number);
TESS_API char *TessBaseAPIGetBoxText(TessBaseAPI *handle, int page_number);
TESS_API char *TessBaseAPIGetLSTMBoxText(TessBaseAPI *handle, int page_number);
TESS_API char *TessBaseAPIGetWordStrBoxText(TessBaseAPI *handle,
int page_number);
TESS_API char *TessBaseAPIGetUNLVText(TessBaseAPI *handle);
TESS_API int TessBaseAPIMeanTextConf(TessBaseAPI *handle);
TESS_API int *TessBaseAPIAllWordConfidences(TessBaseAPI *handle);
#ifndef DISABLED_LEGACY_ENGINE
TESS_API BOOL TessBaseAPIAdaptToWordStr(TessBaseAPI *handle,
TessPageSegMode mode,
const char *wordstr);
#endif // #ifndef DISABLED_LEGACY_ENGINE
TESS_API void TessBaseAPIClear(TessBaseAPI *handle);
TESS_API void TessBaseAPIEnd(TessBaseAPI *handle);
TESS_API int TessBaseAPIIsValidWord(TessBaseAPI *handle, const char *word);
TESS_API BOOL TessBaseAPIGetTextDirection(TessBaseAPI *handle, int *out_offset,
float *out_slope);
TESS_API const char *TessBaseAPIGetUnichar(TessBaseAPI *handle, int unichar_id);
TESS_API void TessBaseAPIClearPersistentCache(TessBaseAPI *handle);
#ifndef DISABLED_LEGACY_ENGINE
// Call TessDeleteText(*best_script_name) to free memory allocated by this
// function
TESS_API BOOL TessBaseAPIDetectOrientationScript(TessBaseAPI *handle,
int *orient_deg,
float *orient_conf,
const char **script_name,
float *script_conf);
#endif // #ifndef DISABLED_LEGACY_ENGINE
TESS_API void TessBaseAPISetMinOrientationMargin(TessBaseAPI *handle,
double margin);
TESS_API int TessBaseAPINumDawgs(const TessBaseAPI *handle);
TESS_API TessOcrEngineMode TessBaseAPIOem(const TessBaseAPI *handle);
TESS_API void TessBaseGetBlockTextOrientations(TessBaseAPI *handle,
int **block_orientation,
bool **vertical_writing);
/* Page iterator */
TESS_API void TessPageIteratorDelete(TessPageIterator *handle);
TESS_API TessPageIterator *TessPageIteratorCopy(const TessPageIterator *handle);
TESS_API void TessPageIteratorBegin(TessPageIterator *handle);
TESS_API BOOL TessPageIteratorNext(TessPageIterator *handle,
TessPageIteratorLevel level);
TESS_API BOOL TessPageIteratorIsAtBeginningOf(const TessPageIterator *handle,
TessPageIteratorLevel level);
TESS_API BOOL TessPageIteratorIsAtFinalElement(const TessPageIterator *handle,
TessPageIteratorLevel level,
TessPageIteratorLevel element);
TESS_API BOOL TessPageIteratorBoundingBox(const TessPageIterator *handle,
TessPageIteratorLevel level,
int *left, int *top, int *right,
int *bottom);
TESS_API TessPolyBlockType
TessPageIteratorBlockType(const TessPageIterator *handle);
TESS_API struct Pix *TessPageIteratorGetBinaryImage(
const TessPageIterator *handle, TessPageIteratorLevel level);
TESS_API struct Pix *TessPageIteratorGetImage(const TessPageIterator *handle,
TessPageIteratorLevel level,
int padding,
struct Pix *original_image,
int *left, int *top);
TESS_API BOOL TessPageIteratorBaseline(const TessPageIterator *handle,
TessPageIteratorLevel level, int *x1,
int *y1, int *x2, int *y2);
TESS_API void TessPageIteratorOrientation(
TessPageIterator *handle, TessOrientation *orientation,
TessWritingDirection *writing_direction, TessTextlineOrder *textline_order,
float *deskew_angle);
TESS_API void TessPageIteratorParagraphInfo(
TessPageIterator *handle, TessParagraphJustification *justification,
BOOL *is_list_item, BOOL *is_crown, int *first_line_indent);
/* Result iterator */
TESS_API void TessResultIteratorDelete(TessResultIterator *handle);
TESS_API TessResultIterator *TessResultIteratorCopy(
const TessResultIterator *handle);
TESS_API TessPageIterator *TessResultIteratorGetPageIterator(
TessResultIterator *handle);
TESS_API const TessPageIterator *TessResultIteratorGetPageIteratorConst(
const TessResultIterator *handle);
TESS_API TessChoiceIterator *TessResultIteratorGetChoiceIterator(
const TessResultIterator *handle);
TESS_API BOOL TessResultIteratorNext(TessResultIterator *handle,
TessPageIteratorLevel level);
TESS_API char *TessResultIteratorGetUTF8Text(const TessResultIterator *handle,
TessPageIteratorLevel level);
TESS_API float TessResultIteratorConfidence(const TessResultIterator *handle,
TessPageIteratorLevel level);
TESS_API const char *TessResultIteratorWordRecognitionLanguage(
const TessResultIterator *handle);
TESS_API const char *TessResultIteratorWordFontAttributes(
const TessResultIterator *handle, BOOL *is_bold, BOOL *is_italic,
BOOL *is_underlined, BOOL *is_monospace, BOOL *is_serif, BOOL *is_smallcaps,
int *pointsize, int *font_id);
TESS_API BOOL
TessResultIteratorWordIsFromDictionary(const TessResultIterator *handle);
TESS_API BOOL TessResultIteratorWordIsNumeric(const TessResultIterator *handle);
TESS_API BOOL
TessResultIteratorSymbolIsSuperscript(const TessResultIterator *handle);
TESS_API BOOL
TessResultIteratorSymbolIsSubscript(const TessResultIterator *handle);
TESS_API BOOL
TessResultIteratorSymbolIsDropcap(const TessResultIterator *handle);
TESS_API void TessChoiceIteratorDelete(TessChoiceIterator *handle);
TESS_API BOOL TessChoiceIteratorNext(TessChoiceIterator *handle);
TESS_API const char *TessChoiceIteratorGetUTF8Text(
const TessChoiceIterator *handle);
TESS_API float TessChoiceIteratorConfidence(const TessChoiceIterator *handle);
/* Progress monitor */
TESS_API ETEXT_DESC *TessMonitorCreate();
TESS_API void TessMonitorDelete(ETEXT_DESC *monitor);
TESS_API void TessMonitorSetCancelFunc(ETEXT_DESC *monitor,
TessCancelFunc cancelFunc);
TESS_API void TessMonitorSetCancelThis(ETEXT_DESC *monitor, void *cancelThis);
TESS_API void *TessMonitorGetCancelThis(ETEXT_DESC *monitor);
TESS_API void TessMonitorSetProgressFunc(ETEXT_DESC *monitor,
TessProgressFunc progressFunc);
TESS_API int TessMonitorGetProgress(ETEXT_DESC *monitor);
TESS_API void TessMonitorSetDeadlineMSecs(ETEXT_DESC *monitor, int deadline);
#ifdef __cplusplus
}
#endif
#endif // API_CAPI_H_

View File

@ -1,37 +0,0 @@
// SPDX-License-Identifier: Apache-2.0
// File: export.h
// Description: Place holder
//
// (C) Copyright 2006, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef TESSERACT_PLATFORM_H_
#define TESSERACT_PLATFORM_H_
#ifndef TESS_API
# if defined(_WIN32) || defined(__CYGWIN__)
# if defined(TESS_EXPORTS)
# define TESS_API __declspec(dllexport)
# elif defined(TESS_IMPORTS)
# define TESS_API __declspec(dllimport)
# else
# define TESS_API
# endif
# else
# if defined(TESS_EXPORTS) || defined(TESS_IMPORTS)
# define TESS_API __attribute__((visibility("default")))
# else
# define TESS_API
# endif
# endif
#endif
#endif // TESSERACT_PLATFORM_H_

View File

@ -1,235 +0,0 @@
// SPDX-License-Identifier: Apache-2.0
// File: ltrresultiterator.h
// Description: Iterator for tesseract results in strict left-to-right
// order that avoids using tesseract internal data structures.
// Author: Ray Smith
//
// (C) Copyright 2010, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef TESSERACT_CCMAIN_LTR_RESULT_ITERATOR_H_
#define TESSERACT_CCMAIN_LTR_RESULT_ITERATOR_H_
#include "export.h" // for TESS_API
#include "pageiterator.h" // for PageIterator
#include "publictypes.h" // for PageIteratorLevel
#include "unichar.h" // for StrongScriptDirection
namespace tesseract {
class BLOB_CHOICE_IT;
class PAGE_RES;
class WERD_RES;
class Tesseract;
// Class to iterate over tesseract results, providing access to all levels
// of the page hierarchy, without including any tesseract headers or having
// to handle any tesseract structures.
// WARNING! This class points to data held within the TessBaseAPI class, and
// therefore can only be used while the TessBaseAPI class still exists and
// has not been subjected to a call of Init, SetImage, Recognize, Clear, End
// DetectOS, or anything else that changes the internal PAGE_RES.
// See tesseract/publictypes.h for the definition of PageIteratorLevel.
// See also base class PageIterator, which contains the bulk of the interface.
// LTRResultIterator adds text-specific methods for access to OCR output.
class TESS_API LTRResultIterator : public PageIterator {
friend class ChoiceIterator;
public:
// page_res and tesseract come directly from the BaseAPI.
// The rectangle parameters are copied indirectly from the Thresholder,
// via the BaseAPI. They represent the coordinates of some rectangle in an
// original image (in top-left-origin coordinates) and therefore the top-left
// needs to be added to any output boxes in order to specify coordinates
// in the original image. See TessBaseAPI::SetRectangle.
// The scale and scaled_yres are in case the Thresholder scaled the image
// rectangle prior to thresholding. Any coordinates in tesseract's image
// must be divided by scale before adding (rect_left, rect_top).
// The scaled_yres indicates the effective resolution of the binary image
// that tesseract has been given by the Thresholder.
// After the constructor, Begin has already been called.
LTRResultIterator(PAGE_RES *page_res, Tesseract *tesseract, int scale,
int scaled_yres, int rect_left, int rect_top,
int rect_width, int rect_height);
~LTRResultIterator() override;
// LTRResultIterators may be copied! This makes it possible to iterate over
// all the objects at a lower level, while maintaining an iterator to
// objects at a higher level. These constructors DO NOT CALL Begin, so
// iterations will continue from the location of src.
// TODO: For now the copy constructor and operator= only need the base class
// versions, but if new data members are added, don't forget to add them!
// ============= Moving around within the page ============.
// See PageIterator.
// ============= Accessing data ==============.
// Returns the null terminated UTF-8 encoded text string for the current
// object at the given level. Use delete [] to free after use.
char *GetUTF8Text(PageIteratorLevel level) const;
// Set the string inserted at the end of each text line. "\n" by default.
void SetLineSeparator(const char *new_line);
// Set the string inserted at the end of each paragraph. "\n" by default.
void SetParagraphSeparator(const char *new_para);
// Returns the mean confidence of the current object at the given level.
// The number should be interpreted as a percent probability. (0.0f-100.0f)
float Confidence(PageIteratorLevel level) const;
// ============= Functions that refer to words only ============.
// Returns the font attributes of the current word. If iterating at a higher
// level object than words, eg textlines, then this will return the
// attributes of the first word in that textline.
// The actual return value is a string representing a font name. It points
// to an internal table and SHOULD NOT BE DELETED. Lifespan is the same as
// the iterator itself, ie rendered invalid by various members of
// TessBaseAPI, including Init, SetImage, End or deleting the TessBaseAPI.
// Pointsize is returned in printers points (1/72 inch.)
const char *WordFontAttributes(bool *is_bold, bool *is_italic,
bool *is_underlined, bool *is_monospace,
bool *is_serif, bool *is_smallcaps,
int *pointsize, int *font_id) const;
// Return the name of the language used to recognize this word.
// On error, nullptr. Do not delete this pointer.
const char *WordRecognitionLanguage() const;
// Return the overall directionality of this word.
StrongScriptDirection WordDirection() const;
// Returns true if the current word was found in a dictionary.
bool WordIsFromDictionary() const;
// Returns the number of blanks before the current word.
int BlanksBeforeWord() const;
// Returns true if the current word is numeric.
bool WordIsNumeric() const;
// Returns true if the word contains blamer information.
bool HasBlamerInfo() const;
// Returns the pointer to ParamsTrainingBundle stored in the BlamerBundle
// of the current word.
const void *GetParamsTrainingBundle() const;
// Returns a pointer to the string with blamer information for this word.
// Assumes that the word's blamer_bundle is not nullptr.
const char *GetBlamerDebug() const;
// Returns a pointer to the string with misadaption information for this word.
// Assumes that the word's blamer_bundle is not nullptr.
const char *GetBlamerMisadaptionDebug() const;
// Returns true if a truth string was recorded for the current word.
bool HasTruthString() const;
// Returns true if the given string is equivalent to the truth string for
// the current word.
bool EquivalentToTruth(const char *str) const;
// Returns a null terminated UTF-8 encoded truth string for the current word.
// Use delete [] to free after use.
char *WordTruthUTF8Text() const;
// Returns a null terminated UTF-8 encoded normalized OCR string for the
// current word. Use delete [] to free after use.
char *WordNormedUTF8Text() const;
// Returns a pointer to serialized choice lattice.
// Fills lattice_size with the number of bytes in lattice data.
const char *WordLattice(int *lattice_size) const;
// ============= Functions that refer to symbols only ============.
// Returns true if the current symbol is a superscript.
// If iterating at a higher level object than symbols, eg words, then
// this will return the attributes of the first symbol in that word.
bool SymbolIsSuperscript() const;
// Returns true if the current symbol is a subscript.
// If iterating at a higher level object than symbols, eg words, then
// this will return the attributes of the first symbol in that word.
bool SymbolIsSubscript() const;
// Returns true if the current symbol is a dropcap.
// If iterating at a higher level object than symbols, eg words, then
// this will return the attributes of the first symbol in that word.
bool SymbolIsDropcap() const;
protected:
const char *line_separator_;
const char *paragraph_separator_;
};
// Class to iterate over the classifier choices for a single RIL_SYMBOL.
class TESS_API ChoiceIterator {
public:
// Construction is from a LTRResultIterator that points to the symbol of
// interest. The ChoiceIterator allows a one-shot iteration over the
// choices for this symbol and after that it is useless.
explicit ChoiceIterator(const LTRResultIterator &result_it);
~ChoiceIterator();
// Moves to the next choice for the symbol and returns false if there
// are none left.
bool Next();
// ============= Accessing data ==============.
// Returns the null terminated UTF-8 encoded text string for the current
// choice.
// NOTE: Unlike LTRResultIterator::GetUTF8Text, the return points to an
// internal structure and should NOT be delete[]ed to free after use.
const char *GetUTF8Text() const;
// Returns the confidence of the current choice depending on the used language
// data. If only LSTM traineddata is used the value range is 0.0f - 1.0f. All
// choices for one symbol should roughly add up to 1.0f.
// If only traineddata of the legacy engine is used, the number should be
// interpreted as a percent probability. (0.0f-100.0f) In this case
// probabilities won't add up to 100. Each one stands on its own.
float Confidence() const;
// Returns a vector containing all timesteps, which belong to the currently
// selected symbol. A timestep is a vector containing pairs of symbols and
// floating point numbers. The number states the probability for the
// corresponding symbol.
std::vector<std::vector<std::pair<const char *, float>>> *Timesteps() const;
private:
// clears the remaining spaces out of the results and adapt the probabilities
void filterSpaces();
// Pointer to the WERD_RES object owned by the API.
WERD_RES *word_res_;
// Iterator over the blob choices.
BLOB_CHOICE_IT *choice_it_;
std::vector<std::pair<const char *, float>> *LSTM_choices_ = nullptr;
std::vector<std::pair<const char *, float>>::iterator LSTM_choice_it_;
const int *tstep_index_;
// regulates the rating granularity
double rating_coefficient_;
// leading blanks
int blanks_before_word_;
// true when there is lstm engine related trained data
bool oemLSTM_;
};
} // namespace tesseract.
#endif // TESSERACT_CCMAIN_LTR_RESULT_ITERATOR_H_

View File

@ -1,158 +0,0 @@
// SPDX-License-Identifier: Apache-2.0
/**********************************************************************
* File: ocrclass.h
* Description: Class definitions and constants for the OCR API.
* Author: Hewlett-Packard Co
*
* (C) Copyright 1996, Hewlett-Packard Co.
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*
**********************************************************************/
/**********************************************************************
* This file contains typedefs for all the structures used by
* the HP OCR interface.
* The structures are designed to allow them to be used with any
* structure alignment up to 8.
**********************************************************************/
#ifndef CCUTIL_OCRCLASS_H_
#define CCUTIL_OCRCLASS_H_
#include <chrono>
#include <ctime>
namespace tesseract {
/**********************************************************************
* EANYCODE_CHAR
* Description of a single character. The character code is defined by
* the character set of the current font.
* Output text is sent as an array of these structures.
* Spaces and line endings in the output are represented in the
* structures of the surrounding characters. They are not directly
* represented as characters.
* The first character in a word has a positive value of blanks.
* Missing information should be set to the defaults in the comments.
* If word bounds are known, but not character bounds, then the top and
* bottom of each character should be those of the word. The left of the
* first and right of the last char in each word should be set. All other
* lefts and rights should be set to -1.
* If set, the values of right and bottom are left+width and top+height.
* Most of the members come directly from the parameters to ocr_append_char.
* The formatting member uses the enhancement parameter and combines the
* line direction stuff into the top 3 bits.
* The coding is 0=RL char, 1=LR char, 2=DR NL, 3=UL NL, 4=DR Para,
* 5=UL Para, 6=TB char, 7=BT char. API users do not need to know what
* the coding is, only that it is backwards compatible with the previous
* version.
**********************************************************************/
struct EANYCODE_CHAR { /*single character */
// It should be noted that the format for char_code for version 2.0 and beyond
// is UTF8 which means that ASCII characters will come out as one structure
// but other characters will be returned in two or more instances of this
// structure with a single byte of the UTF8 code in each, but each will have
// the same bounding box. Programs which want to handle languagues with
// different characters sets will need to handle extended characters
// appropriately, but *all* code needs to be prepared to receive UTF8 coded
// characters for characters such as bullet and fancy quotes.
uint16_t char_code; /*character itself */
int16_t left; /*of char (-1) */
int16_t right; /*of char (-1) */
int16_t top; /*of char (-1) */
int16_t bottom; /*of char (-1) */
int16_t font_index; /*what font (0) */
uint8_t confidence; /*0=perfect, 100=reject (0/100) */
uint8_t point_size; /*of char, 72=i inch, (10) */
int8_t blanks; /*no of spaces before this char (1) */
uint8_t formatting; /*char formatting (0) */
};
/**********************************************************************
* ETEXT_DESC
* Description of the output of the OCR engine.
* This structure is used as both a progress monitor and the final
* output header, since it needs to be a valid progress monitor while
* the OCR engine is storing its output to shared memory.
* During progress, all the buffer info is -1.
* Progress starts at 0 and increases to 100 during OCR. No other constraint.
* Additionally the progress callback contains the bounding box of the word that
* is currently being processed.
* Every progress callback, the OCR engine must set ocr_alive to 1.
* The HP side will set ocr_alive to 0. Repeated failure to reset
* to 1 indicates that the OCR engine is dead.
* If the cancel function is not null then it is called with the number of
* user words found. If it returns true then operation is cancelled.
**********************************************************************/
class ETEXT_DESC;
using CANCEL_FUNC = bool (*)(void *, int);
using PROGRESS_FUNC = bool (*)(int, int, int, int, int);
using PROGRESS_FUNC2 = bool (*)(ETEXT_DESC *, int, int, int, int);
class ETEXT_DESC { // output header
public:
int16_t count{0}; /// chars in this buffer(0)
int16_t progress{0}; /// percent complete increasing (0-100)
/** Progress monitor covers word recognition and it does not cover layout
* analysis.
* See Ray comment in https://github.com/tesseract-ocr/tesseract/pull/27 */
int8_t more_to_come{0}; /// true if not last
volatile int8_t ocr_alive{0}; /// ocr sets to 1, HP 0
int8_t err_code{0}; /// for errcode use
CANCEL_FUNC cancel{nullptr}; /// returns true to cancel
PROGRESS_FUNC progress_callback{
nullptr}; /// called whenever progress increases
PROGRESS_FUNC2 progress_callback2; /// monitor-aware progress callback
void *cancel_this{nullptr}; /// this or other data for cancel
std::chrono::steady_clock::time_point end_time;
/// Time to stop. Expected to be set only
/// by call to set_deadline_msecs().
EANYCODE_CHAR text[1]{}; /// character data
ETEXT_DESC() : progress_callback2(&default_progress_func) {
end_time = std::chrono::time_point<std::chrono::steady_clock,
std::chrono::milliseconds>();
}
// Sets the end time to be deadline_msecs milliseconds from now.
void set_deadline_msecs(int32_t deadline_msecs) {
if (deadline_msecs > 0) {
end_time = std::chrono::steady_clock::now() +
std::chrono::milliseconds(deadline_msecs);
}
}
// Returns false if we've not passed the end_time, or have not set a deadline.
bool deadline_exceeded() const {
if (end_time.time_since_epoch() ==
std::chrono::steady_clock::duration::zero()) {
return false;
}
auto now = std::chrono::steady_clock::now();
return (now > end_time);
}
private:
static bool default_progress_func(ETEXT_DESC *ths, int left, int right,
int top, int bottom) {
if (ths->progress_callback != nullptr) {
return (*(ths->progress_callback))(ths->progress, left, right, top,
bottom);
}
return true;
}
};
} // namespace tesseract
#endif // CCUTIL_OCRCLASS_H_

View File

@ -1,139 +0,0 @@
// SPDX-License-Identifier: Apache-2.0
// File: osdetect.h
// Description: Orientation and script detection.
// Author: Samuel Charron
// Ranjith Unnikrishnan
//
// (C) Copyright 2008, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef TESSERACT_CCMAIN_OSDETECT_H_
#define TESSERACT_CCMAIN_OSDETECT_H_
#include "export.h" // for TESS_API
#include <vector> // for std::vector
namespace tesseract {
class BLOBNBOX;
class BLOBNBOX_CLIST;
class BLOB_CHOICE_LIST;
class TO_BLOCK_LIST;
class UNICHARSET;
class Tesseract;
// Max number of scripts in ICU + "NULL" + Japanese and Korean + Fraktur
const int kMaxNumberOfScripts = 116 + 1 + 2 + 1;
struct OSBestResult {
OSBestResult()
: orientation_id(0), script_id(0), sconfidence(0.0), oconfidence(0.0) {}
int orientation_id;
int script_id;
float sconfidence;
float oconfidence;
};
struct OSResults {
OSResults() : unicharset(nullptr) {
for (int i = 0; i < 4; ++i) {
for (int j = 0; j < kMaxNumberOfScripts; ++j) {
scripts_na[i][j] = 0;
}
orientations[i] = 0;
}
}
void update_best_orientation();
// Set the estimate of the orientation to the given id.
void set_best_orientation(int orientation_id);
// Update/Compute the best estimate of the script assuming the given
// orientation id.
void update_best_script(int orientation_id);
// Return the index of the script with the highest score for this orientation.
TESS_API int get_best_script(int orientation_id) const;
// Accumulate scores with given OSResults instance and update the best script.
void accumulate(const OSResults &osr);
// Print statistics.
void print_scores(void) const;
void print_scores(int orientation_id) const;
// Array holding scores for each orientation id [0,3].
// Orientation ids [0..3] map to [0, 270, 180, 90] degree orientations of the
// page respectively, where the values refer to the amount of clockwise
// rotation to be applied to the page for the text to be upright and readable.
float orientations[4];
// Script confidence scores for each of 4 possible orientations.
float scripts_na[4][kMaxNumberOfScripts];
UNICHARSET *unicharset;
OSBestResult best_result;
};
class OrientationDetector {
public:
OrientationDetector(const std::vector<int> *allowed_scripts,
OSResults *results);
bool detect_blob(BLOB_CHOICE_LIST *scores);
int get_orientation();
private:
OSResults *osr_;
const std::vector<int> *allowed_scripts_;
};
class ScriptDetector {
public:
ScriptDetector(const std::vector<int> *allowed_scripts, OSResults *osr,
tesseract::Tesseract *tess);
void detect_blob(BLOB_CHOICE_LIST *scores);
bool must_stop(int orientation) const;
private:
OSResults *osr_;
static const char *korean_script_;
static const char *japanese_script_;
static const char *fraktur_script_;
int korean_id_;
int japanese_id_;
int katakana_id_;
int hiragana_id_;
int han_id_;
int hangul_id_;
int latin_id_;
int fraktur_id_;
tesseract::Tesseract *tess_;
const std::vector<int> *allowed_scripts_;
};
int orientation_and_script_detection(const char *filename, OSResults *,
tesseract::Tesseract *);
int os_detect(TO_BLOCK_LIST *port_blocks, OSResults *osr,
tesseract::Tesseract *tess);
int os_detect_blobs(const std::vector<int> *allowed_scripts,
BLOBNBOX_CLIST *blob_list, OSResults *osr,
tesseract::Tesseract *tess);
bool os_detect_blob(BLOBNBOX *bbox, OrientationDetector *o, ScriptDetector *s,
OSResults *, tesseract::Tesseract *tess);
// Helper method to convert an orientation index to its value in degrees.
// The value represents the amount of clockwise rotation in degrees that must be
// applied for the text to be upright (readable).
TESS_API int OrientationIdToValue(const int &id);
} // namespace tesseract
#endif // TESSERACT_CCMAIN_OSDETECT_H_

View File

@ -1,364 +0,0 @@
// SPDX-License-Identifier: Apache-2.0
// File: pageiterator.h
// Description: Iterator for tesseract page structure that avoids using
// tesseract internal data structures.
// Author: Ray Smith
//
// (C) Copyright 2010, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef TESSERACT_CCMAIN_PAGEITERATOR_H_
#define TESSERACT_CCMAIN_PAGEITERATOR_H_
#include "export.h"
#include "publictypes.h"
struct Pix;
struct Pta;
namespace tesseract {
struct BlamerBundle;
class C_BLOB_IT;
class PAGE_RES;
class PAGE_RES_IT;
class WERD;
class Tesseract;
/**
* Class to iterate over tesseract page structure, providing access to all
* levels of the page hierarchy, without including any tesseract headers or
* having to handle any tesseract structures.
* WARNING! This class points to data held within the TessBaseAPI class, and
* therefore can only be used while the TessBaseAPI class still exists and
* has not been subjected to a call of Init, SetImage, Recognize, Clear, End
* DetectOS, or anything else that changes the internal PAGE_RES.
* See tesseract/publictypes.h for the definition of PageIteratorLevel.
* See also ResultIterator, derived from PageIterator, which adds in the
* ability to access OCR output with text-specific methods.
*/
class TESS_API PageIterator {
public:
/**
* page_res and tesseract come directly from the BaseAPI.
* The rectangle parameters are copied indirectly from the Thresholder,
* via the BaseAPI. They represent the coordinates of some rectangle in an
* original image (in top-left-origin coordinates) and therefore the top-left
* needs to be added to any output boxes in order to specify coordinates
* in the original image. See TessBaseAPI::SetRectangle.
* The scale and scaled_yres are in case the Thresholder scaled the image
* rectangle prior to thresholding. Any coordinates in tesseract's image
* must be divided by scale before adding (rect_left, rect_top).
* The scaled_yres indicates the effective resolution of the binary image
* that tesseract has been given by the Thresholder.
* After the constructor, Begin has already been called.
*/
PageIterator(PAGE_RES *page_res, Tesseract *tesseract, int scale,
int scaled_yres, int rect_left, int rect_top, int rect_width,
int rect_height);
virtual ~PageIterator();
/**
* Page/ResultIterators may be copied! This makes it possible to iterate over
* all the objects at a lower level, while maintaining an iterator to
* objects at a higher level. These constructors DO NOT CALL Begin, so
* iterations will continue from the location of src.
*/
PageIterator(const PageIterator &src);
const PageIterator &operator=(const PageIterator &src);
/** Are we positioned at the same location as other? */
bool PositionedAtSameWord(const PAGE_RES_IT *other) const;
// ============= Moving around within the page ============.
/**
* Moves the iterator to point to the start of the page to begin an
* iteration.
*/
virtual void Begin();
/**
* Moves the iterator to the beginning of the paragraph.
* This class implements this functionality by moving it to the zero indexed
* blob of the first (leftmost) word on the first row of the paragraph.
*/
virtual void RestartParagraph();
/**
* Return whether this iterator points anywhere in the first textline of a
* paragraph.
*/
bool IsWithinFirstTextlineOfParagraph() const;
/**
* Moves the iterator to the beginning of the text line.
* This class implements this functionality by moving it to the zero indexed
* blob of the first (leftmost) word of the row.
*/
virtual void RestartRow();
/**
* Moves to the start of the next object at the given level in the
* page hierarchy, and returns false if the end of the page was reached.
* NOTE that RIL_SYMBOL will skip non-text blocks, but all other
* PageIteratorLevel level values will visit each non-text block once.
* Think of non text blocks as containing a single para, with a single line,
* with a single imaginary word.
* Calls to Next with different levels may be freely intermixed.
* This function iterates words in right-to-left scripts correctly, if
* the appropriate language has been loaded into Tesseract.
*/
virtual bool Next(PageIteratorLevel level);
/**
* Returns true if the iterator is at the start of an object at the given
* level.
*
* For instance, suppose an iterator it is pointed to the first symbol of the
* first word of the third line of the second paragraph of the first block in
* a page, then:
* it.IsAtBeginningOf(RIL_BLOCK) = false
* it.IsAtBeginningOf(RIL_PARA) = false
* it.IsAtBeginningOf(RIL_TEXTLINE) = true
* it.IsAtBeginningOf(RIL_WORD) = true
* it.IsAtBeginningOf(RIL_SYMBOL) = true
*/
virtual bool IsAtBeginningOf(PageIteratorLevel level) const;
/**
* Returns whether the iterator is positioned at the last element in a
* given level. (e.g. the last word in a line, the last line in a block)
*
* Here's some two-paragraph example
* text. It starts off innocuously
* enough but quickly turns bizarre.
* The author inserts a cornucopia
* of words to guard against confused
* references.
*
* Now take an iterator it pointed to the start of "bizarre."
* it.IsAtFinalElement(RIL_PARA, RIL_SYMBOL) = false
* it.IsAtFinalElement(RIL_PARA, RIL_WORD) = true
* it.IsAtFinalElement(RIL_BLOCK, RIL_WORD) = false
*/
virtual bool IsAtFinalElement(PageIteratorLevel level,
PageIteratorLevel element) const;
/**
* Returns whether this iterator is positioned
* before other: -1
* equal to other: 0
* after other: 1
*/
int Cmp(const PageIterator &other) const;
// ============= Accessing data ==============.
// Coordinate system:
// Integer coordinates are at the cracks between the pixels.
// The top-left corner of the top-left pixel in the image is at (0,0).
// The bottom-right corner of the bottom-right pixel in the image is at
// (width, height).
// Every bounding box goes from the top-left of the top-left contained
// pixel to the bottom-right of the bottom-right contained pixel, so
// the bounding box of the single top-left pixel in the image is:
// (0,0)->(1,1).
// If an image rectangle has been set in the API, then returned coordinates
// relate to the original (full) image, rather than the rectangle.
/**
* Controls what to include in a bounding box. Bounding boxes of all levels
* between RIL_WORD and RIL_BLOCK can include or exclude potential diacritics.
* Between layout analysis and recognition, it isn't known where all
* diacritics belong, so this control is used to include or exclude some
* diacritics that are above or below the main body of the word. In most cases
* where the placement is obvious, and after recognition, it doesn't make as
* much difference, as the diacritics will already be included in the word.
*/
void SetBoundingBoxComponents(bool include_upper_dots,
bool include_lower_dots) {
include_upper_dots_ = include_upper_dots;
include_lower_dots_ = include_lower_dots;
}
/**
* Returns the bounding rectangle of the current object at the given level.
* See comment on coordinate system above.
* Returns false if there is no such object at the current position.
* The returned bounding box is guaranteed to match the size and position
* of the image returned by GetBinaryImage, but may clip foreground pixels
* from a grey image. The padding argument to GetImage can be used to expand
* the image to include more foreground pixels. See GetImage below.
*/
bool BoundingBox(PageIteratorLevel level, int *left, int *top, int *right,
int *bottom) const;
bool BoundingBox(PageIteratorLevel level, int padding, int *left, int *top,
int *right, int *bottom) const;
/**
* Returns the bounding rectangle of the object in a coordinate system of the
* working image rectangle having its origin at (rect_left_, rect_top_) with
* respect to the original image and is scaled by a factor scale_.
*/
bool BoundingBoxInternal(PageIteratorLevel level, int *left, int *top,
int *right, int *bottom) const;
/** Returns whether there is no object of a given level. */
bool Empty(PageIteratorLevel level) const;
/**
* Returns the type of the current block.
* See tesseract/publictypes.h for PolyBlockType.
*/
PolyBlockType BlockType() const;
/**
* Returns the polygon outline of the current block. The returned Pta must
* be ptaDestroy-ed after use. Note that the returned Pta lists the vertices
* of the polygon, and the last edge is the line segment between the last
* point and the first point. nullptr will be returned if the iterator is
* at the end of the document or layout analysis was not used.
*/
Pta *BlockPolygon() const;
/**
* Returns a binary image of the current object at the given level.
* The position and size match the return from BoundingBoxInternal, and so
* this could be upscaled with respect to the original input image.
* Use pixDestroy to delete the image after use.
*/
Pix *GetBinaryImage(PageIteratorLevel level) const;
/**
* Returns an image of the current object at the given level in greyscale
* if available in the input. To guarantee a binary image use BinaryImage.
* NOTE that in order to give the best possible image, the bounds are
* expanded slightly over the binary connected component, by the supplied
* padding, so the top-left position of the returned image is returned
* in (left,top). These will most likely not match the coordinates
* returned by BoundingBox.
* If you do not supply an original image, you will get a binary one.
* Use pixDestroy to delete the image after use.
*/
Pix *GetImage(PageIteratorLevel level, int padding, Pix *original_img,
int *left, int *top) const;
/**
* Returns the baseline of the current object at the given level.
* The baseline is the line that passes through (x1, y1) and (x2, y2).
* WARNING: with vertical text, baselines may be vertical!
* Returns false if there is no baseline at the current position.
*/
bool Baseline(PageIteratorLevel level, int *x1, int *y1, int *x2,
int *y2) const;
// Returns the attributes of the current row.
void RowAttributes(float *row_height, float *descenders,
float *ascenders) const;
/**
* Returns orientation for the block the iterator points to.
* orientation, writing_direction, textline_order: see publictypes.h
* deskew_angle: after rotating the block so the text orientation is
* upright, how many radians does one have to rotate the
* block anti-clockwise for it to be level?
* -Pi/4 <= deskew_angle <= Pi/4
*/
void Orientation(tesseract::Orientation *orientation,
tesseract::WritingDirection *writing_direction,
tesseract::TextlineOrder *textline_order,
float *deskew_angle) const;
/**
* Returns information about the current paragraph, if available.
*
* justification -
* LEFT if ragged right, or fully justified and script is left-to-right.
* RIGHT if ragged left, or fully justified and script is right-to-left.
* unknown if it looks like source code or we have very few lines.
* is_list_item -
* true if we believe this is a member of an ordered or unordered list.
* is_crown -
* true if the first line of the paragraph is aligned with the other
* lines of the paragraph even though subsequent paragraphs have first
* line indents. This typically indicates that this is the continuation
* of a previous paragraph or that it is the very first paragraph in
* the chapter.
* first_line_indent -
* For LEFT aligned paragraphs, the first text line of paragraphs of
* this kind are indented this many pixels from the left edge of the
* rest of the paragraph.
* for RIGHT aligned paragraphs, the first text line of paragraphs of
* this kind are indented this many pixels from the right edge of the
* rest of the paragraph.
* NOTE 1: This value may be negative.
* NOTE 2: if *is_crown == true, the first line of this paragraph is
* actually flush, and first_line_indent is set to the "common"
* first_line_indent for subsequent paragraphs in this block
* of text.
*/
void ParagraphInfo(tesseract::ParagraphJustification *justification,
bool *is_list_item, bool *is_crown,
int *first_line_indent) const;
// If the current WERD_RES (it_->word()) is not nullptr, sets the BlamerBundle
// of the current word to the given pointer (takes ownership of the pointer)
// and returns true.
// Can only be used when iterating on the word level.
bool SetWordBlamerBundle(BlamerBundle *blamer_bundle);
protected:
/**
* Sets up the internal data for iterating the blobs of a new word, then
* moves the iterator to the given offset.
*/
void BeginWord(int offset);
/** Pointer to the page_res owned by the API. */
PAGE_RES *page_res_;
/** Pointer to the Tesseract object owned by the API. */
Tesseract *tesseract_;
/**
* The iterator to the page_res_. Owned by this ResultIterator.
* A pointer just to avoid dragging in Tesseract includes.
*/
PAGE_RES_IT *it_;
/**
* The current input WERD being iterated. If there is an output from OCR,
* then word_ is nullptr. Owned by the API
*/
WERD *word_;
/** The length of the current word_. */
int word_length_;
/** The current blob index within the word. */
int blob_index_;
/**
* Iterator to the blobs within the word. If nullptr, then we are iterating
* OCR results in the box_word.
* Owned by this ResultIterator.
*/
C_BLOB_IT *cblob_it_;
/** Control over what to include in bounding boxes. */
bool include_upper_dots_;
bool include_lower_dots_;
/** Parameters saved from the Thresholder. Needed to rebuild coordinates.*/
int scale_;
int scaled_yres_;
int rect_left_;
int rect_top_;
int rect_width_;
int rect_height_;
};
} // namespace tesseract.
#endif // TESSERACT_CCMAIN_PAGEITERATOR_H_

View File

@ -1,281 +0,0 @@
// SPDX-License-Identifier: Apache-2.0
// File: publictypes.h
// Description: Types used in both the API and internally
// Author: Ray Smith
//
// (C) Copyright 2010, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef TESSERACT_CCSTRUCT_PUBLICTYPES_H_
#define TESSERACT_CCSTRUCT_PUBLICTYPES_H_
namespace tesseract {
// This file contains types that are used both by the API and internally
// to Tesseract. In order to decouple the API from Tesseract and prevent cyclic
// dependencies, THIS FILE SHOULD NOT DEPEND ON ANY OTHER PART OF TESSERACT.
// Restated: It is OK for low-level Tesseract files to include publictypes.h,
// but not for the low-level tesseract code to include top-level API code.
// This file should not use other Tesseract types, as that would drag
// their includes into the API-level.
/** Number of printers' points in an inch. The unit of the pointsize return. */
constexpr int kPointsPerInch = 72;
/**
* Minimum believable resolution. Used as a default if there is no other
* information, as it is safer to under-estimate than over-estimate.
*/
constexpr int kMinCredibleResolution = 70;
/** Maximum believable resolution. */
constexpr int kMaxCredibleResolution = 2400;
/**
* Ratio between median blob size and likely resolution. Used to estimate
* resolution when none is provided. This is basically 1/usual text size in
* inches. */
constexpr int kResolutionEstimationFactor = 10;
/**
* Possible types for a POLY_BLOCK or ColPartition.
* Must be kept in sync with kPBColors in polyblk.cpp and PTIs*Type functions
* below, as well as kPolyBlockNames in layout_test.cc.
* Used extensively by ColPartition, and POLY_BLOCK.
*/
enum PolyBlockType {
PT_UNKNOWN, // Type is not yet known. Keep as the first element.
PT_FLOWING_TEXT, // Text that lives inside a column.
PT_HEADING_TEXT, // Text that spans more than one column.
PT_PULLOUT_TEXT, // Text that is in a cross-column pull-out region.
PT_EQUATION, // Partition belonging to an equation region.
PT_INLINE_EQUATION, // Partition has inline equation.
PT_TABLE, // Partition belonging to a table region.
PT_VERTICAL_TEXT, // Text-line runs vertically.
PT_CAPTION_TEXT, // Text that belongs to an image.
PT_FLOWING_IMAGE, // Image that lives inside a column.
PT_HEADING_IMAGE, // Image that spans more than one column.
PT_PULLOUT_IMAGE, // Image that is in a cross-column pull-out region.
PT_HORZ_LINE, // Horizontal Line.
PT_VERT_LINE, // Vertical Line.
PT_NOISE, // Lies outside of any column.
PT_COUNT
};
/** Returns true if PolyBlockType is of horizontal line type */
inline bool PTIsLineType(PolyBlockType type) {
return type == PT_HORZ_LINE || type == PT_VERT_LINE;
}
/** Returns true if PolyBlockType is of image type */
inline bool PTIsImageType(PolyBlockType type) {
return type == PT_FLOWING_IMAGE || type == PT_HEADING_IMAGE ||
type == PT_PULLOUT_IMAGE;
}
/** Returns true if PolyBlockType is of text type */
inline bool PTIsTextType(PolyBlockType type) {
return type == PT_FLOWING_TEXT || type == PT_HEADING_TEXT ||
type == PT_PULLOUT_TEXT || type == PT_TABLE ||
type == PT_VERTICAL_TEXT || type == PT_CAPTION_TEXT ||
type == PT_INLINE_EQUATION;
}
// Returns true if PolyBlockType is of pullout(inter-column) type
inline bool PTIsPulloutType(PolyBlockType type) {
return type == PT_PULLOUT_IMAGE || type == PT_PULLOUT_TEXT;
}
/**
* +------------------+ Orientation Example:
* | 1 Aaaa Aaaa Aaaa | ====================
* | Aaa aa aaa aa | To left is a diagram of some (1) English and
* | aaaaaa A aa aaa. | (2) Chinese text and a (3) photo credit.
* | 2 |
* | ####### c c C | Upright Latin characters are represented as A and a.
* | ####### c c c | '<' represents a latin character rotated
* | < ####### c c c | anti-clockwise 90 degrees.
* | < ####### c c |
* | < ####### . c | Upright Chinese characters are represented C and c.
* | 3 ####### c |
* +------------------+ NOTA BENE: enum values here should match goodoc.proto
* If you orient your head so that "up" aligns with Orientation,
* then the characters will appear "right side up" and readable.
*
* In the example above, both the English and Chinese paragraphs are oriented
* so their "up" is the top of the page (page up). The photo credit is read
* with one's head turned leftward ("up" is to page left).
*
* The values of this enum match the convention of Tesseract's osdetect.h
*/
enum Orientation {
ORIENTATION_PAGE_UP = 0,
ORIENTATION_PAGE_RIGHT = 1,
ORIENTATION_PAGE_DOWN = 2,
ORIENTATION_PAGE_LEFT = 3,
};
/**
* The grapheme clusters within a line of text are laid out logically
* in this direction, judged when looking at the text line rotated so that
* its Orientation is "page up".
*
* For English text, the writing direction is left-to-right. For the
* Chinese text in the above example, the writing direction is top-to-bottom.
*/
enum WritingDirection {
WRITING_DIRECTION_LEFT_TO_RIGHT = 0,
WRITING_DIRECTION_RIGHT_TO_LEFT = 1,
WRITING_DIRECTION_TOP_TO_BOTTOM = 2,
};
/**
* The text lines are read in the given sequence.
*
* In English, the order is top-to-bottom.
* In Chinese, vertical text lines are read right-to-left. Mongolian is
* written in vertical columns top to bottom like Chinese, but the lines
* order left-to right.
*
* Note that only some combinations make sense. For example,
* WRITING_DIRECTION_LEFT_TO_RIGHT implies TEXTLINE_ORDER_TOP_TO_BOTTOM
*/
enum TextlineOrder {
TEXTLINE_ORDER_LEFT_TO_RIGHT = 0,
TEXTLINE_ORDER_RIGHT_TO_LEFT = 1,
TEXTLINE_ORDER_TOP_TO_BOTTOM = 2,
};
/**
* Possible modes for page layout analysis. These *must* be kept in order
* of decreasing amount of layout analysis to be done, except for OSD_ONLY,
* so that the inequality test macros below work.
*/
enum PageSegMode {
PSM_OSD_ONLY = 0, ///< Orientation and script detection only.
PSM_AUTO_OSD = 1, ///< Automatic page segmentation with orientation and
///< script detection. (OSD)
PSM_AUTO_ONLY = 2, ///< Automatic page segmentation, but no OSD, or OCR.
PSM_AUTO = 3, ///< Fully automatic page segmentation, but no OSD.
PSM_SINGLE_COLUMN = 4, ///< Assume a single column of text of variable sizes.
PSM_SINGLE_BLOCK_VERT_TEXT = 5, ///< Assume a single uniform block of
///< vertically aligned text.
PSM_SINGLE_BLOCK = 6, ///< Assume a single uniform block of text. (Default.)
PSM_SINGLE_LINE = 7, ///< Treat the image as a single text line.
PSM_SINGLE_WORD = 8, ///< Treat the image as a single word.
PSM_CIRCLE_WORD = 9, ///< Treat the image as a single word in a circle.
PSM_SINGLE_CHAR = 10, ///< Treat the image as a single character.
PSM_SPARSE_TEXT =
11, ///< Find as much text as possible in no particular order.
PSM_SPARSE_TEXT_OSD = 12, ///< Sparse text with orientation and script det.
PSM_RAW_LINE = 13, ///< Treat the image as a single text line, bypassing
///< hacks that are Tesseract-specific.
PSM_COUNT ///< Number of enum entries.
};
/**
* Inline functions that act on a PageSegMode to determine whether components of
* layout analysis are enabled.
* *Depend critically on the order of elements of PageSegMode.*
* NOTE that arg is an int for compatibility with INT_PARAM.
*/
inline bool PSM_OSD_ENABLED(int pageseg_mode) {
return pageseg_mode <= PSM_AUTO_OSD || pageseg_mode == PSM_SPARSE_TEXT_OSD;
}
inline bool PSM_ORIENTATION_ENABLED(int pageseg_mode) {
return pageseg_mode <= PSM_AUTO || pageseg_mode == PSM_SPARSE_TEXT_OSD;
}
inline bool PSM_COL_FIND_ENABLED(int pageseg_mode) {
return pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_AUTO;
}
inline bool PSM_SPARSE(int pageseg_mode) {
return pageseg_mode == PSM_SPARSE_TEXT || pageseg_mode == PSM_SPARSE_TEXT_OSD;
}
inline bool PSM_BLOCK_FIND_ENABLED(int pageseg_mode) {
return pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_SINGLE_COLUMN;
}
inline bool PSM_LINE_FIND_ENABLED(int pageseg_mode) {
return pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_SINGLE_BLOCK;
}
inline bool PSM_WORD_FIND_ENABLED(int pageseg_mode) {
return (pageseg_mode >= PSM_AUTO_OSD && pageseg_mode <= PSM_SINGLE_LINE) ||
pageseg_mode == PSM_SPARSE_TEXT || pageseg_mode == PSM_SPARSE_TEXT_OSD;
}
/**
* enum of the elements of the page hierarchy, used in ResultIterator
* to provide functions that operate on each level without having to
* have 5x as many functions.
*/
enum PageIteratorLevel {
RIL_BLOCK, // Block of text/image/separator line.
RIL_PARA, // Paragraph within a block.
RIL_TEXTLINE, // Line within a paragraph.
RIL_WORD, // Word within a textline.
RIL_SYMBOL // Symbol/character within a word.
};
/**
* JUSTIFICATION_UNKNOWN
* The alignment is not clearly one of the other options. This could happen
* for example if there are only one or two lines of text or the text looks
* like source code or poetry.
*
* NOTA BENE: Fully justified paragraphs (text aligned to both left and right
* margins) are marked by Tesseract with JUSTIFICATION_LEFT if their text
* is written with a left-to-right script and with JUSTIFICATION_RIGHT if
* their text is written in a right-to-left script.
*
* Interpretation for text read in vertical lines:
* "Left" is wherever the starting reading position is.
*
* JUSTIFICATION_LEFT
* Each line, except possibly the first, is flush to the same left tab stop.
*
* JUSTIFICATION_CENTER
* The text lines of the paragraph are centered about a line going
* down through their middle of the text lines.
*
* JUSTIFICATION_RIGHT
* Each line, except possibly the first, is flush to the same right tab stop.
*/
enum ParagraphJustification {
JUSTIFICATION_UNKNOWN,
JUSTIFICATION_LEFT,
JUSTIFICATION_CENTER,
JUSTIFICATION_RIGHT,
};
/**
* When Tesseract/Cube is initialized we can choose to instantiate/load/run
* only the Tesseract part, only the Cube part or both along with the combiner.
* The preference of which engine to use is stored in tessedit_ocr_engine_mode.
*
* ATTENTION: When modifying this enum, please make sure to make the
* appropriate changes to all the enums mirroring it (e.g. OCREngine in
* cityblock/workflow/detection/detection_storage.proto). Such enums will
* mention the connection to OcrEngineMode in the comments.
*/
enum OcrEngineMode {
OEM_TESSERACT_ONLY, // Run Tesseract only - fastest; deprecated
OEM_LSTM_ONLY, // Run just the LSTM line recognizer.
OEM_TESSERACT_LSTM_COMBINED, // Run the LSTM recognizer, but allow fallback
// to Tesseract when things get difficult.
// deprecated
OEM_DEFAULT, // Specify this mode when calling init_*(),
// to indicate that any of the above modes
// should be automatically inferred from the
// variables in the language-specific config,
// command-line configs, or if not specified
// in any of the above should be set to the
// default OEM_TESSERACT_ONLY.
OEM_COUNT // Number of OEMs
};
} // namespace tesseract.
#endif // TESSERACT_CCSTRUCT_PUBLICTYPES_H_

View File

@ -1,311 +0,0 @@
// SPDX-License-Identifier: Apache-2.0
// File: renderer.h
// Description: Rendering interface to inject into TessBaseAPI
//
// (C) Copyright 2011, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef TESSERACT_API_RENDERER_H_
#define TESSERACT_API_RENDERER_H_
#include "export.h"
// To avoid collision with other typenames include the ABSOLUTE MINIMUM
// complexity of includes here. Use forward declarations wherever possible
// and hide includes of complex types in baseapi.cpp.
#include <cstdint>
#include <string> // for std::string
#include <vector> // for std::vector
struct Pix;
namespace tesseract {
class TessBaseAPI;
/**
* Interface for rendering tesseract results into a document, such as text,
* HOCR or pdf. This class is abstract. Specific classes handle individual
* formats. This interface is then used to inject the renderer class into
* tesseract when processing images.
*
* For simplicity implementing this with tesseract version 3.01,
* the renderer contains document state that is cleared from document
* to document just as the TessBaseAPI is. This way the base API can just
* delegate its rendering functionality to injected renderers, and the
* renderers can manage the associated state needed for the specific formats
* in addition to the heuristics for producing it.
*/
class TESS_API TessResultRenderer {
public:
virtual ~TessResultRenderer();
// Takes ownership of pointer so must be new'd instance.
// Renderers aren't ordered, but appends the sequences of next parameter
// and existing next(). The renderers should be unique across both lists.
void insert(TessResultRenderer *next);
// Returns the next renderer or nullptr.
TessResultRenderer *next() {
return next_;
}
/**
* Starts a new document with the given title.
* This clears the contents of the output data.
* Title should use UTF-8 encoding.
*/
bool BeginDocument(const char *title);
/**
* Adds the recognized text from the source image to the current document.
* Invalid if BeginDocument not yet called.
*
* Note that this API is a bit weird but is designed to fit into the
* current TessBaseAPI implementation where the api has lots of state
* information that we might want to add in.
*/
bool AddImage(TessBaseAPI *api);
/**
* Finishes the document and finalizes the output data
* Invalid if BeginDocument not yet called.
*/
bool EndDocument();
const char *file_extension() const {
return file_extension_;
}
const char *title() const {
return title_.c_str();
}
// Is everything fine? Otherwise something went wrong.
bool happy() const {
return happy_;
}
/**
* Returns the index of the last image given to AddImage
* (i.e. images are incremented whether the image succeeded or not)
*
* This is always defined. It means either the number of the
* current image, the last image ended, or in the completed document
* depending on when in the document lifecycle you are looking at it.
* Will return -1 if a document was never started.
*/
int imagenum() const {
return imagenum_;
}
protected:
/**
* Called by concrete classes.
*
* outputbase is the name of the output file excluding
* extension. For example, "/path/to/chocolate-chip-cookie-recipe"
*
* extension indicates the file extension to be used for output
* files. For example "pdf" will produce a .pdf file, and "hocr"
* will produce .hocr files.
*/
TessResultRenderer(const char *outputbase, const char *extension);
// Hook for specialized handling in BeginDocument()
virtual bool BeginDocumentHandler();
// This must be overridden to render the OCR'd results
virtual bool AddImageHandler(TessBaseAPI *api) = 0;
// Hook for specialized handling in EndDocument()
virtual bool EndDocumentHandler();
// Renderers can call this to append '\0' terminated strings into
// the output string returned by GetOutput.
// This method will grow the output buffer if needed.
void AppendString(const char *s);
// Renderers can call this to append binary byte sequences into
// the output string returned by GetOutput. Note that s is not necessarily
// '\0' terminated (and can contain '\0' within it).
// This method will grow the output buffer if needed.
void AppendData(const char *s, int len);
private:
TessResultRenderer *next_; // Can link multiple renderers together
FILE *fout_; // output file pointer
const char *file_extension_; // standard extension for generated output
std::string title_; // title of document being rendered
int imagenum_; // index of last image added
bool happy_; // I get grumpy when the disk fills up, etc.
};
/**
* Renders tesseract output into a plain UTF-8 text string
*/
class TESS_API TessTextRenderer : public TessResultRenderer {
public:
explicit TessTextRenderer(const char *outputbase);
protected:
bool AddImageHandler(TessBaseAPI *api) override;
};
/**
* Renders tesseract output into an hocr text string
*/
class TESS_API TessHOcrRenderer : public TessResultRenderer {
public:
explicit TessHOcrRenderer(const char *outputbase, bool font_info);
explicit TessHOcrRenderer(const char *outputbase);
protected:
bool BeginDocumentHandler() override;
bool AddImageHandler(TessBaseAPI *api) override;
bool EndDocumentHandler() override;
private:
bool font_info_; // whether to print font information
};
/**
* Renders tesseract output into an alto text string
*/
class TESS_API TessAltoRenderer : public TessResultRenderer {
public:
explicit TessAltoRenderer(const char *outputbase);
protected:
bool BeginDocumentHandler() override;
bool AddImageHandler(TessBaseAPI *api) override;
bool EndDocumentHandler() override;
private:
bool begin_document;
};
/**
* Renders Tesseract output into a TSV string
*/
class TESS_API TessTsvRenderer : public TessResultRenderer {
public:
explicit TessTsvRenderer(const char *outputbase, bool font_info);
explicit TessTsvRenderer(const char *outputbase);
protected:
bool BeginDocumentHandler() override;
bool AddImageHandler(TessBaseAPI *api) override;
bool EndDocumentHandler() override;
private:
bool font_info_; // whether to print font information
};
/**
* Renders tesseract output into searchable PDF
*/
class TESS_API TessPDFRenderer : public TessResultRenderer {
public:
// datadir is the location of the TESSDATA. We need it because
// we load a custom PDF font from this location.
TessPDFRenderer(const char *outputbase, const char *datadir,
bool textonly = false);
protected:
bool BeginDocumentHandler() override;
bool AddImageHandler(TessBaseAPI *api) override;
bool EndDocumentHandler() override;
private:
// We don't want to have every image in memory at once,
// so we store some metadata as we go along producing
// PDFs one page at a time. At the end, that metadata is
// used to make everything that isn't easily handled in a
// streaming fashion.
long int obj_; // counter for PDF objects
std::vector<uint64_t> offsets_; // offset of every PDF object in bytes
std::vector<long int> pages_; // object number for every /Page object
std::string datadir_; // where to find the custom font
bool textonly_; // skip images if set
// Bookkeeping only. DIY = Do It Yourself.
void AppendPDFObjectDIY(size_t objectsize);
// Bookkeeping + emit data.
void AppendPDFObject(const char *data);
// Create the /Contents object for an entire page.
char *GetPDFTextObjects(TessBaseAPI *api, double width, double height);
// Turn an image into a PDF object. Only transcode if we have to.
static bool imageToPDFObj(Pix *pix, const char *filename, long int objnum,
char **pdf_object, long int *pdf_object_size,
int jpg_quality);
};
/**
* Renders tesseract output into a plain UTF-8 text string
*/
class TESS_API TessUnlvRenderer : public TessResultRenderer {
public:
explicit TessUnlvRenderer(const char *outputbase);
protected:
bool AddImageHandler(TessBaseAPI *api) override;
};
/**
* Renders tesseract output into a plain UTF-8 text string for LSTMBox
*/
class TESS_API TessLSTMBoxRenderer : public TessResultRenderer {
public:
explicit TessLSTMBoxRenderer(const char *outputbase);
protected:
bool AddImageHandler(TessBaseAPI *api) override;
};
/**
* Renders tesseract output into a plain UTF-8 text string
*/
class TESS_API TessBoxTextRenderer : public TessResultRenderer {
public:
explicit TessBoxTextRenderer(const char *outputbase);
protected:
bool AddImageHandler(TessBaseAPI *api) override;
};
/**
* Renders tesseract output into a plain UTF-8 text string in WordStr format
*/
class TESS_API TessWordStrBoxRenderer : public TessResultRenderer {
public:
explicit TessWordStrBoxRenderer(const char *outputbase);
protected:
bool AddImageHandler(TessBaseAPI *api) override;
};
#ifndef DISABLED_LEGACY_ENGINE
/**
* Renders tesseract output into an osd text string
*/
class TESS_API TessOsdRenderer : public TessResultRenderer {
public:
explicit TessOsdRenderer(const char *outputbase);
protected:
bool AddImageHandler(TessBaseAPI *api) override;
};
#endif // ndef DISABLED_LEGACY_ENGINE
} // namespace tesseract.
#endif // TESSERACT_API_RENDERER_H_

View File

@ -1,250 +0,0 @@
// SPDX-License-Identifier: Apache-2.0
// File: resultiterator.h
// Description: Iterator for tesseract results that is capable of
// iterating in proper reading order over Bi Directional
// (e.g. mixed Hebrew and English) text.
// Author: David Eger
//
// (C) Copyright 2011, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef TESSERACT_CCMAIN_RESULT_ITERATOR_H_
#define TESSERACT_CCMAIN_RESULT_ITERATOR_H_
#include "export.h" // for TESS_API, TESS_LOCAL
#include "ltrresultiterator.h" // for LTRResultIterator
#include "publictypes.h" // for PageIteratorLevel
#include "unichar.h" // for StrongScriptDirection
#include <set> // for std::pair
#include <vector> // for std::vector
namespace tesseract {
class TESS_API ResultIterator : public LTRResultIterator {
public:
static ResultIterator *StartOfParagraph(const LTRResultIterator &resit);
/**
* ResultIterator is copy constructible!
* The default copy constructor works just fine for us.
*/
~ResultIterator() override = default;
// ============= Moving around within the page ============.
/**
* Moves the iterator to point to the start of the page to begin
* an iteration.
*/
void Begin() override;
/**
* Moves to the start of the next object at the given level in the
* page hierarchy in the appropriate reading order and returns false if
* the end of the page was reached.
* NOTE that RIL_SYMBOL will skip non-text blocks, but all other
* PageIteratorLevel level values will visit each non-text block once.
* Think of non text blocks as containing a single para, with a single line,
* with a single imaginary word.
* Calls to Next with different levels may be freely intermixed.
* This function iterates words in right-to-left scripts correctly, if
* the appropriate language has been loaded into Tesseract.
*/
bool Next(PageIteratorLevel level) override;
/**
* IsAtBeginningOf() returns whether we're at the logical beginning of the
* given level. (as opposed to ResultIterator's left-to-right top-to-bottom
* order). Otherwise, this acts the same as PageIterator::IsAtBeginningOf().
* For a full description, see pageiterator.h
*/
bool IsAtBeginningOf(PageIteratorLevel level) const override;
/**
* Implement PageIterator's IsAtFinalElement correctly in a BiDi context.
* For instance, IsAtFinalElement(RIL_PARA, RIL_WORD) returns whether we
* point at the last word in a paragraph. See PageIterator for full comment.
*/
bool IsAtFinalElement(PageIteratorLevel level,
PageIteratorLevel element) const override;
// ============= Functions that refer to words only ============.
// Returns the number of blanks before the current word.
int BlanksBeforeWord() const;
// ============= Accessing data ==============.
/**
* Returns the null terminated UTF-8 encoded text string for the current
* object at the given level. Use delete [] to free after use.
*/
virtual char *GetUTF8Text(PageIteratorLevel level) const;
/**
* Returns the LSTM choices for every LSTM timestep for the current word.
*/
virtual std::vector<std::vector<std::vector<std::pair<const char *, float>>>>
*GetRawLSTMTimesteps() const;
virtual std::vector<std::vector<std::pair<const char *, float>>>
*GetBestLSTMSymbolChoices() const;
/**
* Return whether the current paragraph's dominant reading direction
* is left-to-right (as opposed to right-to-left).
*/
bool ParagraphIsLtr() const;
// ============= Exposed only for testing =============.
/**
* Yields the reading order as a sequence of indices and (optional)
* meta-marks for a set of words (given left-to-right).
* The meta marks are passed as negative values:
* kMinorRunStart Start of minor direction text.
* kMinorRunEnd End of minor direction text.
* kComplexWord The next indexed word contains both left-to-right and
* right-to-left characters and was treated as neutral.
*
* For example, suppose we have five words in a text line,
* indexed [0,1,2,3,4] from the leftmost side of the text line.
* The following are all believable reading_orders:
*
* Left-to-Right (in ltr paragraph):
* { 0, 1, 2, 3, 4 }
* Left-to-Right (in rtl paragraph):
* { kMinorRunStart, 0, 1, 2, 3, 4, kMinorRunEnd }
* Right-to-Left (in rtl paragraph):
* { 4, 3, 2, 1, 0 }
* Left-to-Right except for an RTL phrase in words 2, 3 in an ltr paragraph:
* { 0, 1, kMinorRunStart, 3, 2, kMinorRunEnd, 4 }
*/
static void CalculateTextlineOrder(
bool paragraph_is_ltr,
const std::vector<StrongScriptDirection> &word_dirs,
std::vector<int> *reading_order);
static const int kMinorRunStart;
static const int kMinorRunEnd;
static const int kComplexWord;
protected:
/**
* We presume the data associated with the given iterator will outlive us.
* NB: This is private because it does something that is non-obvious:
* it resets to the beginning of the paragraph instead of staying wherever
* resit might have pointed.
*/
explicit ResultIterator(const LTRResultIterator &resit);
private:
/**
* Calculates the current paragraph's dominant writing direction.
* Typically, members should use current_paragraph_ltr_ instead.
*/
bool CurrentParagraphIsLtr() const;
/**
* Returns word indices as measured from resit->RestartRow() = index 0
* for the reading order of words within a textline given an iterator
* into the middle of the text line.
* In addition to non-negative word indices, the following negative values
* may be inserted:
* kMinorRunStart Start of minor direction text.
* kMinorRunEnd End of minor direction text.
* kComplexWord The previous word contains both left-to-right and
* right-to-left characters and was treated as neutral.
*/
void CalculateTextlineOrder(bool paragraph_is_ltr,
const LTRResultIterator &resit,
std::vector<int> *indices) const;
/** Same as above, but the caller's ssd gets filled in if ssd != nullptr. */
void CalculateTextlineOrder(bool paragraph_is_ltr,
const LTRResultIterator &resit,
std::vector<StrongScriptDirection> *ssd,
std::vector<int> *indices) const;
/**
* What is the index of the current word in a strict left-to-right reading
* of the row?
*/
int LTRWordIndex() const;
/**
* Given an iterator pointing at a word, returns the logical reading order
* of blob indices for the word.
*/
void CalculateBlobOrder(std::vector<int> *blob_indices) const;
/** Precondition: current_paragraph_is_ltr_ is set. */
void MoveToLogicalStartOfTextline();
/**
* Precondition: current_paragraph_is_ltr_ and in_minor_direction_
* are set.
*/
void MoveToLogicalStartOfWord();
/** Are we pointing at the final (reading order) symbol of the word? */
bool IsAtFinalSymbolOfWord() const;
/** Are we pointing at the first (reading order) symbol of the word? */
bool IsAtFirstSymbolOfWord() const;
/**
* Append any extra marks that should be appended to this word when printed.
* Mostly, these are Unicode BiDi control characters.
*/
void AppendSuffixMarks(std::string *text) const;
/** Appends the current word in reading order to the given buffer.*/
void AppendUTF8WordText(std::string *text) const;
/**
* Appends the text of the current text line, *assuming this iterator is
* positioned at the beginning of the text line* This function
* updates the iterator to point to the first position past the text line.
* Each textline is terminated in a single newline character.
* If the textline ends a paragraph, it gets a second terminal newline.
*/
void IterateAndAppendUTF8TextlineText(std::string *text);
/**
* Appends the text of the current paragraph in reading order
* to the given buffer.
* Each textline is terminated in a single newline character, and the
* paragraph gets an extra newline at the end.
*/
void AppendUTF8ParagraphText(std::string *text) const;
/** Returns whether the bidi_debug flag is set to at least min_level. */
bool BidiDebug(int min_level) const;
bool current_paragraph_is_ltr_;
/**
* Is the currently pointed-at character at the beginning of
* a minor-direction run?
*/
bool at_beginning_of_minor_run_;
/** Is the currently pointed-at character in a minor-direction sequence? */
bool in_minor_direction_;
/**
* Should detected inter-word spaces be preserved, or "compressed" to a single
* space character (default behavior).
*/
bool preserve_interword_spaces_;
};
} // namespace tesseract.
#endif // TESSERACT_CCMAIN_RESULT_ITERATOR_H_

View File

@ -1,174 +0,0 @@
// SPDX-License-Identifier: Apache-2.0
// File: unichar.h
// Description: Unicode character/ligature class.
// Author: Ray Smith
//
// (C) Copyright 2006, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef TESSERACT_CCUTIL_UNICHAR_H_
#define TESSERACT_CCUTIL_UNICHAR_H_
#include "export.h"
#include <memory.h>
#include <cstring>
#include <string>
#include <vector>
namespace tesseract {
// Maximum number of characters that can be stored in a UNICHAR. Must be
// at least 4. Must not exceed 31 without changing the coding of length.
#define UNICHAR_LEN 30
// A UNICHAR_ID is the unique id of a unichar.
using UNICHAR_ID = int;
// A variable to indicate an invalid or uninitialized unichar id.
static const int INVALID_UNICHAR_ID = -1;
// A special unichar that corresponds to INVALID_UNICHAR_ID.
static const char INVALID_UNICHAR[] = "__INVALID_UNICHAR__";
enum StrongScriptDirection {
DIR_NEUTRAL = 0, // Text contains only neutral characters.
DIR_LEFT_TO_RIGHT = 1, // Text contains no Right-to-Left characters.
DIR_RIGHT_TO_LEFT = 2, // Text contains no Left-to-Right characters.
DIR_MIX = 3, // Text contains a mixture of left-to-right
// and right-to-left characters.
};
using char32 = signed int;
// The UNICHAR class holds a single classification result. This may be
// a single Unicode character (stored as between 1 and 4 utf8 bytes) or
// multiple Unicode characters representing the NFKC expansion of a ligature
// such as fi, ffl etc. These are also stored as utf8.
class TESS_API UNICHAR {
public:
UNICHAR() {
memset(chars, 0, UNICHAR_LEN);
}
// Construct from a utf8 string. If len<0 then the string is null terminated.
// If the string is too long to fit in the UNICHAR then it takes only what
// will fit.
UNICHAR(const char *utf8_str, int len);
// Construct from a single UCS4 character.
explicit UNICHAR(int unicode);
// Default copy constructor and operator= are OK.
// Get the first character as UCS-4.
int first_uni() const;
// Get the length of the UTF8 string.
int utf8_len() const {
int len = chars[UNICHAR_LEN - 1];
return len >= 0 && len < UNICHAR_LEN ? len : UNICHAR_LEN;
}
// Get a UTF8 string, but NOT nullptr terminated.
const char *utf8() const {
return chars;
}
// Get a terminated UTF8 string: Must delete[] it after use.
char *utf8_str() const;
// Get the number of bytes in the first character of the given utf8 string.
static int utf8_step(const char *utf8_str);
// A class to simplify iterating over and accessing elements of a UTF8
// string. Note that unlike the UNICHAR class, const_iterator does NOT COPY or
// take ownership of the underlying byte array. It also does not permit
// modification of the array (as the name suggests).
//
// Example:
// for (UNICHAR::const_iterator it = UNICHAR::begin(str, str_len);
// it != UNICHAR::end(str, len);
// ++it) {
// printf("UCS-4 symbol code = %d\n", *it);
// char buf[5];
// int char_len = it.get_utf8(buf); buf[char_len] = '\0';
// printf("Char = %s\n", buf);
// }
class TESS_API const_iterator {
using CI = const_iterator;
public:
// Step to the next UTF8 character.
// If the current position is at an illegal UTF8 character, then print an
// error message and step by one byte. If the current position is at a
// nullptr value, don't step past it.
const_iterator &operator++();
// Return the UCS-4 value at the current position.
// If the current position is at an illegal UTF8 value, return a single
// space character.
int operator*() const;
// Store the UTF-8 encoding of the current codepoint into buf, which must be
// at least 4 bytes long. Return the number of bytes written.
// If the current position is at an illegal UTF8 value, writes a single
// space character and returns 1.
// Note that this method does not null-terminate the buffer.
int get_utf8(char *buf) const;
// Returns the number of bytes of the current codepoint. Returns 1 if the
// current position is at an illegal UTF8 value.
int utf8_len() const;
// Returns true if the UTF-8 encoding at the current position is legal.
bool is_legal() const;
// Return the pointer into the string at the current position.
const char *utf8_data() const {
return it_;
}
// Iterator equality operators.
friend bool operator==(const CI &lhs, const CI &rhs) {
return lhs.it_ == rhs.it_;
}
friend bool operator!=(const CI &lhs, const CI &rhs) {
return !(lhs == rhs);
}
private:
friend class UNICHAR;
explicit const_iterator(const char *it) : it_(it) {}
const char *it_; // Pointer into the string.
};
// Create a start/end iterator pointing to a string. Note that these methods
// are static and do NOT create a copy or take ownership of the underlying
// array.
static const_iterator begin(const char *utf8_str, int byte_length);
static const_iterator end(const char *utf8_str, int byte_length);
// Converts a utf-8 string to a vector of unicodes.
// Returns an empty vector if the input contains invalid UTF-8.
static std::vector<char32> UTF8ToUTF32(const char *utf8_str);
// Converts a vector of unicodes to a utf8 string.
// Returns an empty string if the input contains an invalid unicode.
static std::string UTF32ToUTF8(const std::vector<char32> &str32);
private:
// A UTF-8 representation of 1 or more Unicode characters.
// The last element (chars[UNICHAR_LEN - 1]) is a length if
// its value < UNICHAR_LEN, otherwise it is a genuine character.
char chars[UNICHAR_LEN]{};
};
} // namespace tesseract
#endif // TESSERACT_CCUTIL_UNICHAR_H_

View File

@ -1,34 +0,0 @@
// SPDX-License-Identifier: Apache-2.0
// File: version.h
// Description: Version information
//
// (C) Copyright 2018, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef TESSERACT_API_VERSION_H_
#define TESSERACT_API_VERSION_H_
// clang-format off
#define TESSERACT_MAJOR_VERSION @GENERIC_MAJOR_VERSION@
#define TESSERACT_MINOR_VERSION @GENERIC_MINOR_VERSION@
#define TESSERACT_MICRO_VERSION @GENERIC_MICRO_VERSION@
#define TESSERACT_VERSION \
(TESSERACT_MAJOR_VERSION << 16 | \
TESSERACT_MINOR_VERSION << 8 | \
TESSERACT_MICRO_VERSION)
#define TESSERACT_VERSION_STR "@PACKAGE_VERSION@"
// clang-format on
#endif // TESSERACT_API_VERSION_H_